- refactor emitter next/prev block handling a bit
[libfirm] / ir / be / becopyopt.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       Copy minimization driver.
23  * @author      Daniel Grund
24  * @date        12.04.2005
25  * @version     $Id$
26  *
27  * Main file for the optimization reducing the copies needed for:
28  * - Phi coalescing
29  * - Register-constrained nodes
30  * - Two-address code instructions
31  */
32 #ifdef HAVE_CONFIG_H
33 #include "config.h"
34 #endif
35
36 #include "execfreq.h"
37 #include "xmalloc.h"
38 #include "debug.h"
39 #include "pmap.h"
40 #include "raw_bitset.h"
41 #include "irnode.h"
42 #include "irgraph.h"
43 #include "irgwalk.h"
44 #include "irprog.h"
45 #include "irloop_t.h"
46 #include "iredges_t.h"
47 #include "phiclass.h"
48 #include "irbitset.h"
49 #include "irphase_t.h"
50 #include "irprintf_t.h"
51
52 #include "bemodule.h"
53 #include "bearch_t.h"
54 #include "benode_t.h"
55 #include "beutil.h"
56 #include "beifg_t.h"
57 #include "beintlive_t.h"
58 #include "becopyopt_t.h"
59 #include "becopystat.h"
60 #include "belive_t.h"
61 #include "beinsn_t.h"
62 #include "besched_t.h"
63 #include "bejavacoal.h"
64 #include "bestatevent.h"
65 #include "beirg_t.h"
66 #include "error.h"
67
68 #include "lc_opts.h"
69 #include "lc_opts_enum.h"
70
71 #define DUMP_BEFORE 1
72 #define DUMP_AFTER  2
73 #define DUMP_APPEL  4
74 #define DUMP_ALL    2 * DUMP_APPEL - 1
75
76 #define COST_FUNC_FREQ     1
77 #define COST_FUNC_LOOP     2
78 #define COST_FUNC_ALL_ONE  3
79
80 static unsigned   dump_flags  = 0;
81 static unsigned   style_flags = 0;
82 static unsigned   do_stats    = 0;
83 static cost_fct_t cost_func   = co_get_costs_exec_freq;
84 static unsigned   algo        = CO_ALGO_HEUR4;
85 static int        improve     = 1;
86
87 static const lc_opt_enum_mask_items_t dump_items[] = {
88         { "before",  DUMP_BEFORE },
89         { "after",   DUMP_AFTER  },
90         { "appel",   DUMP_APPEL  },
91         { "all",     DUMP_ALL    },
92         { NULL,      0 }
93 };
94
95 static const lc_opt_enum_mask_items_t style_items[] = {
96         { "color",   CO_IFG_DUMP_COLORS },
97         { "labels",  CO_IFG_DUMP_LABELS },
98         { "constr",  CO_IFG_DUMP_CONSTR },
99         { "shape",   CO_IFG_DUMP_SHAPE  },
100         { "full",    2 * CO_IFG_DUMP_SHAPE - 1 },
101         { NULL,      0 }
102 };
103
104 static const lc_opt_enum_mask_items_t algo_items[] = {
105         { "none",   CO_ALGO_NONE  },
106         { "heur",   CO_ALGO_HEUR  },
107         { "heur2",  CO_ALGO_HEUR2 },
108         { "heur3",  CO_ALGO_HEUR3 },
109         { "heur4",  CO_ALGO_HEUR4 },
110         { "ilp",    CO_ALGO_ILP   },
111         { NULL,     0 }
112 };
113
114 typedef int (*opt_funcptr)(void);
115
116 static const lc_opt_enum_func_ptr_items_t cost_func_items[] = {
117         { "freq",   (opt_funcptr) co_get_costs_exec_freq },
118         { "loop",   (opt_funcptr) co_get_costs_loop_depth },
119         { "one",    (opt_funcptr) co_get_costs_all_one },
120         { NULL,     NULL }
121 };
122
123 static lc_opt_enum_mask_var_t dump_var = {
124         &dump_flags, dump_items
125 };
126
127 static lc_opt_enum_mask_var_t style_var = {
128         &style_flags, style_items
129 };
130
131 static lc_opt_enum_mask_var_t algo_var = {
132         &algo, algo_items
133 };
134
135 static lc_opt_enum_func_ptr_var_t cost_func_var = {
136         (opt_funcptr*) &cost_func, cost_func_items
137 };
138
139 static const lc_opt_table_entry_t options[] = {
140         LC_OPT_ENT_ENUM_INT      ("algo",    "select copy optimization algo",                           &algo_var),
141         LC_OPT_ENT_ENUM_FUNC_PTR ("cost",    "select a cost function",                                  &cost_func_var),
142         LC_OPT_ENT_ENUM_MASK     ("dump",    "dump ifg before or after copy optimization",              &dump_var),
143         LC_OPT_ENT_ENUM_MASK     ("style",   "dump style for ifg dumping",                              &style_var),
144         LC_OPT_ENT_BOOL          ("stats",   "dump statistics after each optimization",                 &do_stats),
145         LC_OPT_ENT_BOOL          ("improve", "run heur3 before if algo can exploit start solutions",    &improve),
146         LC_OPT_LAST
147 };
148
149 /* Insert additional options registration functions here. */
150 extern void be_co_ilp_register_options(lc_opt_entry_t *grp);
151 extern void be_co2_register_options(lc_opt_entry_t *grp);
152 extern void be_co3_register_options(lc_opt_entry_t *grp);
153
154 void be_init_copycoal(void)
155 {
156         lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
157         lc_opt_entry_t *ra_grp = lc_opt_get_grp(be_grp, "ra");
158         lc_opt_entry_t *chordal_grp = lc_opt_get_grp(ra_grp, "chordal");
159         lc_opt_entry_t *co_grp = lc_opt_get_grp(chordal_grp, "co");
160
161         lc_opt_add_table(co_grp, options);
162 }
163
164 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_copycoal);
165
166 #undef QUICK_AND_DIRTY_HACK
167
168 static int nodes_interfere(const be_chordal_env_t *env, const ir_node *a, const ir_node *b)
169 {
170         if (env->ifg)
171                 return be_ifg_connected(env->ifg, a, b);
172         else
173                 return values_interfere(env->birg, a, b);
174 }
175
176
177 /******************************************************************************
178     _____                           _
179    / ____|                         | |
180   | |  __  ___ _ __   ___ _ __ __ _| |
181   | | |_ |/ _ \ '_ \ / _ \ '__/ _` | |
182   | |__| |  __/ | | |  __/ | | (_| | |
183    \_____|\___|_| |_|\___|_|  \__,_|_|
184
185  ******************************************************************************/
186
187 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
188
189
190 copy_opt_t *new_copy_opt(be_chordal_env_t *chordal_env, cost_fct_t get_costs)
191 {
192         const char *s1, *s2, *s3;
193         int len;
194         copy_opt_t *co;
195
196         FIRM_DBG_REGISTER(dbg, "ir.be.copyopt");
197
198         co = xcalloc(1, sizeof(*co));
199         co->cenv      = chordal_env;
200         co->aenv      = chordal_env->birg->main_env->arch_env;
201         co->irg       = chordal_env->irg;
202         co->cls       = chordal_env->cls;
203         co->get_costs = get_costs;
204
205         s1 = get_irp_prog_name();
206         s2 = get_entity_name(get_irg_entity(co->irg));
207         s3 = chordal_env->cls->name;
208         len = strlen(s1) + strlen(s2) + strlen(s3) + 5;
209         co->name = xmalloc(len);
210         snprintf(co->name, len, "%s__%s__%s", s1, s2, s3);
211
212         return co;
213 }
214
215 void free_copy_opt(copy_opt_t *co) {
216         xfree(co->name);
217         free(co);
218 }
219
220 int co_is_optimizable_root(const copy_opt_t *co, ir_node *irn) {
221         const arch_register_req_t *req;
222         const arch_register_t *reg;
223
224         if (arch_irn_is(co->aenv, irn, ignore))
225                 return 0;
226
227         reg = arch_get_irn_register(co->aenv, irn);
228         if (arch_register_type_is(reg, ignore))
229                 return 0;
230
231         req = arch_get_register_req(co->aenv, irn, -1);
232         if (is_Reg_Phi(irn) || is_Perm_Proj(co->aenv, irn) || is_2addr_code(req))
233                 return 1;
234
235         return 0;
236 }
237
238 int co_get_costs_loop_depth(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
239         int cost = 0;
240         ir_loop *loop;
241         ir_node *root_block = get_nodes_block(root);
242         (void) co;
243         (void) arg;
244
245         if (is_Phi(root)) {
246                 /* for phis the copies are placed in the corresponding pred-block */
247                 loop = get_irn_loop(get_Block_cfgpred_block(root_block, pos));
248         } else {
249                 /* a perm places the copy in the same block as it resides */
250                 loop = get_irn_loop(root_block);
251         }
252         if (loop) {
253                 int d = get_loop_depth(loop);
254                 cost = d*d;
255         }
256         return 1+cost;
257 }
258
259 int co_get_costs_exec_freq(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
260         int res;
261         ir_node *root_bl = get_nodes_block(root);
262         ir_node *copy_bl = is_Phi(root) ? get_Block_cfgpred_block(root_bl, pos) : root_bl;
263         (void) arg;
264         res = get_block_execfreq_ulong(co->cenv->birg->exec_freq, copy_bl);
265
266         /* don't allow values smaller than one. */
267         return res < 1 ? 1 : res;
268 }
269
270
271 int co_get_costs_all_one(const copy_opt_t *co, ir_node *root, ir_node *arg, int pos) {
272         (void) co;
273         (void) root;
274         (void) arg;
275         (void) pos;
276         return 1;
277 }
278
279 /******************************************************************************
280    ____        _   _    _       _ _          _____ _
281   / __ \      | | | |  | |     (_) |        / ____| |
282  | |  | |_ __ | |_| |  | |_ __  _| |_ ___  | (___ | |_ ___  _ __ __ _  __ _  ___
283  | |  | | '_ \| __| |  | | '_ \| | __/ __|  \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
284  | |__| | |_) | |_| |__| | | | | | |_\__ \  ____) | || (_) | | | (_| | (_| |  __/
285   \____/| .__/ \__|\____/|_| |_|_|\__|___/ |_____/ \__\___/|_|  \__,_|\__, |\___|
286         | |                                                            __/ |
287         |_|                                                           |___/
288  ******************************************************************************/
289
290 /**
291  * Determines a maximum weighted independent set with respect to
292  * the interference and conflict edges of all nodes in a qnode.
293  */
294 static int ou_max_ind_set_costs(unit_t *ou) {
295         be_chordal_env_t *chordal_env = ou->co->cenv;
296         ir_node **safe, **unsafe;
297         int i, o, safe_count, safe_costs, unsafe_count, *unsafe_costs;
298         bitset_t *curr;
299         bitset_pos_t pos;
300         int max, curr_weight, best_weight = 0;
301
302         /* assign the nodes into two groups.
303          * safe: node has no interference, hence it is in every max stable set.
304          * unsafe: node has an interference
305          */
306         safe = alloca((ou->node_count-1) * sizeof(*safe));
307         safe_costs = 0;
308         safe_count = 0;
309         unsafe = alloca((ou->node_count-1) * sizeof(*unsafe));
310         unsafe_costs = alloca((ou->node_count-1) * sizeof(*unsafe_costs));
311         unsafe_count = 0;
312         for(i=1; i<ou->node_count; ++i) {
313                 int is_safe = 1;
314                 for(o=1; o<ou->node_count; ++o) {
315                         if (i==o)
316                                 continue;
317                         if (nodes_interfere(chordal_env, ou->nodes[i], ou->nodes[o])) {
318                                 unsafe_costs[unsafe_count] = ou->costs[i];
319                                 unsafe[unsafe_count] = ou->nodes[i];
320                                 ++unsafe_count;
321                                 is_safe = 0;
322                                 break;
323                         }
324                 }
325                 if (is_safe) {
326                         safe_costs += ou->costs[i];
327                         safe[safe_count++] = ou->nodes[i];
328                 }
329         }
330
331
332         /* now compute the best set out of the unsafe nodes*/
333         if (unsafe_count > MIS_HEUR_TRIGGER) {
334                 bitset_t *best = bitset_alloca(unsafe_count);
335                 /* Heuristik: Greedy trial and error form index 0 to unsafe_count-1 */
336                 for (i=0; i<unsafe_count; ++i) {
337                         bitset_set(best, i);
338                         /* check if it is a stable set */
339                         for (o=bitset_next_set(best, 0); o!=-1 && o<i; o=bitset_next_set(best, o+1))
340                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o])) {
341                                         bitset_clear(best, i); /* clear the bit and try next one */
342                                         break;
343                                 }
344                 }
345                 /* compute the weight */
346                 bitset_foreach(best, pos)
347                         best_weight += unsafe_costs[pos];
348         } else {
349                 /* Exact Algorithm: Brute force */
350                 curr = bitset_alloca(unsafe_count);
351                 bitset_set_all(curr);
352                 while ((max = bitset_popcnt(curr)) != 0) {
353                         /* check if curr is a stable set */
354                         for (i=bitset_next_set(curr, 0); i!=-1; i=bitset_next_set(curr, i+1))
355                                 for (o=bitset_next_set(curr, i+1); o!=-1; o=bitset_next_set(curr, o+1)) /* !!!!! difference to qnode_max_ind_set(): NOT (curr, i) */
356                                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o]))
357                                                         goto no_stable_set;
358
359                         /* if we arrive here, we have a stable set */
360                         /* compute the weigth of the stable set*/
361                         curr_weight = 0;
362                         bitset_foreach(curr, pos)
363                                 curr_weight += unsafe_costs[pos];
364
365                         /* any better ? */
366                         if (curr_weight > best_weight) {
367                                 best_weight = curr_weight;
368                         }
369
370         no_stable_set:
371                         bitset_minus1(curr);
372                 }
373         }
374
375         return safe_costs+best_weight;
376 }
377
378 static void co_collect_units(ir_node *irn, void *env) {
379         copy_opt_t *co = env;
380         unit_t *unit;
381
382         if (!is_curr_reg_class(co, irn))
383                 return;
384         if (!co_is_optimizable_root(co, irn))
385                 return;
386
387         /* Init a new unit */
388         unit = xcalloc(1, sizeof(*unit));
389         unit->co = co;
390         unit->node_count = 1;
391         INIT_LIST_HEAD(&unit->queue);
392
393         /* Phi with some/all of its arguments */
394         if (is_Reg_Phi(irn)) {
395                 int i, arity;
396
397                 /* init */
398                 arity = get_irn_arity(irn);
399                 unit->nodes = xmalloc((arity+1) * sizeof(*unit->nodes));
400                 unit->costs = xmalloc((arity+1) * sizeof(*unit->costs));
401                 unit->nodes[0] = irn;
402
403                 /* fill */
404                 for (i=0; i<arity; ++i) {
405                         int o, arg_pos;
406                         ir_node *arg = get_irn_n(irn, i);
407
408                         assert(is_curr_reg_class(co, arg) && "Argument not in same register class.");
409                         if (arg == irn)
410                                 continue;
411                         if (nodes_interfere(co->cenv, irn, arg)) {
412                                 unit->inevitable_costs += co->get_costs(co, irn, arg, i);
413                                 continue;
414                         }
415
416                         /* Else insert the argument of the phi to the members of this ou */
417                         DBG((dbg, LEVEL_1, "\t   Member: %+F\n", arg));
418
419                         if (! arch_irn_is(co->aenv, arg, ignore)) {
420                                 /* Check if arg has occurred at a prior position in the arg/list */
421                                 arg_pos = 0;
422                                 for (o=1; o<unit->node_count; ++o) {
423                                         if (unit->nodes[o] == arg) {
424                                                 arg_pos = o;
425                                                 break;
426                                         }
427                                 }
428
429                                 if (!arg_pos) { /* a new argument */
430                                         /* insert node, set costs */
431                                         unit->nodes[unit->node_count] = arg;
432                                         unit->costs[unit->node_count] = co->get_costs(co, irn, arg, i);
433                                         unit->node_count++;
434                                 } else { /* arg has occurred before in same phi */
435                                         /* increase costs for existing arg */
436                                         unit->costs[arg_pos] += co->get_costs(co, irn, arg, i);
437                                 }
438                         }
439                 }
440                 unit->nodes = xrealloc(unit->nodes, unit->node_count * sizeof(*unit->nodes));
441                 unit->costs = xrealloc(unit->costs, unit->node_count * sizeof(*unit->costs));
442         } else if (is_Perm_Proj(co->aenv, irn)) {
443                 /* Proj of a perm with corresponding arg */
444                 assert(!nodes_interfere(co->cenv, irn, get_Perm_src(irn)));
445                 unit->nodes = xmalloc(2 * sizeof(*unit->nodes));
446                 unit->costs = xmalloc(2 * sizeof(*unit->costs));
447                 unit->node_count = 2;
448                 unit->nodes[0] = irn;
449                 unit->nodes[1] = get_Perm_src(irn);
450                 unit->costs[1] = co->get_costs(co, irn, unit->nodes[1], -1);
451         } else {
452                 const arch_register_req_t *req =
453                         arch_get_register_req(co->aenv, irn, -1);
454
455                 /* Src == Tgt of a 2-addr-code instruction */
456                 if (is_2addr_code(req)) {
457                         const unsigned other = req->other_same;
458                         int            count = 0;
459                         int            i;
460
461                         for (i = 0; (1U << i) <= other; ++i) {
462                                 if (other & (1U << i)) {
463                                         ir_node *o  = get_irn_n(skip_Proj(irn), i);
464                                         if (!arch_irn_is(co->aenv, o, ignore) &&
465                                                         !nodes_interfere(co->cenv, irn, o)) {
466                                                 ++count;
467                                         }
468                                 }
469                         }
470
471                         if (count != 0) {
472                                 int k = 0;
473                                 ++count;
474                                 unit->nodes = xmalloc(count * sizeof(*unit->nodes));
475                                 unit->costs = xmalloc(count * sizeof(*unit->costs));
476                                 unit->node_count = count;
477                                 unit->nodes[k++] = irn;
478
479                                 for (i = 0; 1U << i <= other; ++i) {
480                                         if (other & (1U << i)) {
481                                                 ir_node *o  = get_irn_n(skip_Proj(irn), i);
482                                                 if (!arch_irn_is(co->aenv, o, ignore) &&
483                                                                 !nodes_interfere(co->cenv, irn, o)) {
484                                                         unit->nodes[k] = o;
485                                                         unit->costs[k] = co->get_costs(co, irn, o, -1);
486                                                         ++k;
487                                                 }
488                                         }
489                                 }
490                         }
491                 } else {
492                         assert(0 && "This is not an optimizable node!");
493                 }
494         }
495
496         /* Insert the new unit at a position according to its costs */
497         if (unit->node_count > 1) {
498                 int i;
499                 struct list_head *tmp;
500
501                 /* Determine the maximum costs this unit can cause: all_nodes_cost */
502                 for(i=1; i<unit->node_count; ++i) {
503                         unit->sort_key = MAX(unit->sort_key, unit->costs[i]);
504                         unit->all_nodes_costs += unit->costs[i];
505                 }
506
507                 /* Determine the minimal costs this unit will cause: min_nodes_costs */
508                 unit->min_nodes_costs += unit->all_nodes_costs - ou_max_ind_set_costs(unit);
509                 /* Insert the new ou according to its sort_key */
510                 tmp = &co->units;
511                 while (tmp->next != &co->units && list_entry_units(tmp->next)->sort_key > unit->sort_key)
512                         tmp = tmp->next;
513                 list_add(&unit->units, tmp);
514         } else {
515                 free(unit);
516         }
517 }
518
519 #ifdef QUICK_AND_DIRTY_HACK
520
521 static int compare_ous(const void *k1, const void *k2) {
522         const unit_t *u1 = *((const unit_t **) k1);
523         const unit_t *u2 = *((const unit_t **) k2);
524         int i, o, u1_has_constr, u2_has_constr;
525         arch_register_req_t req;
526         const arch_env_t *aenv = u1->co->aenv;
527
528         /* Units with constraints come first */
529         u1_has_constr = 0;
530         for (i=0; i<u1->node_count; ++i) {
531                 arch_get_register_req(aenv, &req, u1->nodes[i], -1);
532                 if (arch_register_req_is(&req, limited)) {
533                         u1_has_constr = 1;
534                         break;
535                 }
536         }
537
538         u2_has_constr = 0;
539         for (i=0; i<u2->node_count; ++i) {
540                 arch_get_register_req(aenv, &req, u2->nodes[i], -1);
541                 if (arch_register_req_is(&req, limited)) {
542                         u2_has_constr = 1;
543                         break;
544                 }
545         }
546
547         if (u1_has_constr != u2_has_constr)
548                 return u2_has_constr - u1_has_constr;
549
550         /* Now check, whether the two units are connected */
551 #if 0
552         for (i=0; i<u1->node_count; ++i)
553                 for (o=0; o<u2->node_count; ++o)
554                         if (u1->nodes[i] == u2->nodes[o])
555                                 return 0;
556 #endif
557
558         /* After all, the sort key decides. Greater keys come first. */
559         return u2->sort_key - u1->sort_key;
560
561 }
562
563 /**
564  * Sort the ou's according to constraints and their sort_key
565  */
566 static void co_sort_units(copy_opt_t *co) {
567         int i, count = 0, costs;
568         unit_t *ou, **ous;
569
570         /* get the number of ous, remove them form the list and fill the array */
571         list_for_each_entry(unit_t, ou, &co->units, units)
572                 count++;
573         ous = alloca(count * sizeof(*ous));
574
575         costs = co_get_max_copy_costs(co);
576
577         i = 0;
578         list_for_each_entry(unit_t, ou, &co->units, units)
579                 ous[i++] = ou;
580
581         INIT_LIST_HEAD(&co->units);
582
583         assert(count == i && list_empty(&co->units));
584
585         for (i=0; i<count; ++i)
586                 ir_printf("%+F\n", ous[i]->nodes[0]);
587
588         qsort(ous, count, sizeof(*ous), compare_ous);
589
590         ir_printf("\n\n");
591         for (i=0; i<count; ++i)
592                 ir_printf("%+F\n", ous[i]->nodes[0]);
593
594         /* reinsert into list in correct order */
595         for (i=0; i<count; ++i)
596                 list_add_tail(&ous[i]->units, &co->units);
597
598         assert(costs == co_get_max_copy_costs(co));
599 }
600 #endif
601
602 void co_build_ou_structure(copy_opt_t *co) {
603         DBG((dbg, LEVEL_1, "\tCollecting optimization units\n"));
604         INIT_LIST_HEAD(&co->units);
605         irg_walk_graph(co->irg, co_collect_units, NULL, co);
606 #ifdef QUICK_AND_DIRTY_HACK
607         co_sort_units(co);
608 #endif
609 }
610
611 void co_free_ou_structure(copy_opt_t *co) {
612         unit_t *curr, *tmp;
613         ASSERT_OU_AVAIL(co);
614         list_for_each_entry_safe(unit_t, curr, tmp, &co->units, units) {
615                 xfree(curr->nodes);
616                 xfree(curr->costs);
617                 xfree(curr);
618         }
619         co->units.next = NULL;
620 }
621
622 /* co_solve_heuristic() is implemented in becopyheur.c */
623
624 int co_get_max_copy_costs(const copy_opt_t *co) {
625         int i, res = 0;
626         unit_t *curr;
627
628         ASSERT_OU_AVAIL(co);
629
630         list_for_each_entry(unit_t, curr, &co->units, units) {
631                 res += curr->inevitable_costs;
632                 for (i=1; i<curr->node_count; ++i)
633                         res += curr->costs[i];
634         }
635         return res;
636 }
637
638 int co_get_inevit_copy_costs(const copy_opt_t *co) {
639         int res = 0;
640         unit_t *curr;
641
642         ASSERT_OU_AVAIL(co);
643
644         list_for_each_entry(unit_t, curr, &co->units, units)
645                 res += curr->inevitable_costs;
646         return res;
647 }
648
649 int co_get_copy_costs(const copy_opt_t *co) {
650         int i, res = 0;
651         unit_t *curr;
652
653         ASSERT_OU_AVAIL(co);
654
655         list_for_each_entry(unit_t, curr, &co->units, units) {
656                 int root_col = get_irn_col(co, curr->nodes[0]);
657                 DBG((dbg, LEVEL_1, "  %3d costs for root %+F color %d\n", curr->inevitable_costs, curr->nodes[0], root_col));
658                 res += curr->inevitable_costs;
659                 for (i=1; i<curr->node_count; ++i) {
660                         int arg_col = get_irn_col(co, curr->nodes[i]);
661                         if (root_col != arg_col) {
662                                 DBG((dbg, LEVEL_1, "  %3d for arg %+F color %d\n", curr->costs[i], curr->nodes[i], arg_col));
663                                 res += curr->costs[i];
664                         }
665                 }
666         }
667         return res;
668 }
669
670 int co_get_lower_bound(const copy_opt_t *co) {
671         int res = 0;
672         unit_t *curr;
673
674         ASSERT_OU_AVAIL(co);
675
676         list_for_each_entry(unit_t, curr, &co->units, units)
677                 res += curr->inevitable_costs + curr->min_nodes_costs;
678         return res;
679 }
680
681 void co_complete_stats(const copy_opt_t *co, co_complete_stats_t *stat)
682 {
683         bitset_t *seen = bitset_irg_malloc(co->irg);
684         affinity_node_t *an;
685
686         memset(stat, 0, sizeof(stat[0]));
687
688         /* count affinity edges. */
689         co_gs_foreach_aff_node(co, an) {
690                 neighb_t *neigh;
691                 stat->aff_nodes += 1;
692                 bitset_add_irn(seen, an->irn);
693                 co_gs_foreach_neighb(an, neigh) {
694                         if(!bitset_contains_irn(seen, neigh->irn)) {
695                                 stat->aff_edges += 1;
696                                 stat->max_costs += neigh->costs;
697
698                                 if(get_irn_col(co, an->irn) != get_irn_col(co, neigh->irn)) {
699                                         stat->costs += neigh->costs;
700                                         stat->unsatisfied_edges += 1;
701                                 }
702
703                                 if(nodes_interfere(co->cenv, an->irn, neigh->irn)) {
704                                         stat->aff_int += 1;
705                                         stat->inevit_costs += neigh->costs;
706                                 }
707
708                         }
709                 }
710         }
711
712         bitset_free(seen);
713 }
714
715 /******************************************************************************
716    _____                 _        _____ _
717   / ____|               | |      / ____| |
718  | |  __ _ __ __ _ _ __ | |__   | (___ | |_ ___  _ __ __ _  __ _  ___
719  | | |_ | '__/ _` | '_ \| '_ \   \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
720  | |__| | | | (_| | |_) | | | |  ____) | || (_) | | | (_| | (_| |  __/
721   \_____|_|  \__,_| .__/|_| |_| |_____/ \__\___/|_|  \__,_|\__, |\___|
722                   | |                                       __/ |
723                   |_|                                      |___/
724  ******************************************************************************/
725
726 static int compare_affinity_node_t(const void *k1, const void *k2, size_t size) {
727         const affinity_node_t *n1 = k1;
728         const affinity_node_t *n2 = k2;
729         (void) size;
730
731         return (n1->irn != n2->irn);
732 }
733
734 static void add_edge(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
735         affinity_node_t new_node, *node;
736         neighb_t        *nbr;
737         int             allocnew = 1;
738
739         new_node.irn        = n1;
740         new_node.degree     = 0;
741         new_node.neighbours = NULL;
742         node = set_insert(co->nodes, &new_node, sizeof(new_node), hash_irn(new_node.irn));
743
744         for (nbr = node->neighbours; nbr; nbr = nbr->next)
745                 if (nbr->irn == n2) {
746                         allocnew = 0;
747                         break;
748                 }
749
750         /* if we did not find n2 in n1's neighbourhood insert it */
751         if (allocnew) {
752                 nbr        = obstack_alloc(&co->obst, sizeof(*nbr));
753                 nbr->irn   = n2;
754                 nbr->costs = 0;
755                 nbr->next  = node->neighbours;
756
757                 node->neighbours = nbr;
758                 node->degree++;
759         }
760
761         /* now nbr points to n1's neighbour-entry of n2 */
762         nbr->costs += costs;
763 }
764
765 static INLINE void add_edges(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
766         if (! be_ifg_connected(co->cenv->ifg, n1, n2)) {
767                 add_edge(co, n1, n2, costs);
768                 add_edge(co, n2, n1, costs);
769         }
770 }
771
772 static void build_graph_walker(ir_node *irn, void *env) {
773         copy_opt_t *co = env;
774         int pos, max;
775         const arch_register_t *reg;
776
777         if (!is_curr_reg_class(co, irn) || arch_irn_is(co->aenv, irn, ignore))
778                 return;
779
780         reg = arch_get_irn_register(co->aenv, irn);
781         if (arch_register_type_is(reg, ignore))
782                 return;
783
784         if (is_Reg_Phi(irn)) { /* Phis */
785                 for (pos=0, max=get_irn_arity(irn); pos<max; ++pos) {
786                         ir_node *arg = get_irn_n(irn, pos);
787                         add_edges(co, irn, arg, co->get_costs(co, irn, arg, pos));
788                 }
789         }
790         else if (is_Perm_Proj(co->aenv, irn)) { /* Perms */
791                 ir_node *arg = get_Perm_src(irn);
792                 add_edges(co, irn, arg, co->get_costs(co, irn, arg, 0));
793         }
794         else { /* 2-address code */
795                 const arch_register_req_t *req = arch_get_register_req(co->aenv, irn, -1);
796                 if (is_2addr_code(req)) {
797                         const unsigned other = req->other_same;
798                         int i;
799
800                         for (i = 0; 1U << i <= other; ++i) {
801                                 if (other & (1U << i)) {
802                                         ir_node *other = get_irn_n(skip_Proj(irn), i);
803                                         if (! arch_irn_is(co->aenv, other, ignore))
804                                                 add_edges(co, irn, other, co->get_costs(co, irn, other, 0));
805                                 }
806                         }
807                 }
808         }
809 }
810
811 void co_build_graph_structure(copy_opt_t *co) {
812         obstack_init(&co->obst);
813         co->nodes = new_set(compare_affinity_node_t, 32);
814
815         irg_walk_graph(co->irg, build_graph_walker, NULL, co);
816 }
817
818 void co_free_graph_structure(copy_opt_t *co) {
819         ASSERT_GS_AVAIL(co);
820
821         del_set(co->nodes);
822         obstack_free(&co->obst, NULL);
823         co->nodes = NULL;
824 }
825
826 /* co_solve_ilp1() co_solve_ilp2() are implemented in becopyilpX.c */
827
828 int co_gs_is_optimizable(copy_opt_t *co, ir_node *irn) {
829         affinity_node_t new_node, *n;
830
831         ASSERT_GS_AVAIL(co);
832
833         new_node.irn = irn;
834         n = set_find(co->nodes, &new_node, sizeof(new_node), hash_irn(new_node.irn));
835         if (n) {
836                 return (n->degree > 0);
837         } else
838                 return 0;
839 }
840
841 static int co_dump_appel_disjoint_constraints(const copy_opt_t *co, ir_node *a, ir_node *b)
842 {
843         ir_node *nodes[]  = { a, b };
844         bitset_t *constr[] = { NULL, NULL };
845         const arch_register_req_t *req;
846         int j;
847
848         constr[0] = bitset_alloca(co->cls->n_regs);
849         constr[1] = bitset_alloca(co->cls->n_regs);
850
851         for (j = 0; j < 2; ++j) {
852                 req = arch_get_register_req(co->aenv, nodes[j], BE_OUT_POS(0));
853                 if(arch_register_req_is(req, limited))
854                         rbitset_copy_to_bitset(req->limited, constr[j]);
855                 else
856                         bitset_set_all(constr[j]);
857
858         }
859
860         return !bitset_intersect(constr[0], constr[1]);
861 }
862
863 void co_dump_appel_graph(const copy_opt_t *co, FILE *f)
864 {
865         be_ifg_t *ifg  = co->cenv->ifg;
866         int *color_map = alloca(co->cls->n_regs * sizeof(color_map[0]));
867         int *node_map  = xmalloc((get_irg_last_idx(co->irg) + 1) * sizeof(node_map[0]));
868
869         ir_node *irn;
870         void *it, *nit;
871         int n, n_regs;
872         unsigned i;
873
874         n_regs = 0;
875         for(i = 0; i < co->cls->n_regs; ++i) {
876                 const arch_register_t *reg = &co->cls->regs[i];
877                 color_map[i] = arch_register_type_is(reg, ignore) ? -1 : n_regs++;
878         }
879
880         /*
881          * n contains the first node number.
882          * the values below n are the pre-colored register nodes
883          */
884
885         it  = be_ifg_nodes_iter_alloca(ifg);
886         nit = be_ifg_neighbours_iter_alloca(ifg);
887
888         n = n_regs;
889         be_ifg_foreach_node(ifg, it, irn) {
890                 if(!arch_irn_is(co->aenv, irn, ignore))
891                         node_map[get_irn_idx(irn)] = n++;
892         }
893
894         fprintf(f, "%d %d\n", n, n_regs);
895
896         be_ifg_foreach_node(ifg, it, irn) {
897                 if(!arch_irn_is(co->aenv, irn, ignore)) {
898                         int idx            = node_map[get_irn_idx(irn)];
899                         affinity_node_t *a = get_affinity_info(co, irn);
900
901                         const arch_register_req_t *req;
902                         ir_node *adj;
903
904                         req = arch_get_register_req(co->aenv, irn, BE_OUT_POS(0));
905                         if(arch_register_req_is(req, limited)) {
906                                 for(i = 0; i < co->cls->n_regs; ++i) {
907                                         if(!rbitset_is_set(req->limited, i) && color_map[i] >= 0)
908                                                 fprintf(f, "%d %d -1\n", color_map[i], idx);
909                                 }
910                         }
911
912                         be_ifg_foreach_neighbour(ifg, nit, irn, adj) {
913                                 if(!arch_irn_is(co->aenv, adj, ignore) && !co_dump_appel_disjoint_constraints(co, irn, adj)) {
914                                         int adj_idx = node_map[get_irn_idx(adj)];
915                                         if(idx < adj_idx)
916                                                 fprintf(f, "%d %d -1\n", idx, adj_idx);
917                                 }
918                         }
919
920                         if(a) {
921                                 neighb_t *n;
922
923                                 co_gs_foreach_neighb(a, n) {
924                                         if(!arch_irn_is(co->aenv, n->irn, ignore)) {
925                                                 int n_idx = node_map[get_irn_idx(n->irn)];
926                                                 if(idx < n_idx)
927                                                         fprintf(f, "%d %d %d\n", idx, n_idx, (int) n->costs);
928                                         }
929                                 }
930                         }
931                 }
932         }
933
934         xfree(node_map);
935 }
936
937 /*
938          ___ _____ ____   ____   ___ _____   ____                        _
939         |_ _|  ___/ ___| |  _ \ / _ \_   _| |  _ \ _   _ _ __ ___  _ __ (_)_ __   __ _
940          | || |_ | |  _  | | | | | | || |   | | | | | | | '_ ` _ \| '_ \| | '_ \ / _` |
941          | ||  _|| |_| | | |_| | |_| || |   | |_| | |_| | | | | | | |_) | | | | | (_| |
942         |___|_|   \____| |____/ \___/ |_|   |____/ \__,_|_| |_| |_| .__/|_|_| |_|\__, |
943                                                                   |_|            |___/
944 */
945
946 static const char *get_dot_color_name(size_t col)
947 {
948         static const char *names[] = {
949                 "blue",
950                 "red",
951                 "green",
952                 "yellow",
953                 "cyan",
954                 "magenta",
955                 "orange",
956                 "chocolate",
957                 "beige",
958                 "navy",
959                 "darkgreen",
960                 "darkred",
961                 "lightPink",
962                 "chartreuse",
963                 "lightskyblue",
964                 "linen",
965                 "pink",
966                 "lightslateblue",
967                 "mintcream",
968                 "red",
969                 "darkolivegreen",
970                 "mediumblue",
971                 "mistyrose",
972                 "salmon",
973                 "darkseagreen",
974                 "mediumslateblue"
975                 "moccasin",
976                 "tomato",
977                 "forestgreen",
978                 "darkturquoise",
979                 "palevioletred"
980         };
981
982         return col < sizeof(names)/sizeof(names[0]) ? names[col] : "white";
983 }
984
985 typedef struct _co_ifg_dump_t {
986         const copy_opt_t *co;
987         unsigned flags;
988 } co_ifg_dump_t;
989
990 static void ifg_dump_graph_attr(FILE *f, void *self)
991 {
992         (void) self;
993         fprintf(f, "overlap=scale");
994 }
995
996 static int ifg_is_dump_node(void *self, ir_node *irn)
997 {
998         co_ifg_dump_t *cod = self;
999         return !arch_irn_is(cod->co->aenv, irn, ignore);
1000 }
1001
1002 static void ifg_dump_node_attr(FILE *f, void *self, ir_node *irn)
1003 {
1004         co_ifg_dump_t *env         = self;
1005         const arch_register_t *reg = arch_get_irn_register(env->co->aenv, irn);
1006         const arch_register_req_t *req;
1007         int limited;
1008
1009         req = arch_get_register_req(env->co->aenv, irn, BE_OUT_POS(0));
1010         limited = arch_register_req_is(req, limited);
1011
1012         if(env->flags & CO_IFG_DUMP_LABELS) {
1013                 ir_fprintf(f, "label=\"%+F", irn);
1014
1015                 if((env->flags & CO_IFG_DUMP_CONSTR) && limited) {
1016                         bitset_t *bs = bitset_alloca(env->co->cls->n_regs);
1017                         rbitset_copy_to_bitset(req->limited, bs);
1018                         ir_fprintf(f, "\\n%B", bs);
1019                 }
1020                 ir_fprintf(f, "\" ");
1021         } else {
1022                 fprintf(f, "label=\"\" shape=point " );
1023         }
1024
1025         if(env->flags & CO_IFG_DUMP_SHAPE)
1026                 fprintf(f, "shape=%s ", limited ? "diamond" : "ellipse");
1027
1028         if(env->flags & CO_IFG_DUMP_COLORS)
1029                 fprintf(f, "style=filled color=%s ", get_dot_color_name(reg->index));
1030 }
1031
1032 static void ifg_dump_at_end(FILE *file, void *self)
1033 {
1034         co_ifg_dump_t *env = self;
1035         affinity_node_t *a;
1036
1037         co_gs_foreach_aff_node(env->co, a) {
1038                 const arch_register_t *ar = arch_get_irn_register(env->co->aenv, a->irn);
1039                 unsigned aidx = get_irn_idx(a->irn);
1040                 neighb_t *n;
1041
1042                 co_gs_foreach_neighb(a, n) {
1043                         const arch_register_t *nr = arch_get_irn_register(env->co->aenv, n->irn);
1044                         unsigned nidx = get_irn_idx(n->irn);
1045
1046                         if(aidx < nidx) {
1047                                 const char *color = nr == ar ? "blue" : "red";
1048                                 fprintf(file, "\tn%d -- n%d [weight=0.01 ", aidx, nidx);
1049                                 if(env->flags & CO_IFG_DUMP_LABELS)
1050                                         fprintf(file, "label=\"%d\" ", n->costs);
1051                                 if(env->flags & CO_IFG_DUMP_COLORS)
1052                                         fprintf(file, "color=%s ", color);
1053                                 else
1054                                         fprintf(file, "style=dotted");
1055                                 fprintf(file, "];\n");
1056                         }
1057                 }
1058         }
1059 }
1060
1061
1062 static be_ifg_dump_dot_cb_t ifg_dot_cb = {
1063         ifg_is_dump_node,
1064         ifg_dump_graph_attr,
1065         ifg_dump_node_attr,
1066         NULL,
1067         NULL,
1068         ifg_dump_at_end
1069 };
1070
1071
1072
1073 void co_dump_ifg_dot(const copy_opt_t *co, FILE *f, unsigned flags)
1074 {
1075         co_ifg_dump_t cod;
1076
1077         cod.co    = co;
1078         cod.flags = flags;
1079         be_ifg_dump_dot(co->cenv->ifg, co->irg, f, &ifg_dot_cb, &cod);
1080 }
1081
1082
1083 void co_solve_park_moon(copy_opt_t *opt)
1084 {
1085         (void) opt;
1086 }
1087
1088 static int void_algo(copy_opt_t *co)
1089 {
1090         (void) co;
1091         return 0;
1092 }
1093
1094 /*
1095                 _    _                  _ _   _
1096            / \  | | __ _  ___  _ __(_) |_| |__  _ __ ___  ___
1097           / _ \ | |/ _` |/ _ \| '__| | __| '_ \| '_ ` _ \/ __|
1098          / ___ \| | (_| | (_) | |  | | |_| | | | | | | | \__ \
1099         /_/   \_\_|\__, |\___/|_|  |_|\__|_| |_|_| |_| |_|___/
1100                            |___/
1101 */
1102
1103 typedef struct {
1104         co_algo_t  *algo;
1105         const char *name;
1106         int        can_improve_existing;
1107 } co_algo_info_t;
1108
1109 static co_algo_info_t algos[] = {
1110         { void_algo,               "none",  0 },
1111         { co_solve_heuristic,      "heur1", 0 },
1112         { co_solve_heuristic_new,  "heur2", 0 },
1113 #ifdef WITH_JVM
1114         { co_solve_heuristic_java, "heur3", 0 },
1115 #else
1116         { NULL,                    "heur3", 0 },
1117 #endif
1118         { co_solve_heuristic_mst,  "heur4", 0 },
1119 #ifdef WITH_ILP
1120         { co_solve_ilp2,           "ilp",   1 },
1121 #else
1122         { NULL,                    "ilp",   1 },
1123 #endif
1124         { NULL,                    "",      0 }
1125 };
1126
1127 /*
1128     __  __       _         ____       _
1129    |  \/  | __ _(_)_ __   |  _ \ _ __(_)_   _____ _ __
1130    | |\/| |/ _` | | '_ \  | | | | '__| \ \ / / _ \ '__|
1131    | |  | | (_| | | | | | | |_| | |  | |\ V /  __/ |
1132    |_|  |_|\__,_|_|_| |_| |____/|_|  |_| \_/ \___|_|
1133
1134 */
1135
1136 static FILE *my_open(const be_chordal_env_t *env, const char *prefix, const char *suffix)
1137 {
1138         FILE *result;
1139         char buf[1024];
1140         size_t i, n;
1141         char *tu_name;
1142
1143         n = strlen(env->birg->main_env->cup_name);
1144         tu_name = xmalloc((n + 1) * sizeof(*tu_name));
1145         strcpy(tu_name, env->birg->main_env->cup_name);
1146         for (i = 0; i < n; ++i)
1147                 if (tu_name[i] == '.')
1148                         tu_name[i] = '_';
1149
1150
1151         ir_snprintf(buf, sizeof(buf), "%s%s_%F_%s%s", prefix, tu_name, env->irg, env->cls->name, suffix);
1152         xfree(tu_name);
1153         result = fopen(buf, "wt");
1154         if(result == NULL) {
1155                 panic("Couldn't open '%s' for writing.", buf);
1156         }
1157
1158         return result;
1159 }
1160
1161 void co_driver(be_chordal_env_t *cenv)
1162 {
1163         ir_timer_t          *timer = ir_timer_register("firm.be.copyopt", "runtime");
1164         co_complete_stats_t before, after;
1165         copy_opt_t          *co;
1166         co_algo_t           *algo_func;
1167         int                 was_optimal = 0;
1168
1169         if (algo >= CO_ALGO_LAST)
1170                 return;
1171
1172         be_liveness_assure_chk(be_get_birg_liveness(cenv->birg));
1173
1174         co = new_copy_opt(cenv, cost_func);
1175         co_build_ou_structure(co);
1176         co_build_graph_structure(co);
1177
1178         co_complete_stats(co, &before);
1179
1180         be_stat_ev_ull("co_aff_nodes",    before.aff_nodes);
1181         be_stat_ev_ull("co_aff_edges",    before.aff_edges);
1182         be_stat_ev_ull("co_max_costs",    before.max_costs);
1183         be_stat_ev_ull("co_inevit_costs", before.inevit_costs);
1184         be_stat_ev_ull("co_aff_int",      before.aff_int);
1185
1186         be_stat_ev_ull("co_init_costs",   before.costs);
1187         be_stat_ev_ull("co_init_unsat",   before.unsatisfied_edges);
1188
1189         if (dump_flags & DUMP_BEFORE) {
1190                 FILE *f = my_open(cenv, "", "-before.dot");
1191                 co_dump_ifg_dot(co, f, style_flags);
1192                 fclose(f);
1193         }
1194
1195         /* if the algo can improve results, provide an initial solution with heur3 */
1196         if (improve && algos[algo].can_improve_existing) {
1197                 co_complete_stats_t stats;
1198
1199                 /* produce a heuristic solution */
1200 #ifdef WITH_JVM
1201                 co_solve_heuristic_java(co);
1202 #else
1203                 co_solve_heuristic(co);
1204 #endif /* WITH_JVM */
1205
1206                 /* do the stats and provide the current costs */
1207                 co_complete_stats(co, &stats);
1208                 be_stat_ev_ull("co_prepare_costs", stats.costs);
1209         }
1210
1211 #ifdef WITH_JVM
1212         /* start the JVM here so that it does not tamper the timing. */
1213         if (algo == CO_ALGO_HEUR3)
1214                 be_java_coal_start_jvm();
1215 #endif /* WITH_JVM */
1216
1217         algo_func = algos[algo].algo;
1218
1219         /* perform actual copy minimization */
1220         ir_timer_reset_and_start(timer);
1221         was_optimal = algo_func(co);
1222         ir_timer_stop(timer);
1223
1224         be_stat_ev("co_time", ir_timer_elapsed_msec(timer));
1225         be_stat_ev_ull("co_optimal", was_optimal);
1226
1227         if (dump_flags & DUMP_AFTER) {
1228                 FILE *f = my_open(cenv, "", "-after.dot");
1229                 co_dump_ifg_dot(co, f, style_flags);
1230                 fclose(f);
1231         }
1232
1233         co_complete_stats(co, &after);
1234
1235         if (do_stats) {
1236                 ulong64 optimizable_costs = after.max_costs - after.inevit_costs;
1237                 ulong64 evitable          = after.costs     - after.inevit_costs;
1238
1239                 ir_printf("%30F ", cenv->irg);
1240                 printf("%10s %10" ULL_FMT "%10" ULL_FMT "%10" ULL_FMT, cenv->cls->name, after.max_costs, before.costs, after.inevit_costs);
1241
1242                 if(optimizable_costs > 0)
1243                         printf("%10" ULL_FMT " %5.2f\n", after.costs, (evitable * 100.0) / optimizable_costs);
1244                 else
1245                         printf("%10" ULL_FMT " %5s\n", after.costs, "-");
1246         }
1247
1248         /* Dump the interference graph in Appel's format. */
1249         if (dump_flags & DUMP_APPEL) {
1250                 FILE *f = my_open(cenv, "", ".apl");
1251                 fprintf(f, "# %lld %lld\n", after.costs, after.unsatisfied_edges);
1252                 co_dump_appel_graph(co, f);
1253                 fclose(f);
1254         }
1255
1256         be_stat_ev_ull("co_after_costs", after.costs);
1257         be_stat_ev_ull("co_after_unsat", after.unsatisfied_edges);
1258
1259         co_free_graph_structure(co);
1260         co_free_ou_structure(co);
1261         free_copy_opt(co);
1262 }