s/\<\(LC_\)\?INLINE\>/inline/.
[libfirm] / ir / be / becopyopt.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       Copy minimization driver.
23  * @author      Daniel Grund
24  * @date        12.04.2005
25  * @version     $Id$
26  *
27  * Main file for the optimization reducing the copies needed for:
28  * - Phi coalescing
29  * - Register-constrained nodes
30  * - Two-address code instructions
31  */
32 #include "config.h"
33
34 #include "execfreq.h"
35 #include "xmalloc.h"
36 #include "debug.h"
37 #include "pmap.h"
38 #include "raw_bitset.h"
39 #include "irnode.h"
40 #include "irgraph.h"
41 #include "irgwalk.h"
42 #include "irprog.h"
43 #include "irloop_t.h"
44 #include "iredges_t.h"
45 #include "phiclass.h"
46 #include "irbitset.h"
47 #include "irphase_t.h"
48 #include "irprintf_t.h"
49
50 #include "bemodule.h"
51 #include "bearch_t.h"
52 #include "benode_t.h"
53 #include "beutil.h"
54 #include "beifg_t.h"
55 #include "beintlive_t.h"
56 #include "becopyopt_t.h"
57 #include "becopystat.h"
58 #include "belive_t.h"
59 #include "beinsn_t.h"
60 #include "besched_t.h"
61 #include "bestatevent.h"
62 #include "beirg_t.h"
63 #include "error.h"
64
65 #include "lc_opts.h"
66 #include "lc_opts_enum.h"
67
68 #define DUMP_BEFORE 1
69 #define DUMP_AFTER  2
70 #define DUMP_APPEL  4
71 #define DUMP_ALL    2 * DUMP_APPEL - 1
72
73 #define COST_FUNC_FREQ     1
74 #define COST_FUNC_LOOP     2
75 #define COST_FUNC_ALL_ONE  3
76
77 static unsigned   dump_flags  = 0;
78 static unsigned   style_flags = 0;
79 static unsigned   do_stats    = 0;
80 static cost_fct_t cost_func   = co_get_costs_exec_freq;
81 static unsigned   algo        = CO_ALGO_HEUR4;
82 static int        improve     = 1;
83
84 static const lc_opt_enum_mask_items_t dump_items[] = {
85         { "before",  DUMP_BEFORE },
86         { "after",   DUMP_AFTER  },
87         { "appel",   DUMP_APPEL  },
88         { "all",     DUMP_ALL    },
89         { NULL,      0 }
90 };
91
92 static const lc_opt_enum_mask_items_t style_items[] = {
93         { "color",   CO_IFG_DUMP_COLORS },
94         { "labels",  CO_IFG_DUMP_LABELS },
95         { "constr",  CO_IFG_DUMP_CONSTR },
96         { "shape",   CO_IFG_DUMP_SHAPE  },
97         { "full",    2 * CO_IFG_DUMP_SHAPE - 1 },
98         { NULL,      0 }
99 };
100
101 static const lc_opt_enum_mask_items_t algo_items[] = {
102         { "none",   CO_ALGO_NONE  },
103         { "heur",   CO_ALGO_HEUR  },
104         { "heur2",  CO_ALGO_HEUR2 },
105         { "heur3",  CO_ALGO_HEUR3 },
106         { "heur4",  CO_ALGO_HEUR4 },
107         { "ilp",    CO_ALGO_ILP   },
108         { NULL,     0 }
109 };
110
111 typedef int (*opt_funcptr)(void);
112
113 static const lc_opt_enum_func_ptr_items_t cost_func_items[] = {
114         { "freq",   (opt_funcptr) co_get_costs_exec_freq },
115         { "loop",   (opt_funcptr) co_get_costs_loop_depth },
116         { "one",    (opt_funcptr) co_get_costs_all_one },
117         { NULL,     NULL }
118 };
119
120 static lc_opt_enum_mask_var_t dump_var = {
121         &dump_flags, dump_items
122 };
123
124 static lc_opt_enum_mask_var_t style_var = {
125         &style_flags, style_items
126 };
127
128 static lc_opt_enum_mask_var_t algo_var = {
129         &algo, algo_items
130 };
131
132 static lc_opt_enum_func_ptr_var_t cost_func_var = {
133         (opt_funcptr*) &cost_func, cost_func_items
134 };
135
136 static const lc_opt_table_entry_t options[] = {
137         LC_OPT_ENT_ENUM_INT      ("algo",    "select copy optimization algo",                           &algo_var),
138         LC_OPT_ENT_ENUM_FUNC_PTR ("cost",    "select a cost function",                                  &cost_func_var),
139         LC_OPT_ENT_ENUM_MASK     ("dump",    "dump ifg before or after copy optimization",              &dump_var),
140         LC_OPT_ENT_ENUM_MASK     ("style",   "dump style for ifg dumping",                              &style_var),
141         LC_OPT_ENT_BOOL          ("stats",   "dump statistics after each optimization",                 &do_stats),
142         LC_OPT_ENT_BOOL          ("improve", "run heur3 before if algo can exploit start solutions",    &improve),
143         LC_OPT_LAST
144 };
145
146 /* Insert additional options registration functions here. */
147 extern void be_co_ilp_register_options(lc_opt_entry_t *grp);
148 extern void be_co2_register_options(lc_opt_entry_t *grp);
149 extern void be_co3_register_options(lc_opt_entry_t *grp);
150
151 void be_init_copycoal(void)
152 {
153         lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
154         lc_opt_entry_t *ra_grp = lc_opt_get_grp(be_grp, "ra");
155         lc_opt_entry_t *chordal_grp = lc_opt_get_grp(ra_grp, "chordal");
156         lc_opt_entry_t *co_grp = lc_opt_get_grp(chordal_grp, "co");
157
158         lc_opt_add_table(co_grp, options);
159 }
160
161 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_copycoal);
162
163 #undef QUICK_AND_DIRTY_HACK
164
165 static int nodes_interfere(const be_chordal_env_t *env, const ir_node *a, const ir_node *b)
166 {
167         if (env->ifg)
168                 return be_ifg_connected(env->ifg, a, b);
169         else
170                 return values_interfere(env->birg, a, b);
171 }
172
173
174 /******************************************************************************
175     _____                           _
176    / ____|                         | |
177   | |  __  ___ _ __   ___ _ __ __ _| |
178   | | |_ |/ _ \ '_ \ / _ \ '__/ _` | |
179   | |__| |  __/ | | |  __/ | | (_| | |
180    \_____|\___|_| |_|\___|_|  \__,_|_|
181
182  ******************************************************************************/
183
184 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
185
186
187 copy_opt_t *new_copy_opt(be_chordal_env_t *chordal_env, cost_fct_t get_costs)
188 {
189         const char *s1, *s2, *s3;
190         int len;
191         copy_opt_t *co;
192
193         FIRM_DBG_REGISTER(dbg, "ir.be.copyopt");
194
195         co = XMALLOCZ(copy_opt_t);
196         co->cenv      = chordal_env;
197         co->irg       = chordal_env->irg;
198         co->cls       = chordal_env->cls;
199         co->get_costs = get_costs;
200
201         s1 = get_irp_prog_name();
202         s2 = get_entity_name(get_irg_entity(co->irg));
203         s3 = chordal_env->cls->name;
204         len = strlen(s1) + strlen(s2) + strlen(s3) + 5;
205         co->name = XMALLOCN(char, len);
206         snprintf(co->name, len, "%s__%s__%s", s1, s2, s3);
207
208         return co;
209 }
210
211 void free_copy_opt(copy_opt_t *co) {
212         xfree(co->name);
213         free(co);
214 }
215
216 /**
217  * Checks if a node is optimizable, viz. has something to do with coalescing
218  * @param irn  The irn to check
219  */
220 static int co_is_optimizable_root(ir_node *irn)
221 {
222         const arch_register_req_t *req;
223         const arch_register_t *reg;
224
225         if (arch_irn_is(irn, ignore))
226                 return 0;
227
228         reg = arch_get_irn_register(irn);
229         if (arch_register_type_is(reg, ignore))
230                 return 0;
231
232         req = arch_get_register_req(irn, -1);
233         if (is_Reg_Phi(irn) || is_Perm_Proj(irn) || is_2addr_code(req))
234                 return 1;
235
236         return 0;
237 }
238
239 int co_get_costs_loop_depth(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
240         int cost = 0;
241         ir_loop *loop;
242         ir_node *root_block = get_nodes_block(root);
243         (void) co;
244         (void) arg;
245
246         if (is_Phi(root)) {
247                 /* for phis the copies are placed in the corresponding pred-block */
248                 loop = get_irn_loop(get_Block_cfgpred_block(root_block, pos));
249         } else {
250                 /* a perm places the copy in the same block as it resides */
251                 loop = get_irn_loop(root_block);
252         }
253         if (loop) {
254                 int d = get_loop_depth(loop);
255                 cost = d*d;
256         }
257         return 1+cost;
258 }
259
260 int co_get_costs_exec_freq(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
261         int res;
262         ir_node *root_bl = get_nodes_block(root);
263         ir_node *copy_bl = is_Phi(root) ? get_Block_cfgpred_block(root_bl, pos) : root_bl;
264         (void) arg;
265         res = get_block_execfreq_ulong(co->cenv->birg->exec_freq, copy_bl);
266
267         /* don't allow values smaller than one. */
268         return res < 1 ? 1 : res;
269 }
270
271
272 int co_get_costs_all_one(const copy_opt_t *co, ir_node *root, ir_node *arg, int pos) {
273         (void) co;
274         (void) root;
275         (void) arg;
276         (void) pos;
277         return 1;
278 }
279
280 /******************************************************************************
281    ____        _   _    _       _ _          _____ _
282   / __ \      | | | |  | |     (_) |        / ____| |
283  | |  | |_ __ | |_| |  | |_ __  _| |_ ___  | (___ | |_ ___  _ __ __ _  __ _  ___
284  | |  | | '_ \| __| |  | | '_ \| | __/ __|  \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
285  | |__| | |_) | |_| |__| | | | | | |_\__ \  ____) | || (_) | | | (_| | (_| |  __/
286   \____/| .__/ \__|\____/|_| |_|_|\__|___/ |_____/ \__\___/|_|  \__,_|\__, |\___|
287         | |                                                            __/ |
288         |_|                                                           |___/
289  ******************************************************************************/
290
291 /**
292  * Determines a maximum weighted independent set with respect to
293  * the interference and conflict edges of all nodes in a qnode.
294  */
295 static int ou_max_ind_set_costs(unit_t *ou) {
296         be_chordal_env_t *chordal_env = ou->co->cenv;
297         ir_node **safe, **unsafe;
298         int i, o, safe_count, safe_costs, unsafe_count, *unsafe_costs;
299         bitset_t *curr;
300         bitset_pos_t pos;
301         int max, curr_weight, best_weight = 0;
302
303         /* assign the nodes into two groups.
304          * safe: node has no interference, hence it is in every max stable set.
305          * unsafe: node has an interference
306          */
307         safe = alloca((ou->node_count-1) * sizeof(*safe));
308         safe_costs = 0;
309         safe_count = 0;
310         unsafe = alloca((ou->node_count-1) * sizeof(*unsafe));
311         unsafe_costs = alloca((ou->node_count-1) * sizeof(*unsafe_costs));
312         unsafe_count = 0;
313         for(i=1; i<ou->node_count; ++i) {
314                 int is_safe = 1;
315                 for(o=1; o<ou->node_count; ++o) {
316                         if (i==o)
317                                 continue;
318                         if (nodes_interfere(chordal_env, ou->nodes[i], ou->nodes[o])) {
319                                 unsafe_costs[unsafe_count] = ou->costs[i];
320                                 unsafe[unsafe_count] = ou->nodes[i];
321                                 ++unsafe_count;
322                                 is_safe = 0;
323                                 break;
324                         }
325                 }
326                 if (is_safe) {
327                         safe_costs += ou->costs[i];
328                         safe[safe_count++] = ou->nodes[i];
329                 }
330         }
331
332
333         /* now compute the best set out of the unsafe nodes*/
334         if (unsafe_count > MIS_HEUR_TRIGGER) {
335                 bitset_t *best = bitset_alloca(unsafe_count);
336                 /* Heuristik: Greedy trial and error form index 0 to unsafe_count-1 */
337                 for (i=0; i<unsafe_count; ++i) {
338                         bitset_set(best, i);
339                         /* check if it is a stable set */
340                         for (o=bitset_next_set(best, 0); o!=-1 && o<i; o=bitset_next_set(best, o+1))
341                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o])) {
342                                         bitset_clear(best, i); /* clear the bit and try next one */
343                                         break;
344                                 }
345                 }
346                 /* compute the weight */
347                 bitset_foreach(best, pos)
348                         best_weight += unsafe_costs[pos];
349         } else {
350                 /* Exact Algorithm: Brute force */
351                 curr = bitset_alloca(unsafe_count);
352                 bitset_set_all(curr);
353                 while ((max = bitset_popcnt(curr)) != 0) {
354                         /* check if curr is a stable set */
355                         for (i=bitset_next_set(curr, 0); i!=-1; i=bitset_next_set(curr, i+1))
356                                 for (o=bitset_next_set(curr, i+1); o!=-1; o=bitset_next_set(curr, o+1)) /* !!!!! difference to qnode_max_ind_set(): NOT (curr, i) */
357                                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o]))
358                                                         goto no_stable_set;
359
360                         /* if we arrive here, we have a stable set */
361                         /* compute the weigth of the stable set*/
362                         curr_weight = 0;
363                         bitset_foreach(curr, pos)
364                                 curr_weight += unsafe_costs[pos];
365
366                         /* any better ? */
367                         if (curr_weight > best_weight) {
368                                 best_weight = curr_weight;
369                         }
370
371         no_stable_set:
372                         bitset_minus1(curr);
373                 }
374         }
375
376         return safe_costs+best_weight;
377 }
378
379 static void co_collect_units(ir_node *irn, void *env) {
380         copy_opt_t *co = env;
381         unit_t *unit;
382
383         if (!is_curr_reg_class(co, irn))
384                 return;
385         if (!co_is_optimizable_root(irn))
386                 return;
387
388         /* Init a new unit */
389         unit = XMALLOCZ(unit_t);
390         unit->co = co;
391         unit->node_count = 1;
392         INIT_LIST_HEAD(&unit->queue);
393
394         /* Phi with some/all of its arguments */
395         if (is_Reg_Phi(irn)) {
396                 int i, arity;
397
398                 /* init */
399                 arity = get_irn_arity(irn);
400                 unit->nodes = XMALLOCN(ir_node*, arity + 1);
401                 unit->costs = XMALLOCN(int,      arity + 1);
402                 unit->nodes[0] = irn;
403
404                 /* fill */
405                 for (i=0; i<arity; ++i) {
406                         int o, arg_pos;
407                         ir_node *arg = get_irn_n(irn, i);
408
409                         assert(is_curr_reg_class(co, arg) && "Argument not in same register class.");
410                         if (arg == irn)
411                                 continue;
412                         if (nodes_interfere(co->cenv, irn, arg)) {
413                                 unit->inevitable_costs += co->get_costs(co, irn, arg, i);
414                                 continue;
415                         }
416
417                         /* Else insert the argument of the phi to the members of this ou */
418                         DBG((dbg, LEVEL_1, "\t   Member: %+F\n", arg));
419
420                         if (!arch_irn_is(arg, ignore)) {
421                                 /* Check if arg has occurred at a prior position in the arg/list */
422                                 arg_pos = 0;
423                                 for (o=1; o<unit->node_count; ++o) {
424                                         if (unit->nodes[o] == arg) {
425                                                 arg_pos = o;
426                                                 break;
427                                         }
428                                 }
429
430                                 if (!arg_pos) { /* a new argument */
431                                         /* insert node, set costs */
432                                         unit->nodes[unit->node_count] = arg;
433                                         unit->costs[unit->node_count] = co->get_costs(co, irn, arg, i);
434                                         unit->node_count++;
435                                 } else { /* arg has occurred before in same phi */
436                                         /* increase costs for existing arg */
437                                         unit->costs[arg_pos] += co->get_costs(co, irn, arg, i);
438                                 }
439                         }
440                 }
441                 unit->nodes = XREALLOC(unit->nodes, ir_node*, unit->node_count);
442                 unit->costs = XREALLOC(unit->costs, int,      unit->node_count);
443         } else if (is_Perm_Proj(irn)) {
444                 /* Proj of a perm with corresponding arg */
445                 assert(!nodes_interfere(co->cenv, irn, get_Perm_src(irn)));
446                 unit->nodes = XMALLOCN(ir_node*, 2);
447                 unit->costs = XMALLOCN(int,      2);
448                 unit->node_count = 2;
449                 unit->nodes[0] = irn;
450                 unit->nodes[1] = get_Perm_src(irn);
451                 unit->costs[1] = co->get_costs(co, irn, unit->nodes[1], -1);
452         } else {
453                 const arch_register_req_t *req = arch_get_register_req(irn, -1);
454
455                 /* Src == Tgt of a 2-addr-code instruction */
456                 if (is_2addr_code(req)) {
457                         const unsigned other = req->other_same;
458                         int            count = 0;
459                         int            i;
460
461                         for (i = 0; (1U << i) <= other; ++i) {
462                                 if (other & (1U << i)) {
463                                         ir_node *o  = get_irn_n(skip_Proj(irn), i);
464                                         if (!arch_irn_is(o, ignore) &&
465                                                         !nodes_interfere(co->cenv, irn, o)) {
466                                                 ++count;
467                                         }
468                                 }
469                         }
470
471                         if (count != 0) {
472                                 int k = 0;
473                                 ++count;
474                                 unit->nodes = XMALLOCN(ir_node*, count);
475                                 unit->costs = XMALLOCN(int,      count);
476                                 unit->node_count = count;
477                                 unit->nodes[k++] = irn;
478
479                                 for (i = 0; 1U << i <= other; ++i) {
480                                         if (other & (1U << i)) {
481                                                 ir_node *o  = get_irn_n(skip_Proj(irn), i);
482                                                 if (!arch_irn_is(o, ignore) &&
483                                                                 !nodes_interfere(co->cenv, irn, o)) {
484                                                         unit->nodes[k] = o;
485                                                         unit->costs[k] = co->get_costs(co, irn, o, -1);
486                                                         ++k;
487                                                 }
488                                         }
489                                 }
490                         }
491                 } else {
492                         assert(0 && "This is not an optimizable node!");
493                 }
494         }
495
496         /* Insert the new unit at a position according to its costs */
497         if (unit->node_count > 1) {
498                 int i;
499                 struct list_head *tmp;
500
501                 /* Determine the maximum costs this unit can cause: all_nodes_cost */
502                 for(i=1; i<unit->node_count; ++i) {
503                         unit->sort_key = MAX(unit->sort_key, unit->costs[i]);
504                         unit->all_nodes_costs += unit->costs[i];
505                 }
506
507                 /* Determine the minimal costs this unit will cause: min_nodes_costs */
508                 unit->min_nodes_costs += unit->all_nodes_costs - ou_max_ind_set_costs(unit);
509                 /* Insert the new ou according to its sort_key */
510                 tmp = &co->units;
511                 while (tmp->next != &co->units && list_entry_units(tmp->next)->sort_key > unit->sort_key)
512                         tmp = tmp->next;
513                 list_add(&unit->units, tmp);
514         } else {
515                 free(unit);
516         }
517 }
518
519 #ifdef QUICK_AND_DIRTY_HACK
520
521 static int compare_ous(const void *k1, const void *k2) {
522         const unit_t *u1 = *((const unit_t **) k1);
523         const unit_t *u2 = *((const unit_t **) k2);
524         int i, o, u1_has_constr, u2_has_constr;
525         arch_register_req_t req;
526
527         /* Units with constraints come first */
528         u1_has_constr = 0;
529         for (i=0; i<u1->node_count; ++i) {
530                 arch_get_register_req(&req, u1->nodes[i], -1);
531                 if (arch_register_req_is(&req, limited)) {
532                         u1_has_constr = 1;
533                         break;
534                 }
535         }
536
537         u2_has_constr = 0;
538         for (i=0; i<u2->node_count; ++i) {
539                 arch_get_register_req(&req, u2->nodes[i], -1);
540                 if (arch_register_req_is(&req, limited)) {
541                         u2_has_constr = 1;
542                         break;
543                 }
544         }
545
546         if (u1_has_constr != u2_has_constr)
547                 return u2_has_constr - u1_has_constr;
548
549         /* Now check, whether the two units are connected */
550 #if 0
551         for (i=0; i<u1->node_count; ++i)
552                 for (o=0; o<u2->node_count; ++o)
553                         if (u1->nodes[i] == u2->nodes[o])
554                                 return 0;
555 #endif
556
557         /* After all, the sort key decides. Greater keys come first. */
558         return u2->sort_key - u1->sort_key;
559
560 }
561
562 /**
563  * Sort the ou's according to constraints and their sort_key
564  */
565 static void co_sort_units(copy_opt_t *co) {
566         int i, count = 0, costs;
567         unit_t *ou, **ous;
568
569         /* get the number of ous, remove them form the list and fill the array */
570         list_for_each_entry(unit_t, ou, &co->units, units)
571                 count++;
572         ous = alloca(count * sizeof(*ous));
573
574         costs = co_get_max_copy_costs(co);
575
576         i = 0;
577         list_for_each_entry(unit_t, ou, &co->units, units)
578                 ous[i++] = ou;
579
580         INIT_LIST_HEAD(&co->units);
581
582         assert(count == i && list_empty(&co->units));
583
584         for (i=0; i<count; ++i)
585                 ir_printf("%+F\n", ous[i]->nodes[0]);
586
587         qsort(ous, count, sizeof(*ous), compare_ous);
588
589         ir_printf("\n\n");
590         for (i=0; i<count; ++i)
591                 ir_printf("%+F\n", ous[i]->nodes[0]);
592
593         /* reinsert into list in correct order */
594         for (i=0; i<count; ++i)
595                 list_add_tail(&ous[i]->units, &co->units);
596
597         assert(costs == co_get_max_copy_costs(co));
598 }
599 #endif
600
601 void co_build_ou_structure(copy_opt_t *co) {
602         DBG((dbg, LEVEL_1, "\tCollecting optimization units\n"));
603         INIT_LIST_HEAD(&co->units);
604         irg_walk_graph(co->irg, co_collect_units, NULL, co);
605 #ifdef QUICK_AND_DIRTY_HACK
606         co_sort_units(co);
607 #endif
608 }
609
610 void co_free_ou_structure(copy_opt_t *co) {
611         unit_t *curr, *tmp;
612         ASSERT_OU_AVAIL(co);
613         list_for_each_entry_safe(unit_t, curr, tmp, &co->units, units) {
614                 xfree(curr->nodes);
615                 xfree(curr->costs);
616                 xfree(curr);
617         }
618         co->units.next = NULL;
619 }
620
621 /* co_solve_heuristic() is implemented in becopyheur.c */
622
623 int co_get_max_copy_costs(const copy_opt_t *co) {
624         int i, res = 0;
625         unit_t *curr;
626
627         ASSERT_OU_AVAIL(co);
628
629         list_for_each_entry(unit_t, curr, &co->units, units) {
630                 res += curr->inevitable_costs;
631                 for (i=1; i<curr->node_count; ++i)
632                         res += curr->costs[i];
633         }
634         return res;
635 }
636
637 int co_get_inevit_copy_costs(const copy_opt_t *co) {
638         int res = 0;
639         unit_t *curr;
640
641         ASSERT_OU_AVAIL(co);
642
643         list_for_each_entry(unit_t, curr, &co->units, units)
644                 res += curr->inevitable_costs;
645         return res;
646 }
647
648 int co_get_copy_costs(const copy_opt_t *co) {
649         int i, res = 0;
650         unit_t *curr;
651
652         ASSERT_OU_AVAIL(co);
653
654         list_for_each_entry(unit_t, curr, &co->units, units) {
655                 int root_col = get_irn_col(curr->nodes[0]);
656                 DBG((dbg, LEVEL_1, "  %3d costs for root %+F color %d\n", curr->inevitable_costs, curr->nodes[0], root_col));
657                 res += curr->inevitable_costs;
658                 for (i=1; i<curr->node_count; ++i) {
659                         int arg_col = get_irn_col(curr->nodes[i]);
660                         if (root_col != arg_col) {
661                                 DBG((dbg, LEVEL_1, "  %3d for arg %+F color %d\n", curr->costs[i], curr->nodes[i], arg_col));
662                                 res += curr->costs[i];
663                         }
664                 }
665         }
666         return res;
667 }
668
669 int co_get_lower_bound(const copy_opt_t *co) {
670         int res = 0;
671         unit_t *curr;
672
673         ASSERT_OU_AVAIL(co);
674
675         list_for_each_entry(unit_t, curr, &co->units, units)
676                 res += curr->inevitable_costs + curr->min_nodes_costs;
677         return res;
678 }
679
680 void co_complete_stats(const copy_opt_t *co, co_complete_stats_t *stat)
681 {
682         bitset_t *seen = bitset_irg_malloc(co->irg);
683         affinity_node_t *an;
684
685         memset(stat, 0, sizeof(stat[0]));
686
687         /* count affinity edges. */
688         co_gs_foreach_aff_node(co, an) {
689                 neighb_t *neigh;
690                 stat->aff_nodes += 1;
691                 bitset_add_irn(seen, an->irn);
692                 co_gs_foreach_neighb(an, neigh) {
693                         if(!bitset_contains_irn(seen, neigh->irn)) {
694                                 stat->aff_edges += 1;
695                                 stat->max_costs += neigh->costs;
696
697                                 if (get_irn_col(an->irn) != get_irn_col(neigh->irn)) {
698                                         stat->costs += neigh->costs;
699                                         stat->unsatisfied_edges += 1;
700                                 }
701
702                                 if(nodes_interfere(co->cenv, an->irn, neigh->irn)) {
703                                         stat->aff_int += 1;
704                                         stat->inevit_costs += neigh->costs;
705                                 }
706
707                         }
708                 }
709         }
710
711         bitset_free(seen);
712 }
713
714 /******************************************************************************
715    _____                 _        _____ _
716   / ____|               | |      / ____| |
717  | |  __ _ __ __ _ _ __ | |__   | (___ | |_ ___  _ __ __ _  __ _  ___
718  | | |_ | '__/ _` | '_ \| '_ \   \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
719  | |__| | | | (_| | |_) | | | |  ____) | || (_) | | | (_| | (_| |  __/
720   \_____|_|  \__,_| .__/|_| |_| |_____/ \__\___/|_|  \__,_|\__, |\___|
721                   | |                                       __/ |
722                   |_|                                      |___/
723  ******************************************************************************/
724
725 static int compare_affinity_node_t(const void *k1, const void *k2, size_t size) {
726         const affinity_node_t *n1 = k1;
727         const affinity_node_t *n2 = k2;
728         (void) size;
729
730         return (n1->irn != n2->irn);
731 }
732
733 static void add_edge(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
734         affinity_node_t new_node, *node;
735         neighb_t        *nbr;
736         int             allocnew = 1;
737
738         new_node.irn        = n1;
739         new_node.degree     = 0;
740         new_node.neighbours = NULL;
741         node = set_insert(co->nodes, &new_node, sizeof(new_node), hash_irn(new_node.irn));
742
743         for (nbr = node->neighbours; nbr; nbr = nbr->next)
744                 if (nbr->irn == n2) {
745                         allocnew = 0;
746                         break;
747                 }
748
749         /* if we did not find n2 in n1's neighbourhood insert it */
750         if (allocnew) {
751                 nbr        = obstack_alloc(&co->obst, sizeof(*nbr));
752                 nbr->irn   = n2;
753                 nbr->costs = 0;
754                 nbr->next  = node->neighbours;
755
756                 node->neighbours = nbr;
757                 node->degree++;
758         }
759
760         /* now nbr points to n1's neighbour-entry of n2 */
761         nbr->costs += costs;
762 }
763
764 static inline void add_edges(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
765         if (! be_ifg_connected(co->cenv->ifg, n1, n2)) {
766                 add_edge(co, n1, n2, costs);
767                 add_edge(co, n2, n1, costs);
768         }
769 }
770
771 static void build_graph_walker(ir_node *irn, void *env) {
772         copy_opt_t *co = env;
773         int pos, max;
774         const arch_register_t *reg;
775
776         if (!is_curr_reg_class(co, irn) || arch_irn_is(irn, ignore))
777                 return;
778
779         reg = arch_get_irn_register(irn);
780         if (arch_register_type_is(reg, ignore))
781                 return;
782
783         if (is_Reg_Phi(irn)) { /* Phis */
784                 for (pos=0, max=get_irn_arity(irn); pos<max; ++pos) {
785                         ir_node *arg = get_irn_n(irn, pos);
786                         add_edges(co, irn, arg, co->get_costs(co, irn, arg, pos));
787                 }
788         } else if (is_Perm_Proj(irn)) { /* Perms */
789                 ir_node *arg = get_Perm_src(irn);
790                 add_edges(co, irn, arg, co->get_costs(co, irn, arg, 0));
791         }
792         else { /* 2-address code */
793                 const arch_register_req_t *req = arch_get_register_req(irn, -1);
794                 if (is_2addr_code(req)) {
795                         const unsigned other = req->other_same;
796                         int i;
797
798                         for (i = 0; 1U << i <= other; ++i) {
799                                 if (other & (1U << i)) {
800                                         ir_node *other = get_irn_n(skip_Proj(irn), i);
801                                         if (!arch_irn_is(other, ignore))
802                                                 add_edges(co, irn, other, co->get_costs(co, irn, other, 0));
803                                 }
804                         }
805                 }
806         }
807 }
808
809 void co_build_graph_structure(copy_opt_t *co) {
810         obstack_init(&co->obst);
811         co->nodes = new_set(compare_affinity_node_t, 32);
812
813         irg_walk_graph(co->irg, build_graph_walker, NULL, co);
814 }
815
816 void co_free_graph_structure(copy_opt_t *co) {
817         ASSERT_GS_AVAIL(co);
818
819         del_set(co->nodes);
820         obstack_free(&co->obst, NULL);
821         co->nodes = NULL;
822 }
823
824 /* co_solve_ilp1() co_solve_ilp2() are implemented in becopyilpX.c */
825
826 int co_gs_is_optimizable(copy_opt_t *co, ir_node *irn) {
827         affinity_node_t new_node, *n;
828
829         ASSERT_GS_AVAIL(co);
830
831         new_node.irn = irn;
832         n = set_find(co->nodes, &new_node, sizeof(new_node), hash_irn(new_node.irn));
833         if (n) {
834                 return (n->degree > 0);
835         } else
836                 return 0;
837 }
838
839 static int co_dump_appel_disjoint_constraints(const copy_opt_t *co, ir_node *a, ir_node *b)
840 {
841         ir_node *nodes[]  = { a, b };
842         bitset_t *constr[] = { NULL, NULL };
843         const arch_register_req_t *req;
844         int j;
845
846         constr[0] = bitset_alloca(co->cls->n_regs);
847         constr[1] = bitset_alloca(co->cls->n_regs);
848
849         for (j = 0; j < 2; ++j) {
850                 req = arch_get_register_req(nodes[j], BE_OUT_POS(0));
851                 if(arch_register_req_is(req, limited))
852                         rbitset_copy_to_bitset(req->limited, constr[j]);
853                 else
854                         bitset_set_all(constr[j]);
855
856         }
857
858         return !bitset_intersect(constr[0], constr[1]);
859 }
860
861 void co_dump_appel_graph(const copy_opt_t *co, FILE *f)
862 {
863         be_ifg_t *ifg  = co->cenv->ifg;
864         int *color_map = alloca(co->cls->n_regs * sizeof(color_map[0]));
865         int *node_map  = XMALLOCN(int, get_irg_last_idx(co->irg) + 1);
866
867         ir_node *irn;
868         void *it, *nit;
869         int n, n_regs;
870         unsigned i;
871
872         n_regs = 0;
873         for(i = 0; i < co->cls->n_regs; ++i) {
874                 const arch_register_t *reg = &co->cls->regs[i];
875                 color_map[i] = arch_register_type_is(reg, ignore) ? -1 : n_regs++;
876         }
877
878         /*
879          * n contains the first node number.
880          * the values below n are the pre-colored register nodes
881          */
882
883         it  = be_ifg_nodes_iter_alloca(ifg);
884         nit = be_ifg_neighbours_iter_alloca(ifg);
885
886         n = n_regs;
887         be_ifg_foreach_node(ifg, it, irn) {
888                 if (!arch_irn_is(irn, ignore))
889                         node_map[get_irn_idx(irn)] = n++;
890         }
891
892         fprintf(f, "%d %d\n", n, n_regs);
893
894         be_ifg_foreach_node(ifg, it, irn) {
895                 if (!arch_irn_is(irn, ignore)) {
896                         int idx            = node_map[get_irn_idx(irn)];
897                         affinity_node_t *a = get_affinity_info(co, irn);
898
899                         const arch_register_req_t *req;
900                         ir_node *adj;
901
902                         req = arch_get_register_req(irn, BE_OUT_POS(0));
903                         if(arch_register_req_is(req, limited)) {
904                                 for(i = 0; i < co->cls->n_regs; ++i) {
905                                         if(!rbitset_is_set(req->limited, i) && color_map[i] >= 0)
906                                                 fprintf(f, "%d %d -1\n", color_map[i], idx);
907                                 }
908                         }
909
910                         be_ifg_foreach_neighbour(ifg, nit, irn, adj) {
911                                 if (!arch_irn_is(adj, ignore) &&
912                                                 !co_dump_appel_disjoint_constraints(co, irn, adj)) {
913                                         int adj_idx = node_map[get_irn_idx(adj)];
914                                         if(idx < adj_idx)
915                                                 fprintf(f, "%d %d -1\n", idx, adj_idx);
916                                 }
917                         }
918
919                         if(a) {
920                                 neighb_t *n;
921
922                                 co_gs_foreach_neighb(a, n) {
923                                         if (!arch_irn_is(n->irn, ignore)) {
924                                                 int n_idx = node_map[get_irn_idx(n->irn)];
925                                                 if(idx < n_idx)
926                                                         fprintf(f, "%d %d %d\n", idx, n_idx, (int) n->costs);
927                                         }
928                                 }
929                         }
930                 }
931         }
932
933         xfree(node_map);
934 }
935
936 /*
937          ___ _____ ____   ____   ___ _____   ____                        _
938         |_ _|  ___/ ___| |  _ \ / _ \_   _| |  _ \ _   _ _ __ ___  _ __ (_)_ __   __ _
939          | || |_ | |  _  | | | | | | || |   | | | | | | | '_ ` _ \| '_ \| | '_ \ / _` |
940          | ||  _|| |_| | | |_| | |_| || |   | |_| | |_| | | | | | | |_) | | | | | (_| |
941         |___|_|   \____| |____/ \___/ |_|   |____/ \__,_|_| |_| |_| .__/|_|_| |_|\__, |
942                                                                   |_|            |___/
943 */
944
945 static const char *get_dot_color_name(size_t col)
946 {
947         static const char *names[] = {
948                 "blue",
949                 "red",
950                 "green",
951                 "yellow",
952                 "cyan",
953                 "magenta",
954                 "orange",
955                 "chocolate",
956                 "beige",
957                 "navy",
958                 "darkgreen",
959                 "darkred",
960                 "lightPink",
961                 "chartreuse",
962                 "lightskyblue",
963                 "linen",
964                 "pink",
965                 "lightslateblue",
966                 "mintcream",
967                 "red",
968                 "darkolivegreen",
969                 "mediumblue",
970                 "mistyrose",
971                 "salmon",
972                 "darkseagreen",
973                 "mediumslateblue"
974                 "moccasin",
975                 "tomato",
976                 "forestgreen",
977                 "darkturquoise",
978                 "palevioletred"
979         };
980
981         return col < sizeof(names)/sizeof(names[0]) ? names[col] : "white";
982 }
983
984 typedef struct _co_ifg_dump_t {
985         const copy_opt_t *co;
986         unsigned flags;
987 } co_ifg_dump_t;
988
989 static void ifg_dump_graph_attr(FILE *f, void *self)
990 {
991         (void) self;
992         fprintf(f, "overlap=scale");
993 }
994
995 static int ifg_is_dump_node(void *self, ir_node *irn)
996 {
997         (void)self;
998         return !arch_irn_is(irn, ignore);
999 }
1000
1001 static void ifg_dump_node_attr(FILE *f, void *self, ir_node *irn)
1002 {
1003         co_ifg_dump_t *env         = self;
1004         const arch_register_t *reg = arch_get_irn_register(irn);
1005         const arch_register_req_t *req;
1006         int limited;
1007
1008         req = arch_get_register_req(irn, BE_OUT_POS(0));
1009         limited = arch_register_req_is(req, limited);
1010
1011         if(env->flags & CO_IFG_DUMP_LABELS) {
1012                 ir_fprintf(f, "label=\"%+F", irn);
1013
1014                 if((env->flags & CO_IFG_DUMP_CONSTR) && limited) {
1015                         bitset_t *bs = bitset_alloca(env->co->cls->n_regs);
1016                         rbitset_copy_to_bitset(req->limited, bs);
1017                         ir_fprintf(f, "\\n%B", bs);
1018                 }
1019                 ir_fprintf(f, "\" ");
1020         } else {
1021                 fprintf(f, "label=\"\" shape=point " );
1022         }
1023
1024         if(env->flags & CO_IFG_DUMP_SHAPE)
1025                 fprintf(f, "shape=%s ", limited ? "diamond" : "ellipse");
1026
1027         if(env->flags & CO_IFG_DUMP_COLORS)
1028                 fprintf(f, "style=filled color=%s ", get_dot_color_name(reg->index));
1029 }
1030
1031 static void ifg_dump_at_end(FILE *file, void *self)
1032 {
1033         co_ifg_dump_t *env = self;
1034         affinity_node_t *a;
1035
1036         co_gs_foreach_aff_node(env->co, a) {
1037                 const arch_register_t *ar = arch_get_irn_register(a->irn);
1038                 unsigned aidx = get_irn_idx(a->irn);
1039                 neighb_t *n;
1040
1041                 co_gs_foreach_neighb(a, n) {
1042                         const arch_register_t *nr = arch_get_irn_register(n->irn);
1043                         unsigned nidx = get_irn_idx(n->irn);
1044
1045                         if(aidx < nidx) {
1046                                 const char *color = nr == ar ? "blue" : "red";
1047                                 fprintf(file, "\tn%d -- n%d [weight=0.01 ", aidx, nidx);
1048                                 if(env->flags & CO_IFG_DUMP_LABELS)
1049                                         fprintf(file, "label=\"%d\" ", n->costs);
1050                                 if(env->flags & CO_IFG_DUMP_COLORS)
1051                                         fprintf(file, "color=%s ", color);
1052                                 else
1053                                         fprintf(file, "style=dotted");
1054                                 fprintf(file, "];\n");
1055                         }
1056                 }
1057         }
1058 }
1059
1060
1061 static be_ifg_dump_dot_cb_t ifg_dot_cb = {
1062         ifg_is_dump_node,
1063         ifg_dump_graph_attr,
1064         ifg_dump_node_attr,
1065         NULL,
1066         NULL,
1067         ifg_dump_at_end
1068 };
1069
1070
1071
1072 void co_dump_ifg_dot(const copy_opt_t *co, FILE *f, unsigned flags)
1073 {
1074         co_ifg_dump_t cod;
1075
1076         cod.co    = co;
1077         cod.flags = flags;
1078         be_ifg_dump_dot(co->cenv->ifg, co->irg, f, &ifg_dot_cb, &cod);
1079 }
1080
1081
1082 void co_solve_park_moon(copy_opt_t *opt)
1083 {
1084         (void) opt;
1085 }
1086
1087 static int void_algo(copy_opt_t *co)
1088 {
1089         (void) co;
1090         return 0;
1091 }
1092
1093 /*
1094                 _    _                  _ _   _
1095            / \  | | __ _  ___  _ __(_) |_| |__  _ __ ___  ___
1096           / _ \ | |/ _` |/ _ \| '__| | __| '_ \| '_ ` _ \/ __|
1097          / ___ \| | (_| | (_) | |  | | |_| | | | | | | | \__ \
1098         /_/   \_\_|\__, |\___/|_|  |_|\__|_| |_|_| |_| |_|___/
1099                            |___/
1100 */
1101
1102 typedef struct {
1103         co_algo_t  *algo;
1104         const char *name;
1105         int        can_improve_existing;
1106 } co_algo_info_t;
1107
1108 static co_algo_info_t algos[] = {
1109         { void_algo,               "none",  0 },
1110         { co_solve_heuristic,      "heur1", 0 },
1111         { co_solve_heuristic_new,  "heur2", 0 },
1112 #ifdef WITH_JVM
1113         { co_solve_heuristic_java, "heur3", 0 },
1114 #else
1115         { NULL,                    "heur3", 0 },
1116 #endif
1117         { co_solve_heuristic_mst,  "heur4", 0 },
1118 #ifdef WITH_ILP
1119         { co_solve_ilp2,           "ilp",   1 },
1120 #else
1121         { NULL,                    "ilp",   1 },
1122 #endif
1123         { NULL,                    "",      0 }
1124 };
1125
1126 /*
1127     __  __       _         ____       _
1128    |  \/  | __ _(_)_ __   |  _ \ _ __(_)_   _____ _ __
1129    | |\/| |/ _` | | '_ \  | | | | '__| \ \ / / _ \ '__|
1130    | |  | | (_| | | | | | | |_| | |  | |\ V /  __/ |
1131    |_|  |_|\__,_|_|_| |_| |____/|_|  |_| \_/ \___|_|
1132
1133 */
1134
1135 static FILE *my_open(const be_chordal_env_t *env, const char *prefix, const char *suffix)
1136 {
1137         FILE *result;
1138         char buf[1024];
1139         size_t i, n;
1140         char *tu_name;
1141
1142         n = strlen(env->birg->main_env->cup_name);
1143         tu_name = XMALLOCN(char, n + 1);
1144         strcpy(tu_name, env->birg->main_env->cup_name);
1145         for (i = 0; i < n; ++i)
1146                 if (tu_name[i] == '.')
1147                         tu_name[i] = '_';
1148
1149
1150         ir_snprintf(buf, sizeof(buf), "%s%s_%F_%s%s", prefix, tu_name, env->irg, env->cls->name, suffix);
1151         xfree(tu_name);
1152         result = fopen(buf, "wt");
1153         if(result == NULL) {
1154                 panic("Couldn't open '%s' for writing.", buf);
1155         }
1156
1157         return result;
1158 }
1159
1160 void co_driver(be_chordal_env_t *cenv)
1161 {
1162         ir_timer_t          *timer = ir_timer_register("firm.be.copyopt", "runtime");
1163         co_complete_stats_t before, after;
1164         copy_opt_t          *co;
1165         co_algo_t           *algo_func;
1166         int                 was_optimal = 0;
1167
1168         if (algo >= CO_ALGO_LAST)
1169                 return;
1170
1171         be_liveness_assure_chk(be_get_birg_liveness(cenv->birg));
1172
1173         co = new_copy_opt(cenv, cost_func);
1174         co_build_ou_structure(co);
1175         co_build_graph_structure(co);
1176
1177         co_complete_stats(co, &before);
1178
1179         be_stat_ev_ull("co_aff_nodes",    before.aff_nodes);
1180         be_stat_ev_ull("co_aff_edges",    before.aff_edges);
1181         be_stat_ev_ull("co_max_costs",    before.max_costs);
1182         be_stat_ev_ull("co_inevit_costs", before.inevit_costs);
1183         be_stat_ev_ull("co_aff_int",      before.aff_int);
1184
1185         be_stat_ev_ull("co_init_costs",   before.costs);
1186         be_stat_ev_ull("co_init_unsat",   before.unsatisfied_edges);
1187
1188         if (dump_flags & DUMP_BEFORE) {
1189                 FILE *f = my_open(cenv, "", "-before.dot");
1190                 co_dump_ifg_dot(co, f, style_flags);
1191                 fclose(f);
1192         }
1193
1194         /* if the algo can improve results, provide an initial solution with heur3 */
1195         if (improve && algos[algo].can_improve_existing) {
1196                 co_complete_stats_t stats;
1197
1198                 /* produce a heuristic solution */
1199 #ifdef WITH_JVM
1200                 co_solve_heuristic_java(co);
1201 #else
1202                 co_solve_heuristic(co);
1203 #endif /* WITH_JVM */
1204
1205                 /* do the stats and provide the current costs */
1206                 co_complete_stats(co, &stats);
1207                 be_stat_ev_ull("co_prepare_costs", stats.costs);
1208         }
1209
1210 #ifdef WITH_JVM
1211         /* start the JVM here so that it does not tamper the timing. */
1212         if (algo == CO_ALGO_HEUR3)
1213                 be_java_coal_start_jvm();
1214 #endif /* WITH_JVM */
1215
1216         algo_func = algos[algo].algo;
1217
1218         /* perform actual copy minimization */
1219         ir_timer_reset_and_start(timer);
1220         was_optimal = algo_func(co);
1221         ir_timer_stop(timer);
1222
1223         be_stat_ev("co_time", ir_timer_elapsed_msec(timer));
1224         be_stat_ev_ull("co_optimal", was_optimal);
1225
1226         if (dump_flags & DUMP_AFTER) {
1227                 FILE *f = my_open(cenv, "", "-after.dot");
1228                 co_dump_ifg_dot(co, f, style_flags);
1229                 fclose(f);
1230         }
1231
1232         co_complete_stats(co, &after);
1233
1234         if (do_stats) {
1235                 ulong64 optimizable_costs = after.max_costs - after.inevit_costs;
1236                 ulong64 evitable          = after.costs     - after.inevit_costs;
1237
1238                 ir_printf("%30F ", cenv->irg);
1239                 printf("%10s %10" ULL_FMT "%10" ULL_FMT "%10" ULL_FMT, cenv->cls->name, after.max_costs, before.costs, after.inevit_costs);
1240
1241                 if(optimizable_costs > 0)
1242                         printf("%10" ULL_FMT " %5.2f\n", after.costs, (evitable * 100.0) / optimizable_costs);
1243                 else
1244                         printf("%10" ULL_FMT " %5s\n", after.costs, "-");
1245         }
1246
1247         /* Dump the interference graph in Appel's format. */
1248         if (dump_flags & DUMP_APPEL) {
1249                 FILE *f = my_open(cenv, "", ".apl");
1250                 fprintf(f, "# %lld %lld\n", after.costs, after.unsatisfied_edges);
1251                 co_dump_appel_graph(co, f);
1252                 fclose(f);
1253         }
1254
1255         be_stat_ev_ull("co_after_costs", after.costs);
1256         be_stat_ev_ull("co_after_unsat", after.unsatisfied_edges);
1257
1258         co_free_graph_structure(co);
1259         co_free_ou_structure(co);
1260         free_copy_opt(co);
1261 }