Remove the unused parameter const arch_env_t *env from arch_get_irn_register().
[libfirm] / ir / be / becopyopt.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       Copy minimization driver.
23  * @author      Daniel Grund
24  * @date        12.04.2005
25  * @version     $Id$
26  *
27  * Main file for the optimization reducing the copies needed for:
28  * - Phi coalescing
29  * - Register-constrained nodes
30  * - Two-address code instructions
31  */
32 #ifdef HAVE_CONFIG_H
33 #include "config.h"
34 #endif
35
36 #include "execfreq.h"
37 #include "xmalloc.h"
38 #include "debug.h"
39 #include "pmap.h"
40 #include "raw_bitset.h"
41 #include "irnode.h"
42 #include "irgraph.h"
43 #include "irgwalk.h"
44 #include "irprog.h"
45 #include "irloop_t.h"
46 #include "iredges_t.h"
47 #include "phiclass.h"
48 #include "irbitset.h"
49 #include "irphase_t.h"
50 #include "irprintf_t.h"
51
52 #include "bemodule.h"
53 #include "bearch_t.h"
54 #include "benode_t.h"
55 #include "beutil.h"
56 #include "beifg_t.h"
57 #include "beintlive_t.h"
58 #include "becopyopt_t.h"
59 #include "becopystat.h"
60 #include "belive_t.h"
61 #include "beinsn_t.h"
62 #include "besched_t.h"
63 #include "bestatevent.h"
64 #include "beirg_t.h"
65 #include "error.h"
66
67 #include "lc_opts.h"
68 #include "lc_opts_enum.h"
69
70 #define DUMP_BEFORE 1
71 #define DUMP_AFTER  2
72 #define DUMP_APPEL  4
73 #define DUMP_ALL    2 * DUMP_APPEL - 1
74
75 #define COST_FUNC_FREQ     1
76 #define COST_FUNC_LOOP     2
77 #define COST_FUNC_ALL_ONE  3
78
79 static unsigned   dump_flags  = 0;
80 static unsigned   style_flags = 0;
81 static unsigned   do_stats    = 0;
82 static cost_fct_t cost_func   = co_get_costs_exec_freq;
83 static unsigned   algo        = CO_ALGO_HEUR4;
84 static int        improve     = 1;
85
86 static const lc_opt_enum_mask_items_t dump_items[] = {
87         { "before",  DUMP_BEFORE },
88         { "after",   DUMP_AFTER  },
89         { "appel",   DUMP_APPEL  },
90         { "all",     DUMP_ALL    },
91         { NULL,      0 }
92 };
93
94 static const lc_opt_enum_mask_items_t style_items[] = {
95         { "color",   CO_IFG_DUMP_COLORS },
96         { "labels",  CO_IFG_DUMP_LABELS },
97         { "constr",  CO_IFG_DUMP_CONSTR },
98         { "shape",   CO_IFG_DUMP_SHAPE  },
99         { "full",    2 * CO_IFG_DUMP_SHAPE - 1 },
100         { NULL,      0 }
101 };
102
103 static const lc_opt_enum_mask_items_t algo_items[] = {
104         { "none",   CO_ALGO_NONE  },
105         { "heur",   CO_ALGO_HEUR  },
106         { "heur2",  CO_ALGO_HEUR2 },
107         { "heur3",  CO_ALGO_HEUR3 },
108         { "heur4",  CO_ALGO_HEUR4 },
109         { "ilp",    CO_ALGO_ILP   },
110         { NULL,     0 }
111 };
112
113 typedef int (*opt_funcptr)(void);
114
115 static const lc_opt_enum_func_ptr_items_t cost_func_items[] = {
116         { "freq",   (opt_funcptr) co_get_costs_exec_freq },
117         { "loop",   (opt_funcptr) co_get_costs_loop_depth },
118         { "one",    (opt_funcptr) co_get_costs_all_one },
119         { NULL,     NULL }
120 };
121
122 static lc_opt_enum_mask_var_t dump_var = {
123         &dump_flags, dump_items
124 };
125
126 static lc_opt_enum_mask_var_t style_var = {
127         &style_flags, style_items
128 };
129
130 static lc_opt_enum_mask_var_t algo_var = {
131         &algo, algo_items
132 };
133
134 static lc_opt_enum_func_ptr_var_t cost_func_var = {
135         (opt_funcptr*) &cost_func, cost_func_items
136 };
137
138 static const lc_opt_table_entry_t options[] = {
139         LC_OPT_ENT_ENUM_INT      ("algo",    "select copy optimization algo",                           &algo_var),
140         LC_OPT_ENT_ENUM_FUNC_PTR ("cost",    "select a cost function",                                  &cost_func_var),
141         LC_OPT_ENT_ENUM_MASK     ("dump",    "dump ifg before or after copy optimization",              &dump_var),
142         LC_OPT_ENT_ENUM_MASK     ("style",   "dump style for ifg dumping",                              &style_var),
143         LC_OPT_ENT_BOOL          ("stats",   "dump statistics after each optimization",                 &do_stats),
144         LC_OPT_ENT_BOOL          ("improve", "run heur3 before if algo can exploit start solutions",    &improve),
145         LC_OPT_LAST
146 };
147
148 /* Insert additional options registration functions here. */
149 extern void be_co_ilp_register_options(lc_opt_entry_t *grp);
150 extern void be_co2_register_options(lc_opt_entry_t *grp);
151 extern void be_co3_register_options(lc_opt_entry_t *grp);
152
153 void be_init_copycoal(void)
154 {
155         lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
156         lc_opt_entry_t *ra_grp = lc_opt_get_grp(be_grp, "ra");
157         lc_opt_entry_t *chordal_grp = lc_opt_get_grp(ra_grp, "chordal");
158         lc_opt_entry_t *co_grp = lc_opt_get_grp(chordal_grp, "co");
159
160         lc_opt_add_table(co_grp, options);
161 }
162
163 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_copycoal);
164
165 #undef QUICK_AND_DIRTY_HACK
166
167 static int nodes_interfere(const be_chordal_env_t *env, const ir_node *a, const ir_node *b)
168 {
169         if (env->ifg)
170                 return be_ifg_connected(env->ifg, a, b);
171         else
172                 return values_interfere(env->birg, a, b);
173 }
174
175
176 /******************************************************************************
177     _____                           _
178    / ____|                         | |
179   | |  __  ___ _ __   ___ _ __ __ _| |
180   | | |_ |/ _ \ '_ \ / _ \ '__/ _` | |
181   | |__| |  __/ | | |  __/ | | (_| | |
182    \_____|\___|_| |_|\___|_|  \__,_|_|
183
184  ******************************************************************************/
185
186 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
187
188
189 copy_opt_t *new_copy_opt(be_chordal_env_t *chordal_env, cost_fct_t get_costs)
190 {
191         const char *s1, *s2, *s3;
192         int len;
193         copy_opt_t *co;
194
195         FIRM_DBG_REGISTER(dbg, "ir.be.copyopt");
196
197         co = XMALLOCZ(copy_opt_t);
198         co->cenv      = chordal_env;
199         co->aenv      = chordal_env->birg->main_env->arch_env;
200         co->irg       = chordal_env->irg;
201         co->cls       = chordal_env->cls;
202         co->get_costs = get_costs;
203
204         s1 = get_irp_prog_name();
205         s2 = get_entity_name(get_irg_entity(co->irg));
206         s3 = chordal_env->cls->name;
207         len = strlen(s1) + strlen(s2) + strlen(s3) + 5;
208         co->name = XMALLOCN(char, len);
209         snprintf(co->name, len, "%s__%s__%s", s1, s2, s3);
210
211         return co;
212 }
213
214 void free_copy_opt(copy_opt_t *co) {
215         xfree(co->name);
216         free(co);
217 }
218
219 int co_is_optimizable_root(const copy_opt_t *co, ir_node *irn) {
220         const arch_register_req_t *req;
221         const arch_register_t *reg;
222
223         if (arch_irn_is(co->aenv, irn, ignore))
224                 return 0;
225
226         reg = arch_get_irn_register(irn);
227         if (arch_register_type_is(reg, ignore))
228                 return 0;
229
230         req = arch_get_register_req(irn, -1);
231         if (is_Reg_Phi(irn) || is_Perm_Proj(co->aenv, irn) || is_2addr_code(req))
232                 return 1;
233
234         return 0;
235 }
236
237 int co_get_costs_loop_depth(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
238         int cost = 0;
239         ir_loop *loop;
240         ir_node *root_block = get_nodes_block(root);
241         (void) co;
242         (void) arg;
243
244         if (is_Phi(root)) {
245                 /* for phis the copies are placed in the corresponding pred-block */
246                 loop = get_irn_loop(get_Block_cfgpred_block(root_block, pos));
247         } else {
248                 /* a perm places the copy in the same block as it resides */
249                 loop = get_irn_loop(root_block);
250         }
251         if (loop) {
252                 int d = get_loop_depth(loop);
253                 cost = d*d;
254         }
255         return 1+cost;
256 }
257
258 int co_get_costs_exec_freq(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
259         int res;
260         ir_node *root_bl = get_nodes_block(root);
261         ir_node *copy_bl = is_Phi(root) ? get_Block_cfgpred_block(root_bl, pos) : root_bl;
262         (void) arg;
263         res = get_block_execfreq_ulong(co->cenv->birg->exec_freq, copy_bl);
264
265         /* don't allow values smaller than one. */
266         return res < 1 ? 1 : res;
267 }
268
269
270 int co_get_costs_all_one(const copy_opt_t *co, ir_node *root, ir_node *arg, int pos) {
271         (void) co;
272         (void) root;
273         (void) arg;
274         (void) pos;
275         return 1;
276 }
277
278 /******************************************************************************
279    ____        _   _    _       _ _          _____ _
280   / __ \      | | | |  | |     (_) |        / ____| |
281  | |  | |_ __ | |_| |  | |_ __  _| |_ ___  | (___ | |_ ___  _ __ __ _  __ _  ___
282  | |  | | '_ \| __| |  | | '_ \| | __/ __|  \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
283  | |__| | |_) | |_| |__| | | | | | |_\__ \  ____) | || (_) | | | (_| | (_| |  __/
284   \____/| .__/ \__|\____/|_| |_|_|\__|___/ |_____/ \__\___/|_|  \__,_|\__, |\___|
285         | |                                                            __/ |
286         |_|                                                           |___/
287  ******************************************************************************/
288
289 /**
290  * Determines a maximum weighted independent set with respect to
291  * the interference and conflict edges of all nodes in a qnode.
292  */
293 static int ou_max_ind_set_costs(unit_t *ou) {
294         be_chordal_env_t *chordal_env = ou->co->cenv;
295         ir_node **safe, **unsafe;
296         int i, o, safe_count, safe_costs, unsafe_count, *unsafe_costs;
297         bitset_t *curr;
298         bitset_pos_t pos;
299         int max, curr_weight, best_weight = 0;
300
301         /* assign the nodes into two groups.
302          * safe: node has no interference, hence it is in every max stable set.
303          * unsafe: node has an interference
304          */
305         safe = alloca((ou->node_count-1) * sizeof(*safe));
306         safe_costs = 0;
307         safe_count = 0;
308         unsafe = alloca((ou->node_count-1) * sizeof(*unsafe));
309         unsafe_costs = alloca((ou->node_count-1) * sizeof(*unsafe_costs));
310         unsafe_count = 0;
311         for(i=1; i<ou->node_count; ++i) {
312                 int is_safe = 1;
313                 for(o=1; o<ou->node_count; ++o) {
314                         if (i==o)
315                                 continue;
316                         if (nodes_interfere(chordal_env, ou->nodes[i], ou->nodes[o])) {
317                                 unsafe_costs[unsafe_count] = ou->costs[i];
318                                 unsafe[unsafe_count] = ou->nodes[i];
319                                 ++unsafe_count;
320                                 is_safe = 0;
321                                 break;
322                         }
323                 }
324                 if (is_safe) {
325                         safe_costs += ou->costs[i];
326                         safe[safe_count++] = ou->nodes[i];
327                 }
328         }
329
330
331         /* now compute the best set out of the unsafe nodes*/
332         if (unsafe_count > MIS_HEUR_TRIGGER) {
333                 bitset_t *best = bitset_alloca(unsafe_count);
334                 /* Heuristik: Greedy trial and error form index 0 to unsafe_count-1 */
335                 for (i=0; i<unsafe_count; ++i) {
336                         bitset_set(best, i);
337                         /* check if it is a stable set */
338                         for (o=bitset_next_set(best, 0); o!=-1 && o<i; o=bitset_next_set(best, o+1))
339                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o])) {
340                                         bitset_clear(best, i); /* clear the bit and try next one */
341                                         break;
342                                 }
343                 }
344                 /* compute the weight */
345                 bitset_foreach(best, pos)
346                         best_weight += unsafe_costs[pos];
347         } else {
348                 /* Exact Algorithm: Brute force */
349                 curr = bitset_alloca(unsafe_count);
350                 bitset_set_all(curr);
351                 while ((max = bitset_popcnt(curr)) != 0) {
352                         /* check if curr is a stable set */
353                         for (i=bitset_next_set(curr, 0); i!=-1; i=bitset_next_set(curr, i+1))
354                                 for (o=bitset_next_set(curr, i+1); o!=-1; o=bitset_next_set(curr, o+1)) /* !!!!! difference to qnode_max_ind_set(): NOT (curr, i) */
355                                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o]))
356                                                         goto no_stable_set;
357
358                         /* if we arrive here, we have a stable set */
359                         /* compute the weigth of the stable set*/
360                         curr_weight = 0;
361                         bitset_foreach(curr, pos)
362                                 curr_weight += unsafe_costs[pos];
363
364                         /* any better ? */
365                         if (curr_weight > best_weight) {
366                                 best_weight = curr_weight;
367                         }
368
369         no_stable_set:
370                         bitset_minus1(curr);
371                 }
372         }
373
374         return safe_costs+best_weight;
375 }
376
377 static void co_collect_units(ir_node *irn, void *env) {
378         copy_opt_t *co = env;
379         unit_t *unit;
380
381         if (!is_curr_reg_class(co, irn))
382                 return;
383         if (!co_is_optimizable_root(co, irn))
384                 return;
385
386         /* Init a new unit */
387         unit = XMALLOCZ(unit_t);
388         unit->co = co;
389         unit->node_count = 1;
390         INIT_LIST_HEAD(&unit->queue);
391
392         /* Phi with some/all of its arguments */
393         if (is_Reg_Phi(irn)) {
394                 int i, arity;
395
396                 /* init */
397                 arity = get_irn_arity(irn);
398                 unit->nodes = XMALLOCN(ir_node*, arity + 1);
399                 unit->costs = XMALLOCN(int,      arity + 1);
400                 unit->nodes[0] = irn;
401
402                 /* fill */
403                 for (i=0; i<arity; ++i) {
404                         int o, arg_pos;
405                         ir_node *arg = get_irn_n(irn, i);
406
407                         assert(is_curr_reg_class(co, arg) && "Argument not in same register class.");
408                         if (arg == irn)
409                                 continue;
410                         if (nodes_interfere(co->cenv, irn, arg)) {
411                                 unit->inevitable_costs += co->get_costs(co, irn, arg, i);
412                                 continue;
413                         }
414
415                         /* Else insert the argument of the phi to the members of this ou */
416                         DBG((dbg, LEVEL_1, "\t   Member: %+F\n", arg));
417
418                         if (! arch_irn_is(co->aenv, arg, ignore)) {
419                                 /* Check if arg has occurred at a prior position in the arg/list */
420                                 arg_pos = 0;
421                                 for (o=1; o<unit->node_count; ++o) {
422                                         if (unit->nodes[o] == arg) {
423                                                 arg_pos = o;
424                                                 break;
425                                         }
426                                 }
427
428                                 if (!arg_pos) { /* a new argument */
429                                         /* insert node, set costs */
430                                         unit->nodes[unit->node_count] = arg;
431                                         unit->costs[unit->node_count] = co->get_costs(co, irn, arg, i);
432                                         unit->node_count++;
433                                 } else { /* arg has occurred before in same phi */
434                                         /* increase costs for existing arg */
435                                         unit->costs[arg_pos] += co->get_costs(co, irn, arg, i);
436                                 }
437                         }
438                 }
439                 unit->nodes = XREALLOC(unit->nodes, ir_node*, unit->node_count);
440                 unit->costs = XREALLOC(unit->costs, int,      unit->node_count);
441         } else if (is_Perm_Proj(co->aenv, irn)) {
442                 /* Proj of a perm with corresponding arg */
443                 assert(!nodes_interfere(co->cenv, irn, get_Perm_src(irn)));
444                 unit->nodes = XMALLOCN(ir_node*, 2);
445                 unit->costs = XMALLOCN(int,      2);
446                 unit->node_count = 2;
447                 unit->nodes[0] = irn;
448                 unit->nodes[1] = get_Perm_src(irn);
449                 unit->costs[1] = co->get_costs(co, irn, unit->nodes[1], -1);
450         } else {
451                 const arch_register_req_t *req = arch_get_register_req(irn, -1);
452
453                 /* Src == Tgt of a 2-addr-code instruction */
454                 if (is_2addr_code(req)) {
455                         const unsigned other = req->other_same;
456                         int            count = 0;
457                         int            i;
458
459                         for (i = 0; (1U << i) <= other; ++i) {
460                                 if (other & (1U << i)) {
461                                         ir_node *o  = get_irn_n(skip_Proj(irn), i);
462                                         if (!arch_irn_is(co->aenv, o, ignore) &&
463                                                         !nodes_interfere(co->cenv, irn, o)) {
464                                                 ++count;
465                                         }
466                                 }
467                         }
468
469                         if (count != 0) {
470                                 int k = 0;
471                                 ++count;
472                                 unit->nodes = XMALLOCN(ir_node*, count);
473                                 unit->costs = XMALLOCN(int,      count);
474                                 unit->node_count = count;
475                                 unit->nodes[k++] = irn;
476
477                                 for (i = 0; 1U << i <= other; ++i) {
478                                         if (other & (1U << i)) {
479                                                 ir_node *o  = get_irn_n(skip_Proj(irn), i);
480                                                 if (!arch_irn_is(co->aenv, o, ignore) &&
481                                                                 !nodes_interfere(co->cenv, irn, o)) {
482                                                         unit->nodes[k] = o;
483                                                         unit->costs[k] = co->get_costs(co, irn, o, -1);
484                                                         ++k;
485                                                 }
486                                         }
487                                 }
488                         }
489                 } else {
490                         assert(0 && "This is not an optimizable node!");
491                 }
492         }
493
494         /* Insert the new unit at a position according to its costs */
495         if (unit->node_count > 1) {
496                 int i;
497                 struct list_head *tmp;
498
499                 /* Determine the maximum costs this unit can cause: all_nodes_cost */
500                 for(i=1; i<unit->node_count; ++i) {
501                         unit->sort_key = MAX(unit->sort_key, unit->costs[i]);
502                         unit->all_nodes_costs += unit->costs[i];
503                 }
504
505                 /* Determine the minimal costs this unit will cause: min_nodes_costs */
506                 unit->min_nodes_costs += unit->all_nodes_costs - ou_max_ind_set_costs(unit);
507                 /* Insert the new ou according to its sort_key */
508                 tmp = &co->units;
509                 while (tmp->next != &co->units && list_entry_units(tmp->next)->sort_key > unit->sort_key)
510                         tmp = tmp->next;
511                 list_add(&unit->units, tmp);
512         } else {
513                 free(unit);
514         }
515 }
516
517 #ifdef QUICK_AND_DIRTY_HACK
518
519 static int compare_ous(const void *k1, const void *k2) {
520         const unit_t *u1 = *((const unit_t **) k1);
521         const unit_t *u2 = *((const unit_t **) k2);
522         int i, o, u1_has_constr, u2_has_constr;
523         arch_register_req_t req;
524
525         /* Units with constraints come first */
526         u1_has_constr = 0;
527         for (i=0; i<u1->node_count; ++i) {
528                 arch_get_register_req(&req, u1->nodes[i], -1);
529                 if (arch_register_req_is(&req, limited)) {
530                         u1_has_constr = 1;
531                         break;
532                 }
533         }
534
535         u2_has_constr = 0;
536         for (i=0; i<u2->node_count; ++i) {
537                 arch_get_register_req(&req, u2->nodes[i], -1);
538                 if (arch_register_req_is(&req, limited)) {
539                         u2_has_constr = 1;
540                         break;
541                 }
542         }
543
544         if (u1_has_constr != u2_has_constr)
545                 return u2_has_constr - u1_has_constr;
546
547         /* Now check, whether the two units are connected */
548 #if 0
549         for (i=0; i<u1->node_count; ++i)
550                 for (o=0; o<u2->node_count; ++o)
551                         if (u1->nodes[i] == u2->nodes[o])
552                                 return 0;
553 #endif
554
555         /* After all, the sort key decides. Greater keys come first. */
556         return u2->sort_key - u1->sort_key;
557
558 }
559
560 /**
561  * Sort the ou's according to constraints and their sort_key
562  */
563 static void co_sort_units(copy_opt_t *co) {
564         int i, count = 0, costs;
565         unit_t *ou, **ous;
566
567         /* get the number of ous, remove them form the list and fill the array */
568         list_for_each_entry(unit_t, ou, &co->units, units)
569                 count++;
570         ous = alloca(count * sizeof(*ous));
571
572         costs = co_get_max_copy_costs(co);
573
574         i = 0;
575         list_for_each_entry(unit_t, ou, &co->units, units)
576                 ous[i++] = ou;
577
578         INIT_LIST_HEAD(&co->units);
579
580         assert(count == i && list_empty(&co->units));
581
582         for (i=0; i<count; ++i)
583                 ir_printf("%+F\n", ous[i]->nodes[0]);
584
585         qsort(ous, count, sizeof(*ous), compare_ous);
586
587         ir_printf("\n\n");
588         for (i=0; i<count; ++i)
589                 ir_printf("%+F\n", ous[i]->nodes[0]);
590
591         /* reinsert into list in correct order */
592         for (i=0; i<count; ++i)
593                 list_add_tail(&ous[i]->units, &co->units);
594
595         assert(costs == co_get_max_copy_costs(co));
596 }
597 #endif
598
599 void co_build_ou_structure(copy_opt_t *co) {
600         DBG((dbg, LEVEL_1, "\tCollecting optimization units\n"));
601         INIT_LIST_HEAD(&co->units);
602         irg_walk_graph(co->irg, co_collect_units, NULL, co);
603 #ifdef QUICK_AND_DIRTY_HACK
604         co_sort_units(co);
605 #endif
606 }
607
608 void co_free_ou_structure(copy_opt_t *co) {
609         unit_t *curr, *tmp;
610         ASSERT_OU_AVAIL(co);
611         list_for_each_entry_safe(unit_t, curr, tmp, &co->units, units) {
612                 xfree(curr->nodes);
613                 xfree(curr->costs);
614                 xfree(curr);
615         }
616         co->units.next = NULL;
617 }
618
619 /* co_solve_heuristic() is implemented in becopyheur.c */
620
621 int co_get_max_copy_costs(const copy_opt_t *co) {
622         int i, res = 0;
623         unit_t *curr;
624
625         ASSERT_OU_AVAIL(co);
626
627         list_for_each_entry(unit_t, curr, &co->units, units) {
628                 res += curr->inevitable_costs;
629                 for (i=1; i<curr->node_count; ++i)
630                         res += curr->costs[i];
631         }
632         return res;
633 }
634
635 int co_get_inevit_copy_costs(const copy_opt_t *co) {
636         int res = 0;
637         unit_t *curr;
638
639         ASSERT_OU_AVAIL(co);
640
641         list_for_each_entry(unit_t, curr, &co->units, units)
642                 res += curr->inevitable_costs;
643         return res;
644 }
645
646 int co_get_copy_costs(const copy_opt_t *co) {
647         int i, res = 0;
648         unit_t *curr;
649
650         ASSERT_OU_AVAIL(co);
651
652         list_for_each_entry(unit_t, curr, &co->units, units) {
653                 int root_col = get_irn_col(curr->nodes[0]);
654                 DBG((dbg, LEVEL_1, "  %3d costs for root %+F color %d\n", curr->inevitable_costs, curr->nodes[0], root_col));
655                 res += curr->inevitable_costs;
656                 for (i=1; i<curr->node_count; ++i) {
657                         int arg_col = get_irn_col(curr->nodes[i]);
658                         if (root_col != arg_col) {
659                                 DBG((dbg, LEVEL_1, "  %3d for arg %+F color %d\n", curr->costs[i], curr->nodes[i], arg_col));
660                                 res += curr->costs[i];
661                         }
662                 }
663         }
664         return res;
665 }
666
667 int co_get_lower_bound(const copy_opt_t *co) {
668         int res = 0;
669         unit_t *curr;
670
671         ASSERT_OU_AVAIL(co);
672
673         list_for_each_entry(unit_t, curr, &co->units, units)
674                 res += curr->inevitable_costs + curr->min_nodes_costs;
675         return res;
676 }
677
678 void co_complete_stats(const copy_opt_t *co, co_complete_stats_t *stat)
679 {
680         bitset_t *seen = bitset_irg_malloc(co->irg);
681         affinity_node_t *an;
682
683         memset(stat, 0, sizeof(stat[0]));
684
685         /* count affinity edges. */
686         co_gs_foreach_aff_node(co, an) {
687                 neighb_t *neigh;
688                 stat->aff_nodes += 1;
689                 bitset_add_irn(seen, an->irn);
690                 co_gs_foreach_neighb(an, neigh) {
691                         if(!bitset_contains_irn(seen, neigh->irn)) {
692                                 stat->aff_edges += 1;
693                                 stat->max_costs += neigh->costs;
694
695                                 if (get_irn_col(an->irn) != get_irn_col(neigh->irn)) {
696                                         stat->costs += neigh->costs;
697                                         stat->unsatisfied_edges += 1;
698                                 }
699
700                                 if(nodes_interfere(co->cenv, an->irn, neigh->irn)) {
701                                         stat->aff_int += 1;
702                                         stat->inevit_costs += neigh->costs;
703                                 }
704
705                         }
706                 }
707         }
708
709         bitset_free(seen);
710 }
711
712 /******************************************************************************
713    _____                 _        _____ _
714   / ____|               | |      / ____| |
715  | |  __ _ __ __ _ _ __ | |__   | (___ | |_ ___  _ __ __ _  __ _  ___
716  | | |_ | '__/ _` | '_ \| '_ \   \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
717  | |__| | | | (_| | |_) | | | |  ____) | || (_) | | | (_| | (_| |  __/
718   \_____|_|  \__,_| .__/|_| |_| |_____/ \__\___/|_|  \__,_|\__, |\___|
719                   | |                                       __/ |
720                   |_|                                      |___/
721  ******************************************************************************/
722
723 static int compare_affinity_node_t(const void *k1, const void *k2, size_t size) {
724         const affinity_node_t *n1 = k1;
725         const affinity_node_t *n2 = k2;
726         (void) size;
727
728         return (n1->irn != n2->irn);
729 }
730
731 static void add_edge(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
732         affinity_node_t new_node, *node;
733         neighb_t        *nbr;
734         int             allocnew = 1;
735
736         new_node.irn        = n1;
737         new_node.degree     = 0;
738         new_node.neighbours = NULL;
739         node = set_insert(co->nodes, &new_node, sizeof(new_node), hash_irn(new_node.irn));
740
741         for (nbr = node->neighbours; nbr; nbr = nbr->next)
742                 if (nbr->irn == n2) {
743                         allocnew = 0;
744                         break;
745                 }
746
747         /* if we did not find n2 in n1's neighbourhood insert it */
748         if (allocnew) {
749                 nbr        = obstack_alloc(&co->obst, sizeof(*nbr));
750                 nbr->irn   = n2;
751                 nbr->costs = 0;
752                 nbr->next  = node->neighbours;
753
754                 node->neighbours = nbr;
755                 node->degree++;
756         }
757
758         /* now nbr points to n1's neighbour-entry of n2 */
759         nbr->costs += costs;
760 }
761
762 static INLINE void add_edges(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
763         if (! be_ifg_connected(co->cenv->ifg, n1, n2)) {
764                 add_edge(co, n1, n2, costs);
765                 add_edge(co, n2, n1, costs);
766         }
767 }
768
769 static void build_graph_walker(ir_node *irn, void *env) {
770         copy_opt_t *co = env;
771         int pos, max;
772         const arch_register_t *reg;
773
774         if (!is_curr_reg_class(co, irn) || arch_irn_is(co->aenv, irn, ignore))
775                 return;
776
777         reg = arch_get_irn_register(irn);
778         if (arch_register_type_is(reg, ignore))
779                 return;
780
781         if (is_Reg_Phi(irn)) { /* Phis */
782                 for (pos=0, max=get_irn_arity(irn); pos<max; ++pos) {
783                         ir_node *arg = get_irn_n(irn, pos);
784                         add_edges(co, irn, arg, co->get_costs(co, irn, arg, pos));
785                 }
786         }
787         else if (is_Perm_Proj(co->aenv, irn)) { /* Perms */
788                 ir_node *arg = get_Perm_src(irn);
789                 add_edges(co, irn, arg, co->get_costs(co, irn, arg, 0));
790         }
791         else { /* 2-address code */
792                 const arch_register_req_t *req = arch_get_register_req(irn, -1);
793                 if (is_2addr_code(req)) {
794                         const unsigned other = req->other_same;
795                         int i;
796
797                         for (i = 0; 1U << i <= other; ++i) {
798                                 if (other & (1U << i)) {
799                                         ir_node *other = get_irn_n(skip_Proj(irn), i);
800                                         if (! arch_irn_is(co->aenv, other, ignore))
801                                                 add_edges(co, irn, other, co->get_costs(co, irn, other, 0));
802                                 }
803                         }
804                 }
805         }
806 }
807
808 void co_build_graph_structure(copy_opt_t *co) {
809         obstack_init(&co->obst);
810         co->nodes = new_set(compare_affinity_node_t, 32);
811
812         irg_walk_graph(co->irg, build_graph_walker, NULL, co);
813 }
814
815 void co_free_graph_structure(copy_opt_t *co) {
816         ASSERT_GS_AVAIL(co);
817
818         del_set(co->nodes);
819         obstack_free(&co->obst, NULL);
820         co->nodes = NULL;
821 }
822
823 /* co_solve_ilp1() co_solve_ilp2() are implemented in becopyilpX.c */
824
825 int co_gs_is_optimizable(copy_opt_t *co, ir_node *irn) {
826         affinity_node_t new_node, *n;
827
828         ASSERT_GS_AVAIL(co);
829
830         new_node.irn = irn;
831         n = set_find(co->nodes, &new_node, sizeof(new_node), hash_irn(new_node.irn));
832         if (n) {
833                 return (n->degree > 0);
834         } else
835                 return 0;
836 }
837
838 static int co_dump_appel_disjoint_constraints(const copy_opt_t *co, ir_node *a, ir_node *b)
839 {
840         ir_node *nodes[]  = { a, b };
841         bitset_t *constr[] = { NULL, NULL };
842         const arch_register_req_t *req;
843         int j;
844
845         constr[0] = bitset_alloca(co->cls->n_regs);
846         constr[1] = bitset_alloca(co->cls->n_regs);
847
848         for (j = 0; j < 2; ++j) {
849                 req = arch_get_register_req(nodes[j], BE_OUT_POS(0));
850                 if(arch_register_req_is(req, limited))
851                         rbitset_copy_to_bitset(req->limited, constr[j]);
852                 else
853                         bitset_set_all(constr[j]);
854
855         }
856
857         return !bitset_intersect(constr[0], constr[1]);
858 }
859
860 void co_dump_appel_graph(const copy_opt_t *co, FILE *f)
861 {
862         be_ifg_t *ifg  = co->cenv->ifg;
863         int *color_map = alloca(co->cls->n_regs * sizeof(color_map[0]));
864         int *node_map  = XMALLOCN(int, get_irg_last_idx(co->irg) + 1);
865
866         ir_node *irn;
867         void *it, *nit;
868         int n, n_regs;
869         unsigned i;
870
871         n_regs = 0;
872         for(i = 0; i < co->cls->n_regs; ++i) {
873                 const arch_register_t *reg = &co->cls->regs[i];
874                 color_map[i] = arch_register_type_is(reg, ignore) ? -1 : n_regs++;
875         }
876
877         /*
878          * n contains the first node number.
879          * the values below n are the pre-colored register nodes
880          */
881
882         it  = be_ifg_nodes_iter_alloca(ifg);
883         nit = be_ifg_neighbours_iter_alloca(ifg);
884
885         n = n_regs;
886         be_ifg_foreach_node(ifg, it, irn) {
887                 if(!arch_irn_is(co->aenv, irn, ignore))
888                         node_map[get_irn_idx(irn)] = n++;
889         }
890
891         fprintf(f, "%d %d\n", n, n_regs);
892
893         be_ifg_foreach_node(ifg, it, irn) {
894                 if(!arch_irn_is(co->aenv, irn, ignore)) {
895                         int idx            = node_map[get_irn_idx(irn)];
896                         affinity_node_t *a = get_affinity_info(co, irn);
897
898                         const arch_register_req_t *req;
899                         ir_node *adj;
900
901                         req = arch_get_register_req(irn, BE_OUT_POS(0));
902                         if(arch_register_req_is(req, limited)) {
903                                 for(i = 0; i < co->cls->n_regs; ++i) {
904                                         if(!rbitset_is_set(req->limited, i) && color_map[i] >= 0)
905                                                 fprintf(f, "%d %d -1\n", color_map[i], idx);
906                                 }
907                         }
908
909                         be_ifg_foreach_neighbour(ifg, nit, irn, adj) {
910                                 if(!arch_irn_is(co->aenv, adj, ignore) && !co_dump_appel_disjoint_constraints(co, irn, adj)) {
911                                         int adj_idx = node_map[get_irn_idx(adj)];
912                                         if(idx < adj_idx)
913                                                 fprintf(f, "%d %d -1\n", idx, adj_idx);
914                                 }
915                         }
916
917                         if(a) {
918                                 neighb_t *n;
919
920                                 co_gs_foreach_neighb(a, n) {
921                                         if(!arch_irn_is(co->aenv, n->irn, ignore)) {
922                                                 int n_idx = node_map[get_irn_idx(n->irn)];
923                                                 if(idx < n_idx)
924                                                         fprintf(f, "%d %d %d\n", idx, n_idx, (int) n->costs);
925                                         }
926                                 }
927                         }
928                 }
929         }
930
931         xfree(node_map);
932 }
933
934 /*
935          ___ _____ ____   ____   ___ _____   ____                        _
936         |_ _|  ___/ ___| |  _ \ / _ \_   _| |  _ \ _   _ _ __ ___  _ __ (_)_ __   __ _
937          | || |_ | |  _  | | | | | | || |   | | | | | | | '_ ` _ \| '_ \| | '_ \ / _` |
938          | ||  _|| |_| | | |_| | |_| || |   | |_| | |_| | | | | | | |_) | | | | | (_| |
939         |___|_|   \____| |____/ \___/ |_|   |____/ \__,_|_| |_| |_| .__/|_|_| |_|\__, |
940                                                                   |_|            |___/
941 */
942
943 static const char *get_dot_color_name(size_t col)
944 {
945         static const char *names[] = {
946                 "blue",
947                 "red",
948                 "green",
949                 "yellow",
950                 "cyan",
951                 "magenta",
952                 "orange",
953                 "chocolate",
954                 "beige",
955                 "navy",
956                 "darkgreen",
957                 "darkred",
958                 "lightPink",
959                 "chartreuse",
960                 "lightskyblue",
961                 "linen",
962                 "pink",
963                 "lightslateblue",
964                 "mintcream",
965                 "red",
966                 "darkolivegreen",
967                 "mediumblue",
968                 "mistyrose",
969                 "salmon",
970                 "darkseagreen",
971                 "mediumslateblue"
972                 "moccasin",
973                 "tomato",
974                 "forestgreen",
975                 "darkturquoise",
976                 "palevioletred"
977         };
978
979         return col < sizeof(names)/sizeof(names[0]) ? names[col] : "white";
980 }
981
982 typedef struct _co_ifg_dump_t {
983         const copy_opt_t *co;
984         unsigned flags;
985 } co_ifg_dump_t;
986
987 static void ifg_dump_graph_attr(FILE *f, void *self)
988 {
989         (void) self;
990         fprintf(f, "overlap=scale");
991 }
992
993 static int ifg_is_dump_node(void *self, ir_node *irn)
994 {
995         co_ifg_dump_t *cod = self;
996         return !arch_irn_is(cod->co->aenv, irn, ignore);
997 }
998
999 static void ifg_dump_node_attr(FILE *f, void *self, ir_node *irn)
1000 {
1001         co_ifg_dump_t *env         = self;
1002         const arch_register_t *reg = arch_get_irn_register(irn);
1003         const arch_register_req_t *req;
1004         int limited;
1005
1006         req = arch_get_register_req(irn, BE_OUT_POS(0));
1007         limited = arch_register_req_is(req, limited);
1008
1009         if(env->flags & CO_IFG_DUMP_LABELS) {
1010                 ir_fprintf(f, "label=\"%+F", irn);
1011
1012                 if((env->flags & CO_IFG_DUMP_CONSTR) && limited) {
1013                         bitset_t *bs = bitset_alloca(env->co->cls->n_regs);
1014                         rbitset_copy_to_bitset(req->limited, bs);
1015                         ir_fprintf(f, "\\n%B", bs);
1016                 }
1017                 ir_fprintf(f, "\" ");
1018         } else {
1019                 fprintf(f, "label=\"\" shape=point " );
1020         }
1021
1022         if(env->flags & CO_IFG_DUMP_SHAPE)
1023                 fprintf(f, "shape=%s ", limited ? "diamond" : "ellipse");
1024
1025         if(env->flags & CO_IFG_DUMP_COLORS)
1026                 fprintf(f, "style=filled color=%s ", get_dot_color_name(reg->index));
1027 }
1028
1029 static void ifg_dump_at_end(FILE *file, void *self)
1030 {
1031         co_ifg_dump_t *env = self;
1032         affinity_node_t *a;
1033
1034         co_gs_foreach_aff_node(env->co, a) {
1035                 const arch_register_t *ar = arch_get_irn_register(a->irn);
1036                 unsigned aidx = get_irn_idx(a->irn);
1037                 neighb_t *n;
1038
1039                 co_gs_foreach_neighb(a, n) {
1040                         const arch_register_t *nr = arch_get_irn_register(n->irn);
1041                         unsigned nidx = get_irn_idx(n->irn);
1042
1043                         if(aidx < nidx) {
1044                                 const char *color = nr == ar ? "blue" : "red";
1045                                 fprintf(file, "\tn%d -- n%d [weight=0.01 ", aidx, nidx);
1046                                 if(env->flags & CO_IFG_DUMP_LABELS)
1047                                         fprintf(file, "label=\"%d\" ", n->costs);
1048                                 if(env->flags & CO_IFG_DUMP_COLORS)
1049                                         fprintf(file, "color=%s ", color);
1050                                 else
1051                                         fprintf(file, "style=dotted");
1052                                 fprintf(file, "];\n");
1053                         }
1054                 }
1055         }
1056 }
1057
1058
1059 static be_ifg_dump_dot_cb_t ifg_dot_cb = {
1060         ifg_is_dump_node,
1061         ifg_dump_graph_attr,
1062         ifg_dump_node_attr,
1063         NULL,
1064         NULL,
1065         ifg_dump_at_end
1066 };
1067
1068
1069
1070 void co_dump_ifg_dot(const copy_opt_t *co, FILE *f, unsigned flags)
1071 {
1072         co_ifg_dump_t cod;
1073
1074         cod.co    = co;
1075         cod.flags = flags;
1076         be_ifg_dump_dot(co->cenv->ifg, co->irg, f, &ifg_dot_cb, &cod);
1077 }
1078
1079
1080 void co_solve_park_moon(copy_opt_t *opt)
1081 {
1082         (void) opt;
1083 }
1084
1085 static int void_algo(copy_opt_t *co)
1086 {
1087         (void) co;
1088         return 0;
1089 }
1090
1091 /*
1092                 _    _                  _ _   _
1093            / \  | | __ _  ___  _ __(_) |_| |__  _ __ ___  ___
1094           / _ \ | |/ _` |/ _ \| '__| | __| '_ \| '_ ` _ \/ __|
1095          / ___ \| | (_| | (_) | |  | | |_| | | | | | | | \__ \
1096         /_/   \_\_|\__, |\___/|_|  |_|\__|_| |_|_| |_| |_|___/
1097                            |___/
1098 */
1099
1100 typedef struct {
1101         co_algo_t  *algo;
1102         const char *name;
1103         int        can_improve_existing;
1104 } co_algo_info_t;
1105
1106 static co_algo_info_t algos[] = {
1107         { void_algo,               "none",  0 },
1108         { co_solve_heuristic,      "heur1", 0 },
1109         { co_solve_heuristic_new,  "heur2", 0 },
1110 #ifdef WITH_JVM
1111         { co_solve_heuristic_java, "heur3", 0 },
1112 #else
1113         { NULL,                    "heur3", 0 },
1114 #endif
1115         { co_solve_heuristic_mst,  "heur4", 0 },
1116 #ifdef WITH_ILP
1117         { co_solve_ilp2,           "ilp",   1 },
1118 #else
1119         { NULL,                    "ilp",   1 },
1120 #endif
1121         { NULL,                    "",      0 }
1122 };
1123
1124 /*
1125     __  __       _         ____       _
1126    |  \/  | __ _(_)_ __   |  _ \ _ __(_)_   _____ _ __
1127    | |\/| |/ _` | | '_ \  | | | | '__| \ \ / / _ \ '__|
1128    | |  | | (_| | | | | | | |_| | |  | |\ V /  __/ |
1129    |_|  |_|\__,_|_|_| |_| |____/|_|  |_| \_/ \___|_|
1130
1131 */
1132
1133 static FILE *my_open(const be_chordal_env_t *env, const char *prefix, const char *suffix)
1134 {
1135         FILE *result;
1136         char buf[1024];
1137         size_t i, n;
1138         char *tu_name;
1139
1140         n = strlen(env->birg->main_env->cup_name);
1141         tu_name = XMALLOCN(char, n + 1);
1142         strcpy(tu_name, env->birg->main_env->cup_name);
1143         for (i = 0; i < n; ++i)
1144                 if (tu_name[i] == '.')
1145                         tu_name[i] = '_';
1146
1147
1148         ir_snprintf(buf, sizeof(buf), "%s%s_%F_%s%s", prefix, tu_name, env->irg, env->cls->name, suffix);
1149         xfree(tu_name);
1150         result = fopen(buf, "wt");
1151         if(result == NULL) {
1152                 panic("Couldn't open '%s' for writing.", buf);
1153         }
1154
1155         return result;
1156 }
1157
1158 void co_driver(be_chordal_env_t *cenv)
1159 {
1160         ir_timer_t          *timer = ir_timer_register("firm.be.copyopt", "runtime");
1161         co_complete_stats_t before, after;
1162         copy_opt_t          *co;
1163         co_algo_t           *algo_func;
1164         int                 was_optimal = 0;
1165
1166         if (algo >= CO_ALGO_LAST)
1167                 return;
1168
1169         be_liveness_assure_chk(be_get_birg_liveness(cenv->birg));
1170
1171         co = new_copy_opt(cenv, cost_func);
1172         co_build_ou_structure(co);
1173         co_build_graph_structure(co);
1174
1175         co_complete_stats(co, &before);
1176
1177         be_stat_ev_ull("co_aff_nodes",    before.aff_nodes);
1178         be_stat_ev_ull("co_aff_edges",    before.aff_edges);
1179         be_stat_ev_ull("co_max_costs",    before.max_costs);
1180         be_stat_ev_ull("co_inevit_costs", before.inevit_costs);
1181         be_stat_ev_ull("co_aff_int",      before.aff_int);
1182
1183         be_stat_ev_ull("co_init_costs",   before.costs);
1184         be_stat_ev_ull("co_init_unsat",   before.unsatisfied_edges);
1185
1186         if (dump_flags & DUMP_BEFORE) {
1187                 FILE *f = my_open(cenv, "", "-before.dot");
1188                 co_dump_ifg_dot(co, f, style_flags);
1189                 fclose(f);
1190         }
1191
1192         /* if the algo can improve results, provide an initial solution with heur3 */
1193         if (improve && algos[algo].can_improve_existing) {
1194                 co_complete_stats_t stats;
1195
1196                 /* produce a heuristic solution */
1197 #ifdef WITH_JVM
1198                 co_solve_heuristic_java(co);
1199 #else
1200                 co_solve_heuristic(co);
1201 #endif /* WITH_JVM */
1202
1203                 /* do the stats and provide the current costs */
1204                 co_complete_stats(co, &stats);
1205                 be_stat_ev_ull("co_prepare_costs", stats.costs);
1206         }
1207
1208 #ifdef WITH_JVM
1209         /* start the JVM here so that it does not tamper the timing. */
1210         if (algo == CO_ALGO_HEUR3)
1211                 be_java_coal_start_jvm();
1212 #endif /* WITH_JVM */
1213
1214         algo_func = algos[algo].algo;
1215
1216         /* perform actual copy minimization */
1217         ir_timer_reset_and_start(timer);
1218         was_optimal = algo_func(co);
1219         ir_timer_stop(timer);
1220
1221         be_stat_ev("co_time", ir_timer_elapsed_msec(timer));
1222         be_stat_ev_ull("co_optimal", was_optimal);
1223
1224         if (dump_flags & DUMP_AFTER) {
1225                 FILE *f = my_open(cenv, "", "-after.dot");
1226                 co_dump_ifg_dot(co, f, style_flags);
1227                 fclose(f);
1228         }
1229
1230         co_complete_stats(co, &after);
1231
1232         if (do_stats) {
1233                 ulong64 optimizable_costs = after.max_costs - after.inevit_costs;
1234                 ulong64 evitable          = after.costs     - after.inevit_costs;
1235
1236                 ir_printf("%30F ", cenv->irg);
1237                 printf("%10s %10" ULL_FMT "%10" ULL_FMT "%10" ULL_FMT, cenv->cls->name, after.max_costs, before.costs, after.inevit_costs);
1238
1239                 if(optimizable_costs > 0)
1240                         printf("%10" ULL_FMT " %5.2f\n", after.costs, (evitable * 100.0) / optimizable_costs);
1241                 else
1242                         printf("%10" ULL_FMT " %5s\n", after.costs, "-");
1243         }
1244
1245         /* Dump the interference graph in Appel's format. */
1246         if (dump_flags & DUMP_APPEL) {
1247                 FILE *f = my_open(cenv, "", ".apl");
1248                 fprintf(f, "# %lld %lld\n", after.costs, after.unsatisfied_edges);
1249                 co_dump_appel_graph(co, f);
1250                 fclose(f);
1251         }
1252
1253         be_stat_ev_ull("co_after_costs", after.costs);
1254         be_stat_ev_ull("co_after_unsat", after.unsatisfied_edges);
1255
1256         co_free_graph_structure(co);
1257         co_free_ou_structure(co);
1258         free_copy_opt(co);
1259 }