remove old unused function
[libfirm] / ir / be / becopyopt.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       Copy minimization driver.
23  * @author      Daniel Grund
24  * @date        12.04.2005
25  * @version     $Id$
26  *
27  * Main file for the optimization reducing the copies needed for:
28  * - Phi coalescing
29  * - Register-constrained nodes
30  * - Two-address code instructions
31  */
32 #include "config.h"
33
34 #include "execfreq.h"
35 #include "xmalloc.h"
36 #include "debug.h"
37 #include "pmap.h"
38 #include "raw_bitset.h"
39 #include "irnode.h"
40 #include "irgraph.h"
41 #include "irgwalk.h"
42 #include "irprog.h"
43 #include "irloop_t.h"
44 #include "iredges_t.h"
45 #include "phiclass.h"
46 #include "irbitset.h"
47 #include "irphase_t.h"
48 #include "irprintf_t.h"
49
50 #include "bemodule.h"
51 #include "bearch_t.h"
52 #include "benode_t.h"
53 #include "beutil.h"
54 #include "beifg_t.h"
55 #include "beintlive_t.h"
56 #include "becopyopt_t.h"
57 #include "becopystat.h"
58 #include "belive_t.h"
59 #include "beinsn_t.h"
60 #include "besched_t.h"
61 #include "bestatevent.h"
62 #include "beirg_t.h"
63 #include "error.h"
64
65 #include "lc_opts.h"
66 #include "lc_opts_enum.h"
67
68 #define DUMP_BEFORE 1
69 #define DUMP_AFTER  2
70 #define DUMP_APPEL  4
71 #define DUMP_ALL    2 * DUMP_APPEL - 1
72
73 #define COST_FUNC_FREQ     1
74 #define COST_FUNC_LOOP     2
75 #define COST_FUNC_ALL_ONE  3
76
77 static unsigned   dump_flags  = 0;
78 static unsigned   style_flags = 0;
79 static unsigned   do_stats    = 0;
80 static cost_fct_t cost_func   = co_get_costs_exec_freq;
81 static unsigned   algo        = CO_ALGO_HEUR4;
82 static int        improve     = 1;
83
84 static const lc_opt_enum_mask_items_t dump_items[] = {
85         { "before",  DUMP_BEFORE },
86         { "after",   DUMP_AFTER  },
87         { "appel",   DUMP_APPEL  },
88         { "all",     DUMP_ALL    },
89         { NULL,      0 }
90 };
91
92 static const lc_opt_enum_mask_items_t style_items[] = {
93         { "color",   CO_IFG_DUMP_COLORS },
94         { "labels",  CO_IFG_DUMP_LABELS },
95         { "constr",  CO_IFG_DUMP_CONSTR },
96         { "shape",   CO_IFG_DUMP_SHAPE  },
97         { "full",    2 * CO_IFG_DUMP_SHAPE - 1 },
98         { NULL,      0 }
99 };
100
101 static const lc_opt_enum_mask_items_t algo_items[] = {
102         { "none",   CO_ALGO_NONE  },
103         { "heur",   CO_ALGO_HEUR  },
104         { "heur2",  CO_ALGO_HEUR2 },
105         { "heur3",  CO_ALGO_HEUR3 },
106         { "heur4",  CO_ALGO_HEUR4 },
107         { "ilp",    CO_ALGO_ILP   },
108         { NULL,     0 }
109 };
110
111 typedef int (*opt_funcptr)(void);
112
113 static const lc_opt_enum_func_ptr_items_t cost_func_items[] = {
114         { "freq",   (opt_funcptr) co_get_costs_exec_freq },
115         { "loop",   (opt_funcptr) co_get_costs_loop_depth },
116         { "one",    (opt_funcptr) co_get_costs_all_one },
117         { NULL,     NULL }
118 };
119
120 static lc_opt_enum_mask_var_t dump_var = {
121         &dump_flags, dump_items
122 };
123
124 static lc_opt_enum_mask_var_t style_var = {
125         &style_flags, style_items
126 };
127
128 static lc_opt_enum_mask_var_t algo_var = {
129         &algo, algo_items
130 };
131
132 static lc_opt_enum_func_ptr_var_t cost_func_var = {
133         (opt_funcptr*) &cost_func, cost_func_items
134 };
135
136 static const lc_opt_table_entry_t options[] = {
137         LC_OPT_ENT_ENUM_INT      ("algo",    "select copy optimization algo",                           &algo_var),
138         LC_OPT_ENT_ENUM_FUNC_PTR ("cost",    "select a cost function",                                  &cost_func_var),
139         LC_OPT_ENT_ENUM_MASK     ("dump",    "dump ifg before or after copy optimization",              &dump_var),
140         LC_OPT_ENT_ENUM_MASK     ("style",   "dump style for ifg dumping",                              &style_var),
141         LC_OPT_ENT_BOOL          ("stats",   "dump statistics after each optimization",                 &do_stats),
142         LC_OPT_ENT_BOOL          ("improve", "run heur3 before if algo can exploit start solutions",    &improve),
143         LC_OPT_LAST
144 };
145
146 /* Insert additional options registration functions here. */
147 extern void be_co_ilp_register_options(lc_opt_entry_t *grp);
148 extern void be_co2_register_options(lc_opt_entry_t *grp);
149 extern void be_co3_register_options(lc_opt_entry_t *grp);
150
151 void be_init_copycoal(void)
152 {
153         lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
154         lc_opt_entry_t *ra_grp = lc_opt_get_grp(be_grp, "ra");
155         lc_opt_entry_t *chordal_grp = lc_opt_get_grp(ra_grp, "chordal");
156         lc_opt_entry_t *co_grp = lc_opt_get_grp(chordal_grp, "co");
157
158         lc_opt_add_table(co_grp, options);
159 }
160
161 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_copycoal);
162
163 #undef QUICK_AND_DIRTY_HACK
164
165 static int nodes_interfere(const be_chordal_env_t *env, const ir_node *a, const ir_node *b)
166 {
167         if (env->ifg)
168                 return be_ifg_connected(env->ifg, a, b);
169         else
170                 return values_interfere(env->birg, a, b);
171 }
172
173
174 /******************************************************************************
175     _____                           _
176    / ____|                         | |
177   | |  __  ___ _ __   ___ _ __ __ _| |
178   | | |_ |/ _ \ '_ \ / _ \ '__/ _` | |
179   | |__| |  __/ | | |  __/ | | (_| | |
180    \_____|\___|_| |_|\___|_|  \__,_|_|
181
182  ******************************************************************************/
183
184 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
185
186
187 copy_opt_t *new_copy_opt(be_chordal_env_t *chordal_env, cost_fct_t get_costs)
188 {
189         const char *s1, *s2, *s3;
190         int len;
191         copy_opt_t *co;
192
193         FIRM_DBG_REGISTER(dbg, "ir.be.copyopt");
194
195         co = XMALLOCZ(copy_opt_t);
196         co->cenv      = chordal_env;
197         co->irg       = chordal_env->irg;
198         co->cls       = chordal_env->cls;
199         co->get_costs = get_costs;
200
201         s1 = get_irp_prog_name();
202         s2 = get_entity_name(get_irg_entity(co->irg));
203         s3 = chordal_env->cls->name;
204         len = strlen(s1) + strlen(s2) + strlen(s3) + 5;
205         co->name = XMALLOCN(char, len);
206         snprintf(co->name, len, "%s__%s__%s", s1, s2, s3);
207
208         return co;
209 }
210
211 void free_copy_opt(copy_opt_t *co) {
212         xfree(co->name);
213         free(co);
214 }
215
216 int co_is_optimizable_root(const copy_opt_t *co, ir_node *irn) {
217         const arch_register_req_t *req;
218         const arch_register_t *reg;
219         (void)co; // TODO remove parameter
220
221         if (arch_irn_is(irn, ignore))
222                 return 0;
223
224         reg = arch_get_irn_register(irn);
225         if (arch_register_type_is(reg, ignore))
226                 return 0;
227
228         req = arch_get_register_req(irn, -1);
229         if (is_Reg_Phi(irn) || is_Perm_Proj(irn) || is_2addr_code(req))
230                 return 1;
231
232         return 0;
233 }
234
235 int co_get_costs_loop_depth(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
236         int cost = 0;
237         ir_loop *loop;
238         ir_node *root_block = get_nodes_block(root);
239         (void) co;
240         (void) arg;
241
242         if (is_Phi(root)) {
243                 /* for phis the copies are placed in the corresponding pred-block */
244                 loop = get_irn_loop(get_Block_cfgpred_block(root_block, pos));
245         } else {
246                 /* a perm places the copy in the same block as it resides */
247                 loop = get_irn_loop(root_block);
248         }
249         if (loop) {
250                 int d = get_loop_depth(loop);
251                 cost = d*d;
252         }
253         return 1+cost;
254 }
255
256 int co_get_costs_exec_freq(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
257         int res;
258         ir_node *root_bl = get_nodes_block(root);
259         ir_node *copy_bl = is_Phi(root) ? get_Block_cfgpred_block(root_bl, pos) : root_bl;
260         (void) arg;
261         res = get_block_execfreq_ulong(co->cenv->birg->exec_freq, copy_bl);
262
263         /* don't allow values smaller than one. */
264         return res < 1 ? 1 : res;
265 }
266
267
268 int co_get_costs_all_one(const copy_opt_t *co, ir_node *root, ir_node *arg, int pos) {
269         (void) co;
270         (void) root;
271         (void) arg;
272         (void) pos;
273         return 1;
274 }
275
276 /******************************************************************************
277    ____        _   _    _       _ _          _____ _
278   / __ \      | | | |  | |     (_) |        / ____| |
279  | |  | |_ __ | |_| |  | |_ __  _| |_ ___  | (___ | |_ ___  _ __ __ _  __ _  ___
280  | |  | | '_ \| __| |  | | '_ \| | __/ __|  \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
281  | |__| | |_) | |_| |__| | | | | | |_\__ \  ____) | || (_) | | | (_| | (_| |  __/
282   \____/| .__/ \__|\____/|_| |_|_|\__|___/ |_____/ \__\___/|_|  \__,_|\__, |\___|
283         | |                                                            __/ |
284         |_|                                                           |___/
285  ******************************************************************************/
286
287 /**
288  * Determines a maximum weighted independent set with respect to
289  * the interference and conflict edges of all nodes in a qnode.
290  */
291 static int ou_max_ind_set_costs(unit_t *ou) {
292         be_chordal_env_t *chordal_env = ou->co->cenv;
293         ir_node **safe, **unsafe;
294         int i, o, safe_count, safe_costs, unsafe_count, *unsafe_costs;
295         bitset_t *curr;
296         bitset_pos_t pos;
297         int max, curr_weight, best_weight = 0;
298
299         /* assign the nodes into two groups.
300          * safe: node has no interference, hence it is in every max stable set.
301          * unsafe: node has an interference
302          */
303         safe = alloca((ou->node_count-1) * sizeof(*safe));
304         safe_costs = 0;
305         safe_count = 0;
306         unsafe = alloca((ou->node_count-1) * sizeof(*unsafe));
307         unsafe_costs = alloca((ou->node_count-1) * sizeof(*unsafe_costs));
308         unsafe_count = 0;
309         for(i=1; i<ou->node_count; ++i) {
310                 int is_safe = 1;
311                 for(o=1; o<ou->node_count; ++o) {
312                         if (i==o)
313                                 continue;
314                         if (nodes_interfere(chordal_env, ou->nodes[i], ou->nodes[o])) {
315                                 unsafe_costs[unsafe_count] = ou->costs[i];
316                                 unsafe[unsafe_count] = ou->nodes[i];
317                                 ++unsafe_count;
318                                 is_safe = 0;
319                                 break;
320                         }
321                 }
322                 if (is_safe) {
323                         safe_costs += ou->costs[i];
324                         safe[safe_count++] = ou->nodes[i];
325                 }
326         }
327
328
329         /* now compute the best set out of the unsafe nodes*/
330         if (unsafe_count > MIS_HEUR_TRIGGER) {
331                 bitset_t *best = bitset_alloca(unsafe_count);
332                 /* Heuristik: Greedy trial and error form index 0 to unsafe_count-1 */
333                 for (i=0; i<unsafe_count; ++i) {
334                         bitset_set(best, i);
335                         /* check if it is a stable set */
336                         for (o=bitset_next_set(best, 0); o!=-1 && o<i; o=bitset_next_set(best, o+1))
337                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o])) {
338                                         bitset_clear(best, i); /* clear the bit and try next one */
339                                         break;
340                                 }
341                 }
342                 /* compute the weight */
343                 bitset_foreach(best, pos)
344                         best_weight += unsafe_costs[pos];
345         } else {
346                 /* Exact Algorithm: Brute force */
347                 curr = bitset_alloca(unsafe_count);
348                 bitset_set_all(curr);
349                 while ((max = bitset_popcnt(curr)) != 0) {
350                         /* check if curr is a stable set */
351                         for (i=bitset_next_set(curr, 0); i!=-1; i=bitset_next_set(curr, i+1))
352                                 for (o=bitset_next_set(curr, i+1); o!=-1; o=bitset_next_set(curr, o+1)) /* !!!!! difference to qnode_max_ind_set(): NOT (curr, i) */
353                                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o]))
354                                                         goto no_stable_set;
355
356                         /* if we arrive here, we have a stable set */
357                         /* compute the weigth of the stable set*/
358                         curr_weight = 0;
359                         bitset_foreach(curr, pos)
360                                 curr_weight += unsafe_costs[pos];
361
362                         /* any better ? */
363                         if (curr_weight > best_weight) {
364                                 best_weight = curr_weight;
365                         }
366
367         no_stable_set:
368                         bitset_minus1(curr);
369                 }
370         }
371
372         return safe_costs+best_weight;
373 }
374
375 static void co_collect_units(ir_node *irn, void *env) {
376         copy_opt_t *co = env;
377         unit_t *unit;
378
379         if (!is_curr_reg_class(co, irn))
380                 return;
381         if (!co_is_optimizable_root(co, irn))
382                 return;
383
384         /* Init a new unit */
385         unit = XMALLOCZ(unit_t);
386         unit->co = co;
387         unit->node_count = 1;
388         INIT_LIST_HEAD(&unit->queue);
389
390         /* Phi with some/all of its arguments */
391         if (is_Reg_Phi(irn)) {
392                 int i, arity;
393
394                 /* init */
395                 arity = get_irn_arity(irn);
396                 unit->nodes = XMALLOCN(ir_node*, arity + 1);
397                 unit->costs = XMALLOCN(int,      arity + 1);
398                 unit->nodes[0] = irn;
399
400                 /* fill */
401                 for (i=0; i<arity; ++i) {
402                         int o, arg_pos;
403                         ir_node *arg = get_irn_n(irn, i);
404
405                         assert(is_curr_reg_class(co, arg) && "Argument not in same register class.");
406                         if (arg == irn)
407                                 continue;
408                         if (nodes_interfere(co->cenv, irn, arg)) {
409                                 unit->inevitable_costs += co->get_costs(co, irn, arg, i);
410                                 continue;
411                         }
412
413                         /* Else insert the argument of the phi to the members of this ou */
414                         DBG((dbg, LEVEL_1, "\t   Member: %+F\n", arg));
415
416                         if (!arch_irn_is(arg, ignore)) {
417                                 /* Check if arg has occurred at a prior position in the arg/list */
418                                 arg_pos = 0;
419                                 for (o=1; o<unit->node_count; ++o) {
420                                         if (unit->nodes[o] == arg) {
421                                                 arg_pos = o;
422                                                 break;
423                                         }
424                                 }
425
426                                 if (!arg_pos) { /* a new argument */
427                                         /* insert node, set costs */
428                                         unit->nodes[unit->node_count] = arg;
429                                         unit->costs[unit->node_count] = co->get_costs(co, irn, arg, i);
430                                         unit->node_count++;
431                                 } else { /* arg has occurred before in same phi */
432                                         /* increase costs for existing arg */
433                                         unit->costs[arg_pos] += co->get_costs(co, irn, arg, i);
434                                 }
435                         }
436                 }
437                 unit->nodes = XREALLOC(unit->nodes, ir_node*, unit->node_count);
438                 unit->costs = XREALLOC(unit->costs, int,      unit->node_count);
439         } else if (is_Perm_Proj(irn)) {
440                 /* Proj of a perm with corresponding arg */
441                 assert(!nodes_interfere(co->cenv, irn, get_Perm_src(irn)));
442                 unit->nodes = XMALLOCN(ir_node*, 2);
443                 unit->costs = XMALLOCN(int,      2);
444                 unit->node_count = 2;
445                 unit->nodes[0] = irn;
446                 unit->nodes[1] = get_Perm_src(irn);
447                 unit->costs[1] = co->get_costs(co, irn, unit->nodes[1], -1);
448         } else {
449                 const arch_register_req_t *req = arch_get_register_req(irn, -1);
450
451                 /* Src == Tgt of a 2-addr-code instruction */
452                 if (is_2addr_code(req)) {
453                         const unsigned other = req->other_same;
454                         int            count = 0;
455                         int            i;
456
457                         for (i = 0; (1U << i) <= other; ++i) {
458                                 if (other & (1U << i)) {
459                                         ir_node *o  = get_irn_n(skip_Proj(irn), i);
460                                         if (!arch_irn_is(o, ignore) &&
461                                                         !nodes_interfere(co->cenv, irn, o)) {
462                                                 ++count;
463                                         }
464                                 }
465                         }
466
467                         if (count != 0) {
468                                 int k = 0;
469                                 ++count;
470                                 unit->nodes = XMALLOCN(ir_node*, count);
471                                 unit->costs = XMALLOCN(int,      count);
472                                 unit->node_count = count;
473                                 unit->nodes[k++] = irn;
474
475                                 for (i = 0; 1U << i <= other; ++i) {
476                                         if (other & (1U << i)) {
477                                                 ir_node *o  = get_irn_n(skip_Proj(irn), i);
478                                                 if (!arch_irn_is(o, ignore) &&
479                                                                 !nodes_interfere(co->cenv, irn, o)) {
480                                                         unit->nodes[k] = o;
481                                                         unit->costs[k] = co->get_costs(co, irn, o, -1);
482                                                         ++k;
483                                                 }
484                                         }
485                                 }
486                         }
487                 } else {
488                         assert(0 && "This is not an optimizable node!");
489                 }
490         }
491
492         /* Insert the new unit at a position according to its costs */
493         if (unit->node_count > 1) {
494                 int i;
495                 struct list_head *tmp;
496
497                 /* Determine the maximum costs this unit can cause: all_nodes_cost */
498                 for(i=1; i<unit->node_count; ++i) {
499                         unit->sort_key = MAX(unit->sort_key, unit->costs[i]);
500                         unit->all_nodes_costs += unit->costs[i];
501                 }
502
503                 /* Determine the minimal costs this unit will cause: min_nodes_costs */
504                 unit->min_nodes_costs += unit->all_nodes_costs - ou_max_ind_set_costs(unit);
505                 /* Insert the new ou according to its sort_key */
506                 tmp = &co->units;
507                 while (tmp->next != &co->units && list_entry_units(tmp->next)->sort_key > unit->sort_key)
508                         tmp = tmp->next;
509                 list_add(&unit->units, tmp);
510         } else {
511                 free(unit);
512         }
513 }
514
515 #ifdef QUICK_AND_DIRTY_HACK
516
517 static int compare_ous(const void *k1, const void *k2) {
518         const unit_t *u1 = *((const unit_t **) k1);
519         const unit_t *u2 = *((const unit_t **) k2);
520         int i, o, u1_has_constr, u2_has_constr;
521         arch_register_req_t req;
522
523         /* Units with constraints come first */
524         u1_has_constr = 0;
525         for (i=0; i<u1->node_count; ++i) {
526                 arch_get_register_req(&req, u1->nodes[i], -1);
527                 if (arch_register_req_is(&req, limited)) {
528                         u1_has_constr = 1;
529                         break;
530                 }
531         }
532
533         u2_has_constr = 0;
534         for (i=0; i<u2->node_count; ++i) {
535                 arch_get_register_req(&req, u2->nodes[i], -1);
536                 if (arch_register_req_is(&req, limited)) {
537                         u2_has_constr = 1;
538                         break;
539                 }
540         }
541
542         if (u1_has_constr != u2_has_constr)
543                 return u2_has_constr - u1_has_constr;
544
545         /* Now check, whether the two units are connected */
546 #if 0
547         for (i=0; i<u1->node_count; ++i)
548                 for (o=0; o<u2->node_count; ++o)
549                         if (u1->nodes[i] == u2->nodes[o])
550                                 return 0;
551 #endif
552
553         /* After all, the sort key decides. Greater keys come first. */
554         return u2->sort_key - u1->sort_key;
555
556 }
557
558 /**
559  * Sort the ou's according to constraints and their sort_key
560  */
561 static void co_sort_units(copy_opt_t *co) {
562         int i, count = 0, costs;
563         unit_t *ou, **ous;
564
565         /* get the number of ous, remove them form the list and fill the array */
566         list_for_each_entry(unit_t, ou, &co->units, units)
567                 count++;
568         ous = alloca(count * sizeof(*ous));
569
570         costs = co_get_max_copy_costs(co);
571
572         i = 0;
573         list_for_each_entry(unit_t, ou, &co->units, units)
574                 ous[i++] = ou;
575
576         INIT_LIST_HEAD(&co->units);
577
578         assert(count == i && list_empty(&co->units));
579
580         for (i=0; i<count; ++i)
581                 ir_printf("%+F\n", ous[i]->nodes[0]);
582
583         qsort(ous, count, sizeof(*ous), compare_ous);
584
585         ir_printf("\n\n");
586         for (i=0; i<count; ++i)
587                 ir_printf("%+F\n", ous[i]->nodes[0]);
588
589         /* reinsert into list in correct order */
590         for (i=0; i<count; ++i)
591                 list_add_tail(&ous[i]->units, &co->units);
592
593         assert(costs == co_get_max_copy_costs(co));
594 }
595 #endif
596
597 void co_build_ou_structure(copy_opt_t *co) {
598         DBG((dbg, LEVEL_1, "\tCollecting optimization units\n"));
599         INIT_LIST_HEAD(&co->units);
600         irg_walk_graph(co->irg, co_collect_units, NULL, co);
601 #ifdef QUICK_AND_DIRTY_HACK
602         co_sort_units(co);
603 #endif
604 }
605
606 void co_free_ou_structure(copy_opt_t *co) {
607         unit_t *curr, *tmp;
608         ASSERT_OU_AVAIL(co);
609         list_for_each_entry_safe(unit_t, curr, tmp, &co->units, units) {
610                 xfree(curr->nodes);
611                 xfree(curr->costs);
612                 xfree(curr);
613         }
614         co->units.next = NULL;
615 }
616
617 /* co_solve_heuristic() is implemented in becopyheur.c */
618
619 int co_get_max_copy_costs(const copy_opt_t *co) {
620         int i, res = 0;
621         unit_t *curr;
622
623         ASSERT_OU_AVAIL(co);
624
625         list_for_each_entry(unit_t, curr, &co->units, units) {
626                 res += curr->inevitable_costs;
627                 for (i=1; i<curr->node_count; ++i)
628                         res += curr->costs[i];
629         }
630         return res;
631 }
632
633 int co_get_inevit_copy_costs(const copy_opt_t *co) {
634         int res = 0;
635         unit_t *curr;
636
637         ASSERT_OU_AVAIL(co);
638
639         list_for_each_entry(unit_t, curr, &co->units, units)
640                 res += curr->inevitable_costs;
641         return res;
642 }
643
644 int co_get_copy_costs(const copy_opt_t *co) {
645         int i, res = 0;
646         unit_t *curr;
647
648         ASSERT_OU_AVAIL(co);
649
650         list_for_each_entry(unit_t, curr, &co->units, units) {
651                 int root_col = get_irn_col(curr->nodes[0]);
652                 DBG((dbg, LEVEL_1, "  %3d costs for root %+F color %d\n", curr->inevitable_costs, curr->nodes[0], root_col));
653                 res += curr->inevitable_costs;
654                 for (i=1; i<curr->node_count; ++i) {
655                         int arg_col = get_irn_col(curr->nodes[i]);
656                         if (root_col != arg_col) {
657                                 DBG((dbg, LEVEL_1, "  %3d for arg %+F color %d\n", curr->costs[i], curr->nodes[i], arg_col));
658                                 res += curr->costs[i];
659                         }
660                 }
661         }
662         return res;
663 }
664
665 int co_get_lower_bound(const copy_opt_t *co) {
666         int res = 0;
667         unit_t *curr;
668
669         ASSERT_OU_AVAIL(co);
670
671         list_for_each_entry(unit_t, curr, &co->units, units)
672                 res += curr->inevitable_costs + curr->min_nodes_costs;
673         return res;
674 }
675
676 void co_complete_stats(const copy_opt_t *co, co_complete_stats_t *stat)
677 {
678         bitset_t *seen = bitset_irg_malloc(co->irg);
679         affinity_node_t *an;
680
681         memset(stat, 0, sizeof(stat[0]));
682
683         /* count affinity edges. */
684         co_gs_foreach_aff_node(co, an) {
685                 neighb_t *neigh;
686                 stat->aff_nodes += 1;
687                 bitset_add_irn(seen, an->irn);
688                 co_gs_foreach_neighb(an, neigh) {
689                         if(!bitset_contains_irn(seen, neigh->irn)) {
690                                 stat->aff_edges += 1;
691                                 stat->max_costs += neigh->costs;
692
693                                 if (get_irn_col(an->irn) != get_irn_col(neigh->irn)) {
694                                         stat->costs += neigh->costs;
695                                         stat->unsatisfied_edges += 1;
696                                 }
697
698                                 if(nodes_interfere(co->cenv, an->irn, neigh->irn)) {
699                                         stat->aff_int += 1;
700                                         stat->inevit_costs += neigh->costs;
701                                 }
702
703                         }
704                 }
705         }
706
707         bitset_free(seen);
708 }
709
710 /******************************************************************************
711    _____                 _        _____ _
712   / ____|               | |      / ____| |
713  | |  __ _ __ __ _ _ __ | |__   | (___ | |_ ___  _ __ __ _  __ _  ___
714  | | |_ | '__/ _` | '_ \| '_ \   \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
715  | |__| | | | (_| | |_) | | | |  ____) | || (_) | | | (_| | (_| |  __/
716   \_____|_|  \__,_| .__/|_| |_| |_____/ \__\___/|_|  \__,_|\__, |\___|
717                   | |                                       __/ |
718                   |_|                                      |___/
719  ******************************************************************************/
720
721 static int compare_affinity_node_t(const void *k1, const void *k2, size_t size) {
722         const affinity_node_t *n1 = k1;
723         const affinity_node_t *n2 = k2;
724         (void) size;
725
726         return (n1->irn != n2->irn);
727 }
728
729 static void add_edge(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
730         affinity_node_t new_node, *node;
731         neighb_t        *nbr;
732         int             allocnew = 1;
733
734         new_node.irn        = n1;
735         new_node.degree     = 0;
736         new_node.neighbours = NULL;
737         node = set_insert(co->nodes, &new_node, sizeof(new_node), hash_irn(new_node.irn));
738
739         for (nbr = node->neighbours; nbr; nbr = nbr->next)
740                 if (nbr->irn == n2) {
741                         allocnew = 0;
742                         break;
743                 }
744
745         /* if we did not find n2 in n1's neighbourhood insert it */
746         if (allocnew) {
747                 nbr        = obstack_alloc(&co->obst, sizeof(*nbr));
748                 nbr->irn   = n2;
749                 nbr->costs = 0;
750                 nbr->next  = node->neighbours;
751
752                 node->neighbours = nbr;
753                 node->degree++;
754         }
755
756         /* now nbr points to n1's neighbour-entry of n2 */
757         nbr->costs += costs;
758 }
759
760 static INLINE void add_edges(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
761         if (! be_ifg_connected(co->cenv->ifg, n1, n2)) {
762                 add_edge(co, n1, n2, costs);
763                 add_edge(co, n2, n1, costs);
764         }
765 }
766
767 static void build_graph_walker(ir_node *irn, void *env) {
768         copy_opt_t *co = env;
769         int pos, max;
770         const arch_register_t *reg;
771
772         if (!is_curr_reg_class(co, irn) || arch_irn_is(irn, ignore))
773                 return;
774
775         reg = arch_get_irn_register(irn);
776         if (arch_register_type_is(reg, ignore))
777                 return;
778
779         if (is_Reg_Phi(irn)) { /* Phis */
780                 for (pos=0, max=get_irn_arity(irn); pos<max; ++pos) {
781                         ir_node *arg = get_irn_n(irn, pos);
782                         add_edges(co, irn, arg, co->get_costs(co, irn, arg, pos));
783                 }
784         } else if (is_Perm_Proj(irn)) { /* Perms */
785                 ir_node *arg = get_Perm_src(irn);
786                 add_edges(co, irn, arg, co->get_costs(co, irn, arg, 0));
787         }
788         else { /* 2-address code */
789                 const arch_register_req_t *req = arch_get_register_req(irn, -1);
790                 if (is_2addr_code(req)) {
791                         const unsigned other = req->other_same;
792                         int i;
793
794                         for (i = 0; 1U << i <= other; ++i) {
795                                 if (other & (1U << i)) {
796                                         ir_node *other = get_irn_n(skip_Proj(irn), i);
797                                         if (!arch_irn_is(other, ignore))
798                                                 add_edges(co, irn, other, co->get_costs(co, irn, other, 0));
799                                 }
800                         }
801                 }
802         }
803 }
804
805 void co_build_graph_structure(copy_opt_t *co) {
806         obstack_init(&co->obst);
807         co->nodes = new_set(compare_affinity_node_t, 32);
808
809         irg_walk_graph(co->irg, build_graph_walker, NULL, co);
810 }
811
812 void co_free_graph_structure(copy_opt_t *co) {
813         ASSERT_GS_AVAIL(co);
814
815         del_set(co->nodes);
816         obstack_free(&co->obst, NULL);
817         co->nodes = NULL;
818 }
819
820 /* co_solve_ilp1() co_solve_ilp2() are implemented in becopyilpX.c */
821
822 int co_gs_is_optimizable(copy_opt_t *co, ir_node *irn) {
823         affinity_node_t new_node, *n;
824
825         ASSERT_GS_AVAIL(co);
826
827         new_node.irn = irn;
828         n = set_find(co->nodes, &new_node, sizeof(new_node), hash_irn(new_node.irn));
829         if (n) {
830                 return (n->degree > 0);
831         } else
832                 return 0;
833 }
834
835 static int co_dump_appel_disjoint_constraints(const copy_opt_t *co, ir_node *a, ir_node *b)
836 {
837         ir_node *nodes[]  = { a, b };
838         bitset_t *constr[] = { NULL, NULL };
839         const arch_register_req_t *req;
840         int j;
841
842         constr[0] = bitset_alloca(co->cls->n_regs);
843         constr[1] = bitset_alloca(co->cls->n_regs);
844
845         for (j = 0; j < 2; ++j) {
846                 req = arch_get_register_req(nodes[j], BE_OUT_POS(0));
847                 if(arch_register_req_is(req, limited))
848                         rbitset_copy_to_bitset(req->limited, constr[j]);
849                 else
850                         bitset_set_all(constr[j]);
851
852         }
853
854         return !bitset_intersect(constr[0], constr[1]);
855 }
856
857 void co_dump_appel_graph(const copy_opt_t *co, FILE *f)
858 {
859         be_ifg_t *ifg  = co->cenv->ifg;
860         int *color_map = alloca(co->cls->n_regs * sizeof(color_map[0]));
861         int *node_map  = XMALLOCN(int, get_irg_last_idx(co->irg) + 1);
862
863         ir_node *irn;
864         void *it, *nit;
865         int n, n_regs;
866         unsigned i;
867
868         n_regs = 0;
869         for(i = 0; i < co->cls->n_regs; ++i) {
870                 const arch_register_t *reg = &co->cls->regs[i];
871                 color_map[i] = arch_register_type_is(reg, ignore) ? -1 : n_regs++;
872         }
873
874         /*
875          * n contains the first node number.
876          * the values below n are the pre-colored register nodes
877          */
878
879         it  = be_ifg_nodes_iter_alloca(ifg);
880         nit = be_ifg_neighbours_iter_alloca(ifg);
881
882         n = n_regs;
883         be_ifg_foreach_node(ifg, it, irn) {
884                 if (!arch_irn_is(irn, ignore))
885                         node_map[get_irn_idx(irn)] = n++;
886         }
887
888         fprintf(f, "%d %d\n", n, n_regs);
889
890         be_ifg_foreach_node(ifg, it, irn) {
891                 if (!arch_irn_is(irn, ignore)) {
892                         int idx            = node_map[get_irn_idx(irn)];
893                         affinity_node_t *a = get_affinity_info(co, irn);
894
895                         const arch_register_req_t *req;
896                         ir_node *adj;
897
898                         req = arch_get_register_req(irn, BE_OUT_POS(0));
899                         if(arch_register_req_is(req, limited)) {
900                                 for(i = 0; i < co->cls->n_regs; ++i) {
901                                         if(!rbitset_is_set(req->limited, i) && color_map[i] >= 0)
902                                                 fprintf(f, "%d %d -1\n", color_map[i], idx);
903                                 }
904                         }
905
906                         be_ifg_foreach_neighbour(ifg, nit, irn, adj) {
907                                 if (!arch_irn_is(adj, ignore) &&
908                                                 !co_dump_appel_disjoint_constraints(co, irn, adj)) {
909                                         int adj_idx = node_map[get_irn_idx(adj)];
910                                         if(idx < adj_idx)
911                                                 fprintf(f, "%d %d -1\n", idx, adj_idx);
912                                 }
913                         }
914
915                         if(a) {
916                                 neighb_t *n;
917
918                                 co_gs_foreach_neighb(a, n) {
919                                         if (!arch_irn_is(n->irn, ignore)) {
920                                                 int n_idx = node_map[get_irn_idx(n->irn)];
921                                                 if(idx < n_idx)
922                                                         fprintf(f, "%d %d %d\n", idx, n_idx, (int) n->costs);
923                                         }
924                                 }
925                         }
926                 }
927         }
928
929         xfree(node_map);
930 }
931
932 /*
933          ___ _____ ____   ____   ___ _____   ____                        _
934         |_ _|  ___/ ___| |  _ \ / _ \_   _| |  _ \ _   _ _ __ ___  _ __ (_)_ __   __ _
935          | || |_ | |  _  | | | | | | || |   | | | | | | | '_ ` _ \| '_ \| | '_ \ / _` |
936          | ||  _|| |_| | | |_| | |_| || |   | |_| | |_| | | | | | | |_) | | | | | (_| |
937         |___|_|   \____| |____/ \___/ |_|   |____/ \__,_|_| |_| |_| .__/|_|_| |_|\__, |
938                                                                   |_|            |___/
939 */
940
941 static const char *get_dot_color_name(size_t col)
942 {
943         static const char *names[] = {
944                 "blue",
945                 "red",
946                 "green",
947                 "yellow",
948                 "cyan",
949                 "magenta",
950                 "orange",
951                 "chocolate",
952                 "beige",
953                 "navy",
954                 "darkgreen",
955                 "darkred",
956                 "lightPink",
957                 "chartreuse",
958                 "lightskyblue",
959                 "linen",
960                 "pink",
961                 "lightslateblue",
962                 "mintcream",
963                 "red",
964                 "darkolivegreen",
965                 "mediumblue",
966                 "mistyrose",
967                 "salmon",
968                 "darkseagreen",
969                 "mediumslateblue"
970                 "moccasin",
971                 "tomato",
972                 "forestgreen",
973                 "darkturquoise",
974                 "palevioletred"
975         };
976
977         return col < sizeof(names)/sizeof(names[0]) ? names[col] : "white";
978 }
979
980 typedef struct _co_ifg_dump_t {
981         const copy_opt_t *co;
982         unsigned flags;
983 } co_ifg_dump_t;
984
985 static void ifg_dump_graph_attr(FILE *f, void *self)
986 {
987         (void) self;
988         fprintf(f, "overlap=scale");
989 }
990
991 static int ifg_is_dump_node(void *self, ir_node *irn)
992 {
993         (void)self;
994         return !arch_irn_is(irn, ignore);
995 }
996
997 static void ifg_dump_node_attr(FILE *f, void *self, ir_node *irn)
998 {
999         co_ifg_dump_t *env         = self;
1000         const arch_register_t *reg = arch_get_irn_register(irn);
1001         const arch_register_req_t *req;
1002         int limited;
1003
1004         req = arch_get_register_req(irn, BE_OUT_POS(0));
1005         limited = arch_register_req_is(req, limited);
1006
1007         if(env->flags & CO_IFG_DUMP_LABELS) {
1008                 ir_fprintf(f, "label=\"%+F", irn);
1009
1010                 if((env->flags & CO_IFG_DUMP_CONSTR) && limited) {
1011                         bitset_t *bs = bitset_alloca(env->co->cls->n_regs);
1012                         rbitset_copy_to_bitset(req->limited, bs);
1013                         ir_fprintf(f, "\\n%B", bs);
1014                 }
1015                 ir_fprintf(f, "\" ");
1016         } else {
1017                 fprintf(f, "label=\"\" shape=point " );
1018         }
1019
1020         if(env->flags & CO_IFG_DUMP_SHAPE)
1021                 fprintf(f, "shape=%s ", limited ? "diamond" : "ellipse");
1022
1023         if(env->flags & CO_IFG_DUMP_COLORS)
1024                 fprintf(f, "style=filled color=%s ", get_dot_color_name(reg->index));
1025 }
1026
1027 static void ifg_dump_at_end(FILE *file, void *self)
1028 {
1029         co_ifg_dump_t *env = self;
1030         affinity_node_t *a;
1031
1032         co_gs_foreach_aff_node(env->co, a) {
1033                 const arch_register_t *ar = arch_get_irn_register(a->irn);
1034                 unsigned aidx = get_irn_idx(a->irn);
1035                 neighb_t *n;
1036
1037                 co_gs_foreach_neighb(a, n) {
1038                         const arch_register_t *nr = arch_get_irn_register(n->irn);
1039                         unsigned nidx = get_irn_idx(n->irn);
1040
1041                         if(aidx < nidx) {
1042                                 const char *color = nr == ar ? "blue" : "red";
1043                                 fprintf(file, "\tn%d -- n%d [weight=0.01 ", aidx, nidx);
1044                                 if(env->flags & CO_IFG_DUMP_LABELS)
1045                                         fprintf(file, "label=\"%d\" ", n->costs);
1046                                 if(env->flags & CO_IFG_DUMP_COLORS)
1047                                         fprintf(file, "color=%s ", color);
1048                                 else
1049                                         fprintf(file, "style=dotted");
1050                                 fprintf(file, "];\n");
1051                         }
1052                 }
1053         }
1054 }
1055
1056
1057 static be_ifg_dump_dot_cb_t ifg_dot_cb = {
1058         ifg_is_dump_node,
1059         ifg_dump_graph_attr,
1060         ifg_dump_node_attr,
1061         NULL,
1062         NULL,
1063         ifg_dump_at_end
1064 };
1065
1066
1067
1068 void co_dump_ifg_dot(const copy_opt_t *co, FILE *f, unsigned flags)
1069 {
1070         co_ifg_dump_t cod;
1071
1072         cod.co    = co;
1073         cod.flags = flags;
1074         be_ifg_dump_dot(co->cenv->ifg, co->irg, f, &ifg_dot_cb, &cod);
1075 }
1076
1077
1078 void co_solve_park_moon(copy_opt_t *opt)
1079 {
1080         (void) opt;
1081 }
1082
1083 static int void_algo(copy_opt_t *co)
1084 {
1085         (void) co;
1086         return 0;
1087 }
1088
1089 /*
1090                 _    _                  _ _   _
1091            / \  | | __ _  ___  _ __(_) |_| |__  _ __ ___  ___
1092           / _ \ | |/ _` |/ _ \| '__| | __| '_ \| '_ ` _ \/ __|
1093          / ___ \| | (_| | (_) | |  | | |_| | | | | | | | \__ \
1094         /_/   \_\_|\__, |\___/|_|  |_|\__|_| |_|_| |_| |_|___/
1095                            |___/
1096 */
1097
1098 typedef struct {
1099         co_algo_t  *algo;
1100         const char *name;
1101         int        can_improve_existing;
1102 } co_algo_info_t;
1103
1104 static co_algo_info_t algos[] = {
1105         { void_algo,               "none",  0 },
1106         { co_solve_heuristic,      "heur1", 0 },
1107         { co_solve_heuristic_new,  "heur2", 0 },
1108 #ifdef WITH_JVM
1109         { co_solve_heuristic_java, "heur3", 0 },
1110 #else
1111         { NULL,                    "heur3", 0 },
1112 #endif
1113         { co_solve_heuristic_mst,  "heur4", 0 },
1114 #ifdef WITH_ILP
1115         { co_solve_ilp2,           "ilp",   1 },
1116 #else
1117         { NULL,                    "ilp",   1 },
1118 #endif
1119         { NULL,                    "",      0 }
1120 };
1121
1122 /*
1123     __  __       _         ____       _
1124    |  \/  | __ _(_)_ __   |  _ \ _ __(_)_   _____ _ __
1125    | |\/| |/ _` | | '_ \  | | | | '__| \ \ / / _ \ '__|
1126    | |  | | (_| | | | | | | |_| | |  | |\ V /  __/ |
1127    |_|  |_|\__,_|_|_| |_| |____/|_|  |_| \_/ \___|_|
1128
1129 */
1130
1131 static FILE *my_open(const be_chordal_env_t *env, const char *prefix, const char *suffix)
1132 {
1133         FILE *result;
1134         char buf[1024];
1135         size_t i, n;
1136         char *tu_name;
1137
1138         n = strlen(env->birg->main_env->cup_name);
1139         tu_name = XMALLOCN(char, n + 1);
1140         strcpy(tu_name, env->birg->main_env->cup_name);
1141         for (i = 0; i < n; ++i)
1142                 if (tu_name[i] == '.')
1143                         tu_name[i] = '_';
1144
1145
1146         ir_snprintf(buf, sizeof(buf), "%s%s_%F_%s%s", prefix, tu_name, env->irg, env->cls->name, suffix);
1147         xfree(tu_name);
1148         result = fopen(buf, "wt");
1149         if(result == NULL) {
1150                 panic("Couldn't open '%s' for writing.", buf);
1151         }
1152
1153         return result;
1154 }
1155
1156 void co_driver(be_chordal_env_t *cenv)
1157 {
1158         ir_timer_t          *timer = ir_timer_register("firm.be.copyopt", "runtime");
1159         co_complete_stats_t before, after;
1160         copy_opt_t          *co;
1161         co_algo_t           *algo_func;
1162         int                 was_optimal = 0;
1163
1164         if (algo >= CO_ALGO_LAST)
1165                 return;
1166
1167         be_liveness_assure_chk(be_get_birg_liveness(cenv->birg));
1168
1169         co = new_copy_opt(cenv, cost_func);
1170         co_build_ou_structure(co);
1171         co_build_graph_structure(co);
1172
1173         co_complete_stats(co, &before);
1174
1175         be_stat_ev_ull("co_aff_nodes",    before.aff_nodes);
1176         be_stat_ev_ull("co_aff_edges",    before.aff_edges);
1177         be_stat_ev_ull("co_max_costs",    before.max_costs);
1178         be_stat_ev_ull("co_inevit_costs", before.inevit_costs);
1179         be_stat_ev_ull("co_aff_int",      before.aff_int);
1180
1181         be_stat_ev_ull("co_init_costs",   before.costs);
1182         be_stat_ev_ull("co_init_unsat",   before.unsatisfied_edges);
1183
1184         if (dump_flags & DUMP_BEFORE) {
1185                 FILE *f = my_open(cenv, "", "-before.dot");
1186                 co_dump_ifg_dot(co, f, style_flags);
1187                 fclose(f);
1188         }
1189
1190         /* if the algo can improve results, provide an initial solution with heur3 */
1191         if (improve && algos[algo].can_improve_existing) {
1192                 co_complete_stats_t stats;
1193
1194                 /* produce a heuristic solution */
1195 #ifdef WITH_JVM
1196                 co_solve_heuristic_java(co);
1197 #else
1198                 co_solve_heuristic(co);
1199 #endif /* WITH_JVM */
1200
1201                 /* do the stats and provide the current costs */
1202                 co_complete_stats(co, &stats);
1203                 be_stat_ev_ull("co_prepare_costs", stats.costs);
1204         }
1205
1206 #ifdef WITH_JVM
1207         /* start the JVM here so that it does not tamper the timing. */
1208         if (algo == CO_ALGO_HEUR3)
1209                 be_java_coal_start_jvm();
1210 #endif /* WITH_JVM */
1211
1212         algo_func = algos[algo].algo;
1213
1214         /* perform actual copy minimization */
1215         ir_timer_reset_and_start(timer);
1216         was_optimal = algo_func(co);
1217         ir_timer_stop(timer);
1218
1219         be_stat_ev("co_time", ir_timer_elapsed_msec(timer));
1220         be_stat_ev_ull("co_optimal", was_optimal);
1221
1222         if (dump_flags & DUMP_AFTER) {
1223                 FILE *f = my_open(cenv, "", "-after.dot");
1224                 co_dump_ifg_dot(co, f, style_flags);
1225                 fclose(f);
1226         }
1227
1228         co_complete_stats(co, &after);
1229
1230         if (do_stats) {
1231                 ulong64 optimizable_costs = after.max_costs - after.inevit_costs;
1232                 ulong64 evitable          = after.costs     - after.inevit_costs;
1233
1234                 ir_printf("%30F ", cenv->irg);
1235                 printf("%10s %10" ULL_FMT "%10" ULL_FMT "%10" ULL_FMT, cenv->cls->name, after.max_costs, before.costs, after.inevit_costs);
1236
1237                 if(optimizable_costs > 0)
1238                         printf("%10" ULL_FMT " %5.2f\n", after.costs, (evitable * 100.0) / optimizable_costs);
1239                 else
1240                         printf("%10" ULL_FMT " %5s\n", after.costs, "-");
1241         }
1242
1243         /* Dump the interference graph in Appel's format. */
1244         if (dump_flags & DUMP_APPEL) {
1245                 FILE *f = my_open(cenv, "", ".apl");
1246                 fprintf(f, "# %lld %lld\n", after.costs, after.unsatisfied_edges);
1247                 co_dump_appel_graph(co, f);
1248                 fclose(f);
1249         }
1250
1251         be_stat_ev_ull("co_after_costs", after.costs);
1252         be_stat_ev_ull("co_after_unsat", after.unsatisfied_edges);
1253
1254         co_free_graph_structure(co);
1255         co_free_ou_structure(co);
1256         free_copy_opt(co);
1257 }