065b43d34a6fa396567b288af446caca78d1e5df
[libfirm] / ir / be / becopyopt.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       Copy minimization driver.
23  * @author      Daniel Grund
24  * @date        12.04.2005
25  * @version     $Id$
26  *
27  * Main file for the optimization reducing the copies needed for:
28  * - Phi coalescing
29  * - Register-constrained nodes
30  * - Two-address code instructions
31  */
32 #ifdef HAVE_CONFIG_H
33 #include "config.h"
34 #endif
35
36 #include "execfreq.h"
37 #include "xmalloc.h"
38 #include "debug.h"
39 #include "pmap.h"
40 #include "raw_bitset.h"
41 #include "irnode.h"
42 #include "irgraph.h"
43 #include "irgwalk.h"
44 #include "irprog.h"
45 #include "irloop_t.h"
46 #include "iredges_t.h"
47 #include "phiclass.h"
48 #include "irbitset.h"
49 #include "irphase_t.h"
50 #include "irprintf_t.h"
51
52 #include "bemodule.h"
53 #include "bearch_t.h"
54 #include "benode_t.h"
55 #include "beutil.h"
56 #include "beifg_t.h"
57 #include "beintlive_t.h"
58 #include "becopyopt_t.h"
59 #include "becopystat.h"
60 #include "belive_t.h"
61 #include "beinsn_t.h"
62 #include "besched_t.h"
63 #include "bestatevent.h"
64 #include "beirg_t.h"
65 #include "error.h"
66
67 #include "lc_opts.h"
68 #include "lc_opts_enum.h"
69
70 #define DUMP_BEFORE 1
71 #define DUMP_AFTER  2
72 #define DUMP_APPEL  4
73 #define DUMP_ALL    2 * DUMP_APPEL - 1
74
75 #define COST_FUNC_FREQ     1
76 #define COST_FUNC_LOOP     2
77 #define COST_FUNC_ALL_ONE  3
78
79 static unsigned   dump_flags  = 0;
80 static unsigned   style_flags = 0;
81 static unsigned   do_stats    = 0;
82 static cost_fct_t cost_func   = co_get_costs_exec_freq;
83 static unsigned   algo        = CO_ALGO_HEUR4;
84 static int        improve     = 1;
85
86 static const lc_opt_enum_mask_items_t dump_items[] = {
87         { "before",  DUMP_BEFORE },
88         { "after",   DUMP_AFTER  },
89         { "appel",   DUMP_APPEL  },
90         { "all",     DUMP_ALL    },
91         { NULL,      0 }
92 };
93
94 static const lc_opt_enum_mask_items_t style_items[] = {
95         { "color",   CO_IFG_DUMP_COLORS },
96         { "labels",  CO_IFG_DUMP_LABELS },
97         { "constr",  CO_IFG_DUMP_CONSTR },
98         { "shape",   CO_IFG_DUMP_SHAPE  },
99         { "full",    2 * CO_IFG_DUMP_SHAPE - 1 },
100         { NULL,      0 }
101 };
102
103 static const lc_opt_enum_mask_items_t algo_items[] = {
104         { "none",   CO_ALGO_NONE  },
105         { "heur",   CO_ALGO_HEUR  },
106         { "heur2",  CO_ALGO_HEUR2 },
107         { "heur3",  CO_ALGO_HEUR3 },
108         { "heur4",  CO_ALGO_HEUR4 },
109         { "ilp",    CO_ALGO_ILP   },
110         { NULL,     0 }
111 };
112
113 typedef int (*opt_funcptr)(void);
114
115 static const lc_opt_enum_func_ptr_items_t cost_func_items[] = {
116         { "freq",   (opt_funcptr) co_get_costs_exec_freq },
117         { "loop",   (opt_funcptr) co_get_costs_loop_depth },
118         { "one",    (opt_funcptr) co_get_costs_all_one },
119         { NULL,     NULL }
120 };
121
122 static lc_opt_enum_mask_var_t dump_var = {
123         &dump_flags, dump_items
124 };
125
126 static lc_opt_enum_mask_var_t style_var = {
127         &style_flags, style_items
128 };
129
130 static lc_opt_enum_mask_var_t algo_var = {
131         &algo, algo_items
132 };
133
134 static lc_opt_enum_func_ptr_var_t cost_func_var = {
135         (opt_funcptr*) &cost_func, cost_func_items
136 };
137
138 static const lc_opt_table_entry_t options[] = {
139         LC_OPT_ENT_ENUM_INT      ("algo",    "select copy optimization algo",                           &algo_var),
140         LC_OPT_ENT_ENUM_FUNC_PTR ("cost",    "select a cost function",                                  &cost_func_var),
141         LC_OPT_ENT_ENUM_MASK     ("dump",    "dump ifg before or after copy optimization",              &dump_var),
142         LC_OPT_ENT_ENUM_MASK     ("style",   "dump style for ifg dumping",                              &style_var),
143         LC_OPT_ENT_BOOL          ("stats",   "dump statistics after each optimization",                 &do_stats),
144         LC_OPT_ENT_BOOL          ("improve", "run heur3 before if algo can exploit start solutions",    &improve),
145         LC_OPT_LAST
146 };
147
148 /* Insert additional options registration functions here. */
149 extern void be_co_ilp_register_options(lc_opt_entry_t *grp);
150 extern void be_co2_register_options(lc_opt_entry_t *grp);
151 extern void be_co3_register_options(lc_opt_entry_t *grp);
152
153 void be_init_copycoal(void)
154 {
155         lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
156         lc_opt_entry_t *ra_grp = lc_opt_get_grp(be_grp, "ra");
157         lc_opt_entry_t *chordal_grp = lc_opt_get_grp(ra_grp, "chordal");
158         lc_opt_entry_t *co_grp = lc_opt_get_grp(chordal_grp, "co");
159
160         lc_opt_add_table(co_grp, options);
161 }
162
163 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_copycoal);
164
165 #undef QUICK_AND_DIRTY_HACK
166
167 static int nodes_interfere(const be_chordal_env_t *env, const ir_node *a, const ir_node *b)
168 {
169         if (env->ifg)
170                 return be_ifg_connected(env->ifg, a, b);
171         else
172                 return values_interfere(env->birg, a, b);
173 }
174
175
176 /******************************************************************************
177     _____                           _
178    / ____|                         | |
179   | |  __  ___ _ __   ___ _ __ __ _| |
180   | | |_ |/ _ \ '_ \ / _ \ '__/ _` | |
181   | |__| |  __/ | | |  __/ | | (_| | |
182    \_____|\___|_| |_|\___|_|  \__,_|_|
183
184  ******************************************************************************/
185
186 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
187
188
189 copy_opt_t *new_copy_opt(be_chordal_env_t *chordal_env, cost_fct_t get_costs)
190 {
191         const char *s1, *s2, *s3;
192         int len;
193         copy_opt_t *co;
194
195         FIRM_DBG_REGISTER(dbg, "ir.be.copyopt");
196
197         co = XMALLOCZ(copy_opt_t);
198         co->cenv      = chordal_env;
199         co->aenv      = chordal_env->birg->main_env->arch_env;
200         co->irg       = chordal_env->irg;
201         co->cls       = chordal_env->cls;
202         co->get_costs = get_costs;
203
204         s1 = get_irp_prog_name();
205         s2 = get_entity_name(get_irg_entity(co->irg));
206         s3 = chordal_env->cls->name;
207         len = strlen(s1) + strlen(s2) + strlen(s3) + 5;
208         co->name = XMALLOCN(char, len);
209         snprintf(co->name, len, "%s__%s__%s", s1, s2, s3);
210
211         return co;
212 }
213
214 void free_copy_opt(copy_opt_t *co) {
215         xfree(co->name);
216         free(co);
217 }
218
219 int co_is_optimizable_root(const copy_opt_t *co, ir_node *irn) {
220         const arch_register_req_t *req;
221         const arch_register_t *reg;
222
223         if (arch_irn_is(co->aenv, irn, ignore))
224                 return 0;
225
226         reg = arch_get_irn_register(irn);
227         if (arch_register_type_is(reg, ignore))
228                 return 0;
229
230         req = arch_get_register_req(irn, -1);
231         if (is_Reg_Phi(irn) || is_Perm_Proj(irn) || is_2addr_code(req))
232                 return 1;
233
234         return 0;
235 }
236
237 int co_get_costs_loop_depth(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
238         int cost = 0;
239         ir_loop *loop;
240         ir_node *root_block = get_nodes_block(root);
241         (void) co;
242         (void) arg;
243
244         if (is_Phi(root)) {
245                 /* for phis the copies are placed in the corresponding pred-block */
246                 loop = get_irn_loop(get_Block_cfgpred_block(root_block, pos));
247         } else {
248                 /* a perm places the copy in the same block as it resides */
249                 loop = get_irn_loop(root_block);
250         }
251         if (loop) {
252                 int d = get_loop_depth(loop);
253                 cost = d*d;
254         }
255         return 1+cost;
256 }
257
258 int co_get_costs_exec_freq(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
259         int res;
260         ir_node *root_bl = get_nodes_block(root);
261         ir_node *copy_bl = is_Phi(root) ? get_Block_cfgpred_block(root_bl, pos) : root_bl;
262         (void) arg;
263         res = get_block_execfreq_ulong(co->cenv->birg->exec_freq, copy_bl);
264
265         /* don't allow values smaller than one. */
266         return res < 1 ? 1 : res;
267 }
268
269
270 int co_get_costs_all_one(const copy_opt_t *co, ir_node *root, ir_node *arg, int pos) {
271         (void) co;
272         (void) root;
273         (void) arg;
274         (void) pos;
275         return 1;
276 }
277
278 /******************************************************************************
279    ____        _   _    _       _ _          _____ _
280   / __ \      | | | |  | |     (_) |        / ____| |
281  | |  | |_ __ | |_| |  | |_ __  _| |_ ___  | (___ | |_ ___  _ __ __ _  __ _  ___
282  | |  | | '_ \| __| |  | | '_ \| | __/ __|  \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
283  | |__| | |_) | |_| |__| | | | | | |_\__ \  ____) | || (_) | | | (_| | (_| |  __/
284   \____/| .__/ \__|\____/|_| |_|_|\__|___/ |_____/ \__\___/|_|  \__,_|\__, |\___|
285         | |                                                            __/ |
286         |_|                                                           |___/
287  ******************************************************************************/
288
289 /**
290  * Determines a maximum weighted independent set with respect to
291  * the interference and conflict edges of all nodes in a qnode.
292  */
293 static int ou_max_ind_set_costs(unit_t *ou) {
294         be_chordal_env_t *chordal_env = ou->co->cenv;
295         ir_node **safe, **unsafe;
296         int i, o, safe_count, safe_costs, unsafe_count, *unsafe_costs;
297         bitset_t *curr;
298         bitset_pos_t pos;
299         int max, curr_weight, best_weight = 0;
300
301         /* assign the nodes into two groups.
302          * safe: node has no interference, hence it is in every max stable set.
303          * unsafe: node has an interference
304          */
305         safe = alloca((ou->node_count-1) * sizeof(*safe));
306         safe_costs = 0;
307         safe_count = 0;
308         unsafe = alloca((ou->node_count-1) * sizeof(*unsafe));
309         unsafe_costs = alloca((ou->node_count-1) * sizeof(*unsafe_costs));
310         unsafe_count = 0;
311         for(i=1; i<ou->node_count; ++i) {
312                 int is_safe = 1;
313                 for(o=1; o<ou->node_count; ++o) {
314                         if (i==o)
315                                 continue;
316                         if (nodes_interfere(chordal_env, ou->nodes[i], ou->nodes[o])) {
317                                 unsafe_costs[unsafe_count] = ou->costs[i];
318                                 unsafe[unsafe_count] = ou->nodes[i];
319                                 ++unsafe_count;
320                                 is_safe = 0;
321                                 break;
322                         }
323                 }
324                 if (is_safe) {
325                         safe_costs += ou->costs[i];
326                         safe[safe_count++] = ou->nodes[i];
327                 }
328         }
329
330
331         /* now compute the best set out of the unsafe nodes*/
332         if (unsafe_count > MIS_HEUR_TRIGGER) {
333                 bitset_t *best = bitset_alloca(unsafe_count);
334                 /* Heuristik: Greedy trial and error form index 0 to unsafe_count-1 */
335                 for (i=0; i<unsafe_count; ++i) {
336                         bitset_set(best, i);
337                         /* check if it is a stable set */
338                         for (o=bitset_next_set(best, 0); o!=-1 && o<i; o=bitset_next_set(best, o+1))
339                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o])) {
340                                         bitset_clear(best, i); /* clear the bit and try next one */
341                                         break;
342                                 }
343                 }
344                 /* compute the weight */
345                 bitset_foreach(best, pos)
346                         best_weight += unsafe_costs[pos];
347         } else {
348                 /* Exact Algorithm: Brute force */
349                 curr = bitset_alloca(unsafe_count);
350                 bitset_set_all(curr);
351                 while ((max = bitset_popcnt(curr)) != 0) {
352                         /* check if curr is a stable set */
353                         for (i=bitset_next_set(curr, 0); i!=-1; i=bitset_next_set(curr, i+1))
354                                 for (o=bitset_next_set(curr, i+1); o!=-1; o=bitset_next_set(curr, o+1)) /* !!!!! difference to qnode_max_ind_set(): NOT (curr, i) */
355                                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o]))
356                                                         goto no_stable_set;
357
358                         /* if we arrive here, we have a stable set */
359                         /* compute the weigth of the stable set*/
360                         curr_weight = 0;
361                         bitset_foreach(curr, pos)
362                                 curr_weight += unsafe_costs[pos];
363
364                         /* any better ? */
365                         if (curr_weight > best_weight) {
366                                 best_weight = curr_weight;
367                         }
368
369         no_stable_set:
370                         bitset_minus1(curr);
371                 }
372         }
373
374         return safe_costs+best_weight;
375 }
376
377 static void co_collect_units(ir_node *irn, void *env) {
378         copy_opt_t *co = env;
379         unit_t *unit;
380
381         if (!is_curr_reg_class(co, irn))
382                 return;
383         if (!co_is_optimizable_root(co, irn))
384                 return;
385
386         /* Init a new unit */
387         unit = XMALLOCZ(unit_t);
388         unit->co = co;
389         unit->node_count = 1;
390         INIT_LIST_HEAD(&unit->queue);
391
392         /* Phi with some/all of its arguments */
393         if (is_Reg_Phi(irn)) {
394                 int i, arity;
395
396                 /* init */
397                 arity = get_irn_arity(irn);
398                 unit->nodes = XMALLOCN(ir_node*, arity + 1);
399                 unit->costs = XMALLOCN(int,      arity + 1);
400                 unit->nodes[0] = irn;
401
402                 /* fill */
403                 for (i=0; i<arity; ++i) {
404                         int o, arg_pos;
405                         ir_node *arg = get_irn_n(irn, i);
406
407                         assert(is_curr_reg_class(co, arg) && "Argument not in same register class.");
408                         if (arg == irn)
409                                 continue;
410                         if (nodes_interfere(co->cenv, irn, arg)) {
411                                 unit->inevitable_costs += co->get_costs(co, irn, arg, i);
412                                 continue;
413                         }
414
415                         /* Else insert the argument of the phi to the members of this ou */
416                         DBG((dbg, LEVEL_1, "\t   Member: %+F\n", arg));
417
418                         if (! arch_irn_is(co->aenv, arg, ignore)) {
419                                 /* Check if arg has occurred at a prior position in the arg/list */
420                                 arg_pos = 0;
421                                 for (o=1; o<unit->node_count; ++o) {
422                                         if (unit->nodes[o] == arg) {
423                                                 arg_pos = o;
424                                                 break;
425                                         }
426                                 }
427
428                                 if (!arg_pos) { /* a new argument */
429                                         /* insert node, set costs */
430                                         unit->nodes[unit->node_count] = arg;
431                                         unit->costs[unit->node_count] = co->get_costs(co, irn, arg, i);
432                                         unit->node_count++;
433                                 } else { /* arg has occurred before in same phi */
434                                         /* increase costs for existing arg */
435                                         unit->costs[arg_pos] += co->get_costs(co, irn, arg, i);
436                                 }
437                         }
438                 }
439                 unit->nodes = XREALLOC(unit->nodes, ir_node*, unit->node_count);
440                 unit->costs = XREALLOC(unit->costs, int,      unit->node_count);
441         } else if (is_Perm_Proj(irn)) {
442                 /* Proj of a perm with corresponding arg */
443                 assert(!nodes_interfere(co->cenv, irn, get_Perm_src(irn)));
444                 unit->nodes = XMALLOCN(ir_node*, 2);
445                 unit->costs = XMALLOCN(int,      2);
446                 unit->node_count = 2;
447                 unit->nodes[0] = irn;
448                 unit->nodes[1] = get_Perm_src(irn);
449                 unit->costs[1] = co->get_costs(co, irn, unit->nodes[1], -1);
450         } else {
451                 const arch_register_req_t *req = arch_get_register_req(irn, -1);
452
453                 /* Src == Tgt of a 2-addr-code instruction */
454                 if (is_2addr_code(req)) {
455                         const unsigned other = req->other_same;
456                         int            count = 0;
457                         int            i;
458
459                         for (i = 0; (1U << i) <= other; ++i) {
460                                 if (other & (1U << i)) {
461                                         ir_node *o  = get_irn_n(skip_Proj(irn), i);
462                                         if (!arch_irn_is(co->aenv, o, ignore) &&
463                                                         !nodes_interfere(co->cenv, irn, o)) {
464                                                 ++count;
465                                         }
466                                 }
467                         }
468
469                         if (count != 0) {
470                                 int k = 0;
471                                 ++count;
472                                 unit->nodes = XMALLOCN(ir_node*, count);
473                                 unit->costs = XMALLOCN(int,      count);
474                                 unit->node_count = count;
475                                 unit->nodes[k++] = irn;
476
477                                 for (i = 0; 1U << i <= other; ++i) {
478                                         if (other & (1U << i)) {
479                                                 ir_node *o  = get_irn_n(skip_Proj(irn), i);
480                                                 if (!arch_irn_is(co->aenv, o, ignore) &&
481                                                                 !nodes_interfere(co->cenv, irn, o)) {
482                                                         unit->nodes[k] = o;
483                                                         unit->costs[k] = co->get_costs(co, irn, o, -1);
484                                                         ++k;
485                                                 }
486                                         }
487                                 }
488                         }
489                 } else {
490                         assert(0 && "This is not an optimizable node!");
491                 }
492         }
493
494         /* Insert the new unit at a position according to its costs */
495         if (unit->node_count > 1) {
496                 int i;
497                 struct list_head *tmp;
498
499                 /* Determine the maximum costs this unit can cause: all_nodes_cost */
500                 for(i=1; i<unit->node_count; ++i) {
501                         unit->sort_key = MAX(unit->sort_key, unit->costs[i]);
502                         unit->all_nodes_costs += unit->costs[i];
503                 }
504
505                 /* Determine the minimal costs this unit will cause: min_nodes_costs */
506                 unit->min_nodes_costs += unit->all_nodes_costs - ou_max_ind_set_costs(unit);
507                 /* Insert the new ou according to its sort_key */
508                 tmp = &co->units;
509                 while (tmp->next != &co->units && list_entry_units(tmp->next)->sort_key > unit->sort_key)
510                         tmp = tmp->next;
511                 list_add(&unit->units, tmp);
512         } else {
513                 free(unit);
514         }
515 }
516
517 #ifdef QUICK_AND_DIRTY_HACK
518
519 static int compare_ous(const void *k1, const void *k2) {
520         const unit_t *u1 = *((const unit_t **) k1);
521         const unit_t *u2 = *((const unit_t **) k2);
522         int i, o, u1_has_constr, u2_has_constr;
523         arch_register_req_t req;
524
525         /* Units with constraints come first */
526         u1_has_constr = 0;
527         for (i=0; i<u1->node_count; ++i) {
528                 arch_get_register_req(&req, u1->nodes[i], -1);
529                 if (arch_register_req_is(&req, limited)) {
530                         u1_has_constr = 1;
531                         break;
532                 }
533         }
534
535         u2_has_constr = 0;
536         for (i=0; i<u2->node_count; ++i) {
537                 arch_get_register_req(&req, u2->nodes[i], -1);
538                 if (arch_register_req_is(&req, limited)) {
539                         u2_has_constr = 1;
540                         break;
541                 }
542         }
543
544         if (u1_has_constr != u2_has_constr)
545                 return u2_has_constr - u1_has_constr;
546
547         /* Now check, whether the two units are connected */
548 #if 0
549         for (i=0; i<u1->node_count; ++i)
550                 for (o=0; o<u2->node_count; ++o)
551                         if (u1->nodes[i] == u2->nodes[o])
552                                 return 0;
553 #endif
554
555         /* After all, the sort key decides. Greater keys come first. */
556         return u2->sort_key - u1->sort_key;
557
558 }
559
560 /**
561  * Sort the ou's according to constraints and their sort_key
562  */
563 static void co_sort_units(copy_opt_t *co) {
564         int i, count = 0, costs;
565         unit_t *ou, **ous;
566
567         /* get the number of ous, remove them form the list and fill the array */
568         list_for_each_entry(unit_t, ou, &co->units, units)
569                 count++;
570         ous = alloca(count * sizeof(*ous));
571
572         costs = co_get_max_copy_costs(co);
573
574         i = 0;
575         list_for_each_entry(unit_t, ou, &co->units, units)
576                 ous[i++] = ou;
577
578         INIT_LIST_HEAD(&co->units);
579
580         assert(count == i && list_empty(&co->units));
581
582         for (i=0; i<count; ++i)
583                 ir_printf("%+F\n", ous[i]->nodes[0]);
584
585         qsort(ous, count, sizeof(*ous), compare_ous);
586
587         ir_printf("\n\n");
588         for (i=0; i<count; ++i)
589                 ir_printf("%+F\n", ous[i]->nodes[0]);
590
591         /* reinsert into list in correct order */
592         for (i=0; i<count; ++i)
593                 list_add_tail(&ous[i]->units, &co->units);
594
595         assert(costs == co_get_max_copy_costs(co));
596 }
597 #endif
598
599 void co_build_ou_structure(copy_opt_t *co) {
600         DBG((dbg, LEVEL_1, "\tCollecting optimization units\n"));
601         INIT_LIST_HEAD(&co->units);
602         irg_walk_graph(co->irg, co_collect_units, NULL, co);
603 #ifdef QUICK_AND_DIRTY_HACK
604         co_sort_units(co);
605 #endif
606 }
607
608 void co_free_ou_structure(copy_opt_t *co) {
609         unit_t *curr, *tmp;
610         ASSERT_OU_AVAIL(co);
611         list_for_each_entry_safe(unit_t, curr, tmp, &co->units, units) {
612                 xfree(curr->nodes);
613                 xfree(curr->costs);
614                 xfree(curr);
615         }
616         co->units.next = NULL;
617 }
618
619 /* co_solve_heuristic() is implemented in becopyheur.c */
620
621 int co_get_max_copy_costs(const copy_opt_t *co) {
622         int i, res = 0;
623         unit_t *curr;
624
625         ASSERT_OU_AVAIL(co);
626
627         list_for_each_entry(unit_t, curr, &co->units, units) {
628                 res += curr->inevitable_costs;
629                 for (i=1; i<curr->node_count; ++i)
630                         res += curr->costs[i];
631         }
632         return res;
633 }
634
635 int co_get_inevit_copy_costs(const copy_opt_t *co) {
636         int res = 0;
637         unit_t *curr;
638
639         ASSERT_OU_AVAIL(co);
640
641         list_for_each_entry(unit_t, curr, &co->units, units)
642                 res += curr->inevitable_costs;
643         return res;
644 }
645
646 int co_get_copy_costs(const copy_opt_t *co) {
647         int i, res = 0;
648         unit_t *curr;
649
650         ASSERT_OU_AVAIL(co);
651
652         list_for_each_entry(unit_t, curr, &co->units, units) {
653                 int root_col = get_irn_col(curr->nodes[0]);
654                 DBG((dbg, LEVEL_1, "  %3d costs for root %+F color %d\n", curr->inevitable_costs, curr->nodes[0], root_col));
655                 res += curr->inevitable_costs;
656                 for (i=1; i<curr->node_count; ++i) {
657                         int arg_col = get_irn_col(curr->nodes[i]);
658                         if (root_col != arg_col) {
659                                 DBG((dbg, LEVEL_1, "  %3d for arg %+F color %d\n", curr->costs[i], curr->nodes[i], arg_col));
660                                 res += curr->costs[i];
661                         }
662                 }
663         }
664         return res;
665 }
666
667 int co_get_lower_bound(const copy_opt_t *co) {
668         int res = 0;
669         unit_t *curr;
670
671         ASSERT_OU_AVAIL(co);
672
673         list_for_each_entry(unit_t, curr, &co->units, units)
674                 res += curr->inevitable_costs + curr->min_nodes_costs;
675         return res;
676 }
677
678 void co_complete_stats(const copy_opt_t *co, co_complete_stats_t *stat)
679 {
680         bitset_t *seen = bitset_irg_malloc(co->irg);
681         affinity_node_t *an;
682
683         memset(stat, 0, sizeof(stat[0]));
684
685         /* count affinity edges. */
686         co_gs_foreach_aff_node(co, an) {
687                 neighb_t *neigh;
688                 stat->aff_nodes += 1;
689                 bitset_add_irn(seen, an->irn);
690                 co_gs_foreach_neighb(an, neigh) {
691                         if(!bitset_contains_irn(seen, neigh->irn)) {
692                                 stat->aff_edges += 1;
693                                 stat->max_costs += neigh->costs;
694
695                                 if (get_irn_col(an->irn) != get_irn_col(neigh->irn)) {
696                                         stat->costs += neigh->costs;
697                                         stat->unsatisfied_edges += 1;
698                                 }
699
700                                 if(nodes_interfere(co->cenv, an->irn, neigh->irn)) {
701                                         stat->aff_int += 1;
702                                         stat->inevit_costs += neigh->costs;
703                                 }
704
705                         }
706                 }
707         }
708
709         bitset_free(seen);
710 }
711
712 /******************************************************************************
713    _____                 _        _____ _
714   / ____|               | |      / ____| |
715  | |  __ _ __ __ _ _ __ | |__   | (___ | |_ ___  _ __ __ _  __ _  ___
716  | | |_ | '__/ _` | '_ \| '_ \   \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
717  | |__| | | | (_| | |_) | | | |  ____) | || (_) | | | (_| | (_| |  __/
718   \_____|_|  \__,_| .__/|_| |_| |_____/ \__\___/|_|  \__,_|\__, |\___|
719                   | |                                       __/ |
720                   |_|                                      |___/
721  ******************************************************************************/
722
723 static int compare_affinity_node_t(const void *k1, const void *k2, size_t size) {
724         const affinity_node_t *n1 = k1;
725         const affinity_node_t *n2 = k2;
726         (void) size;
727
728         return (n1->irn != n2->irn);
729 }
730
731 static void add_edge(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
732         affinity_node_t new_node, *node;
733         neighb_t        *nbr;
734         int             allocnew = 1;
735
736         new_node.irn        = n1;
737         new_node.degree     = 0;
738         new_node.neighbours = NULL;
739         node = set_insert(co->nodes, &new_node, sizeof(new_node), hash_irn(new_node.irn));
740
741         for (nbr = node->neighbours; nbr; nbr = nbr->next)
742                 if (nbr->irn == n2) {
743                         allocnew = 0;
744                         break;
745                 }
746
747         /* if we did not find n2 in n1's neighbourhood insert it */
748         if (allocnew) {
749                 nbr        = obstack_alloc(&co->obst, sizeof(*nbr));
750                 nbr->irn   = n2;
751                 nbr->costs = 0;
752                 nbr->next  = node->neighbours;
753
754                 node->neighbours = nbr;
755                 node->degree++;
756         }
757
758         /* now nbr points to n1's neighbour-entry of n2 */
759         nbr->costs += costs;
760 }
761
762 static INLINE void add_edges(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
763         if (! be_ifg_connected(co->cenv->ifg, n1, n2)) {
764                 add_edge(co, n1, n2, costs);
765                 add_edge(co, n2, n1, costs);
766         }
767 }
768
769 static void build_graph_walker(ir_node *irn, void *env) {
770         copy_opt_t *co = env;
771         int pos, max;
772         const arch_register_t *reg;
773
774         if (!is_curr_reg_class(co, irn) || arch_irn_is(co->aenv, irn, ignore))
775                 return;
776
777         reg = arch_get_irn_register(irn);
778         if (arch_register_type_is(reg, ignore))
779                 return;
780
781         if (is_Reg_Phi(irn)) { /* Phis */
782                 for (pos=0, max=get_irn_arity(irn); pos<max; ++pos) {
783                         ir_node *arg = get_irn_n(irn, pos);
784                         add_edges(co, irn, arg, co->get_costs(co, irn, arg, pos));
785                 }
786         } else if (is_Perm_Proj(irn)) { /* Perms */
787                 ir_node *arg = get_Perm_src(irn);
788                 add_edges(co, irn, arg, co->get_costs(co, irn, arg, 0));
789         }
790         else { /* 2-address code */
791                 const arch_register_req_t *req = arch_get_register_req(irn, -1);
792                 if (is_2addr_code(req)) {
793                         const unsigned other = req->other_same;
794                         int i;
795
796                         for (i = 0; 1U << i <= other; ++i) {
797                                 if (other & (1U << i)) {
798                                         ir_node *other = get_irn_n(skip_Proj(irn), i);
799                                         if (! arch_irn_is(co->aenv, other, ignore))
800                                                 add_edges(co, irn, other, co->get_costs(co, irn, other, 0));
801                                 }
802                         }
803                 }
804         }
805 }
806
807 void co_build_graph_structure(copy_opt_t *co) {
808         obstack_init(&co->obst);
809         co->nodes = new_set(compare_affinity_node_t, 32);
810
811         irg_walk_graph(co->irg, build_graph_walker, NULL, co);
812 }
813
814 void co_free_graph_structure(copy_opt_t *co) {
815         ASSERT_GS_AVAIL(co);
816
817         del_set(co->nodes);
818         obstack_free(&co->obst, NULL);
819         co->nodes = NULL;
820 }
821
822 /* co_solve_ilp1() co_solve_ilp2() are implemented in becopyilpX.c */
823
824 int co_gs_is_optimizable(copy_opt_t *co, ir_node *irn) {
825         affinity_node_t new_node, *n;
826
827         ASSERT_GS_AVAIL(co);
828
829         new_node.irn = irn;
830         n = set_find(co->nodes, &new_node, sizeof(new_node), hash_irn(new_node.irn));
831         if (n) {
832                 return (n->degree > 0);
833         } else
834                 return 0;
835 }
836
837 static int co_dump_appel_disjoint_constraints(const copy_opt_t *co, ir_node *a, ir_node *b)
838 {
839         ir_node *nodes[]  = { a, b };
840         bitset_t *constr[] = { NULL, NULL };
841         const arch_register_req_t *req;
842         int j;
843
844         constr[0] = bitset_alloca(co->cls->n_regs);
845         constr[1] = bitset_alloca(co->cls->n_regs);
846
847         for (j = 0; j < 2; ++j) {
848                 req = arch_get_register_req(nodes[j], BE_OUT_POS(0));
849                 if(arch_register_req_is(req, limited))
850                         rbitset_copy_to_bitset(req->limited, constr[j]);
851                 else
852                         bitset_set_all(constr[j]);
853
854         }
855
856         return !bitset_intersect(constr[0], constr[1]);
857 }
858
859 void co_dump_appel_graph(const copy_opt_t *co, FILE *f)
860 {
861         be_ifg_t *ifg  = co->cenv->ifg;
862         int *color_map = alloca(co->cls->n_regs * sizeof(color_map[0]));
863         int *node_map  = XMALLOCN(int, get_irg_last_idx(co->irg) + 1);
864
865         ir_node *irn;
866         void *it, *nit;
867         int n, n_regs;
868         unsigned i;
869
870         n_regs = 0;
871         for(i = 0; i < co->cls->n_regs; ++i) {
872                 const arch_register_t *reg = &co->cls->regs[i];
873                 color_map[i] = arch_register_type_is(reg, ignore) ? -1 : n_regs++;
874         }
875
876         /*
877          * n contains the first node number.
878          * the values below n are the pre-colored register nodes
879          */
880
881         it  = be_ifg_nodes_iter_alloca(ifg);
882         nit = be_ifg_neighbours_iter_alloca(ifg);
883
884         n = n_regs;
885         be_ifg_foreach_node(ifg, it, irn) {
886                 if(!arch_irn_is(co->aenv, irn, ignore))
887                         node_map[get_irn_idx(irn)] = n++;
888         }
889
890         fprintf(f, "%d %d\n", n, n_regs);
891
892         be_ifg_foreach_node(ifg, it, irn) {
893                 if(!arch_irn_is(co->aenv, irn, ignore)) {
894                         int idx            = node_map[get_irn_idx(irn)];
895                         affinity_node_t *a = get_affinity_info(co, irn);
896
897                         const arch_register_req_t *req;
898                         ir_node *adj;
899
900                         req = arch_get_register_req(irn, BE_OUT_POS(0));
901                         if(arch_register_req_is(req, limited)) {
902                                 for(i = 0; i < co->cls->n_regs; ++i) {
903                                         if(!rbitset_is_set(req->limited, i) && color_map[i] >= 0)
904                                                 fprintf(f, "%d %d -1\n", color_map[i], idx);
905                                 }
906                         }
907
908                         be_ifg_foreach_neighbour(ifg, nit, irn, adj) {
909                                 if(!arch_irn_is(co->aenv, adj, ignore) && !co_dump_appel_disjoint_constraints(co, irn, adj)) {
910                                         int adj_idx = node_map[get_irn_idx(adj)];
911                                         if(idx < adj_idx)
912                                                 fprintf(f, "%d %d -1\n", idx, adj_idx);
913                                 }
914                         }
915
916                         if(a) {
917                                 neighb_t *n;
918
919                                 co_gs_foreach_neighb(a, n) {
920                                         if(!arch_irn_is(co->aenv, n->irn, ignore)) {
921                                                 int n_idx = node_map[get_irn_idx(n->irn)];
922                                                 if(idx < n_idx)
923                                                         fprintf(f, "%d %d %d\n", idx, n_idx, (int) n->costs);
924                                         }
925                                 }
926                         }
927                 }
928         }
929
930         xfree(node_map);
931 }
932
933 /*
934          ___ _____ ____   ____   ___ _____   ____                        _
935         |_ _|  ___/ ___| |  _ \ / _ \_   _| |  _ \ _   _ _ __ ___  _ __ (_)_ __   __ _
936          | || |_ | |  _  | | | | | | || |   | | | | | | | '_ ` _ \| '_ \| | '_ \ / _` |
937          | ||  _|| |_| | | |_| | |_| || |   | |_| | |_| | | | | | | |_) | | | | | (_| |
938         |___|_|   \____| |____/ \___/ |_|   |____/ \__,_|_| |_| |_| .__/|_|_| |_|\__, |
939                                                                   |_|            |___/
940 */
941
942 static const char *get_dot_color_name(size_t col)
943 {
944         static const char *names[] = {
945                 "blue",
946                 "red",
947                 "green",
948                 "yellow",
949                 "cyan",
950                 "magenta",
951                 "orange",
952                 "chocolate",
953                 "beige",
954                 "navy",
955                 "darkgreen",
956                 "darkred",
957                 "lightPink",
958                 "chartreuse",
959                 "lightskyblue",
960                 "linen",
961                 "pink",
962                 "lightslateblue",
963                 "mintcream",
964                 "red",
965                 "darkolivegreen",
966                 "mediumblue",
967                 "mistyrose",
968                 "salmon",
969                 "darkseagreen",
970                 "mediumslateblue"
971                 "moccasin",
972                 "tomato",
973                 "forestgreen",
974                 "darkturquoise",
975                 "palevioletred"
976         };
977
978         return col < sizeof(names)/sizeof(names[0]) ? names[col] : "white";
979 }
980
981 typedef struct _co_ifg_dump_t {
982         const copy_opt_t *co;
983         unsigned flags;
984 } co_ifg_dump_t;
985
986 static void ifg_dump_graph_attr(FILE *f, void *self)
987 {
988         (void) self;
989         fprintf(f, "overlap=scale");
990 }
991
992 static int ifg_is_dump_node(void *self, ir_node *irn)
993 {
994         co_ifg_dump_t *cod = self;
995         return !arch_irn_is(cod->co->aenv, irn, ignore);
996 }
997
998 static void ifg_dump_node_attr(FILE *f, void *self, ir_node *irn)
999 {
1000         co_ifg_dump_t *env         = self;
1001         const arch_register_t *reg = arch_get_irn_register(irn);
1002         const arch_register_req_t *req;
1003         int limited;
1004
1005         req = arch_get_register_req(irn, BE_OUT_POS(0));
1006         limited = arch_register_req_is(req, limited);
1007
1008         if(env->flags & CO_IFG_DUMP_LABELS) {
1009                 ir_fprintf(f, "label=\"%+F", irn);
1010
1011                 if((env->flags & CO_IFG_DUMP_CONSTR) && limited) {
1012                         bitset_t *bs = bitset_alloca(env->co->cls->n_regs);
1013                         rbitset_copy_to_bitset(req->limited, bs);
1014                         ir_fprintf(f, "\\n%B", bs);
1015                 }
1016                 ir_fprintf(f, "\" ");
1017         } else {
1018                 fprintf(f, "label=\"\" shape=point " );
1019         }
1020
1021         if(env->flags & CO_IFG_DUMP_SHAPE)
1022                 fprintf(f, "shape=%s ", limited ? "diamond" : "ellipse");
1023
1024         if(env->flags & CO_IFG_DUMP_COLORS)
1025                 fprintf(f, "style=filled color=%s ", get_dot_color_name(reg->index));
1026 }
1027
1028 static void ifg_dump_at_end(FILE *file, void *self)
1029 {
1030         co_ifg_dump_t *env = self;
1031         affinity_node_t *a;
1032
1033         co_gs_foreach_aff_node(env->co, a) {
1034                 const arch_register_t *ar = arch_get_irn_register(a->irn);
1035                 unsigned aidx = get_irn_idx(a->irn);
1036                 neighb_t *n;
1037
1038                 co_gs_foreach_neighb(a, n) {
1039                         const arch_register_t *nr = arch_get_irn_register(n->irn);
1040                         unsigned nidx = get_irn_idx(n->irn);
1041
1042                         if(aidx < nidx) {
1043                                 const char *color = nr == ar ? "blue" : "red";
1044                                 fprintf(file, "\tn%d -- n%d [weight=0.01 ", aidx, nidx);
1045                                 if(env->flags & CO_IFG_DUMP_LABELS)
1046                                         fprintf(file, "label=\"%d\" ", n->costs);
1047                                 if(env->flags & CO_IFG_DUMP_COLORS)
1048                                         fprintf(file, "color=%s ", color);
1049                                 else
1050                                         fprintf(file, "style=dotted");
1051                                 fprintf(file, "];\n");
1052                         }
1053                 }
1054         }
1055 }
1056
1057
1058 static be_ifg_dump_dot_cb_t ifg_dot_cb = {
1059         ifg_is_dump_node,
1060         ifg_dump_graph_attr,
1061         ifg_dump_node_attr,
1062         NULL,
1063         NULL,
1064         ifg_dump_at_end
1065 };
1066
1067
1068
1069 void co_dump_ifg_dot(const copy_opt_t *co, FILE *f, unsigned flags)
1070 {
1071         co_ifg_dump_t cod;
1072
1073         cod.co    = co;
1074         cod.flags = flags;
1075         be_ifg_dump_dot(co->cenv->ifg, co->irg, f, &ifg_dot_cb, &cod);
1076 }
1077
1078
1079 void co_solve_park_moon(copy_opt_t *opt)
1080 {
1081         (void) opt;
1082 }
1083
1084 static int void_algo(copy_opt_t *co)
1085 {
1086         (void) co;
1087         return 0;
1088 }
1089
1090 /*
1091                 _    _                  _ _   _
1092            / \  | | __ _  ___  _ __(_) |_| |__  _ __ ___  ___
1093           / _ \ | |/ _` |/ _ \| '__| | __| '_ \| '_ ` _ \/ __|
1094          / ___ \| | (_| | (_) | |  | | |_| | | | | | | | \__ \
1095         /_/   \_\_|\__, |\___/|_|  |_|\__|_| |_|_| |_| |_|___/
1096                            |___/
1097 */
1098
1099 typedef struct {
1100         co_algo_t  *algo;
1101         const char *name;
1102         int        can_improve_existing;
1103 } co_algo_info_t;
1104
1105 static co_algo_info_t algos[] = {
1106         { void_algo,               "none",  0 },
1107         { co_solve_heuristic,      "heur1", 0 },
1108         { co_solve_heuristic_new,  "heur2", 0 },
1109 #ifdef WITH_JVM
1110         { co_solve_heuristic_java, "heur3", 0 },
1111 #else
1112         { NULL,                    "heur3", 0 },
1113 #endif
1114         { co_solve_heuristic_mst,  "heur4", 0 },
1115 #ifdef WITH_ILP
1116         { co_solve_ilp2,           "ilp",   1 },
1117 #else
1118         { NULL,                    "ilp",   1 },
1119 #endif
1120         { NULL,                    "",      0 }
1121 };
1122
1123 /*
1124     __  __       _         ____       _
1125    |  \/  | __ _(_)_ __   |  _ \ _ __(_)_   _____ _ __
1126    | |\/| |/ _` | | '_ \  | | | | '__| \ \ / / _ \ '__|
1127    | |  | | (_| | | | | | | |_| | |  | |\ V /  __/ |
1128    |_|  |_|\__,_|_|_| |_| |____/|_|  |_| \_/ \___|_|
1129
1130 */
1131
1132 static FILE *my_open(const be_chordal_env_t *env, const char *prefix, const char *suffix)
1133 {
1134         FILE *result;
1135         char buf[1024];
1136         size_t i, n;
1137         char *tu_name;
1138
1139         n = strlen(env->birg->main_env->cup_name);
1140         tu_name = XMALLOCN(char, n + 1);
1141         strcpy(tu_name, env->birg->main_env->cup_name);
1142         for (i = 0; i < n; ++i)
1143                 if (tu_name[i] == '.')
1144                         tu_name[i] = '_';
1145
1146
1147         ir_snprintf(buf, sizeof(buf), "%s%s_%F_%s%s", prefix, tu_name, env->irg, env->cls->name, suffix);
1148         xfree(tu_name);
1149         result = fopen(buf, "wt");
1150         if(result == NULL) {
1151                 panic("Couldn't open '%s' for writing.", buf);
1152         }
1153
1154         return result;
1155 }
1156
1157 void co_driver(be_chordal_env_t *cenv)
1158 {
1159         ir_timer_t          *timer = ir_timer_register("firm.be.copyopt", "runtime");
1160         co_complete_stats_t before, after;
1161         copy_opt_t          *co;
1162         co_algo_t           *algo_func;
1163         int                 was_optimal = 0;
1164
1165         if (algo >= CO_ALGO_LAST)
1166                 return;
1167
1168         be_liveness_assure_chk(be_get_birg_liveness(cenv->birg));
1169
1170         co = new_copy_opt(cenv, cost_func);
1171         co_build_ou_structure(co);
1172         co_build_graph_structure(co);
1173
1174         co_complete_stats(co, &before);
1175
1176         be_stat_ev_ull("co_aff_nodes",    before.aff_nodes);
1177         be_stat_ev_ull("co_aff_edges",    before.aff_edges);
1178         be_stat_ev_ull("co_max_costs",    before.max_costs);
1179         be_stat_ev_ull("co_inevit_costs", before.inevit_costs);
1180         be_stat_ev_ull("co_aff_int",      before.aff_int);
1181
1182         be_stat_ev_ull("co_init_costs",   before.costs);
1183         be_stat_ev_ull("co_init_unsat",   before.unsatisfied_edges);
1184
1185         if (dump_flags & DUMP_BEFORE) {
1186                 FILE *f = my_open(cenv, "", "-before.dot");
1187                 co_dump_ifg_dot(co, f, style_flags);
1188                 fclose(f);
1189         }
1190
1191         /* if the algo can improve results, provide an initial solution with heur3 */
1192         if (improve && algos[algo].can_improve_existing) {
1193                 co_complete_stats_t stats;
1194
1195                 /* produce a heuristic solution */
1196 #ifdef WITH_JVM
1197                 co_solve_heuristic_java(co);
1198 #else
1199                 co_solve_heuristic(co);
1200 #endif /* WITH_JVM */
1201
1202                 /* do the stats and provide the current costs */
1203                 co_complete_stats(co, &stats);
1204                 be_stat_ev_ull("co_prepare_costs", stats.costs);
1205         }
1206
1207 #ifdef WITH_JVM
1208         /* start the JVM here so that it does not tamper the timing. */
1209         if (algo == CO_ALGO_HEUR3)
1210                 be_java_coal_start_jvm();
1211 #endif /* WITH_JVM */
1212
1213         algo_func = algos[algo].algo;
1214
1215         /* perform actual copy minimization */
1216         ir_timer_reset_and_start(timer);
1217         was_optimal = algo_func(co);
1218         ir_timer_stop(timer);
1219
1220         be_stat_ev("co_time", ir_timer_elapsed_msec(timer));
1221         be_stat_ev_ull("co_optimal", was_optimal);
1222
1223         if (dump_flags & DUMP_AFTER) {
1224                 FILE *f = my_open(cenv, "", "-after.dot");
1225                 co_dump_ifg_dot(co, f, style_flags);
1226                 fclose(f);
1227         }
1228
1229         co_complete_stats(co, &after);
1230
1231         if (do_stats) {
1232                 ulong64 optimizable_costs = after.max_costs - after.inevit_costs;
1233                 ulong64 evitable          = after.costs     - after.inevit_costs;
1234
1235                 ir_printf("%30F ", cenv->irg);
1236                 printf("%10s %10" ULL_FMT "%10" ULL_FMT "%10" ULL_FMT, cenv->cls->name, after.max_costs, before.costs, after.inevit_costs);
1237
1238                 if(optimizable_costs > 0)
1239                         printf("%10" ULL_FMT " %5.2f\n", after.costs, (evitable * 100.0) / optimizable_costs);
1240                 else
1241                         printf("%10" ULL_FMT " %5s\n", after.costs, "-");
1242         }
1243
1244         /* Dump the interference graph in Appel's format. */
1245         if (dump_flags & DUMP_APPEL) {
1246                 FILE *f = my_open(cenv, "", ".apl");
1247                 fprintf(f, "# %lld %lld\n", after.costs, after.unsatisfied_edges);
1248                 co_dump_appel_graph(co, f);
1249                 fclose(f);
1250         }
1251
1252         be_stat_ev_ull("co_after_costs", after.costs);
1253         be_stat_ev_ull("co_after_unsat", after.unsatisfied_edges);
1254
1255         co_free_graph_structure(co);
1256         co_free_ou_structure(co);
1257         free_copy_opt(co);
1258 }