Add ALLOCAN() and ALLOCANZ().
[libfirm] / ir / be / becopyopt.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       Copy minimization driver.
23  * @author      Daniel Grund
24  * @date        12.04.2005
25  * @version     $Id$
26  *
27  * Main file for the optimization reducing the copies needed for:
28  * - Phi coalescing
29  * - Register-constrained nodes
30  * - Two-address code instructions
31  */
32 #include "config.h"
33
34 #include "execfreq.h"
35 #include "xmalloc.h"
36 #include "debug.h"
37 #include "pmap.h"
38 #include "raw_bitset.h"
39 #include "irnode.h"
40 #include "irgraph.h"
41 #include "irgwalk.h"
42 #include "irprog.h"
43 #include "irloop_t.h"
44 #include "iredges_t.h"
45 #include "phiclass.h"
46 #include "irbitset.h"
47 #include "irphase_t.h"
48 #include "irprintf_t.h"
49
50 #include "bemodule.h"
51 #include "bearch_t.h"
52 #include "benode_t.h"
53 #include "beutil.h"
54 #include "beifg_t.h"
55 #include "beintlive_t.h"
56 #include "becopyopt_t.h"
57 #include "becopystat.h"
58 #include "belive_t.h"
59 #include "beinsn_t.h"
60 #include "besched_t.h"
61 #include "bestatevent.h"
62 #include "beirg_t.h"
63 #include "error.h"
64
65 #include "lc_opts.h"
66 #include "lc_opts_enum.h"
67
68 #define DUMP_BEFORE 1
69 #define DUMP_AFTER  2
70 #define DUMP_APPEL  4
71 #define DUMP_ALL    2 * DUMP_APPEL - 1
72
73 #define COST_FUNC_FREQ     1
74 #define COST_FUNC_LOOP     2
75 #define COST_FUNC_ALL_ONE  3
76
77 static unsigned   dump_flags  = 0;
78 static unsigned   style_flags = 0;
79 static unsigned   do_stats    = 0;
80 static cost_fct_t cost_func   = co_get_costs_exec_freq;
81 static unsigned   algo        = CO_ALGO_HEUR4;
82 static int        improve     = 1;
83
84 static const lc_opt_enum_mask_items_t dump_items[] = {
85         { "before",  DUMP_BEFORE },
86         { "after",   DUMP_AFTER  },
87         { "appel",   DUMP_APPEL  },
88         { "all",     DUMP_ALL    },
89         { NULL,      0 }
90 };
91
92 static const lc_opt_enum_mask_items_t style_items[] = {
93         { "color",   CO_IFG_DUMP_COLORS },
94         { "labels",  CO_IFG_DUMP_LABELS },
95         { "constr",  CO_IFG_DUMP_CONSTR },
96         { "shape",   CO_IFG_DUMP_SHAPE  },
97         { "full",    2 * CO_IFG_DUMP_SHAPE - 1 },
98         { NULL,      0 }
99 };
100
101 static const lc_opt_enum_mask_items_t algo_items[] = {
102         { "none",   CO_ALGO_NONE  },
103         { "heur",   CO_ALGO_HEUR  },
104         { "heur2",  CO_ALGO_HEUR2 },
105         { "heur3",  CO_ALGO_HEUR3 },
106         { "heur4",  CO_ALGO_HEUR4 },
107         { "ilp",    CO_ALGO_ILP   },
108         { NULL,     0 }
109 };
110
111 typedef int (*opt_funcptr)(void);
112
113 static const lc_opt_enum_func_ptr_items_t cost_func_items[] = {
114         { "freq",   (opt_funcptr) co_get_costs_exec_freq },
115         { "loop",   (opt_funcptr) co_get_costs_loop_depth },
116         { "one",    (opt_funcptr) co_get_costs_all_one },
117         { NULL,     NULL }
118 };
119
120 static lc_opt_enum_mask_var_t dump_var = {
121         &dump_flags, dump_items
122 };
123
124 static lc_opt_enum_mask_var_t style_var = {
125         &style_flags, style_items
126 };
127
128 static lc_opt_enum_mask_var_t algo_var = {
129         &algo, algo_items
130 };
131
132 static lc_opt_enum_func_ptr_var_t cost_func_var = {
133         (opt_funcptr*) &cost_func, cost_func_items
134 };
135
136 static const lc_opt_table_entry_t options[] = {
137         LC_OPT_ENT_ENUM_INT      ("algo",    "select copy optimization algo",                           &algo_var),
138         LC_OPT_ENT_ENUM_FUNC_PTR ("cost",    "select a cost function",                                  &cost_func_var),
139         LC_OPT_ENT_ENUM_MASK     ("dump",    "dump ifg before or after copy optimization",              &dump_var),
140         LC_OPT_ENT_ENUM_MASK     ("style",   "dump style for ifg dumping",                              &style_var),
141         LC_OPT_ENT_BOOL          ("stats",   "dump statistics after each optimization",                 &do_stats),
142         LC_OPT_ENT_BOOL          ("improve", "run heur3 before if algo can exploit start solutions",    &improve),
143         LC_OPT_LAST
144 };
145
146 /* Insert additional options registration functions here. */
147 extern void be_co_ilp_register_options(lc_opt_entry_t *grp);
148 extern void be_co2_register_options(lc_opt_entry_t *grp);
149 extern void be_co3_register_options(lc_opt_entry_t *grp);
150
151 void be_init_copycoal(void)
152 {
153         lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
154         lc_opt_entry_t *ra_grp = lc_opt_get_grp(be_grp, "ra");
155         lc_opt_entry_t *chordal_grp = lc_opt_get_grp(ra_grp, "chordal");
156         lc_opt_entry_t *co_grp = lc_opt_get_grp(chordal_grp, "co");
157
158         lc_opt_add_table(co_grp, options);
159 }
160
161 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_copycoal);
162
163 #undef QUICK_AND_DIRTY_HACK
164
165 static int nodes_interfere(const be_chordal_env_t *env, const ir_node *a, const ir_node *b)
166 {
167         if (env->ifg)
168                 return be_ifg_connected(env->ifg, a, b);
169         else
170                 return values_interfere(env->birg, a, b);
171 }
172
173
174 /******************************************************************************
175     _____                           _
176    / ____|                         | |
177   | |  __  ___ _ __   ___ _ __ __ _| |
178   | | |_ |/ _ \ '_ \ / _ \ '__/ _` | |
179   | |__| |  __/ | | |  __/ | | (_| | |
180    \_____|\___|_| |_|\___|_|  \__,_|_|
181
182  ******************************************************************************/
183
184 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
185
186
187 copy_opt_t *new_copy_opt(be_chordal_env_t *chordal_env, cost_fct_t get_costs)
188 {
189         const char *s1, *s2, *s3;
190         int len;
191         copy_opt_t *co;
192
193         FIRM_DBG_REGISTER(dbg, "ir.be.copyopt");
194
195         co = XMALLOCZ(copy_opt_t);
196         co->cenv      = chordal_env;
197         co->irg       = chordal_env->irg;
198         co->cls       = chordal_env->cls;
199         co->get_costs = get_costs;
200
201         s1 = get_irp_prog_name();
202         s2 = get_entity_name(get_irg_entity(co->irg));
203         s3 = chordal_env->cls->name;
204         len = strlen(s1) + strlen(s2) + strlen(s3) + 5;
205         co->name = XMALLOCN(char, len);
206         snprintf(co->name, len, "%s__%s__%s", s1, s2, s3);
207
208         return co;
209 }
210
211 void free_copy_opt(copy_opt_t *co) {
212         xfree(co->name);
213         free(co);
214 }
215
216 /**
217  * Checks if a node is optimizable, viz. has something to do with coalescing
218  * @param irn  The irn to check
219  */
220 static int co_is_optimizable_root(ir_node *irn)
221 {
222         const arch_register_req_t *req;
223         const arch_register_t *reg;
224
225         if (arch_irn_is(irn, ignore))
226                 return 0;
227
228         reg = arch_get_irn_register(irn);
229         if (arch_register_type_is(reg, ignore))
230                 return 0;
231
232         if (is_Reg_Phi(irn) || is_Perm_Proj(irn))
233                 return 1;
234
235         req = arch_get_register_req_out(irn);
236         if (is_2addr_code(req))
237                 return 1;
238
239         return 0;
240 }
241
242 int co_get_costs_loop_depth(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
243         int cost = 0;
244         ir_loop *loop;
245         ir_node *root_block = get_nodes_block(root);
246         (void) co;
247         (void) arg;
248
249         if (is_Phi(root)) {
250                 /* for phis the copies are placed in the corresponding pred-block */
251                 loop = get_irn_loop(get_Block_cfgpred_block(root_block, pos));
252         } else {
253                 /* a perm places the copy in the same block as it resides */
254                 loop = get_irn_loop(root_block);
255         }
256         if (loop) {
257                 int d = get_loop_depth(loop);
258                 cost = d*d;
259         }
260         return 1+cost;
261 }
262
263 int co_get_costs_exec_freq(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
264         int res;
265         ir_node *root_bl = get_nodes_block(root);
266         ir_node *copy_bl = is_Phi(root) ? get_Block_cfgpred_block(root_bl, pos) : root_bl;
267         (void) arg;
268         res = get_block_execfreq_ulong(co->cenv->birg->exec_freq, copy_bl);
269
270         /* don't allow values smaller than one. */
271         return res < 1 ? 1 : res;
272 }
273
274
275 int co_get_costs_all_one(const copy_opt_t *co, ir_node *root, ir_node *arg, int pos) {
276         (void) co;
277         (void) root;
278         (void) arg;
279         (void) pos;
280         return 1;
281 }
282
283 /******************************************************************************
284    ____        _   _    _       _ _          _____ _
285   / __ \      | | | |  | |     (_) |        / ____| |
286  | |  | |_ __ | |_| |  | |_ __  _| |_ ___  | (___ | |_ ___  _ __ __ _  __ _  ___
287  | |  | | '_ \| __| |  | | '_ \| | __/ __|  \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
288  | |__| | |_) | |_| |__| | | | | | |_\__ \  ____) | || (_) | | | (_| | (_| |  __/
289   \____/| .__/ \__|\____/|_| |_|_|\__|___/ |_____/ \__\___/|_|  \__,_|\__, |\___|
290         | |                                                            __/ |
291         |_|                                                           |___/
292  ******************************************************************************/
293
294 /**
295  * Determines a maximum weighted independent set with respect to
296  * the interference and conflict edges of all nodes in a qnode.
297  */
298 static int ou_max_ind_set_costs(unit_t *ou) {
299         be_chordal_env_t *chordal_env = ou->co->cenv;
300         ir_node **safe, **unsafe;
301         int i, o, safe_count, safe_costs, unsafe_count, *unsafe_costs;
302         bitset_t *curr;
303         bitset_pos_t pos;
304         int max, curr_weight, best_weight = 0;
305
306         /* assign the nodes into two groups.
307          * safe: node has no interference, hence it is in every max stable set.
308          * unsafe: node has an interference
309          */
310         safe         = ALLOCAN(ir_node*, ou->node_count - 1);
311         safe_costs   = 0;
312         safe_count   = 0;
313         unsafe       = ALLOCAN(ir_node*, ou->node_count - 1);
314         unsafe_costs = ALLOCAN(int,      ou->node_count - 1);
315         unsafe_count = 0;
316         for(i=1; i<ou->node_count; ++i) {
317                 int is_safe = 1;
318                 for(o=1; o<ou->node_count; ++o) {
319                         if (i==o)
320                                 continue;
321                         if (nodes_interfere(chordal_env, ou->nodes[i], ou->nodes[o])) {
322                                 unsafe_costs[unsafe_count] = ou->costs[i];
323                                 unsafe[unsafe_count] = ou->nodes[i];
324                                 ++unsafe_count;
325                                 is_safe = 0;
326                                 break;
327                         }
328                 }
329                 if (is_safe) {
330                         safe_costs += ou->costs[i];
331                         safe[safe_count++] = ou->nodes[i];
332                 }
333         }
334
335
336         /* now compute the best set out of the unsafe nodes*/
337         if (unsafe_count > MIS_HEUR_TRIGGER) {
338                 bitset_t *best = bitset_alloca(unsafe_count);
339                 /* Heuristik: Greedy trial and error form index 0 to unsafe_count-1 */
340                 for (i=0; i<unsafe_count; ++i) {
341                         bitset_set(best, i);
342                         /* check if it is a stable set */
343                         for (o=bitset_next_set(best, 0); o!=-1 && o<i; o=bitset_next_set(best, o+1))
344                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o])) {
345                                         bitset_clear(best, i); /* clear the bit and try next one */
346                                         break;
347                                 }
348                 }
349                 /* compute the weight */
350                 bitset_foreach(best, pos)
351                         best_weight += unsafe_costs[pos];
352         } else {
353                 /* Exact Algorithm: Brute force */
354                 curr = bitset_alloca(unsafe_count);
355                 bitset_set_all(curr);
356                 while ((max = bitset_popcnt(curr)) != 0) {
357                         /* check if curr is a stable set */
358                         for (i=bitset_next_set(curr, 0); i!=-1; i=bitset_next_set(curr, i+1))
359                                 for (o=bitset_next_set(curr, i+1); o!=-1; o=bitset_next_set(curr, o+1)) /* !!!!! difference to qnode_max_ind_set(): NOT (curr, i) */
360                                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o]))
361                                                         goto no_stable_set;
362
363                         /* if we arrive here, we have a stable set */
364                         /* compute the weigth of the stable set*/
365                         curr_weight = 0;
366                         bitset_foreach(curr, pos)
367                                 curr_weight += unsafe_costs[pos];
368
369                         /* any better ? */
370                         if (curr_weight > best_weight) {
371                                 best_weight = curr_weight;
372                         }
373
374         no_stable_set:
375                         bitset_minus1(curr);
376                 }
377         }
378
379         return safe_costs+best_weight;
380 }
381
382 static void co_collect_units(ir_node *irn, void *env)
383 {
384         const arch_register_req_t *req = arch_get_register_req_out(irn);
385         copy_opt_t                *co  = env;
386         unit_t *unit;
387
388         if (req->cls != co->cls)
389                 return;
390         if (!co_is_optimizable_root(irn))
391                 return;
392
393         /* Init a new unit */
394         unit = XMALLOCZ(unit_t);
395         unit->co = co;
396         unit->node_count = 1;
397         INIT_LIST_HEAD(&unit->queue);
398
399         /* Phi with some/all of its arguments */
400         if (is_Reg_Phi(irn)) {
401                 int i, arity;
402
403                 /* init */
404                 arity = get_irn_arity(irn);
405                 unit->nodes = XMALLOCN(ir_node*, arity + 1);
406                 unit->costs = XMALLOCN(int,      arity + 1);
407                 unit->nodes[0] = irn;
408
409                 /* fill */
410                 for (i=0; i<arity; ++i) {
411                         int o, arg_pos;
412                         ir_node *arg = get_irn_n(irn, i);
413
414                         assert(arch_get_irn_reg_class_out(arg) == co->cls && "Argument not in same register class.");
415                         if (arg == irn)
416                                 continue;
417                         if (nodes_interfere(co->cenv, irn, arg)) {
418                                 unit->inevitable_costs += co->get_costs(co, irn, arg, i);
419                                 continue;
420                         }
421
422                         /* Else insert the argument of the phi to the members of this ou */
423                         DBG((dbg, LEVEL_1, "\t   Member: %+F\n", arg));
424
425                         if (!arch_irn_is(arg, ignore)) {
426                                 /* Check if arg has occurred at a prior position in the arg/list */
427                                 arg_pos = 0;
428                                 for (o=1; o<unit->node_count; ++o) {
429                                         if (unit->nodes[o] == arg) {
430                                                 arg_pos = o;
431                                                 break;
432                                         }
433                                 }
434
435                                 if (!arg_pos) { /* a new argument */
436                                         /* insert node, set costs */
437                                         unit->nodes[unit->node_count] = arg;
438                                         unit->costs[unit->node_count] = co->get_costs(co, irn, arg, i);
439                                         unit->node_count++;
440                                 } else { /* arg has occurred before in same phi */
441                                         /* increase costs for existing arg */
442                                         unit->costs[arg_pos] += co->get_costs(co, irn, arg, i);
443                                 }
444                         }
445                 }
446                 unit->nodes = XREALLOC(unit->nodes, ir_node*, unit->node_count);
447                 unit->costs = XREALLOC(unit->costs, int,      unit->node_count);
448         } else if (is_Perm_Proj(irn)) {
449                 /* Proj of a perm with corresponding arg */
450                 assert(!nodes_interfere(co->cenv, irn, get_Perm_src(irn)));
451                 unit->nodes = XMALLOCN(ir_node*, 2);
452                 unit->costs = XMALLOCN(int,      2);
453                 unit->node_count = 2;
454                 unit->nodes[0] = irn;
455                 unit->nodes[1] = get_Perm_src(irn);
456                 unit->costs[1] = co->get_costs(co, irn, unit->nodes[1], -1);
457         } else {
458                 /* Src == Tgt of a 2-addr-code instruction */
459                 if (is_2addr_code(req)) {
460                         const unsigned other = req->other_same;
461                         int            count = 0;
462                         int            i;
463
464                         for (i = 0; (1U << i) <= other; ++i) {
465                                 if (other & (1U << i)) {
466                                         ir_node *o  = get_irn_n(skip_Proj(irn), i);
467                                         if (!arch_irn_is(o, ignore) &&
468                                                         !nodes_interfere(co->cenv, irn, o)) {
469                                                 ++count;
470                                         }
471                                 }
472                         }
473
474                         if (count != 0) {
475                                 int k = 0;
476                                 ++count;
477                                 unit->nodes = XMALLOCN(ir_node*, count);
478                                 unit->costs = XMALLOCN(int,      count);
479                                 unit->node_count = count;
480                                 unit->nodes[k++] = irn;
481
482                                 for (i = 0; 1U << i <= other; ++i) {
483                                         if (other & (1U << i)) {
484                                                 ir_node *o  = get_irn_n(skip_Proj(irn), i);
485                                                 if (!arch_irn_is(o, ignore) &&
486                                                                 !nodes_interfere(co->cenv, irn, o)) {
487                                                         unit->nodes[k] = o;
488                                                         unit->costs[k] = co->get_costs(co, irn, o, -1);
489                                                         ++k;
490                                                 }
491                                         }
492                                 }
493                         }
494                 } else {
495                         assert(0 && "This is not an optimizable node!");
496                 }
497         }
498
499         /* Insert the new unit at a position according to its costs */
500         if (unit->node_count > 1) {
501                 int i;
502                 struct list_head *tmp;
503
504                 /* Determine the maximum costs this unit can cause: all_nodes_cost */
505                 for(i=1; i<unit->node_count; ++i) {
506                         unit->sort_key = MAX(unit->sort_key, unit->costs[i]);
507                         unit->all_nodes_costs += unit->costs[i];
508                 }
509
510                 /* Determine the minimal costs this unit will cause: min_nodes_costs */
511                 unit->min_nodes_costs += unit->all_nodes_costs - ou_max_ind_set_costs(unit);
512                 /* Insert the new ou according to its sort_key */
513                 tmp = &co->units;
514                 while (tmp->next != &co->units && list_entry_units(tmp->next)->sort_key > unit->sort_key)
515                         tmp = tmp->next;
516                 list_add(&unit->units, tmp);
517         } else {
518                 free(unit);
519         }
520 }
521
522 #ifdef QUICK_AND_DIRTY_HACK
523
524 static int compare_ous(const void *k1, const void *k2) {
525         const unit_t *u1 = *((const unit_t **) k1);
526         const unit_t *u2 = *((const unit_t **) k2);
527         int i, o, u1_has_constr, u2_has_constr;
528         arch_register_req_t req;
529
530         /* Units with constraints come first */
531         u1_has_constr = 0;
532         for (i=0; i<u1->node_count; ++i) {
533                 arch_get_register_req_out(&req, u1->nodes[i]);
534                 if (arch_register_req_is(&req, limited)) {
535                         u1_has_constr = 1;
536                         break;
537                 }
538         }
539
540         u2_has_constr = 0;
541         for (i=0; i<u2->node_count; ++i) {
542                 arch_get_register_req_out(&req, u2->nodes[i]);
543                 if (arch_register_req_is(&req, limited)) {
544                         u2_has_constr = 1;
545                         break;
546                 }
547         }
548
549         if (u1_has_constr != u2_has_constr)
550                 return u2_has_constr - u1_has_constr;
551
552         /* Now check, whether the two units are connected */
553 #if 0
554         for (i=0; i<u1->node_count; ++i)
555                 for (o=0; o<u2->node_count; ++o)
556                         if (u1->nodes[i] == u2->nodes[o])
557                                 return 0;
558 #endif
559
560         /* After all, the sort key decides. Greater keys come first. */
561         return u2->sort_key - u1->sort_key;
562
563 }
564
565 /**
566  * Sort the ou's according to constraints and their sort_key
567  */
568 static void co_sort_units(copy_opt_t *co) {
569         int i, count = 0, costs;
570         unit_t *ou, **ous;
571
572         /* get the number of ous, remove them form the list and fill the array */
573         list_for_each_entry(unit_t, ou, &co->units, units)
574                 count++;
575         ous = ALLOCAN(unit_t, count);
576
577         costs = co_get_max_copy_costs(co);
578
579         i = 0;
580         list_for_each_entry(unit_t, ou, &co->units, units)
581                 ous[i++] = ou;
582
583         INIT_LIST_HEAD(&co->units);
584
585         assert(count == i && list_empty(&co->units));
586
587         for (i=0; i<count; ++i)
588                 ir_printf("%+F\n", ous[i]->nodes[0]);
589
590         qsort(ous, count, sizeof(*ous), compare_ous);
591
592         ir_printf("\n\n");
593         for (i=0; i<count; ++i)
594                 ir_printf("%+F\n", ous[i]->nodes[0]);
595
596         /* reinsert into list in correct order */
597         for (i=0; i<count; ++i)
598                 list_add_tail(&ous[i]->units, &co->units);
599
600         assert(costs == co_get_max_copy_costs(co));
601 }
602 #endif
603
604 void co_build_ou_structure(copy_opt_t *co) {
605         DBG((dbg, LEVEL_1, "\tCollecting optimization units\n"));
606         INIT_LIST_HEAD(&co->units);
607         irg_walk_graph(co->irg, co_collect_units, NULL, co);
608 #ifdef QUICK_AND_DIRTY_HACK
609         co_sort_units(co);
610 #endif
611 }
612
613 void co_free_ou_structure(copy_opt_t *co) {
614         unit_t *curr, *tmp;
615         ASSERT_OU_AVAIL(co);
616         list_for_each_entry_safe(unit_t, curr, tmp, &co->units, units) {
617                 xfree(curr->nodes);
618                 xfree(curr->costs);
619                 xfree(curr);
620         }
621         co->units.next = NULL;
622 }
623
624 /* co_solve_heuristic() is implemented in becopyheur.c */
625
626 int co_get_max_copy_costs(const copy_opt_t *co) {
627         int i, res = 0;
628         unit_t *curr;
629
630         ASSERT_OU_AVAIL(co);
631
632         list_for_each_entry(unit_t, curr, &co->units, units) {
633                 res += curr->inevitable_costs;
634                 for (i=1; i<curr->node_count; ++i)
635                         res += curr->costs[i];
636         }
637         return res;
638 }
639
640 int co_get_inevit_copy_costs(const copy_opt_t *co) {
641         int res = 0;
642         unit_t *curr;
643
644         ASSERT_OU_AVAIL(co);
645
646         list_for_each_entry(unit_t, curr, &co->units, units)
647                 res += curr->inevitable_costs;
648         return res;
649 }
650
651 int co_get_copy_costs(const copy_opt_t *co) {
652         int i, res = 0;
653         unit_t *curr;
654
655         ASSERT_OU_AVAIL(co);
656
657         list_for_each_entry(unit_t, curr, &co->units, units) {
658                 int root_col = get_irn_col(curr->nodes[0]);
659                 DBG((dbg, LEVEL_1, "  %3d costs for root %+F color %d\n", curr->inevitable_costs, curr->nodes[0], root_col));
660                 res += curr->inevitable_costs;
661                 for (i=1; i<curr->node_count; ++i) {
662                         int arg_col = get_irn_col(curr->nodes[i]);
663                         if (root_col != arg_col) {
664                                 DBG((dbg, LEVEL_1, "  %3d for arg %+F color %d\n", curr->costs[i], curr->nodes[i], arg_col));
665                                 res += curr->costs[i];
666                         }
667                 }
668         }
669         return res;
670 }
671
672 int co_get_lower_bound(const copy_opt_t *co) {
673         int res = 0;
674         unit_t *curr;
675
676         ASSERT_OU_AVAIL(co);
677
678         list_for_each_entry(unit_t, curr, &co->units, units)
679                 res += curr->inevitable_costs + curr->min_nodes_costs;
680         return res;
681 }
682
683 void co_complete_stats(const copy_opt_t *co, co_complete_stats_t *stat)
684 {
685         bitset_t *seen = bitset_irg_malloc(co->irg);
686         affinity_node_t *an;
687
688         memset(stat, 0, sizeof(stat[0]));
689
690         /* count affinity edges. */
691         co_gs_foreach_aff_node(co, an) {
692                 neighb_t *neigh;
693                 stat->aff_nodes += 1;
694                 bitset_add_irn(seen, an->irn);
695                 co_gs_foreach_neighb(an, neigh) {
696                         if(!bitset_contains_irn(seen, neigh->irn)) {
697                                 stat->aff_edges += 1;
698                                 stat->max_costs += neigh->costs;
699
700                                 if (get_irn_col(an->irn) != get_irn_col(neigh->irn)) {
701                                         stat->costs += neigh->costs;
702                                         stat->unsatisfied_edges += 1;
703                                 }
704
705                                 if(nodes_interfere(co->cenv, an->irn, neigh->irn)) {
706                                         stat->aff_int += 1;
707                                         stat->inevit_costs += neigh->costs;
708                                 }
709
710                         }
711                 }
712         }
713
714         bitset_free(seen);
715 }
716
717 /******************************************************************************
718    _____                 _        _____ _
719   / ____|               | |      / ____| |
720  | |  __ _ __ __ _ _ __ | |__   | (___ | |_ ___  _ __ __ _  __ _  ___
721  | | |_ | '__/ _` | '_ \| '_ \   \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
722  | |__| | | | (_| | |_) | | | |  ____) | || (_) | | | (_| | (_| |  __/
723   \_____|_|  \__,_| .__/|_| |_| |_____/ \__\___/|_|  \__,_|\__, |\___|
724                   | |                                       __/ |
725                   |_|                                      |___/
726  ******************************************************************************/
727
728 static int compare_affinity_node_t(const void *k1, const void *k2, size_t size) {
729         const affinity_node_t *n1 = k1;
730         const affinity_node_t *n2 = k2;
731         (void) size;
732
733         return (n1->irn != n2->irn);
734 }
735
736 static void add_edge(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
737         affinity_node_t new_node, *node;
738         neighb_t        *nbr;
739         int             allocnew = 1;
740
741         new_node.irn        = n1;
742         new_node.degree     = 0;
743         new_node.neighbours = NULL;
744         node = set_insert(co->nodes, &new_node, sizeof(new_node), hash_irn(new_node.irn));
745
746         for (nbr = node->neighbours; nbr; nbr = nbr->next)
747                 if (nbr->irn == n2) {
748                         allocnew = 0;
749                         break;
750                 }
751
752         /* if we did not find n2 in n1's neighbourhood insert it */
753         if (allocnew) {
754                 nbr        = obstack_alloc(&co->obst, sizeof(*nbr));
755                 nbr->irn   = n2;
756                 nbr->costs = 0;
757                 nbr->next  = node->neighbours;
758
759                 node->neighbours = nbr;
760                 node->degree++;
761         }
762
763         /* now nbr points to n1's neighbour-entry of n2 */
764         nbr->costs += costs;
765 }
766
767 static inline void add_edges(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
768         if (! be_ifg_connected(co->cenv->ifg, n1, n2)) {
769                 add_edge(co, n1, n2, costs);
770                 add_edge(co, n2, n1, costs);
771         }
772 }
773
774 static void build_graph_walker(ir_node *irn, void *env) {
775         const arch_register_req_t *req = arch_get_register_req_out(irn);
776         copy_opt_t                *co  = env;
777         int pos, max;
778         const arch_register_t *reg;
779
780         if (req->cls != co->cls || arch_irn_is(irn, ignore))
781                 return;
782
783         reg = arch_get_irn_register(irn);
784         if (arch_register_type_is(reg, ignore))
785                 return;
786
787         if (is_Reg_Phi(irn)) { /* Phis */
788                 for (pos=0, max=get_irn_arity(irn); pos<max; ++pos) {
789                         ir_node *arg = get_irn_n(irn, pos);
790                         add_edges(co, irn, arg, co->get_costs(co, irn, arg, pos));
791                 }
792         } else if (is_Perm_Proj(irn)) { /* Perms */
793                 ir_node *arg = get_Perm_src(irn);
794                 add_edges(co, irn, arg, co->get_costs(co, irn, arg, 0));
795         } else { /* 2-address code */
796                 if (is_2addr_code(req)) {
797                         const unsigned other = req->other_same;
798                         int i;
799
800                         for (i = 0; 1U << i <= other; ++i) {
801                                 if (other & (1U << i)) {
802                                         ir_node *other = get_irn_n(skip_Proj(irn), i);
803                                         if (!arch_irn_is(other, ignore))
804                                                 add_edges(co, irn, other, co->get_costs(co, irn, other, 0));
805                                 }
806                         }
807                 }
808         }
809 }
810
811 void co_build_graph_structure(copy_opt_t *co) {
812         obstack_init(&co->obst);
813         co->nodes = new_set(compare_affinity_node_t, 32);
814
815         irg_walk_graph(co->irg, build_graph_walker, NULL, co);
816 }
817
818 void co_free_graph_structure(copy_opt_t *co) {
819         ASSERT_GS_AVAIL(co);
820
821         del_set(co->nodes);
822         obstack_free(&co->obst, NULL);
823         co->nodes = NULL;
824 }
825
826 /* co_solve_ilp1() co_solve_ilp2() are implemented in becopyilpX.c */
827
828 int co_gs_is_optimizable(copy_opt_t *co, ir_node *irn) {
829         affinity_node_t new_node, *n;
830
831         ASSERT_GS_AVAIL(co);
832
833         new_node.irn = irn;
834         n = set_find(co->nodes, &new_node, sizeof(new_node), hash_irn(new_node.irn));
835         if (n) {
836                 return (n->degree > 0);
837         } else
838                 return 0;
839 }
840
841 static int co_dump_appel_disjoint_constraints(const copy_opt_t *co, ir_node *a, ir_node *b)
842 {
843         ir_node *nodes[]  = { a, b };
844         bitset_t *constr[] = { NULL, NULL };
845         int j;
846
847         constr[0] = bitset_alloca(co->cls->n_regs);
848         constr[1] = bitset_alloca(co->cls->n_regs);
849
850         for (j = 0; j < 2; ++j) {
851                 const arch_register_req_t *req = arch_get_register_req_out(nodes[j]);
852                 if(arch_register_req_is(req, limited))
853                         rbitset_copy_to_bitset(req->limited, constr[j]);
854                 else
855                         bitset_set_all(constr[j]);
856
857         }
858
859         return !bitset_intersect(constr[0], constr[1]);
860 }
861
862 void co_dump_appel_graph(const copy_opt_t *co, FILE *f)
863 {
864         be_ifg_t *ifg       = co->cenv->ifg;
865         int      *color_map = ALLOCAN(int, co->cls->n_regs);
866         int      *node_map  = XMALLOCN(int, get_irg_last_idx(co->irg) + 1);
867
868         ir_node *irn;
869         void *it, *nit;
870         int n, n_regs;
871         unsigned i;
872
873         n_regs = 0;
874         for(i = 0; i < co->cls->n_regs; ++i) {
875                 const arch_register_t *reg = &co->cls->regs[i];
876                 color_map[i] = arch_register_type_is(reg, ignore) ? -1 : n_regs++;
877         }
878
879         /*
880          * n contains the first node number.
881          * the values below n are the pre-colored register nodes
882          */
883
884         it  = be_ifg_nodes_iter_alloca(ifg);
885         nit = be_ifg_neighbours_iter_alloca(ifg);
886
887         n = n_regs;
888         be_ifg_foreach_node(ifg, it, irn) {
889                 if (!arch_irn_is(irn, ignore))
890                         node_map[get_irn_idx(irn)] = n++;
891         }
892
893         fprintf(f, "%d %d\n", n, n_regs);
894
895         be_ifg_foreach_node(ifg, it, irn) {
896                 if (!arch_irn_is(irn, ignore)) {
897                         int idx                        = node_map[get_irn_idx(irn)];
898                         affinity_node_t           *a   = get_affinity_info(co, irn);
899                         const arch_register_req_t *req = arch_get_register_req_out(irn);
900                         ir_node                   *adj;
901
902                         if(arch_register_req_is(req, limited)) {
903                                 for(i = 0; i < co->cls->n_regs; ++i) {
904                                         if(!rbitset_is_set(req->limited, i) && color_map[i] >= 0)
905                                                 fprintf(f, "%d %d -1\n", color_map[i], idx);
906                                 }
907                         }
908
909                         be_ifg_foreach_neighbour(ifg, nit, irn, adj) {
910                                 if (!arch_irn_is(adj, ignore) &&
911                                                 !co_dump_appel_disjoint_constraints(co, irn, adj)) {
912                                         int adj_idx = node_map[get_irn_idx(adj)];
913                                         if(idx < adj_idx)
914                                                 fprintf(f, "%d %d -1\n", idx, adj_idx);
915                                 }
916                         }
917
918                         if(a) {
919                                 neighb_t *n;
920
921                                 co_gs_foreach_neighb(a, n) {
922                                         if (!arch_irn_is(n->irn, ignore)) {
923                                                 int n_idx = node_map[get_irn_idx(n->irn)];
924                                                 if(idx < n_idx)
925                                                         fprintf(f, "%d %d %d\n", idx, n_idx, (int) n->costs);
926                                         }
927                                 }
928                         }
929                 }
930         }
931
932         xfree(node_map);
933 }
934
935 /*
936          ___ _____ ____   ____   ___ _____   ____                        _
937         |_ _|  ___/ ___| |  _ \ / _ \_   _| |  _ \ _   _ _ __ ___  _ __ (_)_ __   __ _
938          | || |_ | |  _  | | | | | | || |   | | | | | | | '_ ` _ \| '_ \| | '_ \ / _` |
939          | ||  _|| |_| | | |_| | |_| || |   | |_| | |_| | | | | | | |_) | | | | | (_| |
940         |___|_|   \____| |____/ \___/ |_|   |____/ \__,_|_| |_| |_| .__/|_|_| |_|\__, |
941                                                                   |_|            |___/
942 */
943
944 static const char *get_dot_color_name(size_t col)
945 {
946         static const char *names[] = {
947                 "blue",
948                 "red",
949                 "green",
950                 "yellow",
951                 "cyan",
952                 "magenta",
953                 "orange",
954                 "chocolate",
955                 "beige",
956                 "navy",
957                 "darkgreen",
958                 "darkred",
959                 "lightPink",
960                 "chartreuse",
961                 "lightskyblue",
962                 "linen",
963                 "pink",
964                 "lightslateblue",
965                 "mintcream",
966                 "red",
967                 "darkolivegreen",
968                 "mediumblue",
969                 "mistyrose",
970                 "salmon",
971                 "darkseagreen",
972                 "mediumslateblue"
973                 "moccasin",
974                 "tomato",
975                 "forestgreen",
976                 "darkturquoise",
977                 "palevioletred"
978         };
979
980         return col < sizeof(names)/sizeof(names[0]) ? names[col] : "white";
981 }
982
983 typedef struct _co_ifg_dump_t {
984         const copy_opt_t *co;
985         unsigned flags;
986 } co_ifg_dump_t;
987
988 static void ifg_dump_graph_attr(FILE *f, void *self)
989 {
990         (void) self;
991         fprintf(f, "overlap=scale");
992 }
993
994 static int ifg_is_dump_node(void *self, ir_node *irn)
995 {
996         (void)self;
997         return !arch_irn_is(irn, ignore);
998 }
999
1000 static void ifg_dump_node_attr(FILE *f, void *self, ir_node *irn)
1001 {
1002         co_ifg_dump_t             *env     = self;
1003         const arch_register_t     *reg     = arch_get_irn_register(irn);
1004         const arch_register_req_t *req     = arch_get_register_req_out(irn);
1005         int                        limited = arch_register_req_is(req, limited);
1006
1007         if(env->flags & CO_IFG_DUMP_LABELS) {
1008                 ir_fprintf(f, "label=\"%+F", irn);
1009
1010                 if((env->flags & CO_IFG_DUMP_CONSTR) && limited) {
1011                         bitset_t *bs = bitset_alloca(env->co->cls->n_regs);
1012                         rbitset_copy_to_bitset(req->limited, bs);
1013                         ir_fprintf(f, "\\n%B", bs);
1014                 }
1015                 ir_fprintf(f, "\" ");
1016         } else {
1017                 fprintf(f, "label=\"\" shape=point " );
1018         }
1019
1020         if(env->flags & CO_IFG_DUMP_SHAPE)
1021                 fprintf(f, "shape=%s ", limited ? "diamond" : "ellipse");
1022
1023         if(env->flags & CO_IFG_DUMP_COLORS)
1024                 fprintf(f, "style=filled color=%s ", get_dot_color_name(reg->index));
1025 }
1026
1027 static void ifg_dump_at_end(FILE *file, void *self)
1028 {
1029         co_ifg_dump_t *env = self;
1030         affinity_node_t *a;
1031
1032         co_gs_foreach_aff_node(env->co, a) {
1033                 const arch_register_t *ar = arch_get_irn_register(a->irn);
1034                 unsigned aidx = get_irn_idx(a->irn);
1035                 neighb_t *n;
1036
1037                 co_gs_foreach_neighb(a, n) {
1038                         const arch_register_t *nr = arch_get_irn_register(n->irn);
1039                         unsigned nidx = get_irn_idx(n->irn);
1040
1041                         if(aidx < nidx) {
1042                                 const char *color = nr == ar ? "blue" : "red";
1043                                 fprintf(file, "\tn%d -- n%d [weight=0.01 ", aidx, nidx);
1044                                 if(env->flags & CO_IFG_DUMP_LABELS)
1045                                         fprintf(file, "label=\"%d\" ", n->costs);
1046                                 if(env->flags & CO_IFG_DUMP_COLORS)
1047                                         fprintf(file, "color=%s ", color);
1048                                 else
1049                                         fprintf(file, "style=dotted");
1050                                 fprintf(file, "];\n");
1051                         }
1052                 }
1053         }
1054 }
1055
1056
1057 static be_ifg_dump_dot_cb_t ifg_dot_cb = {
1058         ifg_is_dump_node,
1059         ifg_dump_graph_attr,
1060         ifg_dump_node_attr,
1061         NULL,
1062         NULL,
1063         ifg_dump_at_end
1064 };
1065
1066
1067
1068 void co_dump_ifg_dot(const copy_opt_t *co, FILE *f, unsigned flags)
1069 {
1070         co_ifg_dump_t cod;
1071
1072         cod.co    = co;
1073         cod.flags = flags;
1074         be_ifg_dump_dot(co->cenv->ifg, co->irg, f, &ifg_dot_cb, &cod);
1075 }
1076
1077
1078 void co_solve_park_moon(copy_opt_t *opt)
1079 {
1080         (void) opt;
1081 }
1082
1083 static int void_algo(copy_opt_t *co)
1084 {
1085         (void) co;
1086         return 0;
1087 }
1088
1089 /*
1090                 _    _                  _ _   _
1091            / \  | | __ _  ___  _ __(_) |_| |__  _ __ ___  ___
1092           / _ \ | |/ _` |/ _ \| '__| | __| '_ \| '_ ` _ \/ __|
1093          / ___ \| | (_| | (_) | |  | | |_| | | | | | | | \__ \
1094         /_/   \_\_|\__, |\___/|_|  |_|\__|_| |_|_| |_| |_|___/
1095                            |___/
1096 */
1097
1098 typedef struct {
1099         co_algo_t  *algo;
1100         const char *name;
1101         int        can_improve_existing;
1102 } co_algo_info_t;
1103
1104 static co_algo_info_t algos[] = {
1105         { void_algo,               "none",  0 },
1106         { co_solve_heuristic,      "heur1", 0 },
1107         { co_solve_heuristic_new,  "heur2", 0 },
1108 #ifdef WITH_JVM
1109         { co_solve_heuristic_java, "heur3", 0 },
1110 #else
1111         { NULL,                    "heur3", 0 },
1112 #endif
1113         { co_solve_heuristic_mst,  "heur4", 0 },
1114 #ifdef WITH_ILP
1115         { co_solve_ilp2,           "ilp",   1 },
1116 #else
1117         { NULL,                    "ilp",   1 },
1118 #endif
1119         { NULL,                    "",      0 }
1120 };
1121
1122 /*
1123     __  __       _         ____       _
1124    |  \/  | __ _(_)_ __   |  _ \ _ __(_)_   _____ _ __
1125    | |\/| |/ _` | | '_ \  | | | | '__| \ \ / / _ \ '__|
1126    | |  | | (_| | | | | | | |_| | |  | |\ V /  __/ |
1127    |_|  |_|\__,_|_|_| |_| |____/|_|  |_| \_/ \___|_|
1128
1129 */
1130
1131 static FILE *my_open(const be_chordal_env_t *env, const char *prefix, const char *suffix)
1132 {
1133         FILE *result;
1134         char buf[1024];
1135         size_t i, n;
1136         char *tu_name;
1137
1138         n = strlen(env->birg->main_env->cup_name);
1139         tu_name = XMALLOCN(char, n + 1);
1140         strcpy(tu_name, env->birg->main_env->cup_name);
1141         for (i = 0; i < n; ++i)
1142                 if (tu_name[i] == '.')
1143                         tu_name[i] = '_';
1144
1145
1146         ir_snprintf(buf, sizeof(buf), "%s%s_%F_%s%s", prefix, tu_name, env->irg, env->cls->name, suffix);
1147         xfree(tu_name);
1148         result = fopen(buf, "wt");
1149         if(result == NULL) {
1150                 panic("Couldn't open '%s' for writing.", buf);
1151         }
1152
1153         return result;
1154 }
1155
1156 void co_driver(be_chordal_env_t *cenv)
1157 {
1158         ir_timer_t          *timer = ir_timer_register("firm.be.copyopt", "runtime");
1159         co_complete_stats_t before, after;
1160         copy_opt_t          *co;
1161         co_algo_t           *algo_func;
1162         int                 was_optimal = 0;
1163
1164         if (algo >= CO_ALGO_LAST)
1165                 return;
1166
1167         be_liveness_assure_chk(be_get_birg_liveness(cenv->birg));
1168
1169         co = new_copy_opt(cenv, cost_func);
1170         co_build_ou_structure(co);
1171         co_build_graph_structure(co);
1172
1173         co_complete_stats(co, &before);
1174
1175         be_stat_ev_ull("co_aff_nodes",    before.aff_nodes);
1176         be_stat_ev_ull("co_aff_edges",    before.aff_edges);
1177         be_stat_ev_ull("co_max_costs",    before.max_costs);
1178         be_stat_ev_ull("co_inevit_costs", before.inevit_costs);
1179         be_stat_ev_ull("co_aff_int",      before.aff_int);
1180
1181         be_stat_ev_ull("co_init_costs",   before.costs);
1182         be_stat_ev_ull("co_init_unsat",   before.unsatisfied_edges);
1183
1184         if (dump_flags & DUMP_BEFORE) {
1185                 FILE *f = my_open(cenv, "", "-before.dot");
1186                 co_dump_ifg_dot(co, f, style_flags);
1187                 fclose(f);
1188         }
1189
1190         /* if the algo can improve results, provide an initial solution with heur3 */
1191         if (improve && algos[algo].can_improve_existing) {
1192                 co_complete_stats_t stats;
1193
1194                 /* produce a heuristic solution */
1195 #ifdef WITH_JVM
1196                 co_solve_heuristic_java(co);
1197 #else
1198                 co_solve_heuristic(co);
1199 #endif /* WITH_JVM */
1200
1201                 /* do the stats and provide the current costs */
1202                 co_complete_stats(co, &stats);
1203                 be_stat_ev_ull("co_prepare_costs", stats.costs);
1204         }
1205
1206 #ifdef WITH_JVM
1207         /* start the JVM here so that it does not tamper the timing. */
1208         if (algo == CO_ALGO_HEUR3)
1209                 be_java_coal_start_jvm();
1210 #endif /* WITH_JVM */
1211
1212         algo_func = algos[algo].algo;
1213
1214         /* perform actual copy minimization */
1215         ir_timer_reset_and_start(timer);
1216         was_optimal = algo_func(co);
1217         ir_timer_stop(timer);
1218
1219         be_stat_ev("co_time", ir_timer_elapsed_msec(timer));
1220         be_stat_ev_ull("co_optimal", was_optimal);
1221
1222         if (dump_flags & DUMP_AFTER) {
1223                 FILE *f = my_open(cenv, "", "-after.dot");
1224                 co_dump_ifg_dot(co, f, style_flags);
1225                 fclose(f);
1226         }
1227
1228         co_complete_stats(co, &after);
1229
1230         if (do_stats) {
1231                 ulong64 optimizable_costs = after.max_costs - after.inevit_costs;
1232                 ulong64 evitable          = after.costs     - after.inevit_costs;
1233
1234                 ir_printf("%30F ", cenv->irg);
1235                 printf("%10s %10" ULL_FMT "%10" ULL_FMT "%10" ULL_FMT, cenv->cls->name, after.max_costs, before.costs, after.inevit_costs);
1236
1237                 if(optimizable_costs > 0)
1238                         printf("%10" ULL_FMT " %5.2f\n", after.costs, (evitable * 100.0) / optimizable_costs);
1239                 else
1240                         printf("%10" ULL_FMT " %5s\n", after.costs, "-");
1241         }
1242
1243         /* Dump the interference graph in Appel's format. */
1244         if (dump_flags & DUMP_APPEL) {
1245                 FILE *f = my_open(cenv, "", ".apl");
1246                 fprintf(f, "# %lld %lld\n", after.costs, after.unsatisfied_edges);
1247                 co_dump_appel_graph(co, f);
1248                 fclose(f);
1249         }
1250
1251         be_stat_ev_ull("co_after_costs", after.costs);
1252         be_stat_ev_ull("co_after_unsat", after.unsatisfied_edges);
1253
1254         co_free_graph_structure(co);
1255         co_free_ou_structure(co);
1256         free_copy_opt(co);
1257 }