Reuse already fetched values.
[libfirm] / ir / be / becopyopt.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       Copy minimization driver.
23  * @author      Daniel Grund
24  * @date        12.04.2005
25  * @version     $Id$
26  *
27  * Main file for the optimization reducing the copies needed for:
28  * - Phi coalescing
29  * - Register-constrained nodes
30  * - Two-address code instructions
31  */
32 #include "config.h"
33
34 #include "execfreq.h"
35 #include "xmalloc.h"
36 #include "debug.h"
37 #include "pmap.h"
38 #include "raw_bitset.h"
39 #include "irnode.h"
40 #include "irgraph.h"
41 #include "irgwalk.h"
42 #include "irprog.h"
43 #include "irloop_t.h"
44 #include "iredges_t.h"
45 #include "irbitset.h"
46 #include "irphase_t.h"
47 #include "irprintf_t.h"
48
49 #include "bemodule.h"
50 #include "bearch.h"
51 #include "benode_t.h"
52 #include "beutil.h"
53 #include "beifg_t.h"
54 #include "beintlive_t.h"
55 #include "becopyopt_t.h"
56 #include "becopystat.h"
57 #include "belive_t.h"
58 #include "beinsn_t.h"
59 #include "besched.h"
60 #include "bestatevent.h"
61 #include "beirg.h"
62 #include "error.h"
63
64 #include "lc_opts.h"
65 #include "lc_opts_enum.h"
66
67 #define DUMP_BEFORE 1
68 #define DUMP_AFTER  2
69 #define DUMP_APPEL  4
70 #define DUMP_ALL    2 * DUMP_APPEL - 1
71
72 #define COST_FUNC_FREQ     1
73 #define COST_FUNC_LOOP     2
74 #define COST_FUNC_ALL_ONE  3
75
76 static unsigned   dump_flags  = 0;
77 static unsigned   style_flags = 0;
78 static unsigned   do_stats    = 0;
79 static cost_fct_t cost_func   = co_get_costs_exec_freq;
80 static unsigned   algo        = CO_ALGO_HEUR4;
81 static int        improve     = 1;
82
83 static const lc_opt_enum_mask_items_t dump_items[] = {
84         { "before",  DUMP_BEFORE },
85         { "after",   DUMP_AFTER  },
86         { "appel",   DUMP_APPEL  },
87         { "all",     DUMP_ALL    },
88         { NULL,      0 }
89 };
90
91 static const lc_opt_enum_mask_items_t style_items[] = {
92         { "color",   CO_IFG_DUMP_COLORS },
93         { "labels",  CO_IFG_DUMP_LABELS },
94         { "constr",  CO_IFG_DUMP_CONSTR },
95         { "shape",   CO_IFG_DUMP_SHAPE  },
96         { "full",    2 * CO_IFG_DUMP_SHAPE - 1 },
97         { NULL,      0 }
98 };
99
100 static const lc_opt_enum_mask_items_t algo_items[] = {
101         { "none",   CO_ALGO_NONE  },
102         { "heur",   CO_ALGO_HEUR  },
103         { "heur2",  CO_ALGO_HEUR2 },
104         { "heur3",  CO_ALGO_HEUR3 },
105         { "heur4",  CO_ALGO_HEUR4 },
106         { "ilp",    CO_ALGO_ILP   },
107         { NULL,     0 }
108 };
109
110 typedef int (*opt_funcptr)(void);
111
112 static const lc_opt_enum_func_ptr_items_t cost_func_items[] = {
113         { "freq",   (opt_funcptr) co_get_costs_exec_freq },
114         { "loop",   (opt_funcptr) co_get_costs_loop_depth },
115         { "one",    (opt_funcptr) co_get_costs_all_one },
116         { NULL,     NULL }
117 };
118
119 static lc_opt_enum_mask_var_t dump_var = {
120         &dump_flags, dump_items
121 };
122
123 static lc_opt_enum_mask_var_t style_var = {
124         &style_flags, style_items
125 };
126
127 static lc_opt_enum_mask_var_t algo_var = {
128         &algo, algo_items
129 };
130
131 static lc_opt_enum_func_ptr_var_t cost_func_var = {
132         (opt_funcptr*) &cost_func, cost_func_items
133 };
134
135 static const lc_opt_table_entry_t options[] = {
136         LC_OPT_ENT_ENUM_INT      ("algo",    "select copy optimization algo",                           &algo_var),
137         LC_OPT_ENT_ENUM_FUNC_PTR ("cost",    "select a cost function",                                  &cost_func_var),
138         LC_OPT_ENT_ENUM_MASK     ("dump",    "dump ifg before or after copy optimization",              &dump_var),
139         LC_OPT_ENT_ENUM_MASK     ("style",   "dump style for ifg dumping",                              &style_var),
140         LC_OPT_ENT_BOOL          ("stats",   "dump statistics after each optimization",                 &do_stats),
141         LC_OPT_ENT_BOOL          ("improve", "run heur3 before if algo can exploit start solutions",    &improve),
142         LC_OPT_LAST
143 };
144
145 /* Insert additional options registration functions here. */
146 extern void be_co_ilp_register_options(lc_opt_entry_t *grp);
147 extern void be_co2_register_options(lc_opt_entry_t *grp);
148 extern void be_co3_register_options(lc_opt_entry_t *grp);
149
150 void be_init_copycoal(void)
151 {
152         lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
153         lc_opt_entry_t *ra_grp = lc_opt_get_grp(be_grp, "ra");
154         lc_opt_entry_t *chordal_grp = lc_opt_get_grp(ra_grp, "chordal");
155         lc_opt_entry_t *co_grp = lc_opt_get_grp(chordal_grp, "co");
156
157         lc_opt_add_table(co_grp, options);
158 }
159
160 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_copycoal);
161
162 #undef QUICK_AND_DIRTY_HACK
163
164 static int nodes_interfere(const be_chordal_env_t *env, const ir_node *a, const ir_node *b)
165 {
166         if (env->ifg)
167                 return be_ifg_connected(env->ifg, a, b);
168         else
169                 return be_values_interfere(env->birg->lv, a, b);
170 }
171
172
173 /******************************************************************************
174     _____                           _
175    / ____|                         | |
176   | |  __  ___ _ __   ___ _ __ __ _| |
177   | | |_ |/ _ \ '_ \ / _ \ '__/ _` | |
178   | |__| |  __/ | | |  __/ | | (_| | |
179    \_____|\___|_| |_|\___|_|  \__,_|_|
180
181  ******************************************************************************/
182
183 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
184
185
186 copy_opt_t *new_copy_opt(be_chordal_env_t *chordal_env, cost_fct_t get_costs)
187 {
188         const char *s1, *s2, *s3;
189         int len;
190         copy_opt_t *co;
191
192         FIRM_DBG_REGISTER(dbg, "ir.be.copyopt");
193
194         co = XMALLOCZ(copy_opt_t);
195         co->cenv      = chordal_env;
196         co->irg       = chordal_env->irg;
197         co->cls       = chordal_env->cls;
198         co->get_costs = get_costs;
199
200         s1 = get_irp_name();
201         s2 = get_entity_name(get_irg_entity(co->irg));
202         s3 = chordal_env->cls->name;
203         len = strlen(s1) + strlen(s2) + strlen(s3) + 5;
204         co->name = XMALLOCN(char, len);
205         snprintf(co->name, len, "%s__%s__%s", s1, s2, s3);
206
207         return co;
208 }
209
210 void free_copy_opt(copy_opt_t *co) {
211         xfree(co->name);
212         free(co);
213 }
214
215 /**
216  * Checks if a node is optimizable, viz. has something to do with coalescing
217  * @param irn  The irn to check
218  */
219 static int co_is_optimizable_root(ir_node *irn)
220 {
221         const arch_register_req_t *req;
222         const arch_register_t     *reg;
223
224         if (arch_irn_is_ignore(irn))
225                 return 0;
226
227         reg = arch_get_irn_register(irn);
228         if (arch_register_type_is(reg, ignore))
229                 return 0;
230
231         if (is_Reg_Phi(irn) || is_Perm_Proj(irn))
232                 return 1;
233
234         req = arch_get_register_req_out(irn);
235         if (is_2addr_code(req))
236                 return 1;
237
238         return 0;
239 }
240
241 int co_get_costs_loop_depth(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
242         int cost = 0;
243         ir_loop *loop;
244         ir_node *root_block = get_nodes_block(root);
245         (void) co;
246         (void) arg;
247
248         if (is_Phi(root)) {
249                 /* for phis the copies are placed in the corresponding pred-block */
250                 loop = get_irn_loop(get_Block_cfgpred_block(root_block, pos));
251         } else {
252                 /* a perm places the copy in the same block as it resides */
253                 loop = get_irn_loop(root_block);
254         }
255         if (loop) {
256                 int d = get_loop_depth(loop);
257                 cost = d*d;
258         }
259         return 1+cost;
260 }
261
262 int co_get_costs_exec_freq(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
263         int res;
264         ir_node *root_bl = get_nodes_block(root);
265         ir_node *copy_bl = is_Phi(root) ? get_Block_cfgpred_block(root_bl, pos) : root_bl;
266         (void) arg;
267         res = get_block_execfreq_ulong(co->cenv->birg->exec_freq, copy_bl);
268
269         /* don't allow values smaller than one. */
270         return res < 1 ? 1 : res;
271 }
272
273
274 int co_get_costs_all_one(const copy_opt_t *co, ir_node *root, ir_node *arg, int pos) {
275         (void) co;
276         (void) root;
277         (void) arg;
278         (void) pos;
279         return 1;
280 }
281
282 /******************************************************************************
283    ____        _   _    _       _ _          _____ _
284   / __ \      | | | |  | |     (_) |        / ____| |
285  | |  | |_ __ | |_| |  | |_ __  _| |_ ___  | (___ | |_ ___  _ __ __ _  __ _  ___
286  | |  | | '_ \| __| |  | | '_ \| | __/ __|  \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
287  | |__| | |_) | |_| |__| | | | | | |_\__ \  ____) | || (_) | | | (_| | (_| |  __/
288   \____/| .__/ \__|\____/|_| |_|_|\__|___/ |_____/ \__\___/|_|  \__,_|\__, |\___|
289         | |                                                            __/ |
290         |_|                                                           |___/
291  ******************************************************************************/
292
293 /**
294  * Determines a maximum weighted independent set with respect to
295  * the interference and conflict edges of all nodes in a qnode.
296  */
297 static int ou_max_ind_set_costs(unit_t *ou) {
298         be_chordal_env_t *chordal_env = ou->co->cenv;
299         ir_node **safe, **unsafe;
300         int i, o, safe_count, safe_costs, unsafe_count, *unsafe_costs;
301         bitset_t *curr;
302         bitset_pos_t pos;
303         int max, curr_weight, best_weight = 0;
304
305         /* assign the nodes into two groups.
306          * safe: node has no interference, hence it is in every max stable set.
307          * unsafe: node has an interference
308          */
309         safe         = ALLOCAN(ir_node*, ou->node_count - 1);
310         safe_costs   = 0;
311         safe_count   = 0;
312         unsafe       = ALLOCAN(ir_node*, ou->node_count - 1);
313         unsafe_costs = ALLOCAN(int,      ou->node_count - 1);
314         unsafe_count = 0;
315         for(i=1; i<ou->node_count; ++i) {
316                 int is_safe = 1;
317                 for(o=1; o<ou->node_count; ++o) {
318                         if (i==o)
319                                 continue;
320                         if (nodes_interfere(chordal_env, ou->nodes[i], ou->nodes[o])) {
321                                 unsafe_costs[unsafe_count] = ou->costs[i];
322                                 unsafe[unsafe_count] = ou->nodes[i];
323                                 ++unsafe_count;
324                                 is_safe = 0;
325                                 break;
326                         }
327                 }
328                 if (is_safe) {
329                         safe_costs += ou->costs[i];
330                         safe[safe_count++] = ou->nodes[i];
331                 }
332         }
333
334
335         /* now compute the best set out of the unsafe nodes*/
336         if (unsafe_count > MIS_HEUR_TRIGGER) {
337                 bitset_t *best = bitset_alloca(unsafe_count);
338                 /* Heuristik: Greedy trial and error form index 0 to unsafe_count-1 */
339                 for (i=0; i<unsafe_count; ++i) {
340                         bitset_set(best, i);
341                         /* check if it is a stable set */
342                         for (o=bitset_next_set(best, 0); o!=-1 && o<i; o=bitset_next_set(best, o+1))
343                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o])) {
344                                         bitset_clear(best, i); /* clear the bit and try next one */
345                                         break;
346                                 }
347                 }
348                 /* compute the weight */
349                 bitset_foreach(best, pos)
350                         best_weight += unsafe_costs[pos];
351         } else {
352                 /* Exact Algorithm: Brute force */
353                 curr = bitset_alloca(unsafe_count);
354                 bitset_set_all(curr);
355                 while ((max = bitset_popcnt(curr)) != 0) {
356                         /* check if curr is a stable set */
357                         for (i=bitset_next_set(curr, 0); i!=-1; i=bitset_next_set(curr, i+1))
358                                 for (o=bitset_next_set(curr, i+1); o!=-1; o=bitset_next_set(curr, o+1)) /* !!!!! difference to qnode_max_ind_set(): NOT (curr, i) */
359                                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o]))
360                                                         goto no_stable_set;
361
362                         /* if we arrive here, we have a stable set */
363                         /* compute the weigth of the stable set*/
364                         curr_weight = 0;
365                         bitset_foreach(curr, pos)
366                                 curr_weight += unsafe_costs[pos];
367
368                         /* any better ? */
369                         if (curr_weight > best_weight) {
370                                 best_weight = curr_weight;
371                         }
372
373         no_stable_set:
374                         bitset_minus1(curr);
375                 }
376         }
377
378         return safe_costs+best_weight;
379 }
380
381 static void co_collect_units(ir_node *irn, void *env)
382 {
383         const arch_register_req_t *req = arch_get_register_req_out(irn);
384         copy_opt_t                *co  = env;
385         unit_t *unit;
386
387         if (req->cls != co->cls)
388                 return;
389         if (!co_is_optimizable_root(irn))
390                 return;
391
392         /* Init a new unit */
393         unit = XMALLOCZ(unit_t);
394         unit->co = co;
395         unit->node_count = 1;
396         INIT_LIST_HEAD(&unit->queue);
397
398         /* Phi with some/all of its arguments */
399         if (is_Reg_Phi(irn)) {
400                 int i, arity;
401
402                 /* init */
403                 arity = get_irn_arity(irn);
404                 unit->nodes = XMALLOCN(ir_node*, arity + 1);
405                 unit->costs = XMALLOCN(int,      arity + 1);
406                 unit->nodes[0] = irn;
407
408                 /* fill */
409                 for (i=0; i<arity; ++i) {
410                         int o, arg_pos;
411                         ir_node *arg = get_irn_n(irn, i);
412
413                         assert(arch_get_irn_reg_class_out(arg) == co->cls && "Argument not in same register class.");
414                         if (arg == irn)
415                                 continue;
416                         if (nodes_interfere(co->cenv, irn, arg)) {
417                                 unit->inevitable_costs += co->get_costs(co, irn, arg, i);
418                                 continue;
419                         }
420
421                         /* Else insert the argument of the phi to the members of this ou */
422                         DBG((dbg, LEVEL_1, "\t   Member: %+F\n", arg));
423
424                         if (arch_irn_is_ignore(arg))
425                                 continue;
426
427                         /* Check if arg has occurred at a prior position in the arg/list */
428                         arg_pos = 0;
429                         for (o=1; o<unit->node_count; ++o) {
430                                 if (unit->nodes[o] == arg) {
431                                         arg_pos = o;
432                                         break;
433                                 }
434                         }
435
436                         if (!arg_pos) { /* a new argument */
437                                 /* insert node, set costs */
438                                 unit->nodes[unit->node_count] = arg;
439                                 unit->costs[unit->node_count] = co->get_costs(co, irn, arg, i);
440                                 unit->node_count++;
441                         } else { /* arg has occurred before in same phi */
442                                 /* increase costs for existing arg */
443                                 unit->costs[arg_pos] += co->get_costs(co, irn, arg, i);
444                         }
445                 }
446                 unit->nodes = XREALLOC(unit->nodes, ir_node*, unit->node_count);
447                 unit->costs = XREALLOC(unit->costs, int,      unit->node_count);
448         } else if (is_Perm_Proj(irn)) {
449                 /* Proj of a perm with corresponding arg */
450                 assert(!nodes_interfere(co->cenv, irn, get_Perm_src(irn)));
451                 unit->nodes = XMALLOCN(ir_node*, 2);
452                 unit->costs = XMALLOCN(int,      2);
453                 unit->node_count = 2;
454                 unit->nodes[0] = irn;
455                 unit->nodes[1] = get_Perm_src(irn);
456                 unit->costs[1] = co->get_costs(co, irn, unit->nodes[1], -1);
457         } else {
458                 /* Src == Tgt of a 2-addr-code instruction */
459                 if (is_2addr_code(req)) {
460                         const unsigned other = req->other_same;
461                         int            count = 0;
462                         int            i;
463
464                         for (i = 0; (1U << i) <= other; ++i) {
465                                 if (other & (1U << i)) {
466                                         ir_node *o  = get_irn_n(skip_Proj(irn), i);
467                                         if (arch_irn_is_ignore(o))
468                                                 continue;
469                                         if (nodes_interfere(co->cenv, irn, o))
470                                                 continue;
471                                         ++count;
472                                 }
473                         }
474
475                         if (count != 0) {
476                                 int k = 0;
477                                 ++count;
478                                 unit->nodes = XMALLOCN(ir_node*, count);
479                                 unit->costs = XMALLOCN(int,      count);
480                                 unit->node_count = count;
481                                 unit->nodes[k++] = irn;
482
483                                 for (i = 0; 1U << i <= other; ++i) {
484                                         if (other & (1U << i)) {
485                                                 ir_node *o  = get_irn_n(skip_Proj(irn), i);
486                                                 if (!arch_irn_is_ignore(o) &&
487                                                                 !nodes_interfere(co->cenv, irn, o)) {
488                                                         unit->nodes[k] = o;
489                                                         unit->costs[k] = co->get_costs(co, irn, o, -1);
490                                                         ++k;
491                                                 }
492                                         }
493                                 }
494                         }
495                 } else {
496                         assert(0 && "This is not an optimizable node!");
497                 }
498         }
499
500         /* Insert the new unit at a position according to its costs */
501         if (unit->node_count > 1) {
502                 int i;
503                 struct list_head *tmp;
504
505                 /* Determine the maximum costs this unit can cause: all_nodes_cost */
506                 for(i=1; i<unit->node_count; ++i) {
507                         unit->sort_key = MAX(unit->sort_key, unit->costs[i]);
508                         unit->all_nodes_costs += unit->costs[i];
509                 }
510
511                 /* Determine the minimal costs this unit will cause: min_nodes_costs */
512                 unit->min_nodes_costs += unit->all_nodes_costs - ou_max_ind_set_costs(unit);
513                 /* Insert the new ou according to its sort_key */
514                 tmp = &co->units;
515                 while (tmp->next != &co->units && list_entry_units(tmp->next)->sort_key > unit->sort_key)
516                         tmp = tmp->next;
517                 list_add(&unit->units, tmp);
518         } else {
519                 free(unit);
520         }
521 }
522
523 #ifdef QUICK_AND_DIRTY_HACK
524
525 static int compare_ous(const void *k1, const void *k2) {
526         const unit_t *u1 = *((const unit_t **) k1);
527         const unit_t *u2 = *((const unit_t **) k2);
528         int i, o, u1_has_constr, u2_has_constr;
529         arch_register_req_t req;
530
531         /* Units with constraints come first */
532         u1_has_constr = 0;
533         for (i=0; i<u1->node_count; ++i) {
534                 arch_get_register_req_out(&req, u1->nodes[i]);
535                 if (arch_register_req_is(&req, limited)) {
536                         u1_has_constr = 1;
537                         break;
538                 }
539         }
540
541         u2_has_constr = 0;
542         for (i=0; i<u2->node_count; ++i) {
543                 arch_get_register_req_out(&req, u2->nodes[i]);
544                 if (arch_register_req_is(&req, limited)) {
545                         u2_has_constr = 1;
546                         break;
547                 }
548         }
549
550         if (u1_has_constr != u2_has_constr)
551                 return u2_has_constr - u1_has_constr;
552
553         /* Now check, whether the two units are connected */
554 #if 0
555         for (i=0; i<u1->node_count; ++i)
556                 for (o=0; o<u2->node_count; ++o)
557                         if (u1->nodes[i] == u2->nodes[o])
558                                 return 0;
559 #endif
560
561         /* After all, the sort key decides. Greater keys come first. */
562         return u2->sort_key - u1->sort_key;
563
564 }
565
566 /**
567  * Sort the ou's according to constraints and their sort_key
568  */
569 static void co_sort_units(copy_opt_t *co) {
570         int i, count = 0, costs;
571         unit_t *ou, **ous;
572
573         /* get the number of ous, remove them form the list and fill the array */
574         list_for_each_entry(unit_t, ou, &co->units, units)
575                 count++;
576         ous = ALLOCAN(unit_t, count);
577
578         costs = co_get_max_copy_costs(co);
579
580         i = 0;
581         list_for_each_entry(unit_t, ou, &co->units, units)
582                 ous[i++] = ou;
583
584         INIT_LIST_HEAD(&co->units);
585
586         assert(count == i && list_empty(&co->units));
587
588         for (i=0; i<count; ++i)
589                 ir_printf("%+F\n", ous[i]->nodes[0]);
590
591         qsort(ous, count, sizeof(*ous), compare_ous);
592
593         ir_printf("\n\n");
594         for (i=0; i<count; ++i)
595                 ir_printf("%+F\n", ous[i]->nodes[0]);
596
597         /* reinsert into list in correct order */
598         for (i=0; i<count; ++i)
599                 list_add_tail(&ous[i]->units, &co->units);
600
601         assert(costs == co_get_max_copy_costs(co));
602 }
603 #endif
604
605 void co_build_ou_structure(copy_opt_t *co) {
606         DBG((dbg, LEVEL_1, "\tCollecting optimization units\n"));
607         INIT_LIST_HEAD(&co->units);
608         irg_walk_graph(co->irg, co_collect_units, NULL, co);
609 #ifdef QUICK_AND_DIRTY_HACK
610         co_sort_units(co);
611 #endif
612 }
613
614 void co_free_ou_structure(copy_opt_t *co) {
615         unit_t *curr, *tmp;
616         ASSERT_OU_AVAIL(co);
617         list_for_each_entry_safe(unit_t, curr, tmp, &co->units, units) {
618                 xfree(curr->nodes);
619                 xfree(curr->costs);
620                 xfree(curr);
621         }
622         co->units.next = NULL;
623 }
624
625 /* co_solve_heuristic() is implemented in becopyheur.c */
626
627 int co_get_max_copy_costs(const copy_opt_t *co) {
628         int i, res = 0;
629         unit_t *curr;
630
631         ASSERT_OU_AVAIL(co);
632
633         list_for_each_entry(unit_t, curr, &co->units, units) {
634                 res += curr->inevitable_costs;
635                 for (i=1; i<curr->node_count; ++i)
636                         res += curr->costs[i];
637         }
638         return res;
639 }
640
641 int co_get_inevit_copy_costs(const copy_opt_t *co) {
642         int res = 0;
643         unit_t *curr;
644
645         ASSERT_OU_AVAIL(co);
646
647         list_for_each_entry(unit_t, curr, &co->units, units)
648                 res += curr->inevitable_costs;
649         return res;
650 }
651
652 int co_get_copy_costs(const copy_opt_t *co) {
653         int i, res = 0;
654         unit_t *curr;
655
656         ASSERT_OU_AVAIL(co);
657
658         list_for_each_entry(unit_t, curr, &co->units, units) {
659                 int root_col = get_irn_col(curr->nodes[0]);
660                 DBG((dbg, LEVEL_1, "  %3d costs for root %+F color %d\n", curr->inevitable_costs, curr->nodes[0], root_col));
661                 res += curr->inevitable_costs;
662                 for (i=1; i<curr->node_count; ++i) {
663                         int arg_col = get_irn_col(curr->nodes[i]);
664                         if (root_col != arg_col) {
665                                 DBG((dbg, LEVEL_1, "  %3d for arg %+F color %d\n", curr->costs[i], curr->nodes[i], arg_col));
666                                 res += curr->costs[i];
667                         }
668                 }
669         }
670         return res;
671 }
672
673 int co_get_lower_bound(const copy_opt_t *co) {
674         int res = 0;
675         unit_t *curr;
676
677         ASSERT_OU_AVAIL(co);
678
679         list_for_each_entry(unit_t, curr, &co->units, units)
680                 res += curr->inevitable_costs + curr->min_nodes_costs;
681         return res;
682 }
683
684 void co_complete_stats(const copy_opt_t *co, co_complete_stats_t *stat)
685 {
686         bitset_t *seen = bitset_irg_malloc(co->irg);
687         affinity_node_t *an;
688
689         memset(stat, 0, sizeof(stat[0]));
690
691         /* count affinity edges. */
692         co_gs_foreach_aff_node(co, an) {
693                 neighb_t *neigh;
694                 stat->aff_nodes += 1;
695                 bitset_add_irn(seen, an->irn);
696                 co_gs_foreach_neighb(an, neigh) {
697                         if(!bitset_contains_irn(seen, neigh->irn)) {
698                                 stat->aff_edges += 1;
699                                 stat->max_costs += neigh->costs;
700
701                                 if (get_irn_col(an->irn) != get_irn_col(neigh->irn)) {
702                                         stat->costs += neigh->costs;
703                                         stat->unsatisfied_edges += 1;
704                                 }
705
706                                 if(nodes_interfere(co->cenv, an->irn, neigh->irn)) {
707                                         stat->aff_int += 1;
708                                         stat->inevit_costs += neigh->costs;
709                                 }
710
711                         }
712                 }
713         }
714
715         bitset_free(seen);
716 }
717
718 /******************************************************************************
719    _____                 _        _____ _
720   / ____|               | |      / ____| |
721  | |  __ _ __ __ _ _ __ | |__   | (___ | |_ ___  _ __ __ _  __ _  ___
722  | | |_ | '__/ _` | '_ \| '_ \   \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
723  | |__| | | | (_| | |_) | | | |  ____) | || (_) | | | (_| | (_| |  __/
724   \_____|_|  \__,_| .__/|_| |_| |_____/ \__\___/|_|  \__,_|\__, |\___|
725                   | |                                       __/ |
726                   |_|                                      |___/
727  ******************************************************************************/
728
729 static int compare_affinity_node_t(const void *k1, const void *k2, size_t size) {
730         const affinity_node_t *n1 = k1;
731         const affinity_node_t *n2 = k2;
732         (void) size;
733
734         return (n1->irn != n2->irn);
735 }
736
737 static void add_edge(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
738         affinity_node_t new_node, *node;
739         neighb_t        *nbr;
740         int             allocnew = 1;
741
742         new_node.irn        = n1;
743         new_node.degree     = 0;
744         new_node.neighbours = NULL;
745         node = set_insert(co->nodes, &new_node, sizeof(new_node), hash_irn(new_node.irn));
746
747         for (nbr = node->neighbours; nbr; nbr = nbr->next)
748                 if (nbr->irn == n2) {
749                         allocnew = 0;
750                         break;
751                 }
752
753         /* if we did not find n2 in n1's neighbourhood insert it */
754         if (allocnew) {
755                 nbr        = obstack_alloc(&co->obst, sizeof(*nbr));
756                 nbr->irn   = n2;
757                 nbr->costs = 0;
758                 nbr->next  = node->neighbours;
759
760                 node->neighbours = nbr;
761                 node->degree++;
762         }
763
764         /* now nbr points to n1's neighbour-entry of n2 */
765         nbr->costs += costs;
766 }
767
768 static inline void add_edges(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
769         if (! be_ifg_connected(co->cenv->ifg, n1, n2)) {
770                 add_edge(co, n1, n2, costs);
771                 add_edge(co, n2, n1, costs);
772         }
773 }
774
775 static void build_graph_walker(ir_node *irn, void *env) {
776         const arch_register_req_t *req = arch_get_register_req_out(irn);
777         copy_opt_t                *co  = env;
778         int pos, max;
779         const arch_register_t *reg;
780
781         if (req->cls != co->cls || arch_irn_is_ignore(irn))
782                 return;
783
784         reg = arch_get_irn_register(irn);
785         if (arch_register_type_is(reg, ignore))
786                 return;
787
788         if (is_Reg_Phi(irn)) { /* Phis */
789                 for (pos=0, max=get_irn_arity(irn); pos<max; ++pos) {
790                         ir_node *arg = get_irn_n(irn, pos);
791                         add_edges(co, irn, arg, co->get_costs(co, irn, arg, pos));
792                 }
793         } else if (is_Perm_Proj(irn)) { /* Perms */
794                 ir_node *arg = get_Perm_src(irn);
795                 add_edges(co, irn, arg, co->get_costs(co, irn, arg, 0));
796         } else { /* 2-address code */
797                 if (is_2addr_code(req)) {
798                         const unsigned other = req->other_same;
799                         int i;
800
801                         for (i = 0; 1U << i <= other; ++i) {
802                                 if (other & (1U << i)) {
803                                         ir_node *other = get_irn_n(skip_Proj(irn), i);
804                                         if (!arch_irn_is_ignore(other))
805                                                 add_edges(co, irn, other, co->get_costs(co, irn, other, 0));
806                                 }
807                         }
808                 }
809         }
810 }
811
812 void co_build_graph_structure(copy_opt_t *co) {
813         obstack_init(&co->obst);
814         co->nodes = new_set(compare_affinity_node_t, 32);
815
816         irg_walk_graph(co->irg, build_graph_walker, NULL, co);
817 }
818
819 void co_free_graph_structure(copy_opt_t *co) {
820         ASSERT_GS_AVAIL(co);
821
822         del_set(co->nodes);
823         obstack_free(&co->obst, NULL);
824         co->nodes = NULL;
825 }
826
827 /* co_solve_ilp1() co_solve_ilp2() are implemented in becopyilpX.c */
828
829 int co_gs_is_optimizable(copy_opt_t *co, ir_node *irn) {
830         affinity_node_t new_node, *n;
831
832         ASSERT_GS_AVAIL(co);
833
834         new_node.irn = irn;
835         n = set_find(co->nodes, &new_node, sizeof(new_node), hash_irn(new_node.irn));
836         if (n) {
837                 return (n->degree > 0);
838         } else
839                 return 0;
840 }
841
842 static int co_dump_appel_disjoint_constraints(const copy_opt_t *co, ir_node *a, ir_node *b)
843 {
844         ir_node *nodes[]  = { a, b };
845         bitset_t *constr[] = { NULL, NULL };
846         int j;
847
848         constr[0] = bitset_alloca(co->cls->n_regs);
849         constr[1] = bitset_alloca(co->cls->n_regs);
850
851         for (j = 0; j < 2; ++j) {
852                 const arch_register_req_t *req = arch_get_register_req_out(nodes[j]);
853                 if(arch_register_req_is(req, limited))
854                         rbitset_copy_to_bitset(req->limited, constr[j]);
855                 else
856                         bitset_set_all(constr[j]);
857
858         }
859
860         return !bitset_intersect(constr[0], constr[1]);
861 }
862
863 void co_dump_appel_graph(const copy_opt_t *co, FILE *f)
864 {
865         be_ifg_t *ifg       = co->cenv->ifg;
866         int      *color_map = ALLOCAN(int, co->cls->n_regs);
867         int      *node_map  = XMALLOCN(int, get_irg_last_idx(co->irg) + 1);
868
869         ir_node *irn;
870         void *it, *nit;
871         int n, n_regs;
872         unsigned i;
873
874         n_regs = 0;
875         for(i = 0; i < co->cls->n_regs; ++i) {
876                 const arch_register_t *reg = &co->cls->regs[i];
877                 color_map[i] = arch_register_type_is(reg, ignore) ? -1 : n_regs++;
878         }
879
880         /*
881          * n contains the first node number.
882          * the values below n are the pre-colored register nodes
883          */
884
885         it  = be_ifg_nodes_iter_alloca(ifg);
886         nit = be_ifg_neighbours_iter_alloca(ifg);
887
888         n = n_regs;
889         be_ifg_foreach_node(ifg, it, irn) {
890                 if (arch_irn_is_ignore(irn))
891                         continue;
892                 node_map[get_irn_idx(irn)] = n++;
893         }
894
895         fprintf(f, "%d %d\n", n, n_regs);
896
897         be_ifg_foreach_node(ifg, it, irn) {
898                 if (!arch_irn_is_ignore(irn)) {
899                         int idx                        = node_map[get_irn_idx(irn)];
900                         affinity_node_t           *a   = get_affinity_info(co, irn);
901                         const arch_register_req_t *req = arch_get_register_req_out(irn);
902                         ir_node                   *adj;
903
904                         if(arch_register_req_is(req, limited)) {
905                                 for(i = 0; i < co->cls->n_regs; ++i) {
906                                         if(!rbitset_is_set(req->limited, i) && color_map[i] >= 0)
907                                                 fprintf(f, "%d %d -1\n", color_map[i], idx);
908                                 }
909                         }
910
911                         be_ifg_foreach_neighbour(ifg, nit, irn, adj) {
912                                 if (!arch_irn_is_ignore(adj) &&
913                                                 !co_dump_appel_disjoint_constraints(co, irn, adj)) {
914                                         int adj_idx = node_map[get_irn_idx(adj)];
915                                         if(idx < adj_idx)
916                                                 fprintf(f, "%d %d -1\n", idx, adj_idx);
917                                 }
918                         }
919
920                         if(a) {
921                                 neighb_t *n;
922
923                                 co_gs_foreach_neighb(a, n) {
924                                         if (!arch_irn_is_ignore(n->irn)) {
925                                                 int n_idx = node_map[get_irn_idx(n->irn)];
926                                                 if(idx < n_idx)
927                                                         fprintf(f, "%d %d %d\n", idx, n_idx, (int) n->costs);
928                                         }
929                                 }
930                         }
931                 }
932         }
933
934         xfree(node_map);
935 }
936
937 /*
938          ___ _____ ____   ____   ___ _____   ____                        _
939         |_ _|  ___/ ___| |  _ \ / _ \_   _| |  _ \ _   _ _ __ ___  _ __ (_)_ __   __ _
940          | || |_ | |  _  | | | | | | || |   | | | | | | | '_ ` _ \| '_ \| | '_ \ / _` |
941          | ||  _|| |_| | | |_| | |_| || |   | |_| | |_| | | | | | | |_) | | | | | (_| |
942         |___|_|   \____| |____/ \___/ |_|   |____/ \__,_|_| |_| |_| .__/|_|_| |_|\__, |
943                                                                   |_|            |___/
944 */
945
946 static const char *get_dot_color_name(size_t col)
947 {
948         static const char *names[] = {
949                 "blue",
950                 "red",
951                 "green",
952                 "yellow",
953                 "cyan",
954                 "magenta",
955                 "orange",
956                 "chocolate",
957                 "beige",
958                 "navy",
959                 "darkgreen",
960                 "darkred",
961                 "lightPink",
962                 "chartreuse",
963                 "lightskyblue",
964                 "linen",
965                 "pink",
966                 "lightslateblue",
967                 "mintcream",
968                 "red",
969                 "darkolivegreen",
970                 "mediumblue",
971                 "mistyrose",
972                 "salmon",
973                 "darkseagreen",
974                 "mediumslateblue"
975                 "moccasin",
976                 "tomato",
977                 "forestgreen",
978                 "darkturquoise",
979                 "palevioletred"
980         };
981
982         return col < sizeof(names)/sizeof(names[0]) ? names[col] : "white";
983 }
984
985 typedef struct _co_ifg_dump_t {
986         const copy_opt_t *co;
987         unsigned flags;
988 } co_ifg_dump_t;
989
990 static void ifg_dump_graph_attr(FILE *f, void *self)
991 {
992         (void) self;
993         fprintf(f, "overlap=scale");
994 }
995
996 static int ifg_is_dump_node(void *self, ir_node *irn)
997 {
998         (void)self;
999         return !arch_irn_is_ignore(irn);
1000 }
1001
1002 static void ifg_dump_node_attr(FILE *f, void *self, ir_node *irn)
1003 {
1004         co_ifg_dump_t             *env     = self;
1005         const arch_register_t     *reg     = arch_get_irn_register(irn);
1006         const arch_register_req_t *req     = arch_get_register_req_out(irn);
1007         int                        limited = arch_register_req_is(req, limited);
1008
1009         if(env->flags & CO_IFG_DUMP_LABELS) {
1010                 ir_fprintf(f, "label=\"%+F", irn);
1011
1012                 if((env->flags & CO_IFG_DUMP_CONSTR) && limited) {
1013                         bitset_t *bs = bitset_alloca(env->co->cls->n_regs);
1014                         rbitset_copy_to_bitset(req->limited, bs);
1015                         ir_fprintf(f, "\\n%B", bs);
1016                 }
1017                 ir_fprintf(f, "\" ");
1018         } else {
1019                 fprintf(f, "label=\"\" shape=point " );
1020         }
1021
1022         if(env->flags & CO_IFG_DUMP_SHAPE)
1023                 fprintf(f, "shape=%s ", limited ? "diamond" : "ellipse");
1024
1025         if(env->flags & CO_IFG_DUMP_COLORS)
1026                 fprintf(f, "style=filled color=%s ", get_dot_color_name(reg->index));
1027 }
1028
1029 static void ifg_dump_at_end(FILE *file, void *self)
1030 {
1031         co_ifg_dump_t *env = self;
1032         affinity_node_t *a;
1033
1034         co_gs_foreach_aff_node(env->co, a) {
1035                 const arch_register_t *ar = arch_get_irn_register(a->irn);
1036                 unsigned aidx = get_irn_idx(a->irn);
1037                 neighb_t *n;
1038
1039                 co_gs_foreach_neighb(a, n) {
1040                         const arch_register_t *nr = arch_get_irn_register(n->irn);
1041                         unsigned nidx = get_irn_idx(n->irn);
1042
1043                         if(aidx < nidx) {
1044                                 const char *color = nr == ar ? "blue" : "red";
1045                                 fprintf(file, "\tn%d -- n%d [weight=0.01 ", aidx, nidx);
1046                                 if(env->flags & CO_IFG_DUMP_LABELS)
1047                                         fprintf(file, "label=\"%d\" ", n->costs);
1048                                 if(env->flags & CO_IFG_DUMP_COLORS)
1049                                         fprintf(file, "color=%s ", color);
1050                                 else
1051                                         fprintf(file, "style=dotted");
1052                                 fprintf(file, "];\n");
1053                         }
1054                 }
1055         }
1056 }
1057
1058
1059 static be_ifg_dump_dot_cb_t ifg_dot_cb = {
1060         ifg_is_dump_node,
1061         ifg_dump_graph_attr,
1062         ifg_dump_node_attr,
1063         NULL,
1064         NULL,
1065         ifg_dump_at_end
1066 };
1067
1068
1069
1070 void co_dump_ifg_dot(const copy_opt_t *co, FILE *f, unsigned flags)
1071 {
1072         co_ifg_dump_t cod;
1073
1074         cod.co    = co;
1075         cod.flags = flags;
1076         be_ifg_dump_dot(co->cenv->ifg, co->irg, f, &ifg_dot_cb, &cod);
1077 }
1078
1079
1080 void co_solve_park_moon(copy_opt_t *opt)
1081 {
1082         (void) opt;
1083 }
1084
1085 static int void_algo(copy_opt_t *co)
1086 {
1087         (void) co;
1088         return 0;
1089 }
1090
1091 /*
1092                 _    _                  _ _   _
1093            / \  | | __ _  ___  _ __(_) |_| |__  _ __ ___  ___
1094           / _ \ | |/ _` |/ _ \| '__| | __| '_ \| '_ ` _ \/ __|
1095          / ___ \| | (_| | (_) | |  | | |_| | | | | | | | \__ \
1096         /_/   \_\_|\__, |\___/|_|  |_|\__|_| |_|_| |_| |_|___/
1097                            |___/
1098 */
1099
1100 typedef struct {
1101         co_algo_t  *algo;
1102         const char *name;
1103         int        can_improve_existing;
1104 } co_algo_info_t;
1105
1106 static co_algo_info_t algos[] = {
1107         { void_algo,               "none",  0 },
1108         { co_solve_heuristic,      "heur1", 0 },
1109         { co_solve_heuristic_new,  "heur2", 0 },
1110 #ifdef WITH_JVM
1111         { co_solve_heuristic_java, "heur3", 0 },
1112 #else
1113         { NULL,                    "heur3", 0 },
1114 #endif
1115         { co_solve_heuristic_mst,  "heur4", 0 },
1116 #ifdef WITH_ILP
1117         { co_solve_ilp2,           "ilp",   1 },
1118 #else
1119         { NULL,                    "ilp",   1 },
1120 #endif
1121         { NULL,                    "",      0 }
1122 };
1123
1124 /*
1125     __  __       _         ____       _
1126    |  \/  | __ _(_)_ __   |  _ \ _ __(_)_   _____ _ __
1127    | |\/| |/ _` | | '_ \  | | | | '__| \ \ / / _ \ '__|
1128    | |  | | (_| | | | | | | |_| | |  | |\ V /  __/ |
1129    |_|  |_|\__,_|_|_| |_| |____/|_|  |_| \_/ \___|_|
1130
1131 */
1132
1133 static FILE *my_open(const be_chordal_env_t *env, const char *prefix, const char *suffix)
1134 {
1135         FILE *result;
1136         char buf[1024];
1137         size_t i, n;
1138         char *tu_name;
1139
1140         n = strlen(env->birg->main_env->cup_name);
1141         tu_name = XMALLOCN(char, n + 1);
1142         strcpy(tu_name, env->birg->main_env->cup_name);
1143         for (i = 0; i < n; ++i)
1144                 if (tu_name[i] == '.')
1145                         tu_name[i] = '_';
1146
1147
1148         ir_snprintf(buf, sizeof(buf), "%s%s_%F_%s%s", prefix, tu_name, env->irg, env->cls->name, suffix);
1149         xfree(tu_name);
1150         result = fopen(buf, "wt");
1151         if(result == NULL) {
1152                 panic("Couldn't open '%s' for writing.", buf);
1153         }
1154
1155         return result;
1156 }
1157
1158 void co_driver(be_chordal_env_t *cenv)
1159 {
1160         ir_timer_t          *timer = ir_timer_register("firm.be.copyopt", "runtime");
1161         co_complete_stats_t before, after;
1162         copy_opt_t          *co;
1163         co_algo_t           *algo_func;
1164         int                 was_optimal = 0;
1165
1166         if (algo >= CO_ALGO_LAST)
1167                 return;
1168
1169         be_liveness_assure_chk(be_get_birg_liveness(cenv->birg));
1170
1171         co = new_copy_opt(cenv, cost_func);
1172         co_build_ou_structure(co);
1173         co_build_graph_structure(co);
1174
1175         co_complete_stats(co, &before);
1176
1177         be_stat_ev_ull("co_aff_nodes",    before.aff_nodes);
1178         be_stat_ev_ull("co_aff_edges",    before.aff_edges);
1179         be_stat_ev_ull("co_max_costs",    before.max_costs);
1180         be_stat_ev_ull("co_inevit_costs", before.inevit_costs);
1181         be_stat_ev_ull("co_aff_int",      before.aff_int);
1182
1183         be_stat_ev_ull("co_init_costs",   before.costs);
1184         be_stat_ev_ull("co_init_unsat",   before.unsatisfied_edges);
1185
1186         if (dump_flags & DUMP_BEFORE) {
1187                 FILE *f = my_open(cenv, "", "-before.dot");
1188                 co_dump_ifg_dot(co, f, style_flags);
1189                 fclose(f);
1190         }
1191
1192         /* if the algo can improve results, provide an initial solution with heur3 */
1193         if (improve && algos[algo].can_improve_existing) {
1194                 co_complete_stats_t stats;
1195
1196                 /* produce a heuristic solution */
1197 #ifdef WITH_JVM
1198                 co_solve_heuristic_java(co);
1199 #else
1200                 co_solve_heuristic(co);
1201 #endif /* WITH_JVM */
1202
1203                 /* do the stats and provide the current costs */
1204                 co_complete_stats(co, &stats);
1205                 be_stat_ev_ull("co_prepare_costs", stats.costs);
1206         }
1207
1208 #ifdef WITH_JVM
1209         /* start the JVM here so that it does not tamper the timing. */
1210         if (algo == CO_ALGO_HEUR3)
1211                 be_java_coal_start_jvm();
1212 #endif /* WITH_JVM */
1213
1214         algo_func = algos[algo].algo;
1215
1216         /* perform actual copy minimization */
1217         ir_timer_reset_and_start(timer);
1218         was_optimal = algo_func(co);
1219         ir_timer_stop(timer);
1220
1221         be_stat_ev("co_time", ir_timer_elapsed_msec(timer));
1222         be_stat_ev_ull("co_optimal", was_optimal);
1223
1224         if (dump_flags & DUMP_AFTER) {
1225                 FILE *f = my_open(cenv, "", "-after.dot");
1226                 co_dump_ifg_dot(co, f, style_flags);
1227                 fclose(f);
1228         }
1229
1230         co_complete_stats(co, &after);
1231
1232         if (do_stats) {
1233                 ulong64 optimizable_costs = after.max_costs - after.inevit_costs;
1234                 ulong64 evitable          = after.costs     - after.inevit_costs;
1235
1236                 ir_printf("%30F ", cenv->irg);
1237                 printf("%10s %10" ULL_FMT "%10" ULL_FMT "%10" ULL_FMT, cenv->cls->name, after.max_costs, before.costs, after.inevit_costs);
1238
1239                 if(optimizable_costs > 0)
1240                         printf("%10" ULL_FMT " %5.2f\n", after.costs, (evitable * 100.0) / optimizable_costs);
1241                 else
1242                         printf("%10" ULL_FMT " %5s\n", after.costs, "-");
1243         }
1244
1245         /* Dump the interference graph in Appel's format. */
1246         if (dump_flags & DUMP_APPEL) {
1247                 FILE *f = my_open(cenv, "", ".apl");
1248                 fprintf(f, "# %lld %lld\n", after.costs, after.unsatisfied_edges);
1249                 co_dump_appel_graph(co, f);
1250                 fclose(f);
1251         }
1252
1253         be_stat_ev_ull("co_after_costs", after.costs);
1254         be_stat_ev_ull("co_after_unsat", after.unsatisfied_edges);
1255
1256         co_free_graph_structure(co);
1257         co_free_ou_structure(co);
1258         free_copy_opt(co);
1259 }