becopyilp: code cleanups
[libfirm] / ir / be / becopyopt.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       Copy minimization driver.
23  * @author      Daniel Grund
24  * @date        12.04.2005
25  *
26  * Main file for the optimization reducing the copies needed for:
27  * - Phi coalescing
28  * - Register-constrained nodes
29  * - Two-address code instructions
30  */
31 #include "config.h"
32
33 #include "execfreq.h"
34 #include "xmalloc.h"
35 #include "debug.h"
36 #include "pmap.h"
37 #include "raw_bitset.h"
38 #include "irnode.h"
39 #include "irgraph.h"
40 #include "irgwalk.h"
41 #include "irprog.h"
42 #include "irloop_t.h"
43 #include "iredges_t.h"
44 #include "irprintf_t.h"
45 #include "irtools.h"
46 #include "util.h"
47
48 #include "bemodule.h"
49 #include "bearch.h"
50 #include "benode.h"
51 #include "beutil.h"
52 #include "beifg.h"
53 #include "beintlive_t.h"
54 #include "becopyopt_t.h"
55 #include "becopystat.h"
56 #include "belive_t.h"
57 #include "beinsn_t.h"
58 #include "besched.h"
59 #include "bestatevent.h"
60 #include "beirg.h"
61 #include "error.h"
62
63 #include "lc_opts.h"
64 #include "lc_opts_enum.h"
65
66 #define DUMP_BEFORE 1
67 #define DUMP_AFTER  2
68 #define DUMP_APPEL  4
69 #define DUMP_ALL    2 * DUMP_APPEL - 1
70
71 #define COST_FUNC_FREQ     1
72 #define COST_FUNC_LOOP     2
73 #define COST_FUNC_ALL_ONE  3
74
75 /**
76  * Flags for dumping the IFG.
77  */
78 enum {
79         CO_IFG_DUMP_COLORS = 1 << 0, /**< Dump the graph colored. */
80         CO_IFG_DUMP_LABELS = 1 << 1, /**< Dump node/edge labels. */
81         CO_IFG_DUMP_SHAPE  = 1 << 2, /**< Give constrained nodes special shapes. */
82         CO_IFG_DUMP_CONSTR = 1 << 3, /**< Dump the node constraints in the label. */
83 };
84
85 static int co_get_costs_loop_depth(const ir_node *root, int pos);
86 static int co_get_costs_exec_freq(const ir_node *root, int pos);
87 static int co_get_costs_all_one(const ir_node *root, int pos);
88
89 static unsigned   dump_flags  = 0;
90 static unsigned   style_flags = 0;
91 static int        do_stats    = 0;
92 static cost_fct_t cost_func   = co_get_costs_exec_freq;
93 static int        improve     = 1;
94
95 static const lc_opt_enum_mask_items_t dump_items[] = {
96         { "before",  DUMP_BEFORE },
97         { "after",   DUMP_AFTER  },
98         { "appel",   DUMP_APPEL  },
99         { "all",     DUMP_ALL    },
100         { NULL,      0 }
101 };
102
103 /**
104  * Flags for dumping the IFG.
105  */
106 enum {
107         CO_IFG_DUMP_COLORS = 1 << 0, /**< Dump the graph colored. */
108         CO_IFG_DUMP_LABELS = 1 << 1, /**< Dump node/edge labels. */
109         CO_IFG_DUMP_SHAPE  = 1 << 2, /**< Give constrained nodes special shapes. */
110         CO_IFG_DUMP_CONSTR = 1 << 3, /**< Dump the node constraints in the label. */
111 };
112
113 static const lc_opt_enum_mask_items_t style_items[] = {
114         { "color",   CO_IFG_DUMP_COLORS },
115         { "labels",  CO_IFG_DUMP_LABELS },
116         { "constr",  CO_IFG_DUMP_CONSTR },
117         { "shape",   CO_IFG_DUMP_SHAPE  },
118         { "full",    2 * CO_IFG_DUMP_SHAPE - 1 },
119         { NULL,      0 }
120 };
121
122 typedef int (*opt_funcptr)(void);
123 static const lc_opt_enum_func_ptr_items_t cost_func_items[] = {
124         { "freq",   (opt_funcptr) co_get_costs_exec_freq },
125         { "loop",   (opt_funcptr) co_get_costs_loop_depth },
126         { "one",    (opt_funcptr) co_get_costs_all_one },
127         { NULL,     NULL }
128 };
129
130 static lc_opt_enum_mask_var_t dump_var = {
131         &dump_flags, dump_items
132 };
133
134 static lc_opt_enum_mask_var_t style_var = {
135         &style_flags, style_items
136 };
137
138 static lc_opt_enum_func_ptr_var_t cost_func_var = {
139         (opt_funcptr*) &cost_func, cost_func_items
140 };
141
142 static const lc_opt_table_entry_t options[] = {
143         LC_OPT_ENT_ENUM_FUNC_PTR ("cost",    "select a cost function",                                  &cost_func_var),
144         LC_OPT_ENT_ENUM_MASK     ("dump",    "dump ifg before or after copy optimization",              &dump_var),
145         LC_OPT_ENT_ENUM_MASK     ("style",   "dump style for ifg dumping",                              &style_var),
146         LC_OPT_ENT_BOOL          ("stats",   "dump statistics after each optimization",                 &do_stats),
147         LC_OPT_ENT_BOOL          ("improve", "run heur1 before if algo can exploit start solutions",    &improve),
148         LC_OPT_LAST
149 };
150
151 static be_module_list_entry_t *copyopts = NULL;
152 static const co_algo_info *selected_copyopt = NULL;
153
154 void be_register_copyopt(const char *name, co_algo_info *copyopt)
155 {
156         if (selected_copyopt == NULL)
157                 selected_copyopt = copyopt;
158         be_add_module_to_list(&copyopts, name, copyopt);
159 }
160
161 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_copyopt)
162 void be_init_copyopt(void)
163 {
164         lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
165         lc_opt_entry_t *ra_grp = lc_opt_get_grp(be_grp, "ra");
166         lc_opt_entry_t *chordal_grp = lc_opt_get_grp(ra_grp, "chordal");
167         lc_opt_entry_t *co_grp = lc_opt_get_grp(chordal_grp, "co");
168
169         lc_opt_add_table(co_grp, options);
170         be_add_module_list_opt(co_grp, "algo", "select copy optimization algo",
171                                        &copyopts, (void**) &selected_copyopt);
172 }
173
174 static int void_algo(copy_opt_t *co)
175 {
176         (void) co;
177         return 0;
178 }
179
180 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_copynone)
181 void be_init_copynone(void)
182 {
183         static co_algo_info copyheur = {
184                 void_algo, 0
185         };
186
187         be_register_copyopt("none", &copyheur);
188 }
189
190 #undef QUICK_AND_DIRTY_HACK
191
192 static int nodes_interfere(const be_chordal_env_t *env, const ir_node *a, const ir_node *b)
193 {
194         if (env->ifg)
195                 return be_ifg_connected(env->ifg, a, b);
196         else {
197                 be_lv_t *lv = be_get_irg_liveness(env->irg);
198                 return be_values_interfere(lv, a, b);
199         }
200 }
201
202
203 /******************************************************************************
204     _____                           _
205    / ____|                         | |
206   | |  __  ___ _ __   ___ _ __ __ _| |
207   | | |_ |/ _ \ '_ \ / _ \ '__/ _` | |
208   | |__| |  __/ | | |  __/ | | (_| | |
209    \_____|\___|_| |_|\___|_|  \__,_|_|
210
211  ******************************************************************************/
212
213 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
214
215
216 copy_opt_t *new_copy_opt(be_chordal_env_t *chordal_env, cost_fct_t get_costs)
217 {
218         const char *s1, *s2, *s3;
219         size_t len;
220         copy_opt_t *co;
221
222         FIRM_DBG_REGISTER(dbg, "ir.be.copyopt");
223
224         co = XMALLOCZ(copy_opt_t);
225         co->cenv      = chordal_env;
226         co->irg       = chordal_env->irg;
227         co->cls       = chordal_env->cls;
228         co->get_costs = get_costs;
229
230         s1 = get_irp_name();
231         s2 = get_entity_name(get_irg_entity(co->irg));
232         s3 = chordal_env->cls->name;
233         len = strlen(s1) + strlen(s2) + strlen(s3) + 5;
234         co->name = XMALLOCN(char, len);
235         snprintf(co->name, len, "%s__%s__%s", s1, s2, s3);
236
237         return co;
238 }
239
240 void free_copy_opt(copy_opt_t *co)
241 {
242         xfree(co->name);
243         free(co);
244 }
245
246 /**
247  * Checks if a node is optimizable, viz. has something to do with coalescing
248  * @param irn  The irn to check
249  */
250 static int co_is_optimizable_root(ir_node *irn)
251 {
252         const arch_register_req_t *req;
253
254         if (arch_irn_is_ignore(irn))
255                 return 0;
256
257         if (is_Reg_Phi(irn) || is_Perm_Proj(irn))
258                 return 1;
259
260         req = arch_get_irn_register_req(irn);
261         if (is_2addr_code(req))
262                 return 1;
263
264         return 0;
265 }
266
267 /**
268  * Computes the costs of a copy according to loop depth
269  * @param pos  the argument position of arg in the root arguments
270  * @return     Must be >= 0 in all cases.
271  */
272 static int co_get_costs_loop_depth(const ir_node *root, int pos)
273 {
274         ir_node *block = get_nodes_block(root);
275         ir_loop *loop;
276         int      cost;
277
278         if (is_Phi(root)) {
279                 block = get_Block_cfgpred_block(block, pos);
280         }
281         loop = get_irn_loop(block);
282         if (loop) {
283                 int d = get_loop_depth(loop);
284                 cost = d*d;
285         } else {
286                 cost = 0;
287         }
288         return 1+cost;
289 }
290
291 /**
292  * Computes the costs of a copy according to execution frequency
293  * @param pos  the argument position of arg in the root arguments
294  * @return Must be >= 0 in all cases.
295  */
296 static int co_get_costs_exec_freq(const ir_node *root, int pos)
297 {
298         ir_graph     *irg       = get_irn_irg(root);
299         ir_node      *root_bl   = get_nodes_block(root);
300         ir_node      *copy_bl
301                 = is_Phi(root) ? get_Block_cfgpred_block(root_bl, pos) : root_bl;
302         ir_exec_freq *exec_freq = be_get_irg_exec_freq(irg);
303         int           res       = get_block_execfreq_ulong(exec_freq, copy_bl);
304
305         /* don't allow values smaller than one. */
306         return res < 1 ? 1 : res;
307 }
308
309 /**
310  * All costs equal 1. Using this will reduce the _number_ of copies.
311  * @param co   The copy opt object.
312  * @return Must be >= 0 in all cases.
313  */
314 static int co_get_costs_all_one(const ir_node *root, int pos)
315 {
316         (void) root;
317         (void) pos;
318         return 1;
319 }
320
321 /******************************************************************************
322    ____        _   _    _       _ _          _____ _
323   / __ \      | | | |  | |     (_) |        / ____| |
324  | |  | |_ __ | |_| |  | |_ __  _| |_ ___  | (___ | |_ ___  _ __ __ _  __ _  ___
325  | |  | | '_ \| __| |  | | '_ \| | __/ __|  \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
326  | |__| | |_) | |_| |__| | | | | | |_\__ \  ____) | || (_) | | | (_| | (_| |  __/
327   \____/| .__/ \__|\____/|_| |_|_|\__|___/ |_____/ \__\___/|_|  \__,_|\__, |\___|
328         | |                                                            __/ |
329         |_|                                                           |___/
330  ******************************************************************************/
331
332 /**
333  * Determines a maximum weighted independent set with respect to
334  * the interference and conflict edges of all nodes in a qnode.
335  */
336 static int ou_max_ind_set_costs(unit_t *ou)
337 {
338         be_chordal_env_t *chordal_env = ou->co->cenv;
339         ir_node **safe, **unsafe;
340         int i, o, safe_count, safe_costs, unsafe_count, *unsafe_costs;
341         bitset_t *curr;
342         size_t  pos;
343         int curr_weight, best_weight = 0;
344
345         /* assign the nodes into two groups.
346          * safe: node has no interference, hence it is in every max stable set.
347          * unsafe: node has an interference
348          */
349         safe         = ALLOCAN(ir_node*, ou->node_count - 1);
350         safe_costs   = 0;
351         safe_count   = 0;
352         unsafe       = ALLOCAN(ir_node*, ou->node_count - 1);
353         unsafe_costs = ALLOCAN(int,      ou->node_count - 1);
354         unsafe_count = 0;
355         for (i=1; i<ou->node_count; ++i) {
356                 int is_safe = 1;
357                 for (o=1; o<ou->node_count; ++o) {
358                         if (i==o)
359                                 continue;
360                         if (nodes_interfere(chordal_env, ou->nodes[i], ou->nodes[o])) {
361                                 unsafe_costs[unsafe_count] = ou->costs[i];
362                                 unsafe[unsafe_count] = ou->nodes[i];
363                                 ++unsafe_count;
364                                 is_safe = 0;
365                                 break;
366                         }
367                 }
368                 if (is_safe) {
369                         safe_costs += ou->costs[i];
370                         safe[safe_count++] = ou->nodes[i];
371                 }
372         }
373
374
375         /* now compute the best set out of the unsafe nodes*/
376         if (unsafe_count > MIS_HEUR_TRIGGER) {
377                 bitset_t *best = bitset_alloca(unsafe_count);
378                 /* Heuristic: Greedy trial and error form index 0 to unsafe_count-1 */
379                 for (i=0; i<unsafe_count; ++i) {
380                         bitset_set(best, i);
381                         /* check if it is a stable set */
382                         for (o=bitset_next_set(best, 0); o!=-1 && o<i; o=bitset_next_set(best, o+1))
383                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o])) {
384                                         bitset_clear(best, i); /* clear the bit and try next one */
385                                         break;
386                                 }
387                 }
388                 /* compute the weight */
389                 bitset_foreach(best, pos)
390                         best_weight += unsafe_costs[pos];
391         } else {
392                 /* Exact Algorithm: Brute force */
393                 curr = bitset_alloca(unsafe_count);
394                 bitset_set_all(curr);
395                 while (bitset_popcount(curr) != 0) {
396                         /* check if curr is a stable set */
397                         for (i=bitset_next_set(curr, 0); i!=-1; i=bitset_next_set(curr, i+1))
398                                 for (o=bitset_next_set(curr, i+1); o!=-1; o=bitset_next_set(curr, o+1)) /* !!!!! difference to qnode_max_ind_set(): NOT (curr, i) */
399                                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o]))
400                                                         goto no_stable_set;
401
402                         /* if we arrive here, we have a stable set */
403                         /* compute the weight of the stable set*/
404                         curr_weight = 0;
405                         bitset_foreach(curr, pos)
406                                 curr_weight += unsafe_costs[pos];
407
408                         /* any better ? */
409                         if (curr_weight > best_weight) {
410                                 best_weight = curr_weight;
411                         }
412
413         no_stable_set:
414                         bitset_minus1(curr);
415                 }
416         }
417
418         return safe_costs+best_weight;
419 }
420
421 static void co_collect_units(ir_node *irn, void *env)
422 {
423         const arch_register_req_t *req;
424         copy_opt_t                *co  = (copy_opt_t*)env;
425         unit_t *unit;
426
427         if (get_irn_mode(irn) == mode_T)
428                 return;
429         req = arch_get_irn_register_req(irn);
430         if (req->cls != co->cls)
431                 return;
432         if (!co_is_optimizable_root(irn))
433                 return;
434
435         /* Init a new unit */
436         unit = XMALLOCZ(unit_t);
437         unit->co = co;
438         unit->node_count = 1;
439         INIT_LIST_HEAD(&unit->queue);
440
441         /* Phi with some/all of its arguments */
442         if (is_Reg_Phi(irn)) {
443                 int i, arity;
444
445                 /* init */
446                 arity = get_irn_arity(irn);
447                 unit->nodes = XMALLOCN(ir_node*, arity + 1);
448                 unit->costs = XMALLOCN(int,      arity + 1);
449                 unit->nodes[0] = irn;
450
451                 /* fill */
452                 for (i=0; i<arity; ++i) {
453                         int o, arg_pos;
454                         ir_node *arg = get_irn_n(irn, i);
455
456                         assert(arch_get_irn_reg_class(arg) == co->cls && "Argument not in same register class.");
457                         if (arg == irn)
458                                 continue;
459                         if (nodes_interfere(co->cenv, irn, arg)) {
460                                 unit->inevitable_costs += co->get_costs(irn, i);
461                                 continue;
462                         }
463
464                         /* Else insert the argument of the phi to the members of this ou */
465                         DBG((dbg, LEVEL_1, "\t   Member: %+F\n", arg));
466
467                         if (arch_irn_is_ignore(arg))
468                                 continue;
469
470                         /* Check if arg has occurred at a prior position in the arg/list */
471                         arg_pos = 0;
472                         for (o=1; o<unit->node_count; ++o) {
473                                 if (unit->nodes[o] == arg) {
474                                         arg_pos = o;
475                                         break;
476                                 }
477                         }
478
479                         if (!arg_pos) { /* a new argument */
480                                 /* insert node, set costs */
481                                 unit->nodes[unit->node_count] = arg;
482                                 unit->costs[unit->node_count] = co->get_costs(irn, i);
483                                 unit->node_count++;
484                         } else { /* arg has occurred before in same phi */
485                                 /* increase costs for existing arg */
486                                 unit->costs[arg_pos] += co->get_costs(irn, i);
487                         }
488                 }
489                 unit->nodes = XREALLOC(unit->nodes, ir_node*, unit->node_count);
490                 unit->costs = XREALLOC(unit->costs, int,      unit->node_count);
491         } else if (is_Perm_Proj(irn)) {
492                 /* Proj of a perm with corresponding arg */
493                 assert(!nodes_interfere(co->cenv, irn, get_Perm_src(irn)));
494                 unit->nodes = XMALLOCN(ir_node*, 2);
495                 unit->costs = XMALLOCN(int,      2);
496                 unit->node_count = 2;
497                 unit->nodes[0] = irn;
498                 unit->nodes[1] = get_Perm_src(irn);
499                 unit->costs[1] = co->get_costs(irn, -1);
500         } else {
501                 /* Src == Tgt of a 2-addr-code instruction */
502                 if (is_2addr_code(req)) {
503                         const unsigned other = req->other_same;
504                         int            count = 0;
505                         int            i;
506
507                         for (i = 0; (1U << i) <= other; ++i) {
508                                 if (other & (1U << i)) {
509                                         ir_node *o  = get_irn_n(skip_Proj(irn), i);
510                                         if (arch_irn_is_ignore(o))
511                                                 continue;
512                                         if (nodes_interfere(co->cenv, irn, o))
513                                                 continue;
514                                         ++count;
515                                 }
516                         }
517
518                         if (count != 0) {
519                                 int k = 0;
520                                 ++count;
521                                 unit->nodes = XMALLOCN(ir_node*, count);
522                                 unit->costs = XMALLOCN(int,      count);
523                                 unit->node_count = count;
524                                 unit->nodes[k++] = irn;
525
526                                 for (i = 0; 1U << i <= other; ++i) {
527                                         if (other & (1U << i)) {
528                                                 ir_node *o  = get_irn_n(skip_Proj(irn), i);
529                                                 if (!arch_irn_is_ignore(o) &&
530                                                                 !nodes_interfere(co->cenv, irn, o)) {
531                                                         unit->nodes[k] = o;
532                                                         unit->costs[k] = co->get_costs(irn, -1);
533                                                         ++k;
534                                                 }
535                                         }
536                                 }
537                         }
538                 } else {
539                         assert(0 && "This is not an optimizable node!");
540                 }
541         }
542
543         /* Insert the new unit at a position according to its costs */
544         if (unit->node_count > 1) {
545                 int i;
546                 struct list_head *tmp;
547
548                 /* Determine the maximum costs this unit can cause: all_nodes_cost */
549                 for (i=1; i<unit->node_count; ++i) {
550                         unit->sort_key = MAX(unit->sort_key, unit->costs[i]);
551                         unit->all_nodes_costs += unit->costs[i];
552                 }
553
554                 /* Determine the minimal costs this unit will cause: min_nodes_costs */
555                 unit->min_nodes_costs += unit->all_nodes_costs - ou_max_ind_set_costs(unit);
556                 /* Insert the new ou according to its sort_key */
557                 tmp = &co->units;
558                 while (tmp->next != &co->units && list_entry_units(tmp->next)->sort_key > unit->sort_key)
559                         tmp = tmp->next;
560                 list_add(&unit->units, tmp);
561         } else {
562                 free(unit);
563         }
564 }
565
566 #ifdef QUICK_AND_DIRTY_HACK
567
568 static int compare_ous(const void *k1, const void *k2)
569 {
570         const unit_t *u1 = *((const unit_t **) k1);
571         const unit_t *u2 = *((const unit_t **) k2);
572         int i, o, u1_has_constr, u2_has_constr;
573         arch_register_req_t req;
574
575         /* Units with constraints come first */
576         u1_has_constr = 0;
577         for (i=0; i<u1->node_count; ++i) {
578                 arch_get_irn_register_req(&req, u1->nodes[i]);
579                 if (arch_register_req_is(&req, limited)) {
580                         u1_has_constr = 1;
581                         break;
582                 }
583         }
584
585         u2_has_constr = 0;
586         for (i=0; i<u2->node_count; ++i) {
587                 arch_get_irn_register_req(&req, u2->nodes[i]);
588                 if (arch_register_req_is(&req, limited)) {
589                         u2_has_constr = 1;
590                         break;
591                 }
592         }
593
594         if (u1_has_constr != u2_has_constr)
595                 return u2_has_constr - u1_has_constr;
596
597         /* Now check, whether the two units are connected */
598 #if 0
599         for (i=0; i<u1->node_count; ++i)
600                 for (o=0; o<u2->node_count; ++o)
601                         if (u1->nodes[i] == u2->nodes[o])
602                                 return 0;
603 #endif
604
605         /* After all, the sort key decides. Greater keys come first. */
606         return u2->sort_key - u1->sort_key;
607
608 }
609
610 /**
611  * Sort the ou's according to constraints and their sort_key
612  */
613 static void co_sort_units(copy_opt_t *co)
614 {
615         int i, count = 0, costs;
616         unit_t *ou, **ous;
617
618         /* get the number of ous, remove them form the list and fill the array */
619         list_for_each_entry(unit_t, ou, &co->units, units)
620                 count++;
621         ous = ALLOCAN(unit_t, count);
622
623         costs = co_get_max_copy_costs(co);
624
625         i = 0;
626         list_for_each_entry(unit_t, ou, &co->units, units)
627                 ous[i++] = ou;
628
629         INIT_LIST_HEAD(&co->units);
630
631         assert(count == i && list_empty(&co->units));
632
633         for (i=0; i<count; ++i)
634                 ir_printf("%+F\n", ous[i]->nodes[0]);
635
636         qsort(ous, count, sizeof(*ous), compare_ous);
637
638         ir_printf("\n\n");
639         for (i=0; i<count; ++i)
640                 ir_printf("%+F\n", ous[i]->nodes[0]);
641
642         /* reinsert into list in correct order */
643         for (i=0; i<count; ++i)
644                 list_add_tail(&ous[i]->units, &co->units);
645
646         assert(costs == co_get_max_copy_costs(co));
647 }
648 #endif
649
650 void co_build_ou_structure(copy_opt_t *co)
651 {
652         DBG((dbg, LEVEL_1, "\tCollecting optimization units\n"));
653         INIT_LIST_HEAD(&co->units);
654         irg_walk_graph(co->irg, co_collect_units, NULL, co);
655 #ifdef QUICK_AND_DIRTY_HACK
656         co_sort_units(co);
657 #endif
658 }
659
660 void co_free_ou_structure(copy_opt_t *co)
661 {
662         unit_t *curr, *tmp;
663         ASSERT_OU_AVAIL(co);
664         list_for_each_entry_safe(unit_t, curr, tmp, &co->units, units) {
665                 xfree(curr->nodes);
666                 xfree(curr->costs);
667                 xfree(curr);
668         }
669         co->units.next = NULL;
670 }
671
672 /* co_solve_heuristic() is implemented in becopyheur.c */
673
674 int co_get_max_copy_costs(const copy_opt_t *co)
675 {
676         int i, res = 0;
677         unit_t *curr;
678
679         ASSERT_OU_AVAIL(co);
680
681         list_for_each_entry(unit_t, curr, &co->units, units) {
682                 res += curr->inevitable_costs;
683                 for (i=1; i<curr->node_count; ++i)
684                         res += curr->costs[i];
685         }
686         return res;
687 }
688
689 int co_get_inevit_copy_costs(const copy_opt_t *co)
690 {
691         int res = 0;
692         unit_t *curr;
693
694         ASSERT_OU_AVAIL(co);
695
696         list_for_each_entry(unit_t, curr, &co->units, units)
697                 res += curr->inevitable_costs;
698         return res;
699 }
700
701 int co_get_copy_costs(const copy_opt_t *co)
702 {
703         int i, res = 0;
704         unit_t *curr;
705
706         ASSERT_OU_AVAIL(co);
707
708         list_for_each_entry(unit_t, curr, &co->units, units) {
709                 int root_col = get_irn_col(curr->nodes[0]);
710                 DBG((dbg, LEVEL_1, "  %3d costs for root %+F color %d\n", curr->inevitable_costs, curr->nodes[0], root_col));
711                 res += curr->inevitable_costs;
712                 for (i=1; i<curr->node_count; ++i) {
713                         int arg_col = get_irn_col(curr->nodes[i]);
714                         if (root_col != arg_col) {
715                                 DBG((dbg, LEVEL_1, "  %3d for arg %+F color %d\n", curr->costs[i], curr->nodes[i], arg_col));
716                                 res += curr->costs[i];
717                         }
718                 }
719         }
720         return res;
721 }
722
723 int co_get_lower_bound(const copy_opt_t *co)
724 {
725         int res = 0;
726         unit_t *curr;
727
728         ASSERT_OU_AVAIL(co);
729
730         list_for_each_entry(unit_t, curr, &co->units, units)
731                 res += curr->inevitable_costs + curr->min_nodes_costs;
732         return res;
733 }
734
735 void co_complete_stats(const copy_opt_t *co, co_complete_stats_t *stat)
736 {
737         bitset_t *seen = bitset_malloc(get_irg_last_idx(co->irg));
738         affinity_node_t *an;
739
740         memset(stat, 0, sizeof(stat[0]));
741
742         /* count affinity edges. */
743         co_gs_foreach_aff_node(co, an) {
744                 neighb_t *neigh;
745                 stat->aff_nodes += 1;
746                 bitset_set(seen, get_irn_idx(an->irn));
747                 co_gs_foreach_neighb(an, neigh) {
748                         if (!bitset_is_set(seen, get_irn_idx(neigh->irn))) {
749                                 stat->aff_edges += 1;
750                                 stat->max_costs += neigh->costs;
751
752                                 if (get_irn_col(an->irn) != get_irn_col(neigh->irn)) {
753                                         stat->costs += neigh->costs;
754                                         stat->unsatisfied_edges += 1;
755                                 }
756
757                                 if (nodes_interfere(co->cenv, an->irn, neigh->irn)) {
758                                         stat->aff_int += 1;
759                                         stat->inevit_costs += neigh->costs;
760                                 }
761
762                         }
763                 }
764         }
765
766         bitset_free(seen);
767 }
768
769 /******************************************************************************
770    _____                 _        _____ _
771   / ____|               | |      / ____| |
772  | |  __ _ __ __ _ _ __ | |__   | (___ | |_ ___  _ __ __ _  __ _  ___
773  | | |_ | '__/ _` | '_ \| '_ \   \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
774  | |__| | | | (_| | |_) | | | |  ____) | || (_) | | | (_| | (_| |  __/
775   \_____|_|  \__,_| .__/|_| |_| |_____/ \__\___/|_|  \__,_|\__, |\___|
776                   | |                                       __/ |
777                   |_|                                      |___/
778  ******************************************************************************/
779
780 static int compare_affinity_node_t(const void *k1, const void *k2, size_t size)
781 {
782         const affinity_node_t *n1 = (const affinity_node_t*)k1;
783         const affinity_node_t *n2 = (const affinity_node_t*)k2;
784         (void) size;
785
786         return (n1->irn != n2->irn);
787 }
788
789 static void add_edge(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs)
790 {
791         affinity_node_t new_node, *node;
792         neighb_t        *nbr;
793         int             allocnew = 1;
794
795         new_node.irn        = n1;
796         new_node.degree     = 0;
797         new_node.neighbours = NULL;
798         node = (affinity_node_t*)set_insert(co->nodes, &new_node, sizeof(new_node), hash_irn(new_node.irn));
799
800         for (nbr = node->neighbours; nbr; nbr = nbr->next)
801                 if (nbr->irn == n2) {
802                         allocnew = 0;
803                         break;
804                 }
805
806         /* if we did not find n2 in n1's neighbourhood insert it */
807         if (allocnew) {
808                 nbr        = OALLOC(&co->obst, neighb_t);
809                 nbr->irn   = n2;
810                 nbr->costs = 0;
811                 nbr->next  = node->neighbours;
812
813                 node->neighbours = nbr;
814                 node->degree++;
815         }
816
817         /* now nbr points to n1's neighbour-entry of n2 */
818         nbr->costs += costs;
819 }
820
821 static inline void add_edges(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs)
822 {
823         if (! be_ifg_connected(co->cenv->ifg, n1, n2)) {
824                 add_edge(co, n1, n2, costs);
825                 add_edge(co, n2, n1, costs);
826         }
827 }
828
829 static void build_graph_walker(ir_node *irn, void *env)
830 {
831         const arch_register_req_t *req;
832         copy_opt_t                *co  = (copy_opt_t*)env;
833         int pos, max;
834
835         if (get_irn_mode(irn) == mode_T)
836                 return;
837         req = arch_get_irn_register_req(irn);
838         if (req->cls != co->cls || arch_irn_is_ignore(irn))
839                 return;
840
841         if (is_Reg_Phi(irn)) { /* Phis */
842                 for (pos=0, max=get_irn_arity(irn); pos<max; ++pos) {
843                         ir_node *arg = get_irn_n(irn, pos);
844                         add_edges(co, irn, arg, co->get_costs(irn, pos));
845                 }
846         } else if (is_Perm_Proj(irn)) { /* Perms */
847                 ir_node *arg = get_Perm_src(irn);
848                 add_edges(co, irn, arg, co->get_costs(irn, -1));
849         } else { /* 2-address code */
850                 if (is_2addr_code(req)) {
851                         const unsigned other = req->other_same;
852                         int i;
853
854                         for (i = 0; 1U << i <= other; ++i) {
855                                 if (other & (1U << i)) {
856                                         ir_node *other = get_irn_n(skip_Proj(irn), i);
857                                         if (!arch_irn_is_ignore(other))
858                                                 add_edges(co, irn, other, co->get_costs(irn, -1));
859                                 }
860                         }
861                 }
862         }
863 }
864
865 void co_build_graph_structure(copy_opt_t *co)
866 {
867         obstack_init(&co->obst);
868         co->nodes = new_set(compare_affinity_node_t, 32);
869
870         irg_walk_graph(co->irg, build_graph_walker, NULL, co);
871 }
872
873 void co_free_graph_structure(copy_opt_t *co)
874 {
875         ASSERT_GS_AVAIL(co);
876
877         del_set(co->nodes);
878         obstack_free(&co->obst, NULL);
879         co->nodes = NULL;
880 }
881
882 int co_gs_is_optimizable(copy_opt_t *co, ir_node *irn)
883 {
884         affinity_node_t new_node, *n;
885
886         ASSERT_GS_AVAIL(co);
887
888         new_node.irn = irn;
889         n = (affinity_node_t*)set_find(co->nodes, &new_node, sizeof(new_node), hash_irn(new_node.irn));
890         if (n) {
891                 return (n->degree > 0);
892         } else
893                 return 0;
894 }
895
896 static int co_dump_appel_disjoint_constraints(const copy_opt_t *co, ir_node *a, ir_node *b)
897 {
898         ir_node *nodes[]  = { a, b };
899         bitset_t *constr[] = { NULL, NULL };
900         int j;
901
902         constr[0] = bitset_alloca(co->cls->n_regs);
903         constr[1] = bitset_alloca(co->cls->n_regs);
904
905         for (j = 0; j < 2; ++j) {
906                 const arch_register_req_t *req = arch_get_irn_register_req(nodes[j]);
907                 if (arch_register_req_is(req, limited))
908                         rbitset_copy_to_bitset(req->limited, constr[j]);
909                 else
910                         bitset_set_all(constr[j]);
911
912         }
913
914         return !bitset_intersect(constr[0], constr[1]);
915 }
916
917 void co_dump_appel_graph(const copy_opt_t *co, FILE *f)
918 {
919         be_ifg_t *ifg       = co->cenv->ifg;
920         int      *color_map = ALLOCAN(int, co->cls->n_regs);
921         int      *node_map  = XMALLOCN(int, get_irg_last_idx(co->irg) + 1);
922         ir_graph *irg       = co->irg;
923         be_irg_t *birg      = be_birg_from_irg(irg);
924
925         ir_node *irn;
926         nodes_iter_t it;
927         neighbours_iter_t nit;
928         int n, n_regs;
929         unsigned i;
930
931         n_regs = 0;
932         for (i = 0; i < co->cls->n_regs; ++i) {
933                 const arch_register_t *reg = &co->cls->regs[i];
934                 if (rbitset_is_set(birg->allocatable_regs, reg->global_index)) {
935                         color_map[i] = n_regs++;
936                 } else {
937                         color_map[i] = -1;
938                 }
939         }
940
941         /*
942          * n contains the first node number.
943          * the values below n are the pre-colored register nodes
944          */
945
946         n = n_regs;
947         be_ifg_foreach_node(ifg, &it, irn) {
948                 if (arch_irn_is_ignore(irn))
949                         continue;
950                 node_map[get_irn_idx(irn)] = n++;
951         }
952
953         fprintf(f, "%d %d\n", n, n_regs);
954
955         be_ifg_foreach_node(ifg, &it, irn) {
956                 if (!arch_irn_is_ignore(irn)) {
957                         int idx                        = node_map[get_irn_idx(irn)];
958                         affinity_node_t           *a   = get_affinity_info(co, irn);
959                         const arch_register_req_t *req = arch_get_irn_register_req(irn);
960                         ir_node                   *adj;
961
962                         if (arch_register_req_is(req, limited)) {
963                                 for (i = 0; i < co->cls->n_regs; ++i) {
964                                         if (!rbitset_is_set(req->limited, i) && color_map[i] >= 0)
965                                                 fprintf(f, "%d %d -1\n", color_map[i], idx);
966                                 }
967                         }
968
969                         be_ifg_foreach_neighbour(ifg, &nit, irn, adj) {
970                                 if (!arch_irn_is_ignore(adj) &&
971                                                 !co_dump_appel_disjoint_constraints(co, irn, adj)) {
972                                         int adj_idx = node_map[get_irn_idx(adj)];
973                                         if (idx < adj_idx)
974                                                 fprintf(f, "%d %d -1\n", idx, adj_idx);
975                                 }
976                         }
977
978                         if (a) {
979                                 neighb_t *n;
980
981                                 co_gs_foreach_neighb(a, n) {
982                                         if (!arch_irn_is_ignore(n->irn)) {
983                                                 int n_idx = node_map[get_irn_idx(n->irn)];
984                                                 if (idx < n_idx)
985                                                         fprintf(f, "%d %d %d\n", idx, n_idx, (int) n->costs);
986                                         }
987                                 }
988                         }
989                 }
990         }
991
992         xfree(node_map);
993 }
994
995 /*
996          ___ _____ ____   ____   ___ _____   ____                        _
997         |_ _|  ___/ ___| |  _ \ / _ \_   _| |  _ \ _   _ _ __ ___  _ __ (_)_ __   __ _
998          | || |_ | |  _  | | | | | | || |   | | | | | | | '_ ` _ \| '_ \| | '_ \ / _` |
999          | ||  _|| |_| | | |_| | |_| || |   | |_| | |_| | | | | | | |_) | | | | | (_| |
1000         |___|_|   \____| |____/ \___/ |_|   |____/ \__,_|_| |_| |_| .__/|_|_| |_|\__, |
1001                                                                   |_|            |___/
1002 */
1003
1004 static const char *get_dot_color_name(size_t col)
1005 {
1006         static const char *names[] = {
1007                 "blue",
1008                 "red",
1009                 "green",
1010                 "yellow",
1011                 "cyan",
1012                 "magenta",
1013                 "orange",
1014                 "chocolate",
1015                 "beige",
1016                 "navy",
1017                 "darkgreen",
1018                 "darkred",
1019                 "lightPink",
1020                 "chartreuse",
1021                 "lightskyblue",
1022                 "linen",
1023                 "pink",
1024                 "lightslateblue",
1025                 "mintcream",
1026                 "red",
1027                 "darkolivegreen",
1028                 "mediumblue",
1029                 "mistyrose",
1030                 "salmon",
1031                 "darkseagreen",
1032                 "mediumslateblue"
1033                 "moccasin",
1034                 "tomato",
1035                 "forestgreen",
1036                 "darkturquoise",
1037                 "palevioletred"
1038         };
1039
1040         return col < sizeof(names)/sizeof(names[0]) ? names[col] : "white";
1041 }
1042
1043 typedef struct co_ifg_dump_t {
1044         const copy_opt_t *co;
1045         unsigned flags;
1046 } co_ifg_dump_t;
1047
1048 static void ifg_dump_graph_attr(FILE *f, void *self)
1049 {
1050         (void) self;
1051         fprintf(f, "overlap=scale");
1052 }
1053
1054 static int ifg_is_dump_node(void *self, ir_node *irn)
1055 {
1056         (void)self;
1057         return !arch_irn_is_ignore(irn);
1058 }
1059
1060 static void ifg_dump_node_attr(FILE *f, void *self, ir_node *irn)
1061 {
1062         co_ifg_dump_t             *env     = (co_ifg_dump_t*)self;
1063         const arch_register_t     *reg     = arch_get_irn_register(irn);
1064         const arch_register_req_t *req     = arch_get_irn_register_req(irn);
1065         int                        limited = arch_register_req_is(req, limited);
1066
1067         if (env->flags & CO_IFG_DUMP_LABELS) {
1068                 ir_fprintf(f, "label=\"%+F", irn);
1069
1070                 if ((env->flags & CO_IFG_DUMP_CONSTR) && limited) {
1071                         bitset_t *bs = bitset_alloca(env->co->cls->n_regs);
1072                         rbitset_copy_to_bitset(req->limited, bs);
1073                         ir_fprintf(f, "\\n%B", bs);
1074                 }
1075                 ir_fprintf(f, "\" ");
1076         } else {
1077                 fprintf(f, "label=\"\" shape=point " );
1078         }
1079
1080         if (env->flags & CO_IFG_DUMP_SHAPE)
1081                 fprintf(f, "shape=%s ", limited ? "diamond" : "ellipse");
1082
1083         if (env->flags & CO_IFG_DUMP_COLORS)
1084                 fprintf(f, "style=filled color=%s ", get_dot_color_name(reg->index));
1085 }
1086
1087 static void ifg_dump_at_end(FILE *file, void *self)
1088 {
1089         co_ifg_dump_t *env = (co_ifg_dump_t*)self;
1090         affinity_node_t *a;
1091
1092         co_gs_foreach_aff_node(env->co, a) {
1093                 const arch_register_t *ar = arch_get_irn_register(a->irn);
1094                 unsigned aidx = get_irn_idx(a->irn);
1095                 neighb_t *n;
1096
1097                 co_gs_foreach_neighb(a, n) {
1098                         const arch_register_t *nr = arch_get_irn_register(n->irn);
1099                         unsigned nidx = get_irn_idx(n->irn);
1100
1101                         if (aidx < nidx) {
1102                                 const char *color = nr == ar ? "blue" : "red";
1103                                 fprintf(file, "\tn%u -- n%u [weight=0.01 ", aidx, nidx);
1104                                 if (env->flags & CO_IFG_DUMP_LABELS)
1105                                         fprintf(file, "label=\"%d\" ", n->costs);
1106                                 if (env->flags & CO_IFG_DUMP_COLORS)
1107                                         fprintf(file, "color=%s ", color);
1108                                 else
1109                                         fprintf(file, "style=dotted");
1110                                 fprintf(file, "];\n");
1111                         }
1112                 }
1113         }
1114 }
1115
1116
1117 static be_ifg_dump_dot_cb_t ifg_dot_cb = {
1118         ifg_is_dump_node,
1119         ifg_dump_graph_attr,
1120         ifg_dump_node_attr,
1121         NULL,
1122         NULL,
1123         ifg_dump_at_end
1124 };
1125
1126
1127
1128 void co_dump_ifg_dot(const copy_opt_t *co, FILE *f, unsigned flags)
1129 {
1130         co_ifg_dump_t cod;
1131
1132         cod.co    = co;
1133         cod.flags = flags;
1134         be_ifg_dump_dot(co->cenv->ifg, co->irg, f, &ifg_dot_cb, &cod);
1135 }
1136
1137 /*
1138     __  __       _         ____       _
1139    |  \/  | __ _(_)_ __   |  _ \ _ __(_)_   _____ _ __
1140    | |\/| |/ _` | | '_ \  | | | | '__| \ \ / / _ \ '__|
1141    | |  | | (_| | | | | | | |_| | |  | |\ V /  __/ |
1142    |_|  |_|\__,_|_|_| |_| |____/|_|  |_| \_/ \___|_|
1143
1144 */
1145
1146 static FILE *my_open(const be_chordal_env_t *env, const char *prefix, const char *suffix)
1147 {
1148         FILE *result;
1149         char buf[1024];
1150         size_t i, n;
1151         char *tu_name;
1152         const char *cup_name = be_get_irg_main_env(env->irg)->cup_name;
1153
1154         n = strlen(cup_name);
1155         tu_name = XMALLOCN(char, n + 1);
1156         strcpy(tu_name, cup_name);
1157         for (i = 0; i < n; ++i)
1158                 if (tu_name[i] == '.')
1159                         tu_name[i] = '_';
1160
1161
1162         ir_snprintf(buf, sizeof(buf), "%s%s_%F_%s%s", prefix, tu_name, env->irg, env->cls->name, suffix);
1163         xfree(tu_name);
1164         result = fopen(buf, "wt");
1165         if (result == NULL) {
1166                 panic("Couldn't open '%s' for writing.", buf);
1167         }
1168
1169         return result;
1170 }
1171
1172 void co_driver(be_chordal_env_t *cenv)
1173 {
1174         ir_timer_t          *timer = ir_timer_new();
1175         co_complete_stats_t before, after;
1176         copy_opt_t          *co;
1177         int                 was_optimal = 0;
1178
1179         assert(selected_copyopt);
1180
1181         /* skip copymin if algo is 'none' */
1182         if (selected_copyopt->copyopt == void_algo)
1183                 return;
1184
1185         be_liveness_assure_chk(be_get_irg_liveness(cenv->irg));
1186
1187         co = new_copy_opt(cenv, cost_func);
1188         co_build_ou_structure(co);
1189         co_build_graph_structure(co);
1190
1191         co_complete_stats(co, &before);
1192
1193         be_stat_ev_ull("co_aff_nodes",    before.aff_nodes);
1194         be_stat_ev_ull("co_aff_edges",    before.aff_edges);
1195         be_stat_ev_ull("co_max_costs",    before.max_costs);
1196         be_stat_ev_ull("co_inevit_costs", before.inevit_costs);
1197         be_stat_ev_ull("co_aff_int",      before.aff_int);
1198
1199         be_stat_ev_ull("co_init_costs",   before.costs);
1200         be_stat_ev_ull("co_init_unsat",   before.unsatisfied_edges);
1201
1202         if (dump_flags & DUMP_BEFORE) {
1203                 FILE *f = my_open(cenv, "", "-before.dot");
1204                 co_dump_ifg_dot(co, f, style_flags);
1205                 fclose(f);
1206         }
1207
1208         /* if the algo can improve results, provide an initial solution with heur1 */
1209         if (improve && selected_copyopt->can_improve_existing) {
1210                 co_complete_stats_t stats;
1211
1212                 /* produce a heuristic solution */
1213                 co_solve_heuristic(co);
1214
1215                 /* do the stats and provide the current costs */
1216                 co_complete_stats(co, &stats);
1217                 be_stat_ev_ull("co_prepare_costs", stats.costs);
1218         }
1219
1220         /* perform actual copy minimization */
1221         ir_timer_reset_and_start(timer);
1222         was_optimal = selected_copyopt->copyopt(co);
1223         ir_timer_stop(timer);
1224
1225         be_stat_ev("co_time", ir_timer_elapsed_msec(timer));
1226         be_stat_ev_ull("co_optimal", was_optimal);
1227         ir_timer_free(timer);
1228
1229         if (dump_flags & DUMP_AFTER) {
1230                 FILE *f = my_open(cenv, "", "-after.dot");
1231                 co_dump_ifg_dot(co, f, style_flags);
1232                 fclose(f);
1233         }
1234
1235         co_complete_stats(co, &after);
1236
1237         if (do_stats) {
1238                 unsigned long long optimizable_costs = after.max_costs - after.inevit_costs;
1239                 unsigned long long evitable          = after.costs     - after.inevit_costs;
1240
1241                 ir_printf("%30F ", cenv->irg);
1242                 printf("%10s %10llu%10llu%10llu", cenv->cls->name, after.max_costs, before.costs, after.inevit_costs);
1243
1244                 if (optimizable_costs > 0)
1245                         printf("%10llu %5.2f\n", after.costs, (evitable * 100.0) / optimizable_costs);
1246                 else
1247                         printf("%10llu %5s\n", after.costs, "-");
1248         }
1249
1250         /* Dump the interference graph in Appel's format. */
1251         if (dump_flags & DUMP_APPEL) {
1252                 FILE *f = my_open(cenv, "", ".apl");
1253                 fprintf(f, "# %llu %llu\n", after.costs, after.unsatisfied_edges);
1254                 co_dump_appel_graph(co, f);
1255                 fclose(f);
1256         }
1257
1258         be_stat_ev_ull("co_after_costs", after.costs);
1259         be_stat_ev_ull("co_after_unsat", after.unsatisfied_edges);
1260
1261         co_free_graph_structure(co);
1262         co_free_ou_structure(co);
1263         free_copy_opt(co);
1264 }