becopyilp: fix size_reduction code
[libfirm] / ir / be / becopyopt.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       Copy minimization driver.
23  * @author      Daniel Grund
24  * @date        12.04.2005
25  *
26  * Main file for the optimization reducing the copies needed for:
27  * - Phi coalescing
28  * - Register-constrained nodes
29  * - Two-address code instructions
30  */
31 #include "config.h"
32
33 #include "execfreq.h"
34 #include "xmalloc.h"
35 #include "debug.h"
36 #include "pmap.h"
37 #include "raw_bitset.h"
38 #include "irnode.h"
39 #include "irgraph.h"
40 #include "irgwalk.h"
41 #include "irprog.h"
42 #include "irloop_t.h"
43 #include "iredges_t.h"
44 #include "irprintf_t.h"
45 #include "irtools.h"
46 #include "util.h"
47
48 #include "bemodule.h"
49 #include "bearch.h"
50 #include "benode.h"
51 #include "beutil.h"
52 #include "beifg.h"
53 #include "beintlive_t.h"
54 #include "becopyopt_t.h"
55 #include "becopystat.h"
56 #include "belive_t.h"
57 #include "beinsn_t.h"
58 #include "besched.h"
59 #include "bestatevent.h"
60 #include "beirg.h"
61 #include "error.h"
62
63 #include "lc_opts.h"
64 #include "lc_opts_enum.h"
65
66 #define DUMP_BEFORE 1
67 #define DUMP_AFTER  2
68 #define DUMP_APPEL  4
69 #define DUMP_ALL    2 * DUMP_APPEL - 1
70
71 #define COST_FUNC_FREQ     1
72 #define COST_FUNC_LOOP     2
73 #define COST_FUNC_ALL_ONE  3
74
75 static unsigned   dump_flags  = 0;
76 static unsigned   style_flags = 0;
77 static int        do_stats    = 0;
78 static cost_fct_t cost_func   = co_get_costs_exec_freq;
79 static int        improve     = 1;
80
81 static const lc_opt_enum_mask_items_t dump_items[] = {
82         { "before",  DUMP_BEFORE },
83         { "after",   DUMP_AFTER  },
84         { "appel",   DUMP_APPEL  },
85         { "all",     DUMP_ALL    },
86         { NULL,      0 }
87 };
88
89 /**
90  * Flags for dumping the IFG.
91  */
92 enum {
93         CO_IFG_DUMP_COLORS = 1 << 0, /**< Dump the graph colored. */
94         CO_IFG_DUMP_LABELS = 1 << 1, /**< Dump node/edge labels. */
95         CO_IFG_DUMP_SHAPE  = 1 << 2, /**< Give constrained nodes special shapes. */
96         CO_IFG_DUMP_CONSTR = 1 << 3, /**< Dump the node constraints in the label. */
97 };
98
99 static const lc_opt_enum_mask_items_t style_items[] = {
100         { "color",   CO_IFG_DUMP_COLORS },
101         { "labels",  CO_IFG_DUMP_LABELS },
102         { "constr",  CO_IFG_DUMP_CONSTR },
103         { "shape",   CO_IFG_DUMP_SHAPE  },
104         { "full",    2 * CO_IFG_DUMP_SHAPE - 1 },
105         { NULL,      0 }
106 };
107
108 typedef int (*opt_funcptr)(void);
109
110 static const lc_opt_enum_func_ptr_items_t cost_func_items[] = {
111         { "freq",   (opt_funcptr) co_get_costs_exec_freq },
112         { "loop",   (opt_funcptr) co_get_costs_loop_depth },
113         { "one",    (opt_funcptr) co_get_costs_all_one },
114         { NULL,     NULL }
115 };
116
117 static lc_opt_enum_mask_var_t dump_var = {
118         &dump_flags, dump_items
119 };
120
121 static lc_opt_enum_mask_var_t style_var = {
122         &style_flags, style_items
123 };
124
125 static lc_opt_enum_func_ptr_var_t cost_func_var = {
126         (opt_funcptr*) &cost_func, cost_func_items
127 };
128
129 static const lc_opt_table_entry_t options[] = {
130         LC_OPT_ENT_ENUM_FUNC_PTR ("cost",    "select a cost function",                                  &cost_func_var),
131         LC_OPT_ENT_ENUM_MASK     ("dump",    "dump ifg before or after copy optimization",              &dump_var),
132         LC_OPT_ENT_ENUM_MASK     ("style",   "dump style for ifg dumping",                              &style_var),
133         LC_OPT_ENT_BOOL          ("stats",   "dump statistics after each optimization",                 &do_stats),
134         LC_OPT_ENT_BOOL          ("improve", "run heur1 before if algo can exploit start solutions",    &improve),
135         LC_OPT_LAST
136 };
137
138 static be_module_list_entry_t *copyopts = NULL;
139 static const co_algo_info *selected_copyopt = NULL;
140
141 void be_register_copyopt(const char *name, co_algo_info *copyopt)
142 {
143         if (selected_copyopt == NULL)
144                 selected_copyopt = copyopt;
145         be_add_module_to_list(&copyopts, name, copyopt);
146 }
147
148 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_copyopt)
149 void be_init_copyopt(void)
150 {
151         lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
152         lc_opt_entry_t *ra_grp = lc_opt_get_grp(be_grp, "ra");
153         lc_opt_entry_t *chordal_grp = lc_opt_get_grp(ra_grp, "chordal");
154         lc_opt_entry_t *co_grp = lc_opt_get_grp(chordal_grp, "co");
155
156         lc_opt_add_table(co_grp, options);
157         be_add_module_list_opt(co_grp, "algo", "select copy optimization algo",
158                                        &copyopts, (void**) &selected_copyopt);
159 }
160
161 static int void_algo(copy_opt_t *co)
162 {
163         (void) co;
164         return 0;
165 }
166
167 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_copynone)
168 void be_init_copynone(void)
169 {
170         static co_algo_info copyheur = {
171                 void_algo, 0
172         };
173
174         be_register_copyopt("none", &copyheur);
175 }
176
177 #undef QUICK_AND_DIRTY_HACK
178
179 static int nodes_interfere(const be_chordal_env_t *env, const ir_node *a, const ir_node *b)
180 {
181         if (env->ifg)
182                 return be_ifg_connected(env->ifg, a, b);
183         else {
184                 be_lv_t *lv = be_get_irg_liveness(env->irg);
185                 return be_values_interfere(lv, a, b);
186         }
187 }
188
189
190 /******************************************************************************
191     _____                           _
192    / ____|                         | |
193   | |  __  ___ _ __   ___ _ __ __ _| |
194   | | |_ |/ _ \ '_ \ / _ \ '__/ _` | |
195   | |__| |  __/ | | |  __/ | | (_| | |
196    \_____|\___|_| |_|\___|_|  \__,_|_|
197
198  ******************************************************************************/
199
200 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
201
202
203 copy_opt_t *new_copy_opt(be_chordal_env_t *chordal_env, cost_fct_t get_costs)
204 {
205         const char *s1, *s2, *s3;
206         size_t len;
207         copy_opt_t *co;
208
209         FIRM_DBG_REGISTER(dbg, "ir.be.copyopt");
210
211         co = XMALLOCZ(copy_opt_t);
212         co->cenv      = chordal_env;
213         co->irg       = chordal_env->irg;
214         co->cls       = chordal_env->cls;
215         co->get_costs = get_costs;
216
217         s1 = get_irp_name();
218         s2 = get_entity_name(get_irg_entity(co->irg));
219         s3 = chordal_env->cls->name;
220         len = strlen(s1) + strlen(s2) + strlen(s3) + 5;
221         co->name = XMALLOCN(char, len);
222         snprintf(co->name, len, "%s__%s__%s", s1, s2, s3);
223
224         return co;
225 }
226
227 void free_copy_opt(copy_opt_t *co)
228 {
229         xfree(co->name);
230         free(co);
231 }
232
233 /**
234  * Checks if a node is optimizable, viz. has something to do with coalescing
235  * @param irn  The irn to check
236  */
237 static int co_is_optimizable_root(ir_node *irn)
238 {
239         const arch_register_req_t *req;
240
241         if (arch_irn_is_ignore(irn))
242                 return 0;
243
244         if (is_Reg_Phi(irn) || is_Perm_Proj(irn))
245                 return 1;
246
247         req = arch_get_irn_register_req(irn);
248         if (is_2addr_code(req))
249                 return 1;
250
251         return 0;
252 }
253
254 int co_get_costs_loop_depth(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos)
255 {
256         int cost = 0;
257         ir_loop *loop;
258         ir_node *root_block = get_nodes_block(root);
259         (void) co;
260         (void) arg;
261
262         if (is_Phi(root)) {
263                 /* for phis the copies are placed in the corresponding pred-block */
264                 loop = get_irn_loop(get_Block_cfgpred_block(root_block, pos));
265         } else {
266                 /* a perm places the copy in the same block as it resides */
267                 loop = get_irn_loop(root_block);
268         }
269         if (loop) {
270                 int d = get_loop_depth(loop);
271                 cost = d*d;
272         }
273         return 1+cost;
274 }
275
276 int co_get_costs_exec_freq(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos)
277 {
278         int res;
279         ir_node *root_bl = get_nodes_block(root);
280         ir_node *copy_bl = is_Phi(root) ? get_Block_cfgpred_block(root_bl, pos) : root_bl;
281         ir_exec_freq *exec_freq = be_get_irg_exec_freq(co->cenv->irg);
282         (void) arg;
283         res = get_block_execfreq_ulong(exec_freq, copy_bl);
284
285         /* don't allow values smaller than one. */
286         return res < 1 ? 1 : res;
287 }
288
289
290 int co_get_costs_all_one(const copy_opt_t *co, ir_node *root, ir_node *arg, int pos)
291 {
292         (void) co;
293         (void) root;
294         (void) arg;
295         (void) pos;
296         return 1;
297 }
298
299 /******************************************************************************
300    ____        _   _    _       _ _          _____ _
301   / __ \      | | | |  | |     (_) |        / ____| |
302  | |  | |_ __ | |_| |  | |_ __  _| |_ ___  | (___ | |_ ___  _ __ __ _  __ _  ___
303  | |  | | '_ \| __| |  | | '_ \| | __/ __|  \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
304  | |__| | |_) | |_| |__| | | | | | |_\__ \  ____) | || (_) | | | (_| | (_| |  __/
305   \____/| .__/ \__|\____/|_| |_|_|\__|___/ |_____/ \__\___/|_|  \__,_|\__, |\___|
306         | |                                                            __/ |
307         |_|                                                           |___/
308  ******************************************************************************/
309
310 /**
311  * Determines a maximum weighted independent set with respect to
312  * the interference and conflict edges of all nodes in a qnode.
313  */
314 static int ou_max_ind_set_costs(unit_t *ou)
315 {
316         be_chordal_env_t *chordal_env = ou->co->cenv;
317         ir_node **safe, **unsafe;
318         int i, o, safe_count, safe_costs, unsafe_count, *unsafe_costs;
319         bitset_t *curr;
320         size_t  pos;
321         int curr_weight, best_weight = 0;
322
323         /* assign the nodes into two groups.
324          * safe: node has no interference, hence it is in every max stable set.
325          * unsafe: node has an interference
326          */
327         safe         = ALLOCAN(ir_node*, ou->node_count - 1);
328         safe_costs   = 0;
329         safe_count   = 0;
330         unsafe       = ALLOCAN(ir_node*, ou->node_count - 1);
331         unsafe_costs = ALLOCAN(int,      ou->node_count - 1);
332         unsafe_count = 0;
333         for (i=1; i<ou->node_count; ++i) {
334                 int is_safe = 1;
335                 for (o=1; o<ou->node_count; ++o) {
336                         if (i==o)
337                                 continue;
338                         if (nodes_interfere(chordal_env, ou->nodes[i], ou->nodes[o])) {
339                                 unsafe_costs[unsafe_count] = ou->costs[i];
340                                 unsafe[unsafe_count] = ou->nodes[i];
341                                 ++unsafe_count;
342                                 is_safe = 0;
343                                 break;
344                         }
345                 }
346                 if (is_safe) {
347                         safe_costs += ou->costs[i];
348                         safe[safe_count++] = ou->nodes[i];
349                 }
350         }
351
352
353         /* now compute the best set out of the unsafe nodes*/
354         if (unsafe_count > MIS_HEUR_TRIGGER) {
355                 bitset_t *best = bitset_alloca(unsafe_count);
356                 /* Heuristic: Greedy trial and error form index 0 to unsafe_count-1 */
357                 for (i=0; i<unsafe_count; ++i) {
358                         bitset_set(best, i);
359                         /* check if it is a stable set */
360                         for (o=bitset_next_set(best, 0); o!=-1 && o<i; o=bitset_next_set(best, o+1))
361                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o])) {
362                                         bitset_clear(best, i); /* clear the bit and try next one */
363                                         break;
364                                 }
365                 }
366                 /* compute the weight */
367                 bitset_foreach(best, pos)
368                         best_weight += unsafe_costs[pos];
369         } else {
370                 /* Exact Algorithm: Brute force */
371                 curr = bitset_alloca(unsafe_count);
372                 bitset_set_all(curr);
373                 while (bitset_popcount(curr) != 0) {
374                         /* check if curr is a stable set */
375                         for (i=bitset_next_set(curr, 0); i!=-1; i=bitset_next_set(curr, i+1))
376                                 for (o=bitset_next_set(curr, i+1); o!=-1; o=bitset_next_set(curr, o+1)) /* !!!!! difference to qnode_max_ind_set(): NOT (curr, i) */
377                                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o]))
378                                                         goto no_stable_set;
379
380                         /* if we arrive here, we have a stable set */
381                         /* compute the weight of the stable set*/
382                         curr_weight = 0;
383                         bitset_foreach(curr, pos)
384                                 curr_weight += unsafe_costs[pos];
385
386                         /* any better ? */
387                         if (curr_weight > best_weight) {
388                                 best_weight = curr_weight;
389                         }
390
391         no_stable_set:
392                         bitset_minus1(curr);
393                 }
394         }
395
396         return safe_costs+best_weight;
397 }
398
399 static void co_collect_units(ir_node *irn, void *env)
400 {
401         const arch_register_req_t *req;
402         copy_opt_t                *co  = (copy_opt_t*)env;
403         unit_t *unit;
404
405         if (get_irn_mode(irn) == mode_T)
406                 return;
407         req = arch_get_irn_register_req(irn);
408         if (req->cls != co->cls)
409                 return;
410         if (!co_is_optimizable_root(irn))
411                 return;
412
413         /* Init a new unit */
414         unit = XMALLOCZ(unit_t);
415         unit->co = co;
416         unit->node_count = 1;
417         INIT_LIST_HEAD(&unit->queue);
418
419         /* Phi with some/all of its arguments */
420         if (is_Reg_Phi(irn)) {
421                 int i, arity;
422
423                 /* init */
424                 arity = get_irn_arity(irn);
425                 unit->nodes = XMALLOCN(ir_node*, arity + 1);
426                 unit->costs = XMALLOCN(int,      arity + 1);
427                 unit->nodes[0] = irn;
428
429                 /* fill */
430                 for (i=0; i<arity; ++i) {
431                         int o, arg_pos;
432                         ir_node *arg = get_irn_n(irn, i);
433
434                         assert(arch_get_irn_reg_class(arg) == co->cls && "Argument not in same register class.");
435                         if (arg == irn)
436                                 continue;
437                         if (nodes_interfere(co->cenv, irn, arg)) {
438                                 unit->inevitable_costs += co->get_costs(co, irn, arg, i);
439                                 continue;
440                         }
441
442                         /* Else insert the argument of the phi to the members of this ou */
443                         DBG((dbg, LEVEL_1, "\t   Member: %+F\n", arg));
444
445                         if (arch_irn_is_ignore(arg))
446                                 continue;
447
448                         /* Check if arg has occurred at a prior position in the arg/list */
449                         arg_pos = 0;
450                         for (o=1; o<unit->node_count; ++o) {
451                                 if (unit->nodes[o] == arg) {
452                                         arg_pos = o;
453                                         break;
454                                 }
455                         }
456
457                         if (!arg_pos) { /* a new argument */
458                                 /* insert node, set costs */
459                                 unit->nodes[unit->node_count] = arg;
460                                 unit->costs[unit->node_count] = co->get_costs(co, irn, arg, i);
461                                 unit->node_count++;
462                         } else { /* arg has occurred before in same phi */
463                                 /* increase costs for existing arg */
464                                 unit->costs[arg_pos] += co->get_costs(co, irn, arg, i);
465                         }
466                 }
467                 unit->nodes = XREALLOC(unit->nodes, ir_node*, unit->node_count);
468                 unit->costs = XREALLOC(unit->costs, int,      unit->node_count);
469         } else if (is_Perm_Proj(irn)) {
470                 /* Proj of a perm with corresponding arg */
471                 assert(!nodes_interfere(co->cenv, irn, get_Perm_src(irn)));
472                 unit->nodes = XMALLOCN(ir_node*, 2);
473                 unit->costs = XMALLOCN(int,      2);
474                 unit->node_count = 2;
475                 unit->nodes[0] = irn;
476                 unit->nodes[1] = get_Perm_src(irn);
477                 unit->costs[1] = co->get_costs(co, irn, unit->nodes[1], -1);
478         } else {
479                 /* Src == Tgt of a 2-addr-code instruction */
480                 if (is_2addr_code(req)) {
481                         const unsigned other = req->other_same;
482                         int            count = 0;
483                         int            i;
484
485                         for (i = 0; (1U << i) <= other; ++i) {
486                                 if (other & (1U << i)) {
487                                         ir_node *o  = get_irn_n(skip_Proj(irn), i);
488                                         if (arch_irn_is_ignore(o))
489                                                 continue;
490                                         if (nodes_interfere(co->cenv, irn, o))
491                                                 continue;
492                                         ++count;
493                                 }
494                         }
495
496                         if (count != 0) {
497                                 int k = 0;
498                                 ++count;
499                                 unit->nodes = XMALLOCN(ir_node*, count);
500                                 unit->costs = XMALLOCN(int,      count);
501                                 unit->node_count = count;
502                                 unit->nodes[k++] = irn;
503
504                                 for (i = 0; 1U << i <= other; ++i) {
505                                         if (other & (1U << i)) {
506                                                 ir_node *o  = get_irn_n(skip_Proj(irn), i);
507                                                 if (!arch_irn_is_ignore(o) &&
508                                                                 !nodes_interfere(co->cenv, irn, o)) {
509                                                         unit->nodes[k] = o;
510                                                         unit->costs[k] = co->get_costs(co, irn, o, -1);
511                                                         ++k;
512                                                 }
513                                         }
514                                 }
515                         }
516                 } else {
517                         assert(0 && "This is not an optimizable node!");
518                 }
519         }
520
521         /* Insert the new unit at a position according to its costs */
522         if (unit->node_count > 1) {
523                 int i;
524                 struct list_head *tmp;
525
526                 /* Determine the maximum costs this unit can cause: all_nodes_cost */
527                 for (i=1; i<unit->node_count; ++i) {
528                         unit->sort_key = MAX(unit->sort_key, unit->costs[i]);
529                         unit->all_nodes_costs += unit->costs[i];
530                 }
531
532                 /* Determine the minimal costs this unit will cause: min_nodes_costs */
533                 unit->min_nodes_costs += unit->all_nodes_costs - ou_max_ind_set_costs(unit);
534                 /* Insert the new ou according to its sort_key */
535                 tmp = &co->units;
536                 while (tmp->next != &co->units && list_entry_units(tmp->next)->sort_key > unit->sort_key)
537                         tmp = tmp->next;
538                 list_add(&unit->units, tmp);
539         } else {
540                 free(unit);
541         }
542 }
543
544 #ifdef QUICK_AND_DIRTY_HACK
545
546 static int compare_ous(const void *k1, const void *k2)
547 {
548         const unit_t *u1 = *((const unit_t **) k1);
549         const unit_t *u2 = *((const unit_t **) k2);
550         int i, o, u1_has_constr, u2_has_constr;
551         arch_register_req_t req;
552
553         /* Units with constraints come first */
554         u1_has_constr = 0;
555         for (i=0; i<u1->node_count; ++i) {
556                 arch_get_irn_register_req(&req, u1->nodes[i]);
557                 if (arch_register_req_is(&req, limited)) {
558                         u1_has_constr = 1;
559                         break;
560                 }
561         }
562
563         u2_has_constr = 0;
564         for (i=0; i<u2->node_count; ++i) {
565                 arch_get_irn_register_req(&req, u2->nodes[i]);
566                 if (arch_register_req_is(&req, limited)) {
567                         u2_has_constr = 1;
568                         break;
569                 }
570         }
571
572         if (u1_has_constr != u2_has_constr)
573                 return u2_has_constr - u1_has_constr;
574
575         /* Now check, whether the two units are connected */
576 #if 0
577         for (i=0; i<u1->node_count; ++i)
578                 for (o=0; o<u2->node_count; ++o)
579                         if (u1->nodes[i] == u2->nodes[o])
580                                 return 0;
581 #endif
582
583         /* After all, the sort key decides. Greater keys come first. */
584         return u2->sort_key - u1->sort_key;
585
586 }
587
588 /**
589  * Sort the ou's according to constraints and their sort_key
590  */
591 static void co_sort_units(copy_opt_t *co)
592 {
593         int i, count = 0, costs;
594         unit_t *ou, **ous;
595
596         /* get the number of ous, remove them form the list and fill the array */
597         list_for_each_entry(unit_t, ou, &co->units, units)
598                 count++;
599         ous = ALLOCAN(unit_t, count);
600
601         costs = co_get_max_copy_costs(co);
602
603         i = 0;
604         list_for_each_entry(unit_t, ou, &co->units, units)
605                 ous[i++] = ou;
606
607         INIT_LIST_HEAD(&co->units);
608
609         assert(count == i && list_empty(&co->units));
610
611         for (i=0; i<count; ++i)
612                 ir_printf("%+F\n", ous[i]->nodes[0]);
613
614         qsort(ous, count, sizeof(*ous), compare_ous);
615
616         ir_printf("\n\n");
617         for (i=0; i<count; ++i)
618                 ir_printf("%+F\n", ous[i]->nodes[0]);
619
620         /* reinsert into list in correct order */
621         for (i=0; i<count; ++i)
622                 list_add_tail(&ous[i]->units, &co->units);
623
624         assert(costs == co_get_max_copy_costs(co));
625 }
626 #endif
627
628 void co_build_ou_structure(copy_opt_t *co)
629 {
630         DBG((dbg, LEVEL_1, "\tCollecting optimization units\n"));
631         INIT_LIST_HEAD(&co->units);
632         irg_walk_graph(co->irg, co_collect_units, NULL, co);
633 #ifdef QUICK_AND_DIRTY_HACK
634         co_sort_units(co);
635 #endif
636 }
637
638 void co_free_ou_structure(copy_opt_t *co)
639 {
640         unit_t *curr, *tmp;
641         ASSERT_OU_AVAIL(co);
642         list_for_each_entry_safe(unit_t, curr, tmp, &co->units, units) {
643                 xfree(curr->nodes);
644                 xfree(curr->costs);
645                 xfree(curr);
646         }
647         co->units.next = NULL;
648 }
649
650 /* co_solve_heuristic() is implemented in becopyheur.c */
651
652 int co_get_max_copy_costs(const copy_opt_t *co)
653 {
654         int i, res = 0;
655         unit_t *curr;
656
657         ASSERT_OU_AVAIL(co);
658
659         list_for_each_entry(unit_t, curr, &co->units, units) {
660                 res += curr->inevitable_costs;
661                 for (i=1; i<curr->node_count; ++i)
662                         res += curr->costs[i];
663         }
664         return res;
665 }
666
667 int co_get_inevit_copy_costs(const copy_opt_t *co)
668 {
669         int res = 0;
670         unit_t *curr;
671
672         ASSERT_OU_AVAIL(co);
673
674         list_for_each_entry(unit_t, curr, &co->units, units)
675                 res += curr->inevitable_costs;
676         return res;
677 }
678
679 int co_get_copy_costs(const copy_opt_t *co)
680 {
681         int i, res = 0;
682         unit_t *curr;
683
684         ASSERT_OU_AVAIL(co);
685
686         list_for_each_entry(unit_t, curr, &co->units, units) {
687                 int root_col = get_irn_col(curr->nodes[0]);
688                 DBG((dbg, LEVEL_1, "  %3d costs for root %+F color %d\n", curr->inevitable_costs, curr->nodes[0], root_col));
689                 res += curr->inevitable_costs;
690                 for (i=1; i<curr->node_count; ++i) {
691                         int arg_col = get_irn_col(curr->nodes[i]);
692                         if (root_col != arg_col) {
693                                 DBG((dbg, LEVEL_1, "  %3d for arg %+F color %d\n", curr->costs[i], curr->nodes[i], arg_col));
694                                 res += curr->costs[i];
695                         }
696                 }
697         }
698         return res;
699 }
700
701 int co_get_lower_bound(const copy_opt_t *co)
702 {
703         int res = 0;
704         unit_t *curr;
705
706         ASSERT_OU_AVAIL(co);
707
708         list_for_each_entry(unit_t, curr, &co->units, units)
709                 res += curr->inevitable_costs + curr->min_nodes_costs;
710         return res;
711 }
712
713 void co_complete_stats(const copy_opt_t *co, co_complete_stats_t *stat)
714 {
715         bitset_t *seen = bitset_malloc(get_irg_last_idx(co->irg));
716         affinity_node_t *an;
717
718         memset(stat, 0, sizeof(stat[0]));
719
720         /* count affinity edges. */
721         co_gs_foreach_aff_node(co, an) {
722                 neighb_t *neigh;
723                 stat->aff_nodes += 1;
724                 bitset_set(seen, get_irn_idx(an->irn));
725                 co_gs_foreach_neighb(an, neigh) {
726                         if (!bitset_is_set(seen, get_irn_idx(neigh->irn))) {
727                                 stat->aff_edges += 1;
728                                 stat->max_costs += neigh->costs;
729
730                                 if (get_irn_col(an->irn) != get_irn_col(neigh->irn)) {
731                                         stat->costs += neigh->costs;
732                                         stat->unsatisfied_edges += 1;
733                                 }
734
735                                 if (nodes_interfere(co->cenv, an->irn, neigh->irn)) {
736                                         stat->aff_int += 1;
737                                         stat->inevit_costs += neigh->costs;
738                                 }
739
740                         }
741                 }
742         }
743
744         bitset_free(seen);
745 }
746
747 /******************************************************************************
748    _____                 _        _____ _
749   / ____|               | |      / ____| |
750  | |  __ _ __ __ _ _ __ | |__   | (___ | |_ ___  _ __ __ _  __ _  ___
751  | | |_ | '__/ _` | '_ \| '_ \   \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
752  | |__| | | | (_| | |_) | | | |  ____) | || (_) | | | (_| | (_| |  __/
753   \_____|_|  \__,_| .__/|_| |_| |_____/ \__\___/|_|  \__,_|\__, |\___|
754                   | |                                       __/ |
755                   |_|                                      |___/
756  ******************************************************************************/
757
758 static int compare_affinity_node_t(const void *k1, const void *k2, size_t size)
759 {
760         const affinity_node_t *n1 = (const affinity_node_t*)k1;
761         const affinity_node_t *n2 = (const affinity_node_t*)k2;
762         (void) size;
763
764         return (n1->irn != n2->irn);
765 }
766
767 static void add_edge(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs)
768 {
769         affinity_node_t new_node, *node;
770         neighb_t        *nbr;
771         int             allocnew = 1;
772
773         new_node.irn        = n1;
774         new_node.degree     = 0;
775         new_node.neighbours = NULL;
776         node = (affinity_node_t*)set_insert(co->nodes, &new_node, sizeof(new_node), hash_irn(new_node.irn));
777
778         for (nbr = node->neighbours; nbr; nbr = nbr->next)
779                 if (nbr->irn == n2) {
780                         allocnew = 0;
781                         break;
782                 }
783
784         /* if we did not find n2 in n1's neighbourhood insert it */
785         if (allocnew) {
786                 nbr        = OALLOC(&co->obst, neighb_t);
787                 nbr->irn   = n2;
788                 nbr->costs = 0;
789                 nbr->next  = node->neighbours;
790
791                 node->neighbours = nbr;
792                 node->degree++;
793         }
794
795         /* now nbr points to n1's neighbour-entry of n2 */
796         nbr->costs += costs;
797 }
798
799 static inline void add_edges(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs)
800 {
801         if (! be_ifg_connected(co->cenv->ifg, n1, n2)) {
802                 add_edge(co, n1, n2, costs);
803                 add_edge(co, n2, n1, costs);
804         }
805 }
806
807 static void build_graph_walker(ir_node *irn, void *env)
808 {
809         const arch_register_req_t *req;
810         copy_opt_t                *co  = (copy_opt_t*)env;
811         int pos, max;
812
813         if (get_irn_mode(irn) == mode_T)
814                 return;
815         req = arch_get_irn_register_req(irn);
816         if (req->cls != co->cls || arch_irn_is_ignore(irn))
817                 return;
818
819         if (is_Reg_Phi(irn)) { /* Phis */
820                 for (pos=0, max=get_irn_arity(irn); pos<max; ++pos) {
821                         ir_node *arg = get_irn_n(irn, pos);
822                         add_edges(co, irn, arg, co->get_costs(co, irn, arg, pos));
823                 }
824         } else if (is_Perm_Proj(irn)) { /* Perms */
825                 ir_node *arg = get_Perm_src(irn);
826                 add_edges(co, irn, arg, co->get_costs(co, irn, arg, 0));
827         } else { /* 2-address code */
828                 if (is_2addr_code(req)) {
829                         const unsigned other = req->other_same;
830                         int i;
831
832                         for (i = 0; 1U << i <= other; ++i) {
833                                 if (other & (1U << i)) {
834                                         ir_node *other = get_irn_n(skip_Proj(irn), i);
835                                         if (!arch_irn_is_ignore(other))
836                                                 add_edges(co, irn, other, co->get_costs(co, irn, other, 0));
837                                 }
838                         }
839                 }
840         }
841 }
842
843 void co_build_graph_structure(copy_opt_t *co)
844 {
845         obstack_init(&co->obst);
846         co->nodes = new_set(compare_affinity_node_t, 32);
847
848         irg_walk_graph(co->irg, build_graph_walker, NULL, co);
849 }
850
851 void co_free_graph_structure(copy_opt_t *co)
852 {
853         ASSERT_GS_AVAIL(co);
854
855         del_set(co->nodes);
856         obstack_free(&co->obst, NULL);
857         co->nodes = NULL;
858 }
859
860 int co_gs_is_optimizable(copy_opt_t *co, ir_node *irn)
861 {
862         affinity_node_t new_node, *n;
863
864         ASSERT_GS_AVAIL(co);
865
866         new_node.irn = irn;
867         n = (affinity_node_t*)set_find(co->nodes, &new_node, sizeof(new_node), hash_irn(new_node.irn));
868         if (n) {
869                 return (n->degree > 0);
870         } else
871                 return 0;
872 }
873
874 static int co_dump_appel_disjoint_constraints(const copy_opt_t *co, ir_node *a, ir_node *b)
875 {
876         ir_node *nodes[]  = { a, b };
877         bitset_t *constr[] = { NULL, NULL };
878         int j;
879
880         constr[0] = bitset_alloca(co->cls->n_regs);
881         constr[1] = bitset_alloca(co->cls->n_regs);
882
883         for (j = 0; j < 2; ++j) {
884                 const arch_register_req_t *req = arch_get_irn_register_req(nodes[j]);
885                 if (arch_register_req_is(req, limited))
886                         rbitset_copy_to_bitset(req->limited, constr[j]);
887                 else
888                         bitset_set_all(constr[j]);
889
890         }
891
892         return !bitset_intersect(constr[0], constr[1]);
893 }
894
895 void co_dump_appel_graph(const copy_opt_t *co, FILE *f)
896 {
897         be_ifg_t *ifg       = co->cenv->ifg;
898         int      *color_map = ALLOCAN(int, co->cls->n_regs);
899         int      *node_map  = XMALLOCN(int, get_irg_last_idx(co->irg) + 1);
900         ir_graph *irg       = co->irg;
901         be_irg_t *birg      = be_birg_from_irg(irg);
902
903         ir_node *irn;
904         nodes_iter_t it;
905         neighbours_iter_t nit;
906         int n, n_regs;
907         unsigned i;
908
909         n_regs = 0;
910         for (i = 0; i < co->cls->n_regs; ++i) {
911                 const arch_register_t *reg = &co->cls->regs[i];
912                 if (rbitset_is_set(birg->allocatable_regs, reg->global_index)) {
913                         color_map[i] = n_regs++;
914                 } else {
915                         color_map[i] = -1;
916                 }
917         }
918
919         /*
920          * n contains the first node number.
921          * the values below n are the pre-colored register nodes
922          */
923
924         n = n_regs;
925         be_ifg_foreach_node(ifg, &it, irn) {
926                 if (arch_irn_is_ignore(irn))
927                         continue;
928                 node_map[get_irn_idx(irn)] = n++;
929         }
930
931         fprintf(f, "%d %d\n", n, n_regs);
932
933         be_ifg_foreach_node(ifg, &it, irn) {
934                 if (!arch_irn_is_ignore(irn)) {
935                         int idx                        = node_map[get_irn_idx(irn)];
936                         affinity_node_t           *a   = get_affinity_info(co, irn);
937                         const arch_register_req_t *req = arch_get_irn_register_req(irn);
938                         ir_node                   *adj;
939
940                         if (arch_register_req_is(req, limited)) {
941                                 for (i = 0; i < co->cls->n_regs; ++i) {
942                                         if (!rbitset_is_set(req->limited, i) && color_map[i] >= 0)
943                                                 fprintf(f, "%d %d -1\n", color_map[i], idx);
944                                 }
945                         }
946
947                         be_ifg_foreach_neighbour(ifg, &nit, irn, adj) {
948                                 if (!arch_irn_is_ignore(adj) &&
949                                                 !co_dump_appel_disjoint_constraints(co, irn, adj)) {
950                                         int adj_idx = node_map[get_irn_idx(adj)];
951                                         if (idx < adj_idx)
952                                                 fprintf(f, "%d %d -1\n", idx, adj_idx);
953                                 }
954                         }
955
956                         if (a) {
957                                 neighb_t *n;
958
959                                 co_gs_foreach_neighb(a, n) {
960                                         if (!arch_irn_is_ignore(n->irn)) {
961                                                 int n_idx = node_map[get_irn_idx(n->irn)];
962                                                 if (idx < n_idx)
963                                                         fprintf(f, "%d %d %d\n", idx, n_idx, (int) n->costs);
964                                         }
965                                 }
966                         }
967                 }
968         }
969
970         xfree(node_map);
971 }
972
973 /*
974          ___ _____ ____   ____   ___ _____   ____                        _
975         |_ _|  ___/ ___| |  _ \ / _ \_   _| |  _ \ _   _ _ __ ___  _ __ (_)_ __   __ _
976          | || |_ | |  _  | | | | | | || |   | | | | | | | '_ ` _ \| '_ \| | '_ \ / _` |
977          | ||  _|| |_| | | |_| | |_| || |   | |_| | |_| | | | | | | |_) | | | | | (_| |
978         |___|_|   \____| |____/ \___/ |_|   |____/ \__,_|_| |_| |_| .__/|_|_| |_|\__, |
979                                                                   |_|            |___/
980 */
981
982 static const char *get_dot_color_name(size_t col)
983 {
984         static const char *names[] = {
985                 "blue",
986                 "red",
987                 "green",
988                 "yellow",
989                 "cyan",
990                 "magenta",
991                 "orange",
992                 "chocolate",
993                 "beige",
994                 "navy",
995                 "darkgreen",
996                 "darkred",
997                 "lightPink",
998                 "chartreuse",
999                 "lightskyblue",
1000                 "linen",
1001                 "pink",
1002                 "lightslateblue",
1003                 "mintcream",
1004                 "red",
1005                 "darkolivegreen",
1006                 "mediumblue",
1007                 "mistyrose",
1008                 "salmon",
1009                 "darkseagreen",
1010                 "mediumslateblue"
1011                 "moccasin",
1012                 "tomato",
1013                 "forestgreen",
1014                 "darkturquoise",
1015                 "palevioletred"
1016         };
1017
1018         return col < sizeof(names)/sizeof(names[0]) ? names[col] : "white";
1019 }
1020
1021 typedef struct co_ifg_dump_t {
1022         const copy_opt_t *co;
1023         unsigned flags;
1024 } co_ifg_dump_t;
1025
1026 static void ifg_dump_graph_attr(FILE *f, void *self)
1027 {
1028         (void) self;
1029         fprintf(f, "overlap=scale");
1030 }
1031
1032 static int ifg_is_dump_node(void *self, ir_node *irn)
1033 {
1034         (void)self;
1035         return !arch_irn_is_ignore(irn);
1036 }
1037
1038 static void ifg_dump_node_attr(FILE *f, void *self, ir_node *irn)
1039 {
1040         co_ifg_dump_t             *env     = (co_ifg_dump_t*)self;
1041         const arch_register_t     *reg     = arch_get_irn_register(irn);
1042         const arch_register_req_t *req     = arch_get_irn_register_req(irn);
1043         int                        limited = arch_register_req_is(req, limited);
1044
1045         if (env->flags & CO_IFG_DUMP_LABELS) {
1046                 ir_fprintf(f, "label=\"%+F", irn);
1047
1048                 if ((env->flags & CO_IFG_DUMP_CONSTR) && limited) {
1049                         bitset_t *bs = bitset_alloca(env->co->cls->n_regs);
1050                         rbitset_copy_to_bitset(req->limited, bs);
1051                         ir_fprintf(f, "\\n%B", bs);
1052                 }
1053                 ir_fprintf(f, "\" ");
1054         } else {
1055                 fprintf(f, "label=\"\" shape=point " );
1056         }
1057
1058         if (env->flags & CO_IFG_DUMP_SHAPE)
1059                 fprintf(f, "shape=%s ", limited ? "diamond" : "ellipse");
1060
1061         if (env->flags & CO_IFG_DUMP_COLORS)
1062                 fprintf(f, "style=filled color=%s ", get_dot_color_name(reg->index));
1063 }
1064
1065 static void ifg_dump_at_end(FILE *file, void *self)
1066 {
1067         co_ifg_dump_t *env = (co_ifg_dump_t*)self;
1068         affinity_node_t *a;
1069
1070         co_gs_foreach_aff_node(env->co, a) {
1071                 const arch_register_t *ar = arch_get_irn_register(a->irn);
1072                 unsigned aidx = get_irn_idx(a->irn);
1073                 neighb_t *n;
1074
1075                 co_gs_foreach_neighb(a, n) {
1076                         const arch_register_t *nr = arch_get_irn_register(n->irn);
1077                         unsigned nidx = get_irn_idx(n->irn);
1078
1079                         if (aidx < nidx) {
1080                                 const char *color = nr == ar ? "blue" : "red";
1081                                 fprintf(file, "\tn%u -- n%u [weight=0.01 ", aidx, nidx);
1082                                 if (env->flags & CO_IFG_DUMP_LABELS)
1083                                         fprintf(file, "label=\"%d\" ", n->costs);
1084                                 if (env->flags & CO_IFG_DUMP_COLORS)
1085                                         fprintf(file, "color=%s ", color);
1086                                 else
1087                                         fprintf(file, "style=dotted");
1088                                 fprintf(file, "];\n");
1089                         }
1090                 }
1091         }
1092 }
1093
1094
1095 static be_ifg_dump_dot_cb_t ifg_dot_cb = {
1096         ifg_is_dump_node,
1097         ifg_dump_graph_attr,
1098         ifg_dump_node_attr,
1099         NULL,
1100         NULL,
1101         ifg_dump_at_end
1102 };
1103
1104
1105
1106 void co_dump_ifg_dot(const copy_opt_t *co, FILE *f, unsigned flags)
1107 {
1108         co_ifg_dump_t cod;
1109
1110         cod.co    = co;
1111         cod.flags = flags;
1112         be_ifg_dump_dot(co->cenv->ifg, co->irg, f, &ifg_dot_cb, &cod);
1113 }
1114
1115 /*
1116     __  __       _         ____       _
1117    |  \/  | __ _(_)_ __   |  _ \ _ __(_)_   _____ _ __
1118    | |\/| |/ _` | | '_ \  | | | | '__| \ \ / / _ \ '__|
1119    | |  | | (_| | | | | | | |_| | |  | |\ V /  __/ |
1120    |_|  |_|\__,_|_|_| |_| |____/|_|  |_| \_/ \___|_|
1121
1122 */
1123
1124 static FILE *my_open(const be_chordal_env_t *env, const char *prefix, const char *suffix)
1125 {
1126         FILE *result;
1127         char buf[1024];
1128         size_t i, n;
1129         char *tu_name;
1130         const char *cup_name = be_get_irg_main_env(env->irg)->cup_name;
1131
1132         n = strlen(cup_name);
1133         tu_name = XMALLOCN(char, n + 1);
1134         strcpy(tu_name, cup_name);
1135         for (i = 0; i < n; ++i)
1136                 if (tu_name[i] == '.')
1137                         tu_name[i] = '_';
1138
1139
1140         ir_snprintf(buf, sizeof(buf), "%s%s_%F_%s%s", prefix, tu_name, env->irg, env->cls->name, suffix);
1141         xfree(tu_name);
1142         result = fopen(buf, "wt");
1143         if (result == NULL) {
1144                 panic("Couldn't open '%s' for writing.", buf);
1145         }
1146
1147         return result;
1148 }
1149
1150 void co_driver(be_chordal_env_t *cenv)
1151 {
1152         ir_timer_t          *timer = ir_timer_new();
1153         co_complete_stats_t before, after;
1154         copy_opt_t          *co;
1155         int                 was_optimal = 0;
1156
1157         assert(selected_copyopt);
1158
1159         /* skip copymin if algo is 'none' */
1160         if (selected_copyopt->copyopt == void_algo)
1161                 return;
1162
1163         be_liveness_assure_chk(be_get_irg_liveness(cenv->irg));
1164
1165         co = new_copy_opt(cenv, cost_func);
1166         co_build_ou_structure(co);
1167         co_build_graph_structure(co);
1168
1169         co_complete_stats(co, &before);
1170
1171         be_stat_ev_ull("co_aff_nodes",    before.aff_nodes);
1172         be_stat_ev_ull("co_aff_edges",    before.aff_edges);
1173         be_stat_ev_ull("co_max_costs",    before.max_costs);
1174         be_stat_ev_ull("co_inevit_costs", before.inevit_costs);
1175         be_stat_ev_ull("co_aff_int",      before.aff_int);
1176
1177         be_stat_ev_ull("co_init_costs",   before.costs);
1178         be_stat_ev_ull("co_init_unsat",   before.unsatisfied_edges);
1179
1180         if (dump_flags & DUMP_BEFORE) {
1181                 FILE *f = my_open(cenv, "", "-before.dot");
1182                 co_dump_ifg_dot(co, f, style_flags);
1183                 fclose(f);
1184         }
1185
1186         /* if the algo can improve results, provide an initial solution with heur1 */
1187         if (improve && selected_copyopt->can_improve_existing) {
1188                 co_complete_stats_t stats;
1189
1190                 /* produce a heuristic solution */
1191                 co_solve_heuristic(co);
1192
1193                 /* do the stats and provide the current costs */
1194                 co_complete_stats(co, &stats);
1195                 be_stat_ev_ull("co_prepare_costs", stats.costs);
1196         }
1197
1198         /* perform actual copy minimization */
1199         ir_timer_reset_and_start(timer);
1200         was_optimal = selected_copyopt->copyopt(co);
1201         ir_timer_stop(timer);
1202
1203         be_stat_ev("co_time", ir_timer_elapsed_msec(timer));
1204         be_stat_ev_ull("co_optimal", was_optimal);
1205         ir_timer_free(timer);
1206
1207         if (dump_flags & DUMP_AFTER) {
1208                 FILE *f = my_open(cenv, "", "-after.dot");
1209                 co_dump_ifg_dot(co, f, style_flags);
1210                 fclose(f);
1211         }
1212
1213         co_complete_stats(co, &after);
1214
1215         if (do_stats) {
1216                 unsigned long long optimizable_costs = after.max_costs - after.inevit_costs;
1217                 unsigned long long evitable          = after.costs     - after.inevit_costs;
1218
1219                 ir_printf("%30F ", cenv->irg);
1220                 printf("%10s %10llu%10llu%10llu", cenv->cls->name, after.max_costs, before.costs, after.inevit_costs);
1221
1222                 if (optimizable_costs > 0)
1223                         printf("%10llu %5.2f\n", after.costs, (evitable * 100.0) / optimizable_costs);
1224                 else
1225                         printf("%10llu %5s\n", after.costs, "-");
1226         }
1227
1228         /* Dump the interference graph in Appel's format. */
1229         if (dump_flags & DUMP_APPEL) {
1230                 FILE *f = my_open(cenv, "", ".apl");
1231                 fprintf(f, "# %llu %llu\n", after.costs, after.unsatisfied_edges);
1232                 co_dump_appel_graph(co, f);
1233                 fclose(f);
1234         }
1235
1236         be_stat_ev_ull("co_after_costs", after.costs);
1237         be_stat_ev_ull("co_after_unsat", after.unsatisfied_edges);
1238
1239         co_free_graph_structure(co);
1240         co_free_ou_structure(co);
1241         free_copy_opt(co);
1242 }