55fe5145ae8eab733a3c2938ca001bb7f8011f57
[libfirm] / ir / be / becopyopt.c
1 /**
2  * Author:      Daniel Grund
3  * Date:                12.04.2005
4  * Copyright:   (c) Universitaet Karlsruhe
5  * Licence:     This file protected by GPL -  GNU GENERAL PUBLIC LICENSE.
6  */
7 #ifdef HAVE_CONFIG_H
8 #include "config.h"
9 #endif
10 #ifdef HAVE_ALLOCA_H
11 #include <alloca.h>
12 #endif
13 #ifdef HAVE_MALLOC_H
14 #include <malloc.h>
15 #endif
16
17 #include "execfreq.h"
18 #include "xmalloc.h"
19 #include "debug.h"
20 #include "pmap.h"
21 #include "irgraph.h"
22 #include "irgwalk.h"
23 #include "irprog.h"
24 #include "irloop_t.h"
25 #include "iredges_t.h"
26 #include "phiclass.h"
27 #include "irbitset.h"
28 #include "irphase_t.h"
29 #include "irprintf_t.h"
30
31
32 #include "bearch.h"
33 #include "benode_t.h"
34 #include "beutil.h"
35 #include "beifg_t.h"
36 #include "becopyopt_t.h"
37 #include "becopystat.h"
38 #include "belive_t.h"
39 #include "beinsn_t.h"
40 #include "besched_t.h"
41
42 #ifdef WITH_LIBCORE
43
44 /* Insert additional options registration functions here. */
45 extern void be_co2_register_options(lc_opt_entry_t *grp);
46 extern void be_co3_register_options(lc_opt_entry_t *grp);
47
48 void co_register_options(lc_opt_entry_t *grp)
49 {
50         be_co2_register_options(grp);
51         be_co3_register_options(grp);
52 }
53 #endif
54
55
56 #undef QUICK_AND_DIRTY_HACK
57
58 /******************************************************************************
59     _____                           _
60    / ____|                         | |
61   | |  __  ___ _ __   ___ _ __ __ _| |
62   | | |_ |/ _ \ '_ \ / _ \ '__/ _` | |
63   | |__| |  __/ | | |  __/ | | (_| | |
64    \_____|\___|_| |_|\___|_|  \__,_|_|
65
66  ******************************************************************************/
67
68 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
69
70 void be_copy_opt_init(void) {
71 }
72
73 copy_opt_t *new_copy_opt(be_chordal_env_t *chordal_env, cost_fct_t get_costs)
74 {
75         const char *s1, *s2, *s3;
76         int len;
77         copy_opt_t *co;
78
79         FIRM_DBG_REGISTER(dbg, "ir.be.copyopt");
80
81         co = xcalloc(1, sizeof(*co));
82         co->cenv      = chordal_env;
83         co->aenv      = chordal_env->birg->main_env->arch_env;
84         co->irg       = chordal_env->irg;
85         co->cls       = chordal_env->cls;
86         co->get_costs = get_costs;
87
88         s1 = get_irp_prog_name();
89         s2 = get_entity_name(get_irg_entity(co->irg));
90         s3 = chordal_env->cls->name;
91         len = strlen(s1) + strlen(s2) + strlen(s3) + 5;
92         co->name = xmalloc(len);
93         snprintf(co->name, len, "%s__%s__%s", s1, s2, s3);
94
95         return co;
96 }
97
98 void free_copy_opt(copy_opt_t *co) {
99         xfree(co->name);
100         free(co);
101 }
102
103 int co_is_optimizable_root(const copy_opt_t *co, ir_node *irn) {
104         arch_register_req_t req;
105         const arch_register_t *reg;
106
107         if (arch_irn_is(co->aenv, irn, ignore))
108                 return 0;
109
110         reg = arch_get_irn_register(co->aenv, irn);
111         if (arch_register_type_is(reg, ignore))
112                 return 0;
113
114         if (is_Reg_Phi(irn) || is_Perm_Proj(co->aenv, irn) || is_2addr_code(co->aenv, irn, &req))
115                 return 1;
116
117         return 0;
118 }
119
120 int co_is_optimizable_arg(const copy_opt_t *co, ir_node *irn) {
121         const ir_edge_t *edge;
122         const arch_register_t *reg;
123
124         assert(0 && "Is buggy and obsolete. Do not use");
125
126         if (arch_irn_is(co->aenv, irn, ignore))
127                 return 0;
128
129         reg = arch_get_irn_register(co->aenv, irn);
130         if (arch_register_type_is(reg, ignore))
131                 return 0;
132
133         foreach_out_edge(irn, edge) {
134                 ir_node *n = edge->src;
135
136                 if (!nodes_interfere(co->cenv, irn, n) || irn == n) {
137                         arch_register_req_t req;
138                         arch_get_register_req(co->aenv, &req, n, -1);
139
140                         if(is_Reg_Phi(n) ||
141                            is_Perm(co->aenv, n) ||
142                            (arch_register_req_is(&req, should_be_same) && req.other_same == irn)
143                           )
144                                 return 1;
145                 }
146         }
147
148         return 0;
149 }
150
151 int co_get_costs_loop_depth(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
152         int cost = 0;
153         ir_loop *loop;
154         ir_node *root_block = get_nodes_block(root);
155
156         if (is_Phi(root)) {
157                 /* for phis the copies are placed in the corresponding pred-block */
158                 loop = get_irn_loop(get_Block_cfgpred_block(root_block, pos));
159         } else {
160                 /* a perm places the copy in the same block as it resides */
161                 loop = get_irn_loop(root_block);
162         }
163         if (loop) {
164                 int d = get_loop_depth(loop);
165                 cost = d*d;
166         }
167         return cost+1;
168 }
169
170 int co_get_costs_exec_freq(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
171         ir_node *root_bl = get_nodes_block(root);
172         ir_node *copy_bl = is_Phi(root) ? get_Block_cfgpred_block(root_bl, pos) : root_bl;
173         unsigned long freq = get_block_execfreq_ulong(co->cenv->exec_freq, copy_bl);
174         return freq > 0 ? (int) freq : 1;
175 }
176
177
178 int co_get_costs_all_one(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
179         return 1;
180 }
181
182 /******************************************************************************
183    ____        _   _    _       _ _          _____ _
184   / __ \      | | | |  | |     (_) |        / ____| |
185  | |  | |_ __ | |_| |  | |_ __  _| |_ ___  | (___ | |_ ___  _ __ __ _  __ _  ___
186  | |  | | '_ \| __| |  | | '_ \| | __/ __|  \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
187  | |__| | |_) | |_| |__| | | | | | |_\__ \  ____) | || (_) | | | (_| | (_| |  __/
188   \____/| .__/ \__|\____/|_| |_|_|\__|___/ |_____/ \__\___/|_|  \__,_|\__, |\___|
189         | |                                                            __/ |
190         |_|                                                           |___/
191  ******************************************************************************/
192
193 /**
194  * Determines a maximum weighted independent set with respect to
195  * the interference and conflict edges of all nodes in a qnode.
196  */
197 static int ou_max_ind_set_costs(unit_t *ou) {
198         be_chordal_env_t *chordal_env = ou->co->cenv;
199         ir_node **safe, **unsafe;
200         int i, o, safe_count, safe_costs, unsafe_count, *unsafe_costs;
201         bitset_t *curr;
202         int max, pos, curr_weight, best_weight = 0;
203
204         /* assign the nodes into two groups.
205          * safe: node has no interference, hence it is in every max stable set.
206          * unsafe: node has an interference
207          */
208         safe = alloca((ou->node_count-1) * sizeof(*safe));
209         safe_costs = 0;
210         safe_count = 0;
211         unsafe = alloca((ou->node_count-1) * sizeof(*unsafe));
212         unsafe_costs = alloca((ou->node_count-1) * sizeof(*unsafe_costs));
213         unsafe_count = 0;
214         for(i=1; i<ou->node_count; ++i) {
215                 int is_safe = 1;
216                 for(o=1; o<ou->node_count; ++o) {
217                         if (i==o)
218                                 continue;
219                         if (nodes_interfere(chordal_env, ou->nodes[i], ou->nodes[o])) {
220                                 unsafe_costs[unsafe_count] = ou->costs[i];
221                                 unsafe[unsafe_count] = ou->nodes[i];
222                                 ++unsafe_count;
223                                 is_safe = 0;
224                                 break;
225                         }
226                 }
227                 if (is_safe) {
228                         safe_costs += ou->costs[i];
229                         safe[safe_count++] = ou->nodes[i];
230                 }
231         }
232
233
234         /* now compute the best set out of the unsafe nodes*/
235         if (unsafe_count > MIS_HEUR_TRIGGER) {
236                 bitset_t *best = bitset_alloca(unsafe_count);
237                 /* Heuristik: Greedy trial and error form index 0 to unsafe_count-1 */
238                 for (i=0; i<unsafe_count; ++i) {
239                         bitset_set(best, i);
240                         /* check if it is a stable set */
241                         for (o=bitset_next_set(best, 0); o!=-1 && o<i; o=bitset_next_set(best, o+1))
242                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o])) {
243                                         bitset_clear(best, i); /* clear the bit and try next one */
244                                         break;
245                                 }
246                 }
247                 /* compute the weight */
248                 bitset_foreach(best, pos)
249                         best_weight += unsafe_costs[pos];
250         } else {
251                 /* Exact Algorithm: Brute force */
252                 curr = bitset_alloca(unsafe_count);
253                 bitset_set_all(curr);
254                 while ((max = bitset_popcnt(curr)) != 0) {
255                         /* check if curr is a stable set */
256                         for (i=bitset_next_set(curr, 0); i!=-1; i=bitset_next_set(curr, i+1))
257                                 for (o=bitset_next_set(curr, i+1); o!=-1; o=bitset_next_set(curr, o+1)) /* !!!!! difference to qnode_max_ind_set(): NOT (curr, i) */
258                                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o]))
259                                                         goto no_stable_set;
260
261                         /* if we arrive here, we have a stable set */
262                         /* compute the weigth of the stable set*/
263                         curr_weight = 0;
264                         bitset_foreach(curr, pos)
265                                 curr_weight += unsafe_costs[pos];
266
267                         /* any better ? */
268                         if (curr_weight > best_weight) {
269                                 best_weight = curr_weight;
270                         }
271
272         no_stable_set:
273                         bitset_minus1(curr);
274                 }
275         }
276
277         return safe_costs+best_weight;
278 }
279
280 static void co_collect_units(ir_node *irn, void *env) {
281         copy_opt_t *co = env;
282         unit_t *unit;
283         arch_register_req_t req;
284
285         if (!is_curr_reg_class(co, irn))
286                 return;
287         if (!co_is_optimizable_root(co, irn))
288                 return;
289
290         /* Init a new unit */
291         unit = xcalloc(1, sizeof(*unit));
292         unit->co = co;
293         unit->node_count = 1;
294         INIT_LIST_HEAD(&unit->queue);
295
296         /* Phi with some/all of its arguments */
297         if (is_Reg_Phi(irn)) {
298                 int i, arity;
299
300                 /* init */
301                 arity = get_irn_arity(irn);
302                 unit->nodes = xmalloc((arity+1) * sizeof(*unit->nodes));
303                 unit->costs = xmalloc((arity+1) * sizeof(*unit->costs));
304                 unit->nodes[0] = irn;
305
306                 /* fill */
307                 for (i=0; i<arity; ++i) {
308                         int o, arg_pos;
309                         ir_node *arg = get_irn_n(irn, i);
310
311                         assert(is_curr_reg_class(co, arg) && "Argument not in same register class.");
312                         if (arg == irn)
313                                 continue;
314                         if (nodes_interfere(co->cenv, irn, arg)) {
315                                 unit->inevitable_costs += co->get_costs(co, irn, arg, i);
316                                 continue;
317                         }
318
319                         /* Else insert the argument of the phi to the members of this ou */
320                         DBG((dbg, LEVEL_1, "\t   Member: %+F\n", arg));
321
322                         /* Check if arg has occurred at a prior position in the arg/list */
323                         arg_pos = 0;
324                         for (o=0; o<unit->node_count; ++o)
325                                 if (unit->nodes[o] == arg) {
326                                         arg_pos = o;
327                                         break;
328                                 }
329
330                         if (!arg_pos) { /* a new argument */
331                                 /* insert node, set costs */
332                                 unit->nodes[unit->node_count] = arg;
333                                 unit->costs[unit->node_count] = co->get_costs(co, irn, arg, i);
334                                 unit->node_count++;
335                         } else { /* arg has occured before in same phi */
336                                 /* increase costs for existing arg */
337                                 unit->costs[arg_pos] += co->get_costs(co, irn, arg, i);
338                         }
339                 }
340                 unit->nodes = xrealloc(unit->nodes, unit->node_count * sizeof(*unit->nodes));
341                 unit->costs = xrealloc(unit->costs, unit->node_count * sizeof(*unit->costs));
342         } else
343
344         /* Proj of a perm with corresponding arg */
345         if (is_Perm_Proj(co->aenv, irn)) {
346                 assert(!nodes_interfere(co->cenv, irn, get_Perm_src(irn)));
347                 unit->nodes = xmalloc(2 * sizeof(*unit->nodes));
348                 unit->costs = xmalloc(2 * sizeof(*unit->costs));
349                 unit->node_count = 2;
350                 unit->nodes[0] = irn;
351                 unit->nodes[1] = get_Perm_src(irn);
352                 unit->costs[1] = co->get_costs(co, irn, unit->nodes[1], -1);
353         } else
354
355         /* Src == Tgt of a 2-addr-code instruction */
356         if (is_2addr_code(co->aenv, irn, &req)) {
357                 ir_node *other = req.other_same;
358                 if (!nodes_interfere(co->cenv, irn, other)) {
359                         unit->nodes = xmalloc(2 * sizeof(*unit->nodes));
360                         unit->costs = xmalloc(2 * sizeof(*unit->costs));
361                         unit->node_count = 2;
362                         unit->nodes[0] = irn;
363                         unit->nodes[1] = other;
364                         unit->costs[1] = co->get_costs(co, irn, other, -1);
365                 }
366         } else
367                 assert(0 && "This is not an optimizable node!");
368
369         /* Insert the new unit at a position according to its costs */
370         if (unit->node_count > 1) {
371                 int i;
372                 struct list_head *tmp;
373
374                 /* Determine the maximum costs this unit can cause: all_nodes_cost */
375                 for(i=1; i<unit->node_count; ++i) {
376                         unit->sort_key = MAX(unit->sort_key, unit->costs[i]);
377                         unit->all_nodes_costs += unit->costs[i];
378                 }
379
380                 /* Determine the minimal costs this unit will cause: min_nodes_costs */
381                 unit->min_nodes_costs += unit->all_nodes_costs - ou_max_ind_set_costs(unit);
382                 /* Insert the new ou according to its sort_key */
383                 tmp = &co->units;
384                 while (tmp->next != &co->units && list_entry_units(tmp->next)->sort_key > unit->sort_key)
385                         tmp = tmp->next;
386                 list_add(&unit->units, tmp);
387         } else {
388                 free(unit);
389         }
390 }
391
392 #ifdef QUICK_AND_DIRTY_HACK
393
394 static int compare_ous(const void *k1, const void *k2) {
395         const unit_t *u1 = *((const unit_t **) k1);
396         const unit_t *u2 = *((const unit_t **) k2);
397         int i, o, u1_has_constr, u2_has_constr;
398         arch_register_req_t req;
399         const arch_env_t *aenv = u1->co->aenv;
400
401         /* Units with constraints come first */
402         u1_has_constr = 0;
403         for (i=0; i<u1->node_count; ++i) {
404                 arch_get_register_req(aenv, &req, u1->nodes[i], -1);
405                 if (arch_register_req_is(&req, limited)) {
406                         u1_has_constr = 1;
407                         break;
408                 }
409         }
410
411         u2_has_constr = 0;
412         for (i=0; i<u2->node_count; ++i) {
413                 arch_get_register_req(aenv, &req, u2->nodes[i], -1);
414                 if (arch_register_req_is(&req, limited)) {
415                         u2_has_constr = 1;
416                         break;
417                 }
418         }
419
420         if (u1_has_constr != u2_has_constr)
421                 return u2_has_constr - u1_has_constr;
422
423         /* Now check, whether the two units are connected */
424 #if 0
425         for (i=0; i<u1->node_count; ++i)
426                 for (o=0; o<u2->node_count; ++o)
427                         if (u1->nodes[i] == u2->nodes[o])
428                                 return 0;
429 #endif
430
431         /* After all, the sort key decides. Greater keys come first. */
432         return u2->sort_key - u1->sort_key;
433
434 }
435
436 /**
437  * Sort the ou's according to constraints and their sort_key
438  */
439 static void co_sort_units(copy_opt_t *co) {
440         int i, count = 0, costs;
441         unit_t *ou, **ous;
442
443         /* get the number of ous, remove them form the list and fill the array */
444         list_for_each_entry(unit_t, ou, &co->units, units)
445                 count++;
446         ous = alloca(count * sizeof(*ous));
447
448         costs = co_get_max_copy_costs(co);
449
450         i = 0;
451         list_for_each_entry(unit_t, ou, &co->units, units)
452                 ous[i++] = ou;
453
454         INIT_LIST_HEAD(&co->units);
455
456         assert(count == i && list_empty(&co->units));
457
458         for (i=0; i<count; ++i)
459                 ir_printf("%+F\n", ous[i]->nodes[0]);
460
461         qsort(ous, count, sizeof(*ous), compare_ous);
462
463         ir_printf("\n\n");
464         for (i=0; i<count; ++i)
465                 ir_printf("%+F\n", ous[i]->nodes[0]);
466
467         /* reinsert into list in correct order */
468         for (i=0; i<count; ++i)
469                 list_add_tail(&ous[i]->units, &co->units);
470
471         assert(costs == co_get_max_copy_costs(co));
472 }
473 #endif
474
475 void co_build_ou_structure(copy_opt_t *co) {
476         DBG((dbg, LEVEL_1, "\tCollecting optimization units\n"));
477         INIT_LIST_HEAD(&co->units);
478         irg_walk_graph(co->irg, co_collect_units, NULL, co);
479 #ifdef QUICK_AND_DIRTY_HACK
480         co_sort_units(co);
481 #endif
482 }
483
484 void co_free_ou_structure(copy_opt_t *co) {
485         unit_t *curr, *tmp;
486         ASSERT_OU_AVAIL(co);
487         list_for_each_entry_safe(unit_t, curr, tmp, &co->units, units) {
488                 xfree(curr->nodes);
489                 xfree(curr->costs);
490                 xfree(curr);
491         }
492         co->units.next = NULL;
493 }
494
495 /* co_solve_heuristic() is implemented in becopyheur.c */
496
497 int co_get_max_copy_costs(const copy_opt_t *co) {
498         int i, res = 0;
499         unit_t *curr;
500
501         ASSERT_OU_AVAIL(co);
502
503         list_for_each_entry(unit_t, curr, &co->units, units) {
504                 res += curr->inevitable_costs;
505                 for (i=1; i<curr->node_count; ++i)
506                         res += curr->costs[i];
507         }
508         return res;
509 }
510
511 int co_get_inevit_copy_costs(const copy_opt_t *co) {
512         int res = 0;
513         unit_t *curr;
514
515         ASSERT_OU_AVAIL(co);
516
517         list_for_each_entry(unit_t, curr, &co->units, units)
518                 res += curr->inevitable_costs;
519         return res;
520 }
521
522 int co_get_copy_costs(const copy_opt_t *co) {
523         int i, res = 0;
524         unit_t *curr;
525
526         ASSERT_OU_AVAIL(co);
527
528         list_for_each_entry(unit_t, curr, &co->units, units) {
529                 int root_col = get_irn_col(co, curr->nodes[0]);
530                 DBG((dbg, LEVEL_1, "  %3d costs for root %+F color %d\n", curr->inevitable_costs, curr->nodes[0], root_col));
531                 res += curr->inevitable_costs;
532                 for (i=1; i<curr->node_count; ++i) {
533                         int arg_col = get_irn_col(co, curr->nodes[i]);
534                         if (root_col != arg_col) {
535                                 DBG((dbg, LEVEL_1, "  %3d for arg %+F color %d\n", curr->costs[i], curr->nodes[i], arg_col));
536                                 res += curr->costs[i];
537                         }
538                 }
539         }
540         return res;
541 }
542
543 int co_get_lower_bound(const copy_opt_t *co) {
544         int res = 0;
545         unit_t *curr;
546
547         ASSERT_OU_AVAIL(co);
548
549         list_for_each_entry(unit_t, curr, &co->units, units)
550                 res += curr->inevitable_costs + curr->min_nodes_costs;
551         return res;
552 }
553
554 /******************************************************************************
555    _____                 _        _____ _
556   / ____|               | |      / ____| |
557  | |  __ _ __ __ _ _ __ | |__   | (___ | |_ ___  _ __ __ _  __ _  ___
558  | | |_ | '__/ _` | '_ \| '_ \   \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
559  | |__| | | | (_| | |_) | | | |  ____) | || (_) | | | (_| | (_| |  __/
560   \_____|_|  \__,_| .__/|_| |_| |_____/ \__\___/|_|  \__,_|\__, |\___|
561                   | |                                       __/ |
562                   |_|                                      |___/
563  ******************************************************************************/
564
565 static int compare_affinity_node_t(const void *k1, const void *k2, size_t size) {
566         const affinity_node_t *n1 = k1;
567         const affinity_node_t *n2 = k2;
568
569         return (n1->irn != n2->irn);
570 }
571
572 static void add_edge(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
573         affinity_node_t new_node, *node;
574         neighb_t new_nbr, *nbr;
575         int allocnew;
576
577         new_node.irn        = n1;
578         new_node.degree     = 0;
579         new_node.neighbours = NULL;
580         node = set_insert(co->nodes, &new_node, sizeof(new_node), HASH_PTR(new_node.irn));
581
582         allocnew = 1;
583         for (nbr = node->neighbours; nbr; nbr = nbr->next)
584                 if (nbr->irn == n2) {
585                         allocnew = 0;
586                         break;
587                 }
588
589         /* if we did not find n2 in n1's neighbourhood insert it */
590         if (allocnew) {
591                 obstack_grow(&co->obst, &new_nbr, sizeof(new_nbr));
592                 nbr = obstack_finish(&co->obst);
593                 nbr->irn   = n2;
594                 nbr->costs = 0;
595                 nbr->next  = node->neighbours;
596                 node->neighbours = nbr;
597                 node->degree++;
598         }
599
600         /* now nbr points to n1's neighbour-entry of n2 */
601         nbr->costs += costs;
602 }
603
604 static INLINE void add_edges(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
605         if (! be_ifg_connected(co->cenv->ifg, n1, n2)) {
606                 add_edge(co, n1, n2, costs);
607                 add_edge(co, n2, n1, costs);
608         }
609 }
610
611 static void build_graph_walker(ir_node *irn, void *env) {
612         copy_opt_t *co = env;
613         int pos, max;
614         arch_register_req_t req;
615         const arch_register_t *reg;
616
617         if (!is_curr_reg_class(co, irn) || arch_irn_is(co->aenv, irn, ignore))
618                 return;
619
620         reg = arch_get_irn_register(co->aenv, irn);
621         if (arch_register_type_is(reg, ignore))
622                 return;
623
624         /* Phis */
625         if (is_Reg_Phi(irn))
626                 for (pos=0, max=get_irn_arity(irn); pos<max; ++pos) {
627                         ir_node *arg = get_irn_n(irn, pos);
628                         add_edges(co, irn, arg, co->get_costs(co, irn, arg, pos));
629                 }
630
631         /* Perms */
632         else if (is_Perm_Proj(co->aenv, irn)) {
633                 ir_node *arg = get_Perm_src(irn);
634                 add_edges(co, irn, arg, co->get_costs(co, irn, arg, 0));
635         }
636
637         /* 2-address code */
638         else if (is_2addr_code(co->aenv, irn, &req))
639                 add_edges(co, irn, req.other_same, co->get_costs(co, irn, req.other_same, 0));
640 }
641
642 void co_build_graph_structure(copy_opt_t *co) {
643         obstack_init(&co->obst);
644         co->nodes = new_set(compare_affinity_node_t, 32);
645
646         irg_walk_graph(co->irg, build_graph_walker, NULL, co);
647 }
648
649 void co_free_graph_structure(copy_opt_t *co) {
650         ASSERT_GS_AVAIL(co);
651
652         del_set(co->nodes);
653         obstack_free(&co->obst, NULL);
654         co->nodes = NULL;
655 }
656
657 /* co_solve_ilp1() co_solve_ilp2() are implemented in becopyilpX.c */
658
659 int co_gs_is_optimizable(copy_opt_t *co, ir_node *irn) {
660         affinity_node_t new_node, *n;
661
662         ASSERT_GS_AVAIL(co);
663
664         new_node.irn = irn;
665         n = set_find(co->nodes, &new_node, sizeof(new_node), HASH_PTR(new_node.irn));
666         if (n) {
667                 return (n->degree > 0);
668         } else
669                 return 0;
670 }
671
672 void co_dump_appel_graph(const copy_opt_t *co, FILE *f)
673 {
674         be_ifg_t *ifg  = co->cenv->ifg;
675         int *color_map = alloca(co->cls->n_regs * sizeof(color_map[0]));
676         bitset_t *adm  = bitset_alloca(co->cls->n_regs);
677
678         ir_node *irn;
679         void *it, *nit;
680         int i, n, n_regs;
681
682         n_regs = 0;
683         for(i = 0; i < co->cls->n_regs; ++i) {
684                 const arch_register_t *reg = &co->cls->regs[i];
685                 color_map[i] = arch_register_type_is(reg, ignore) ? -1 : n_regs++;
686         }
687
688         /*
689          * n contains the first node number.
690          * the values below n are the pre-colored register nodes
691          */
692
693         it  = be_ifg_nodes_iter_alloca(ifg);
694         nit = be_ifg_neighbours_iter_alloca(ifg);
695
696         n = n_regs;
697         be_ifg_foreach_node(ifg, it, irn) {
698                 if(!arch_irn_is(co->aenv, irn, ignore))
699                         set_irn_link(irn, INT_TO_PTR(n++));
700         }
701
702         fprintf(f, "%d %d\n", n, n_regs);
703
704         be_ifg_foreach_node(ifg, it, irn) {
705                 if(!arch_irn_is(co->aenv, irn, ignore)) {
706                         int idx            = PTR_TO_INT(get_irn_link(irn));
707                         affinity_node_t *a = get_affinity_info(co, irn);
708
709                         arch_register_req_t req;
710                         ir_node *adj;
711
712                         arch_get_register_req(co->aenv, &req, irn, BE_OUT_POS(0));
713                         if(arch_register_req_is(&req, limited)) {
714                                 bitset_clear_all(adm);
715                                 req.limited(req.limited_env, adm);
716                                 for(i = 0; i < co->cls->n_regs; ++i)
717                                         if(!bitset_is_set(adm, i) && color_map[i] >= 0)
718                                                 fprintf(f, "%d %d -1\n", color_map[i], idx);
719
720                         }
721
722
723                         be_ifg_foreach_neighbour(ifg, nit, irn, adj) {
724                                 if(!arch_irn_is(co->aenv, adj, ignore)) {
725                                         int adj_idx = PTR_TO_INT(get_irn_link(adj));
726                                         if(idx < adj_idx)
727                                                 fprintf(f, "%d %d -1\n", idx, adj_idx);
728                                 }
729                         }
730
731                         if(a) {
732                                 neighb_t *n;
733
734                                 co_gs_foreach_neighb(a, n) {
735                                         if(!arch_irn_is(co->aenv, n->irn, ignore)) {
736                                                 int n_idx = PTR_TO_INT(get_irn_link(n->irn));
737                                                 if(idx < n_idx)
738                                                         fprintf(f, "%d %d %d\n", idx, n_idx, n->costs);
739                                         }
740                                 }
741                         }
742                 }
743         }
744 }
745
746 typedef struct _appel_clique_walker_t {
747         phase_t ph;
748         const copy_opt_t *co;
749         int curr_nr;
750         int node_count;
751         FILE *f;
752         int dumb;
753         int *color_map;
754         struct obstack obst;
755 } appel_clique_walker_t;
756
757 typedef struct _appel_block_info_t {
758         int *live_end_nr;
759         int *live_in_nr;
760         int *phi_nr;
761         ir_node **live_end;
762         ir_node **live_in;
763         ir_node **phi;
764         int n_live_end;
765         int n_live_in;
766         int n_phi;
767 } appel_block_info_t;
768
769 static int appel_aff_weight(const appel_clique_walker_t *env, ir_node *bl)
770 {
771 #if 0
772         double freq = get_block_execfreq(env->co->cenv->execfreq, bl);
773         int res = (int) freq;
774         return res == 0 ? 1 : res;
775 #else
776         ir_loop *loop = get_irn_loop(bl);
777         if(loop) {
778                 int d = get_loop_depth(loop);
779                 return 1 + d * d;
780         }
781         return 1;
782 #endif
783 }
784
785 static void *appel_clique_walker_irn_init(phase_t *phase, ir_node *irn, void *old)
786 {
787         appel_block_info_t *res = NULL;
788
789         if(is_Block(irn)) {
790                 appel_clique_walker_t *d = (void *) phase;
791                 res = phase_alloc(phase, sizeof(res[0]));
792                 res->phi_nr      = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end_nr));
793                 res->live_end_nr = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end_nr));
794                 res->live_in_nr  = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in_nr));
795                 res->live_end    = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end));
796                 res->live_in     = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in));
797                 res->phi         = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in));
798         }
799
800         return res;
801 }
802
803 typedef struct _insn_list_t {
804         be_insn_t *insn;
805         struct list_head list;
806 } insn_list_t;
807
808 static int appel_get_live_end_nr(appel_clique_walker_t *env, ir_node *bl, ir_node *irn)
809 {
810         appel_block_info_t *bli = phase_get_irn_data(&env->ph, bl);
811         int i;
812
813         for(i = 0; i < bli->n_live_end; ++i)
814                 if(bli->live_end[i] == irn)
815                         return bli->live_end_nr[i];
816
817         return -1;
818 }
819
820 static int appel_dump_clique(appel_clique_walker_t *env, pset *live, ir_node *bl, int curr_nr, int start_nr)
821 {
822         ir_node **live_arr = alloca(env->co->cls->n_regs * sizeof(live_arr[0]));
823         ir_node *irn;
824         int n_live;
825         int j;
826
827         n_live = 0;
828         foreach_pset(live, irn)
829                 live_arr[n_live++] = irn;
830
831         /* dump the live after clique */
832         if(!env->dumb) {
833                 for(j = 0; j < n_live; ++j) {
834                         int k;
835
836                         for(k = j + 1; k < n_live; ++k) {
837                                 fprintf(env->f, "%d %d -1 ", curr_nr + j, curr_nr + k);
838                         }
839                         fprintf(env->f, "\n");
840                 }
841         }
842
843         /* dump the affinities */
844         for(j = 0; !env->dumb && j < n_live; ++j) {
845                 ir_node *irn = live_arr[j];
846                 int old_nr = PTR_TO_INT(get_irn_link(irn));
847
848                 /* if the node was already live in the last insn dump the affinity */
849                 if(old_nr > start_nr) {
850                         int weight = appel_aff_weight(env, bl);
851                         fprintf(env->f, "%d %d %d\n", old_nr, curr_nr + j, weight);
852                 }
853         }
854
855         /* set the current numbers into the link field. */
856         for(j = 0; j < n_live; ++j) {
857                 ir_node *irn = live_arr[j];
858                 set_irn_link(irn, INT_TO_PTR(curr_nr + j));
859         }
860
861         return curr_nr + n_live;
862 }
863
864 static void appel_walker(ir_node *bl, void *data)
865 {
866         appel_clique_walker_t *env = data;
867         appel_block_info_t *bli    = phase_get_or_set_irn_data(&env->ph, bl);
868         struct obstack *obst       = &env->obst;
869         void *base                 = obstack_base(obst);
870         pset *live                 = pset_new_ptr_default();
871
872         int n_insns  = 0;
873         int n_nodes  = 0;
874         int start_nr = env->curr_nr;
875         int curr_nr  = start_nr;
876
877         be_insn_env_t insn_env;
878         int i, j;
879         ir_node *irn;
880         be_insn_t **insns;
881
882         insn_env.aenv = env->co->aenv;
883         insn_env.cls  = env->co->cls;
884         insn_env.obst = obst;
885         insn_env.ignore_colors = env->co->cenv->ignore_colors;
886
887         /* Guess how many insns will be in this block. */
888         sched_foreach(bl, irn)
889                 n_nodes++;
890
891         bli->n_phi = 0;
892         insns = malloc(n_nodes * sizeof(insns[0]));
893
894         /* Put all insns in an array. */
895         irn = sched_first(bl);
896         while(!sched_is_end(irn)) {
897                 be_insn_t *insn;
898                 insn = be_scan_insn(&insn_env, irn);
899                 insns[n_insns++] = insn;
900                 irn = insn->next_insn;
901         }
902
903         DBG((env->co->cenv->dbg, LEVEL_2, "%+F\n", bl));
904         be_liveness_end_of_block(env->co->cenv->lv, env->co->aenv, env->co->cls, bl, live);
905
906         /* Generate the bad and ugly. */
907         for(i = n_insns - 1; i >= 0; --i) {
908                 be_insn_t *insn = insns[i];
909
910                 /* The first live set has to be saved in the block border set. */
911                 if(i == n_insns - 1) {
912                         j = 0;
913                         foreach_pset(live, irn) {
914                                 bli->live_end[j]    = irn;
915                                 bli->live_end_nr[j] = curr_nr + j;
916                                 ++j;
917                         }
918                         bli->n_live_end = j;
919                 }
920
921                 if(!env->dumb) {
922                         for(j = 0; j < insn->use_start; ++j) {
923                                 ir_node *op   = insn->ops[j].carrier;
924                                 bitset_t *adm = insn->ops[j].regs;
925                                 int k;
926                                 int nr;
927
928                                 if(!insn->ops[j].has_constraints)
929                                         continue;
930
931                                 nr = 0;
932                                 foreach_pset(live, irn) {
933                                         if(irn == op) {
934                                                 pset_break(live);
935                                                 break;
936                                         }
937                                         ++nr;
938                                 }
939
940                                 assert(nr < pset_count(live));
941
942                                 for(k = 0; k < env->co->cls->n_regs; ++k) {
943                                         int mapped_col = env->color_map[k];
944                                         if(mapped_col >= 0 && !bitset_is_set(adm, k) && !bitset_is_set(env->co->cenv->ignore_colors, k))
945                                                 fprintf(env->f, "%d %d -1\n", curr_nr + nr, mapped_col);
946                                 }
947                         }
948                 }
949
950                 /* dump the clique and update the stuff. */
951                 curr_nr = appel_dump_clique(env, live, bl, curr_nr, start_nr);
952
953                 /* remove all defs. */
954                 for(j = 0; j < insn->use_start; ++j)
955                         pset_remove_ptr(live, insn->ops[j].carrier);
956
957                 if(is_Phi(insn->irn) && arch_irn_consider_in_reg_alloc(env->co->aenv, env->co->cls, insn->irn)) {
958                         bli->phi[bli->n_phi]    = insn->irn;
959                         bli->phi_nr[bli->n_phi] = PTR_TO_INT(get_irn_link(insn->irn));
960                         bli->n_phi++;
961                 }
962
963                 /* add all uses */
964                 else
965                         for(j = insn->use_start; j < insn->n_ops; ++j)
966                                 pset_insert_ptr(live, insn->ops[j].carrier);
967         }
968
969         /* print the start clique. */
970         curr_nr = appel_dump_clique(env, live, bl, curr_nr, start_nr);
971
972         i = 0;
973         foreach_pset(live, irn) {
974                 bli->live_in[i]    = irn;
975                 bli->live_in_nr[i] = PTR_TO_INT(get_irn_link(irn));
976                 ++i;
977         }
978         bli->n_live_in = i;
979
980         del_pset(live);
981         free(insns);
982         obstack_free(obst, base);
983         env->curr_nr = curr_nr;
984 }
985
986 static void appel_inter_block_aff(ir_node *bl, void *data)
987 {
988         appel_clique_walker_t *env = data;
989         appel_block_info_t *bli    = phase_get_irn_data(&env->ph, bl);
990
991         int i, j, n;
992
993         for(i = 0; i < bli->n_live_in; ++i) {
994                 ir_node *irn = bli->live_in[i];
995
996                 for(j = 0, n = get_Block_n_cfgpreds(bl); j < n; ++j) {
997                         ir_node *pred  = get_Block_cfgpred_block(bl, j);
998                         appel_block_info_t *pred_bli = phase_get_irn_data(&env->ph, pred);
999
1000                         int nr = appel_get_live_end_nr(env, pred, irn);
1001                         assert(nr >= 0);
1002                         fprintf(env->f, "%d %d 1\n", bli->live_in_nr[i], nr);
1003                 }
1004         }
1005
1006         for(i = 0; i < bli->n_phi; ++i) {
1007                 ir_node *irn = bli->phi[i];
1008
1009                 for(j = 0, n = get_Block_n_cfgpreds(bl); j < n; ++j) {
1010                         ir_node *pred  = get_Block_cfgpred_block(bl, j);
1011                         appel_block_info_t *pred_bli = phase_get_irn_data(&env->ph, pred);
1012                         ir_node *op = get_irn_n(irn, j);
1013
1014                         int nr = appel_get_live_end_nr(env, pred, op);
1015                         assert(nr >= 0);
1016                         fprintf(env->f, "%d %d 1\n", bli->phi_nr[i], nr);
1017                 }
1018         }
1019
1020 }
1021
1022 void co_dump_appel_graph_cliques(const copy_opt_t *co, FILE *f)
1023 {
1024         int i;
1025         int n_colors;
1026         appel_clique_walker_t env;
1027         bitset_t *adm = bitset_alloca(co->cls->n_regs);
1028
1029         be_liveness_recompute(co->cenv->lv);
1030         obstack_init(&env.obst);
1031         phase_init(&env.ph, "appel_clique_dumper", co->irg, PHASE_DEFAULT_GROWTH, appel_clique_walker_irn_init);
1032         env.curr_nr = co->cls->n_regs;
1033         env.co = co;
1034         env.f = f;
1035
1036         bitset_copy(adm, co->cenv->ignore_colors);
1037         bitset_flip_all(adm);
1038
1039         /* Make color map. */
1040         env.color_map = alloca(co->cls->n_regs * sizeof(env.color_map[0]));
1041         for(i = 0, n_colors = 0; i < co->cls->n_regs; ++i) {
1042                 const arch_register_t *reg = &co->cls->regs[i];
1043                 env.color_map[i] = arch_register_type_is(reg, ignore) ? -1 : n_colors++;
1044         }
1045
1046         env.dumb = 1;
1047         env.curr_nr = n_colors;
1048         irg_block_walk_graph(co->irg, firm_clear_link, NULL, NULL);
1049         irg_block_walk_graph(co->irg, appel_walker, NULL, &env);
1050
1051         fprintf(f, "%d %d\n", env.curr_nr, n_colors);
1052
1053         /* make the first k nodes interfere */
1054         for(i = 0; i < n_colors; ++i) {
1055                 int j;
1056                 for(j = i + 1; j < n_colors; ++j)
1057                         fprintf(f, "%d %d -1 ", i, j);
1058                 fprintf(f, "\n");
1059         }
1060
1061         env.dumb = 0;
1062         env.curr_nr = n_colors;
1063         irg_block_walk_graph(co->irg, firm_clear_link, NULL, NULL);
1064         irg_block_walk_graph(co->irg, appel_walker, NULL, &env);
1065         irg_block_walk_graph(co->irg, appel_inter_block_aff, NULL, &env);
1066         obstack_free(&env.obst, NULL);
1067 }
1068
1069 /*
1070 ___ _____ ____   ____   ___ _____   ____                        _
1071 |_ _|  ___/ ___| |  _ \ / _ \_   _| |  _ \ _   _ _ __ ___  _ __ (_)_ __   __ _
1072 | || |_ | |  _  | | | | | | || |   | | | | | | | '_ ` _ \| '_ \| | '_ \ / _` |
1073 | ||  _|| |_| | | |_| | |_| || |   | |_| | |_| | | | | | | |_) | | | | | (_| |
1074 |___|_|   \____| |____/ \___/ |_|   |____/ \__,_|_| |_| |_| .__/|_|_| |_|\__, |
1075 |_|            |___/
1076 */
1077
1078 static const char *get_dot_color_name(int col)
1079 {
1080         static const char *names[] = {
1081                 "blue",
1082                 "red",
1083                 "green",
1084                 "yellow",
1085                 "cyan",
1086                 "magenta",
1087                 "orange",
1088                 "chocolate",
1089                 "beige",
1090                 "navy",
1091                 "darkgreen",
1092                 "darkred",
1093                 "lightPink",
1094                 "chartreuse",
1095                 "lightskyblue",
1096                 "linen",
1097                 "pink",
1098                 "lightslateblue",
1099                 "mintcream",
1100                 "red",
1101                 "darkolivegreen",
1102                 "mediumblue",
1103                 "mistyrose",
1104                 "salmon",
1105                 "darkseagreen",
1106                 "mediumslateblue"
1107                 "moccasin",
1108                 "tomato",
1109                 "forestgreen",
1110                 "darkturquoise",
1111                 "palevioletred"
1112         };
1113
1114         return col < sizeof(names)/sizeof(names[0]) ? names[col] : "white";
1115 }
1116
1117 typedef struct _co_ifg_dump_t {
1118         const copy_opt_t *co;
1119         unsigned flags;
1120 } co_ifg_dump_t;
1121
1122 static const char *get_dot_shape_name(co_ifg_dump_t *cod, ir_node *irn)
1123 {
1124         arch_register_req_t req;
1125
1126         arch_get_register_req(cod->co->aenv, &req, irn, BE_OUT_POS(0));
1127         if(arch_register_req_is(&req, limited))
1128                 return "diamond";
1129
1130         return "ellipse";
1131 }
1132
1133 static void ifg_dump_graph_attr(FILE *f, void *self)
1134 {
1135         fprintf(f, "overlay=false");
1136 }
1137
1138 static int ifg_is_dump_node(void *self, ir_node *irn)
1139 {
1140         co_ifg_dump_t *cod = self;
1141         return !arch_irn_is(cod->co->aenv, irn, ignore);
1142 }
1143
1144 static void ifg_dump_node_attr(FILE *f, void *self, ir_node *irn)
1145 {
1146         co_ifg_dump_t *env         = self;
1147         const arch_register_t *reg = arch_get_irn_register(env->co->aenv, irn);
1148
1149         ir_fprintf(f, "label=\"%+F\" style=filled color=%s shape=%s", irn, get_dot_color_name(reg->index), get_dot_shape_name(env, irn));
1150 }
1151
1152 static void ifg_dump_at_end(FILE *file, void *self)
1153 {
1154         co_ifg_dump_t *env = self;
1155         affinity_node_t *a;
1156
1157         co_gs_foreach_aff_node(env->co, a) {
1158                 const arch_register_t *ar = arch_get_irn_register(env->co->aenv, a->irn);
1159                 unsigned aidx = get_irn_idx(a->irn);
1160                 neighb_t *n;
1161
1162                 co_gs_foreach_neighb(a, n) {
1163                         const arch_register_t *nr = arch_get_irn_register(env->co->aenv, n->irn);
1164                         int nidx = get_irn_idx(n->irn);
1165
1166                         if(aidx < nidx) {
1167                                 const char *color = nr == ar ? "blue" : "red";
1168                                 fprintf(file, "\tn%d -- n%d [label=\"%d\" style=dashed color=%s];\n", aidx, nidx, n->costs, color);
1169                         }
1170                 }
1171         }
1172 }
1173
1174
1175 static be_ifg_dump_dot_cb_t ifg_dot_cb = {
1176         ifg_is_dump_node,
1177         ifg_dump_graph_attr,
1178         ifg_dump_node_attr,
1179         NULL,
1180         NULL,
1181         ifg_dump_at_end
1182 };
1183
1184
1185
1186 void co_dump_ifg_dot(const copy_opt_t *co, FILE *f, unsigned flags)
1187 {
1188         co_ifg_dump_t cod;
1189
1190         cod.co    = co;
1191         cod.flags = flags;
1192         be_ifg_dump_dot(co->cenv->ifg, co->irg, f, &ifg_dot_cb, &cod);
1193 }
1194
1195
1196 void co_solve_park_moon(copy_opt_t *opt)
1197 {
1198
1199 }