cleanup besched header
[libfirm] / ir / be / beschedrss.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       Implementation of a register saturating list scheduler.
23  * @author      Christian Wuerdig
24  * @date        29.08.2006
25  * @version     $Id$
26  *
27  * Implementation of a register saturating list scheduler
28  * as described in: Sid-Ahmed-Ali Touati
29  * Register Saturation in Superscalar and VLIW Codes
30  */
31 #include "config.h"
32
33 #include "beschedrss.h"
34
35 #include <limits.h>
36
37 #include "obst.h"
38 #include "debug.h"
39
40 #include "irgraph_t.h"
41 #include "irnode_t.h"
42 #include "iredges_t.h"
43 #include "ircons_t.h"
44 #include "irphase_t.h"
45 #include "irgwalk.h"
46 #include "irdump.h"
47 #include "irtools.h"
48 #include "irbitset.h"
49 #include "irprintf.h"
50 #include "irnodeset.h"
51 #include "bipartite.h"
52 #include "hungarian.h"
53 #include "plist.h"
54 #include "array_t.h"
55
56 #include "heights.h"
57
58 #include "beabi.h"
59 #include "bemodule.h"
60 #include "benode.h"
61 #include "besched.h"
62 #include "beirg.h"
63 #include "belive.h"
64
65 #include "lc_opts.h"
66 #include "lc_opts_enum.h"
67
68
69 #define ARR_LEN_SAFE(arr) ((arr) != NULL ? ARR_LEN((arr)) : 0)
70
71 #define HASH_RSS_EDGE(edge) ((get_irn_node_nr((edge)->src) << 16) | (get_irn_node_nr((edge)->tgt) & 0xFFFF))
72 #define BSEARCH_IRN_ARR(val, arr) \
73         bsearch(&(val), (arr), ARR_LEN_SAFE((arr)), sizeof((arr)[0]), cmp_irn_idx)
74
75 #define BLOCK_IDX_MAP(rss, irn) bsearch_for_index(get_irn_idx((irn)), (rss)->idx_map, ARR_LEN_SAFE((rss)->idx_map), 1)
76
77 /* the rss options */
78 typedef struct rss_opts_t {
79         int dump_flags;
80 } rss_opts_t;
81
82 /* Represents a child with associated costs */
83 typedef struct child {
84         ir_node *irn;
85         float   cost;
86 } child_t;
87
88 /* We need edges for several purposes. */
89 typedef struct rss_edge {
90         ir_node *src;
91         ir_node *tgt;
92         void    *next;
93 } rss_edge_t;
94
95 /* Represents a connected bipartite component. */
96 typedef struct cbc {
97         ir_nodeset_t parents;       /**< = S  a set of value producers */
98         ir_nodeset_t children;      /**< = T  a set of value consumers */
99         pset    *kill_edges;    /**< = E  a set of edges (t in T, s in S) such as each s in S gets killed by at least one t in T */
100         int     nr;             /**< a deterministic index for set insertion (used as hash) */
101 } cbc_t;
102
103 /* Represents a disjoint value DAG. */
104 typedef struct dvg {
105         ir_nodeset_t nodes;
106         pset    *edges;
107 } dvg_t;
108
109 /* Represents a chain of nodes. */
110 typedef struct chain {
111         plist_t *elements;   /**< List of chain elements */
112         int     nr;          /**< a deterministic index for set insertion (used as hash) */
113 } chain_t;
114
115 typedef struct rss_irn {
116         plist_t  *consumer_list;    /**< List of consumers */
117         const ir_node **consumer;   /**< Sorted consumer array (needed for faster access) */
118
119         plist_t  *parent_list;      /**< List of parents */
120         plist_t  *pkiller_list;     /**< List of potential killers */
121
122         plist_t  *descendant_list;  /**< List of descendants */
123         const ir_node **descendants;      /**< Sorted descendant array (needed for faster access) */
124
125         const ir_node  *killer;     /**< The selected unique killer */
126         const ir_node  *irn;        /**< The corresponding firm node to this rss_irn */
127
128         chain_t  *chain;            /**< The chain, this node is associated to */
129
130         unsigned desc_walk;         /**< visited flag for collecting descendants */
131         unsigned kill_count;        /**< number of nodes killed by this one */
132
133         unsigned live_out : 1;      /**< irn has consumers outside of it's block */
134         unsigned visited  : 1;      /**< visited flag for bipartite decomposition */
135         unsigned havecons : 1;      /**< visited flag collect consumer */
136         unsigned handled  : 1;      /**< flag indicating whether or not the list structures have been build */
137         unsigned dumped   : 1;      /**< flag indication whether or not this node was dumped */
138 } rss_irn_t;
139
140 /* Represents a serialization edge with associated costs. */
141 typedef struct serialization {
142         rss_irn_t  *u;       /* the top node */
143         rss_irn_t  *v;       /* the node about to be serialized after u */
144         rss_edge_t *edge;    /* the edge selected for the serialization */
145         int        omega1;   /* estimated: available regs - RS reduction */
146         int        omega2;   /* increase in critical path length */
147         int        new_killer;
148 } serialization_t;
149
150 typedef struct rss {
151         ir_phase          ph;              /**< Phase to hold some data */
152         ir_heights_t     *h;              /**< The current height object */
153         ir_graph         *irg;            /**< The irg to preprocess */
154         plist_t          *nodes;          /**< The list of interesting nodes */
155         const arch_env_t *arch_env;       /**< The architecture environment */
156         be_abi_irg_t     *abi;            /**< The abi for this irg */
157         pset             *cbc_set;        /**< A set of connected bipartite components */
158         ir_node          *block;          /**< The current block in progress. */
159         int              *idx_map;        /**< Mapping irn indices to per block indices */
160         unsigned         max_height;      /**< maximum height in the current block */
161         rss_opts_t       *opts;           /**< The options */
162         be_lv_t          *liveness;       /**< The liveness information for this irg */
163         ir_nodeset_t      live_block;     /**< Values alive at end of block */
164         const arch_register_class_t *cls; /**< The current register class */
165         DEBUG_ONLY(firm_dbg_module_t *dbg);
166 } rss_t;
167
168 static inline rss_irn_t *get_rss_irn(rss_t *env, const ir_node *node)
169 {
170         return (rss_irn_t*)phase_get_or_set_irn_data(&env->ph, node);
171 }
172
173 /**
174  * We need some special nodes:
175  * a source and a sink for all live-in and live-out values of a block
176  */
177 enum {
178         iro_rss_Source,
179         iro_rss_Sink,
180         iro_rss_last
181 };
182
183 /** The opcode of the rss_Source node once allocated. */
184 static ir_op *op_rss_Source;
185 /** The opcode of the rss_Sink node once allocated. */
186 static ir_op *op_rss_Sink;
187
188 /** The rss_Source node of the current graph. */
189 static ir_node *_source = NULL;
190 /** The rss_Sink node of the current graph. */
191 static ir_node *_sink   = NULL;
192
193 #define is_Source(irn) ((irn) == _source)
194 #define is_Sink(irn)   ((irn) == _sink)
195
196 enum {
197         RSS_DUMP_NONE  = 0,
198         RSS_DUMP_CBC   = 1 << 0,
199         RSS_DUMP_PKG   = 1 << 1,
200         RSS_DUMP_KILL  = 1 << 2,
201         RSS_DUMP_DVG   = 1 << 3,
202         RSS_DUMP_MAXAC = 1 << 4,
203         RSS_DUMP_ALL   = (RSS_DUMP_MAXAC << 1) - 1,
204 };
205
206 static rss_opts_t rss_options = {
207         RSS_DUMP_NONE,
208 };
209
210 static const lc_opt_enum_int_items_t dump_items[] = {
211         { "none",  RSS_DUMP_NONE  },
212         { "cbc",   RSS_DUMP_CBC   },
213         { "pkg",   RSS_DUMP_PKG   },
214         { "kill",  RSS_DUMP_KILL  },
215         { "dvg",   RSS_DUMP_DVG   },
216         { "maxac", RSS_DUMP_MAXAC },
217         { "all",   RSS_DUMP_ALL   },
218         { NULL, 0 }
219 };
220
221 static lc_opt_enum_int_var_t dump_var = {
222         &rss_options.dump_flags, dump_items
223 };
224
225 static const lc_opt_table_entry_t rss_option_table[] = {
226         LC_OPT_ENT_ENUM_MASK("dump", "dump phases", &dump_var),
227         LC_OPT_LAST
228 };
229
230 /******************************************************************************
231  *  _          _                    __                  _   _
232  * | |        | |                  / _|                | | (_)
233  * | |__   ___| |_ __   ___ _ __  | |_ _   _ _ __   ___| |_ _  ___  _ __  ___
234  * | '_ \ / _ \ | '_ \ / _ \ '__| |  _| | | | '_ \ / __| __| |/ _ \| '_ \/ __|
235  * | | | |  __/ | |_) |  __/ |    | | | |_| | | | | (__| |_| | (_) | | | \__ \
236  * |_| |_|\___|_| .__/ \___|_|    |_|  \__,_|_| |_|\___|\__|_|\___/|_| |_|___/
237  *            | |
238  *            |_|
239  ******************************************************************************/
240
241 /**
242  * Acquire opcodes if needed and create source and sink nodes.
243  */
244 static void init_rss_special_nodes(ir_graph *irg)
245 {
246         ir_node *block;
247
248         if (op_rss_Source == NULL) {
249                 int iro_rss_base = get_next_ir_opcodes(iro_rss_last);
250                 op_rss_Source = new_ir_op(iro_rss_base + iro_rss_Source, "rss_Source", op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
251                 op_rss_Sink   = new_ir_op(iro_rss_base + iro_rss_Sink,   "rss_Sink",   op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
252         }
253         block   = get_irg_start_block(irg);
254         _source = new_ir_node(NULL, irg, block, op_rss_Source, mode_ANY, 0, NULL);
255         _sink   = new_ir_node(NULL, irg, block, op_rss_Sink, mode_ANY, 0, NULL);
256 }
257
258 static int cmp_int(const void *a, const void *b)
259 {
260         const int *i1 = (const int*)a;
261         const int *i2 = (const int*)b;
262
263         return QSORT_CMP(*i1, *i2);
264 }
265
266 static int cmp_child_costs(const void *a, const void *b)
267 {
268         const child_t *c1 = (const child_t*)a;
269         const child_t *c2 = (const child_t*)b;
270
271         return QSORT_CMP(c1->cost, c2->cost);
272 }
273
274 static int cmp_irn_idx(const void *a, const void *b)
275 {
276         const ir_node *n1 = *(ir_node **)a;
277         const ir_node *n2 = *(ir_node **)b;
278
279         return QSORT_CMP(get_irn_idx(n1), get_irn_idx(n2));
280 }
281
282 static int cmp_rss_edges(const void *a, const void *b)
283 {
284         const rss_edge_t *e1 = (const rss_edge_t*)a;
285         const rss_edge_t *e2 = (const rss_edge_t*)b;
286
287         return (e1->src != e2->src) || (e1->tgt != e2->tgt);
288 }
289
290 static int bsearch_for_index(int key, int *arr, size_t len, int force)
291 {
292         int left = 0;
293         int right = len;
294
295         while (right >= left) {
296                 int idx = (left + right) / 2;
297
298                 if (key < arr[idx])
299                         right = idx - 1;
300                 else if (key > arr[idx])
301                         left = idx + 1;
302                 else
303                         return idx;
304         }
305
306         if (force)
307                 assert(0 && "Something is wrong, key not found.");
308         return -1;
309 }
310
311 static const ir_node **build_sorted_array_from_list(plist_t *irn_list, struct obstack *obst)
312 {
313         plist_element_t *el;
314         int             i   = 0;
315         int             len = plist_count(irn_list);
316         const ir_node   **arr = (const ir_node**)NEW_ARR_D(ir_node*, obst, len);
317
318         /* copy the list into the array */
319         foreach_plist(irn_list, el) {
320                 arr[i++] = (ir_node*)plist_element_get_value(el);
321         }
322
323         /* sort the array by node index */
324         /* HACK cast for MSVC */
325         qsort((void*)arr, len, sizeof(arr[0]), cmp_irn_idx);
326
327         return arr;
328 }
329
330 /*****************************************************
331  *      _      _                       _
332  *     | |    | |                     (_)
333  *   __| | ___| |__  _   _  __ _  __ _ _ _ __   __ _
334  *  / _` |/ _ \ '_ \| | | |/ _` |/ _` | | '_ \ / _` |
335  * | (_| |  __/ |_) | |_| | (_| | (_| | | | | | (_| |
336  *  \__,_|\___|_.__/ \__,_|\__, |\__, |_|_| |_|\__, |
337  *                          __/ | __/ |         __/ |
338  *                         |___/ |___/         |___/
339  *
340  *****************************************************/
341
342 #ifdef DEBUG_libfirm
343 static void dump_nodeset(ir_nodeset_t *ns, const char *prefix)
344 {
345         ir_nodeset_iterator_t iter;
346         ir_node *irn;
347
348         ir_nodeset_iterator_init(&iter, ns);
349         while ( (irn = ir_nodeset_iterator_next(&iter)) != NULL ) {
350                 ir_fprintf(stderr, "%s%+F\n", prefix, irn);
351         }
352 }
353 #endif
354
355 static void build_file_name(rss_t *rss, const char *suffix, size_t suf_len, char *buf, size_t len)
356 {
357         const char *irg_name;
358
359         memset(buf, 0, len);
360         irg_name = get_entity_name(get_irg_entity(rss->irg));
361         snprintf(buf, len - suf_len, "%s-%s-block-%ld",
362                 irg_name, arch_register_class_name(rss->cls), get_irn_node_nr(rss->block));
363         strcat(buf, suffix);
364 }
365
366 /* Dumps all collected bipartite components of current irg as vcg. */
367 static void debug_vcg_dump_bipartite(rss_t *rss)
368 {
369         cbc_t *cbc;
370         FILE  *f;
371         char  file_name[256];
372         static const char suffix[] = "-RSS-CBC.vcg";
373
374         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
375         f = fopen(file_name, "w");
376
377         ir_fprintf(f, "graph: { title: \"connected bipartite component graph of %+F\"\n", rss->irg);
378         fprintf(f, "display_edge_labels: no\n");
379         fprintf(f, "layoutalgorithm: mindepth\n");
380         fprintf(f, "manhattan_edges: yes\n\n");
381
382         foreach_pset(rss->cbc_set, cbc_t*, cbc) {
383                 ir_nodeset_iterator_t iter;
384                 ir_node    *n;
385                 rss_edge_t *ke;
386
387                 fprintf(f, "graph: { titel: \"cbc %d\" label: \"cbc %d\" status:clustered color:yellow\n", cbc->nr, cbc->nr);
388                 foreach_ir_nodeset(&cbc->parents, n, iter) {
389                         ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
390                 }
391                 foreach_ir_nodeset(&cbc->children, n, iter) {
392                         ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
393                 }
394                 foreach_pset(cbc->kill_edges, rss_edge_t*, ke) {
395                         ir_fprintf(f, "edge: { sourcename: \"n%d_%d\" targetname: \"n%d_%d\" }\n",
396                                 get_irn_node_nr(ke->src), cbc->nr, get_irn_node_nr(ke->tgt), cbc->nr);
397                 }
398                 fprintf(f, "}\n\n");
399         }
400         fprintf(f, "}\n");
401         fclose(f);
402 }
403
404 /* Dump the computed killing function as vcg. */
405 static void debug_vcg_dump_kill(rss_t *rss)
406 {
407         FILE            *f;
408         char            file_name[256];
409         plist_element_t *el;
410         static const char suffix[] = "-RSS-KILL.vcg";
411
412         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
413         f = fopen(file_name, "w");
414
415         ir_fprintf(f, "graph: { title: \"computed kill graph of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
416         fprintf(f, "display_edge_labels: no\n");
417         fprintf(f, "layoutalgorithm: mindepth\n");
418         fprintf(f, "manhattan_edges: yes\n\n");
419
420         {
421                 /* reset dump_flag */
422                 ir_node *irn;
423                 foreach_phase_irn(&rss->ph, irn) {
424                         rss_irn_t *node = get_rss_irn(rss, irn);
425                         node->dumped = 0;
426                 }
427         }
428
429         /* dump all nodes and their killers */
430         foreach_plist(rss->nodes, el) {
431                 ir_node   *irn     = (ir_node  *)plist_element_get_value(el);
432                 rss_irn_t *rirn    = get_rss_irn(rss, irn);
433                 rss_irn_t *pk_rirn = get_rss_irn(rss, rirn->killer);
434
435                 if (! rirn->dumped) {
436                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
437                         rirn->dumped = 1;
438                 }
439
440                 if (! pk_rirn->dumped) {
441                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(rirn->killer), rirn->killer);
442                         pk_rirn->dumped = 1;
443                 }
444
445                 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
446                         get_irn_node_nr(rirn->killer), get_irn_node_nr(irn));
447         }
448
449         fprintf(f, "}\n");
450         fclose(f);
451 }
452
453 /* Dumps the potential killing DAG (PKG) as vcg. */
454 static void debug_vcg_dump_pkg(rss_t *rss, ir_nodeset_t *max_ac, int iteration)
455 {
456         FILE              *f;
457         char              file_name[256];
458         char              suffix[32];
459         static const char suffix1[] = "-RSS-PKG.vcg";
460         static const char suffix2[] = "-RSS-PKG-MAXAC.vcg";
461         plist_element_t   *el;
462
463         if (! max_ac) {
464                 snprintf(suffix, sizeof(suffix), "%s", suffix1);
465         }
466         else {
467                 snprintf(suffix, sizeof(suffix), "-%02d%s", iteration, suffix2);
468         }
469
470         build_file_name(rss, suffix, strlen(suffix) + 1, file_name, sizeof(file_name));
471         f = fopen(file_name, "w");
472
473         ir_fprintf(f, "graph: { title: \"potential killing DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
474         fprintf(f, "display_edge_labels: no\n");
475         fprintf(f, "layoutalgorithm: mindepth\n");
476         fprintf(f, "manhattan_edges: yes\n\n");
477
478         {
479                 /* reset dump_flag */
480                 ir_node *irn;
481                 foreach_phase_irn(&rss->ph, irn) {
482                         rss_irn_t *node = get_rss_irn(rss, irn);
483                         node->dumped = 0;
484                 }
485         }
486
487         foreach_plist(rss->nodes, el) {
488                 ir_node    *irn  = (ir_node   *)plist_element_get_value(el);
489                 rss_irn_t  *rirn = get_rss_irn(rss, irn);
490                 const char *c1   = "";
491                 plist_element_t *k_el;
492
493                 /* dump selected saturating values in yellow */
494                 if (max_ac && ir_nodeset_contains(max_ac, irn))
495                         c1 = "color:yellow";
496
497                 if (! rirn->dumped) {
498                         if (rirn->chain)
499                                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F   c%d\" %s }\n", get_irn_node_nr(irn), irn, rirn->chain->nr, c1);
500                         else
501                                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(irn), irn, c1);
502                         rirn->dumped = 1;
503                 }
504
505                 foreach_plist(rirn->pkiller_list, k_el) {
506                         ir_node    *pkiller = (ir_node   *)plist_element_get_value(k_el);
507                         rss_irn_t  *pk_rirn = get_rss_irn(rss, pkiller);
508                         const char *c2      = "";
509
510                         if (max_ac && ir_nodeset_contains(max_ac, pkiller))
511                                 c2 = "color:yellow";
512
513                         if (! pk_rirn->dumped) {
514                                 if (pk_rirn->chain)
515                                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F   c%d\" %s }\n", get_irn_node_nr(pkiller), pkiller, pk_rirn->chain->nr, c2);
516                                 else
517                                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(pkiller), pkiller, c2);
518                                 pk_rirn->dumped = 1;
519                         }
520                         ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
521                                 get_irn_node_nr(pkiller), get_irn_node_nr(irn));
522                 }
523         }
524         fprintf(f, "}\n");
525         fclose(f);
526 }
527
528 /* Dumps the disjoint value DAG (DVG) as vcg. */
529 static void debug_vcg_dump_dvg(rss_t *rss, dvg_t *dvg)
530 {
531         static const char suffix[] = "-RSS-DVG.vcg";
532         FILE       *f;
533         char       file_name[256];
534         ir_nodeset_iterator_t iter;
535         ir_node    *irn;
536         rss_edge_t *edge;
537
538         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
539         f = fopen(file_name, "w");
540
541         ir_fprintf(f, "graph: { title: \"disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
542         fprintf(f, "display_edge_labels: no\n");
543         fprintf(f, "layoutalgorithm: mindepth\n");
544         fprintf(f, "manhattan_edges: yes\n\n");
545
546         /* dump all nodes */
547         foreach_ir_nodeset(&dvg->nodes, irn, iter) {
548                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
549         }
550
551         /* dump all edges */
552         foreach_pset(dvg->edges, rss_edge_t*, edge) {
553                 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
554                         get_irn_node_nr(edge->src), get_irn_node_nr(edge->tgt));
555         }
556
557         fprintf(f, "}\n");
558         fclose(f);
559 }
560
561 #if 0
562 /* Dumps the PKG(DVG). */
563 static void debug_vcg_dump_dvg_pkiller(rss_t *rss, dvg_t *dvg)
564 {
565         static const char suffix[] = "-RSS-DVG-PKG.vcg";
566         FILE       *f;
567         char       file_name[256];
568         ir_nodeset_iterator_t iter;
569         ir_node    *irn;
570
571         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
572         f = fopen(file_name, "w");
573
574         ir_fprintf(f, "graph: { title: \"PKG of disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
575         fprintf(f, "display_edge_labels: no\n");
576         fprintf(f, "layoutalgorithm: mindepth\n");
577         fprintf(f, "manhattan_edges: yes\n\n");
578
579         /* dump all nodes */
580         foreach_ir_nodeset(&dvg->nodes, irn, iter) {
581                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
582         }
583
584         /* dump all edges */
585         foreach_ir_nodeset(&dvg->nodes, irn, iter) {
586                 rss_irn_t       *node = get_rss_irn(rss, irn);
587                 plist_element_t *el;
588
589                 foreach_plist(node->dvg_pkiller_list, el) {
590                         ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
591                                 get_irn_node_nr(plist_element_get_value(el)), get_irn_node_nr(irn));
592                 }
593         }
594
595         fprintf(f, "}\n");
596         fclose(f);
597 }
598 #endif /* if 0 */
599
600 /**
601  * In case there is no rss information for irn, initialize it.
602  */
603 static void *init_rss_irn(ir_phase *ph, const ir_node *irn)
604 {
605         rss_irn_t *res = (rss_irn_t*)phase_alloc(ph, sizeof(res[0]));
606
607         res->descendant_list = plist_obstack_new(phase_obst(ph));
608         res->descendants     = NULL;
609
610         res->consumer_list   = plist_obstack_new(phase_obst(ph));
611         res->consumer        = NULL;
612
613         res->pkiller_list    = plist_obstack_new(phase_obst(ph));
614
615         res->parent_list     = plist_obstack_new(phase_obst(ph));
616
617         res->killer          = NULL;
618         res->irn             = irn;
619         res->chain           = NULL;
620
621         res->kill_count      = 0;
622         res->desc_walk       = 0;
623         res->live_out        = 0;
624         res->visited         = 0;
625         res->handled         = 0;
626         res->dumped          = 0;
627         res->havecons        = 0;
628
629         return res;
630 }
631
632 /**
633  * Collect all nodes data dependent on current node.
634  */
635 static void collect_descendants(rss_t *rss, rss_irn_t *rirn, ir_node *irn, int *got_sink, unsigned cur_desc_walk)
636 {
637         const ir_edge_t *edge;
638         rss_irn_t       *cur_node = get_rss_irn(rss, irn);
639         ir_node         *block    = rss->block;
640         ir_edge_kind_t  ekind[2]  = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
641         int             i;
642
643         if (cur_node->desc_walk >= cur_desc_walk)
644                 return;
645         cur_node->desc_walk = cur_desc_walk;
646
647         /* Stop at Barriers */
648         if (be_is_Barrier(irn))
649                 return;
650
651         /* loop over normal and dependency edges */
652         for (i = 0; i < 2; ++i) {
653                 foreach_out_edge_kind(irn, edge, ekind[i]) {
654                         ir_node *user = get_edge_src_irn(edge);
655
656                         /* skip ignore nodes as they do not really contribute to register pressure */
657                         if (arch_irn_is_ignore(user))
658                                 continue;
659
660                         /*
661                                 (a) mode_X means Jump            -> out_edge
662                                 (b) Phi as user of a normal node -> out-edge
663                         */
664                         if (get_irn_mode(user) == mode_X || is_Phi(user)) {
665                                 if (! *got_sink)
666                                         goto force_sink;
667                                 else
668                                         continue;
669                         }
670
671                         if (is_Proj(user)) {
672                                 //if (arch_get_irn_reg_class_out(user) == rss->cls)
673                                         collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
674                         }
675                         else {
676                                 /* check if user lives in block and is not a control flow node */
677                                 if (get_nodes_block(user) == block) {
678                                         if (! plist_has_value(rirn->descendant_list, user)) {
679                                                 plist_insert_back(rirn->descendant_list, user);
680                                                 DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", user));
681                                         }
682                                         collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
683                                 }
684                                 else if (! *got_sink) {
685 force_sink:
686                                         /* user lives out of block: add sink as descendant if not already done */
687                                         plist_insert_back(rirn->descendant_list, _sink);
688                                         *got_sink = 1;
689                                         DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", _sink));
690                                 }
691                         }
692                 }
693         }
694 }
695
696 /**
697  * Handles a single consumer.
698  */
699 static void collect_single_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *consumer, int *got_sink)
700 {
701         ir_node *block = rss->block;
702
703         assert(! is_Proj(consumer) && "Cannot handle Projs");
704
705         if (! is_Phi(consumer) && ! is_Block(consumer) && get_nodes_block(consumer) == block) {
706                 if (!arch_irn_is_ignore(consumer) &&
707                                 !plist_has_value(rss_irn->consumer_list, consumer)) {
708                         plist_insert_back(rss_irn->consumer_list, consumer);
709                         DBG((rss->dbg, LEVEL_2, "\t\tconsumer %+F\n", consumer));
710                 }
711         }
712         else {
713                 rss_irn->live_out = 1;
714                 DBG((rss->dbg, LEVEL_2, "\t\tlive out %+F", consumer));
715                 if (! *got_sink) {
716                         plist_insert_back(rss_irn->consumer_list, _sink);
717                         *got_sink = 1;
718                         DB((rss->dbg, LEVEL_2, ", %+F added instead", _sink));
719                 }
720                 DB((rss->dbg, LEVEL_2, "\n"));
721         }
722 }
723
724 /**
725  * Collect all nodes consuming the value(s) produced by current node.
726  */
727 static void collect_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *irn, int *got_sink)
728 {
729         const ir_edge_t *edge;
730         int             i;
731         ir_edge_kind_t  ekind[2]  = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
732         rss_irn_t       *cur_node = get_rss_irn(rss, irn);
733
734         if (cur_node->havecons)
735                 return;
736         cur_node->havecons = 1;
737
738         for (i = 0; i < 2; ++i) {
739                 foreach_out_edge_kind(irn, edge, ekind[i]) {
740                         ir_node *consumer = get_edge_src_irn(edge);
741
742                         if (is_Proj(consumer)) {
743                                 //if (arch_get_irn_reg_class_out(consumer) == rss->cls)
744                                         collect_consumer(rss, rss_irn, consumer, got_sink);
745                         }
746                         else
747                                 collect_single_consumer(rss, rss_irn, consumer, got_sink);
748                 }
749         }
750 }
751
752 /**
753  * Collects all consumer and descendant of a irn.
754  */
755 static void collect_node_info(rss_t *rss, ir_node *irn)
756 {
757         static unsigned cur_desc_walk = 0;
758         rss_irn_t       *rss_irn      = get_rss_irn(rss, irn);
759         int             got_sink;
760
761         assert(! is_Proj(irn) && "Cannot handle Projs.");
762
763         /* check if node info is already available */
764         if (rss_irn->handled)
765                 return;
766
767         DBG((rss->dbg, LEVEL_1, "\tcomputing consumers of %+F:\n", irn));
768
769         /* collect all consumer */
770         got_sink = 0;
771         collect_consumer(rss, rss_irn, irn, &got_sink);
772
773         /* build sorted consumer array */
774         rss_irn->consumer = build_sorted_array_from_list(rss_irn->consumer_list, phase_obst(&rss->ph));
775
776         DBG((rss->dbg, LEVEL_1, "\tcompute descendants of %+F:\n", irn));
777
778         /* collect descendants */
779         got_sink = 0;
780         collect_descendants(rss, rss_irn, irn, &got_sink, ++cur_desc_walk);
781
782         /* build sorted descendant array */
783         rss_irn->descendants = build_sorted_array_from_list(rss_irn->descendant_list, phase_obst(&rss->ph));
784
785         rss_irn->handled = 1;
786 }
787
788 /**
789  * Checks if v is a potential killer of u.
790  * v is in pkill(u) iff descendants(v) cut consumer(u) is v
791  *
792  * @param rss   The rss object
793  * @param v      The node to check for killer
794  * @param u      The potentially killed value
795  * @return 1 if v is in pkill(u), 0 otherwise
796  */
797 static int is_potential_killer(rss_t *rss, rss_irn_t *v, rss_irn_t *u)
798 {
799         plist_t *list;
800         const ir_node **arr;
801         plist_element_t *el;
802         (void) rss;
803
804         /* as we loop over the list: loop over the shorter one */
805         if (plist_count(v->descendant_list) > plist_count(u->consumer_list)) {
806                 list = u->consumer_list;
807                 arr  = v->descendants;
808         }
809         else {
810                 list = v->descendant_list;
811                 arr  = u->consumer;
812         }
813
814         /* for each list element: try to find element in array */
815         foreach_plist(list, el) {
816                 ir_node *irn   = (ir_node*)plist_element_get_value(el);
817                 ir_node *match = (ir_node*)BSEARCH_IRN_ARR(irn, arr);
818
819                 if (match && match != irn)
820                         return 0;
821         }
822
823         return 1;
824 }
825
826 /**
827  * Update descendants, consumer and pkiller list for the given irn.
828  */
829 static void update_node_info(rss_t *rss, ir_node *irn, ir_node *pk_irn)
830 {
831         rss_irn_t *node    = get_rss_irn(rss, irn);
832         rss_irn_t *pkiller = get_rss_irn(rss, pk_irn);
833
834         /* add consumer and rebuild the consumer array */
835         if (! plist_has_value(node->consumer_list, pk_irn)) {
836                 plist_insert_back(node->consumer_list, pk_irn);
837                 node->consumer = build_sorted_array_from_list(node->consumer_list, phase_obst(&rss->ph));
838         }
839
840         /* add pk_irn as descendant, add it's descendants to irn's and rebuild array */
841         if (! plist_has_value(node->descendant_list, pk_irn)) {
842                 plist_element_t *el;
843
844                 plist_insert_back(node->descendant_list, pk_irn);
845
846                 foreach_plist(pkiller->descendant_list, el) {
847                         ir_node *desc = (ir_node*)plist_element_get_value(el);
848
849                         if (! plist_has_value(node->descendant_list, desc))
850                                 plist_insert_back(node->descendant_list, desc);
851                 }
852
853                 node->descendants = build_sorted_array_from_list(node->descendant_list, phase_obst(&rss->ph));
854         }
855
856 }
857
858 /**
859  * Compute the potential killing set PK.
860  */
861 static void compute_pkill_set(rss_t *rss)
862 {
863         plist_element_t *u_el, *v_el;
864
865         foreach_plist(rss->nodes, u_el) {
866                 ir_node   *u_irn = (ir_node*)plist_element_get_value(u_el);
867                 rss_irn_t *u     = get_rss_irn(rss, u_irn);
868
869                 DBG((rss->dbg, LEVEL_1, "\tcomputing potential killers of %+F:\n", u_irn));
870
871                 /* check each consumer if it is a potential killer  */
872                 foreach_plist(u->consumer_list, v_el) {
873                         ir_node   *v_irn = (ir_node*)plist_element_get_value(v_el);
874                         rss_irn_t *v     = get_rss_irn(rss, v_irn);
875
876                         /* check, if v is a potential killer of u */
877                         if (is_potential_killer(rss, v, u)) {
878                                 if (! plist_has_value(u->pkiller_list, v_irn))
879                                         plist_insert_back(u->pkiller_list, v_irn);
880                                 v->kill_count++;
881                                 DBG((rss->dbg, LEVEL_2, "\t\tpotential killer %+F\n", v_irn));
882                         }
883                 }
884
885                 u->killer = _sink;
886         }
887
888         if (rss->opts->dump_flags & RSS_DUMP_PKG)
889                 debug_vcg_dump_pkg(rss, NULL, 0);
890 }
891
892 /**
893  * Build set of killing edges (from values to their potential killers)
894  */
895 static void build_kill_edges(rss_t *rss, pset *epk)
896 {
897         plist_element_t *el, *k_el;
898
899         foreach_plist(rss->nodes, el) {
900                 ir_node    *irn = (ir_node*)plist_element_get_value(el);
901                 rss_irn_t *rirn = get_rss_irn(rss, irn);
902
903                 DBG((rss->dbg, LEVEL_3, "\t\tbuilding kill edges for %+F:\n", irn));
904
905                 foreach_plist(rirn->pkiller_list, k_el) {
906                         ir_node    *pkiller = (ir_node*)plist_element_get_value(k_el);
907                         rss_edge_t *ke      = OALLOC(phase_obst(&rss->ph), rss_edge_t);
908
909                         ke->src  = irn;
910                         ke->tgt  = pkiller;
911                         ke->next = NULL;
912
913                         DBG((rss->dbg, LEVEL_3, "\t\t\ttarget %+F\n", pkiller));
914
915                         pset_insert(epk, ke, HASH_RSS_EDGE(ke));
916                 }
917         }
918 }
919
920 #ifdef DEBUG_libfirm
921 /* print the given cbc for debugging purpose */
922 static void debug_print_cbc(firm_dbg_module_t *mod, cbc_t *cbc)
923 {
924         ir_nodeset_iterator_t iter;
925         ir_node    *n;
926         rss_edge_t *ke;
927
928         DBG((mod, LEVEL_3, "\t\tS = set of parents:\n"));
929         foreach_ir_nodeset(&cbc->parents, n, iter) {
930                 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
931         }
932         DBG((mod, LEVEL_3, "\t\tT = set of children:\n"));
933         foreach_ir_nodeset(&cbc->children, n, iter) {
934                 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
935         }
936         DBG((mod, LEVEL_3, "\t\tE = Edges from producers to consumers\n"));
937         foreach_pset(cbc->kill_edges, rss_edge_t*, ke) {
938                 DBG((mod, LEVEL_3, "\t\t\t%+F -> %+F\n", ke->src, ke->tgt));
939         }
940 }
941 #endif
942
943 /**
944  * Construct the bipartite decomposition.
945  * Sid-Ahmed-Ali Touati, Phd Thesis
946  * Register Pressure in Instruction Level Parallelism, p. 71
947  */
948 static void compute_bipartite_decomposition(rss_t *rss)
949 {
950         pset *epk    = new_pset(cmp_rss_edges, 10);
951         int  cur_num = 0;
952
953         plist_element_t *el;
954
955         DBG((rss->dbg, LEVEL_1, "\tcomputing bipartite decomposition:\n"));
956
957         build_kill_edges(rss, epk);
958
959         foreach_plist(rss->nodes, el) {
960                 ir_node   *u_irn   = (ir_node*)plist_element_get_value(el);
961                 rss_irn_t *u       = get_rss_irn(rss, u_irn);
962                 int       p_change = 1;
963                 int       c_change = 1;
964                 int       vrfy_ok  = 1;
965
966                 cbc_t           *cbc;
967                 plist_element_t *el2;
968                 rss_edge_t      *k_edge;
969                 rss_edge_t      *kedge_root = NULL;
970                 ir_node         *t_irn, *s_irn;
971                 ir_nodeset_iterator_t iter;
972
973                 if (u->visited || u_irn == _sink)
974                         continue;
975
976                 DBG((rss->dbg, LEVEL_2, "\t\t%+F choosen:\n", u_irn));
977
978                 cbc     = OALLOC(phase_obst(&rss->ph), cbc_t);
979                 cbc->nr = cur_num++;
980
981                 /* initialize S_cb */
982                 ir_nodeset_init_size(&cbc->parents, 5);
983                 ir_nodeset_insert(&cbc->parents, u_irn);
984                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (init)\n", u_irn));
985
986                 /* E_cb = empty */
987                 cbc->kill_edges = new_pset(cmp_rss_edges, 5);
988
989                 /* each parent gets killed by at least one value from children */
990
991                 /* T_cb = PK_successors(u) */
992                 ir_nodeset_init_size(&cbc->children, 5);
993                 foreach_plist(u->pkiller_list, el2) {
994                         ir_nodeset_insert(&cbc->children, (ir_node*)plist_element_get_value(el2));
995                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (init)\n", plist_element_get_value(el2)));
996                 }
997
998                 /*
999                         Now: insert the parents of all children into the parent set
1000                         and insert the children of all parents into the children set
1001                         until the sets don't change any more
1002                 */
1003                 while (p_change || c_change) {
1004                         ir_nodeset_iterator_t iter;
1005                         p_change = c_change = 0;
1006
1007                         /* accumulate parents */
1008                         foreach_ir_nodeset(&cbc->children, t_irn, iter) {
1009                                 foreach_pset(epk, rss_edge_t*, k_edge) {
1010                                         ir_node *val = k_edge->src;
1011
1012                                         if (k_edge->tgt == t_irn && ! ir_nodeset_contains(&cbc->parents, val)) {
1013                                                 ir_nodeset_insert(&cbc->parents, val);
1014                                                 p_change = 1;
1015                                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (killed by %+F)\n", val, t_irn));
1016                                         }
1017                                 }
1018                         }
1019
1020                         /* accumulate children */
1021                         foreach_ir_nodeset(&cbc->parents, s_irn, iter) {
1022                                 foreach_pset(epk, rss_edge_t*, k_edge) {
1023                                         ir_node *val = k_edge->tgt;
1024
1025                                         if (k_edge->src == s_irn  && ! ir_nodeset_contains(&cbc->children, val)) {
1026                                                 ir_nodeset_insert(&cbc->children, val);
1027                                                 c_change = 1;
1028                                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (kills %+F)\n", val, s_irn));
1029                                         }
1030                                 }
1031                         }
1032                 }
1033
1034                 /* mark all parent values as visited */
1035                 foreach_ir_nodeset(&cbc->parents, s_irn, iter) {
1036                         rss_irn_t *s = get_rss_irn(rss, s_irn);
1037                         s->visited = 1;
1038                         /* assure bipartite property */
1039 #if 0
1040                         if (ir_nodeset_contains(&cbc->children, s_irn)) {
1041                                 ir_nodeset_remove(&cbc->children, s_irn);
1042                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F removed from to children\n", s_irn));
1043                         }
1044 #endif
1045                 }
1046
1047                 /* update edges */
1048                 foreach_pset(epk, rss_edge_t*, k_edge) {
1049                         if (ir_nodeset_contains(&cbc->parents, k_edge->src) &&
1050                             ir_nodeset_contains(&cbc->children, k_edge->tgt)) {
1051                                 pset_insert(cbc->kill_edges, k_edge, HASH_RSS_EDGE(k_edge));
1052                                 /*
1053                                         Link all k_edges which are about to be removed together.
1054                                         Beware: pset_remove kills the iterator.
1055                                 */
1056                                 k_edge->next = kedge_root;
1057                                 kedge_root   = k_edge;
1058                         }
1059                 }
1060
1061                 /* remove all linked k_edges */
1062                 for (; kedge_root; kedge_root = (rss_edge_t*)kedge_root->next) {
1063                         pset_remove(epk, kedge_root, HASH_RSS_EDGE(kedge_root));
1064                 }
1065
1066                 /* verify the cbc */
1067                 foreach_ir_nodeset(&cbc->parents, s_irn, iter) {
1068                         int is_killed = 0;
1069
1070                         foreach_pset(cbc->kill_edges, rss_edge_t*, k_edge) {
1071                                 if (k_edge->src == s_irn) {
1072                                         is_killed = 1;
1073                                         pset_break(cbc->kill_edges);
1074                                         break;
1075                                 }
1076                         }
1077
1078                         if (! is_killed) {
1079                                 vrfy_ok = 0;
1080                                 ir_fprintf(stderr, "Warning: parent %+F is not killed in current cbc\n", s_irn);
1081                         }
1082                 }
1083                 assert(vrfy_ok && "Verification of CBC failed");
1084
1085                 /* add the connected bipartite component */
1086                 if (ir_nodeset_size(&cbc->parents) > 0 &&
1087                     ir_nodeset_size(&cbc->children) > 0 &&
1088                         pset_count(cbc->kill_edges) > 0)
1089                         pset_insert(rss->cbc_set, cbc, (unsigned)cbc->nr);
1090                 DBG((rss->dbg, LEVEL_2, "\tbipartite component %d inserted:\n", cbc->nr));
1091                 DEBUG_ONLY(
1092                         if (firm_dbg_get_mask(rss->dbg) & LEVEL_2)
1093                                 debug_print_cbc(rss->dbg, cbc);
1094                 );
1095         }
1096
1097         if (rss->opts->dump_flags & RSS_DUMP_CBC)
1098                 debug_vcg_dump_bipartite(rss);
1099
1100         del_pset(epk);
1101 }
1102
1103 /**
1104  * Select the child with the maximum cost.
1105  */
1106 static child_t *select_child_max_cost(rss_t *rss, ir_nodeset_t *x, ir_nodeset_t *y, child_t *t, cbc_t *cbc)
1107 {
1108         ir_node *child;
1109         ir_nodeset_iterator_t iter;
1110         float   max_cost = -1.0f;
1111
1112         DBG((rss->dbg, LEVEL_2, "\t\tcomputing children costs:\n"));
1113
1114         foreach_ir_nodeset(&cbc->children, child, iter) {
1115                 rss_irn_t  *r_child             = get_rss_irn(rss, child);
1116                 int         num_unkilled_parents = 0;
1117                 int         num_descendants;
1118                 rss_edge_t *k_edge;
1119                 float       cost;
1120
1121                 /* get the number of unkilled parents */
1122                 foreach_pset(cbc->kill_edges, rss_edge_t*, k_edge) {
1123                         if (k_edge->tgt == child && ir_nodeset_contains(x, k_edge->src))
1124                                 ++num_unkilled_parents;
1125                 }
1126
1127                 cost = (float)num_unkilled_parents;
1128
1129                 num_descendants = plist_count(r_child->descendant_list) + ir_nodeset_size(y);
1130
1131                 if (num_descendants > 0)
1132                         cost /= (float)num_descendants;
1133
1134                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F, #desc %d, cost %.3f\n", child, num_descendants, cost));
1135
1136                 if (cost > max_cost) {
1137                         t->irn   = child;
1138                         t->cost  = cost;
1139                         max_cost = cost;
1140                 }
1141         }
1142
1143         return t;
1144 }
1145
1146 /**
1147  * Remove all parents from x which are killed by t_irn.
1148  */
1149 static void remove_covered_parents(rss_t *rss, ir_nodeset_t *x, ir_node *t_irn, cbc_t *cbc)
1150 {
1151         rss_irn_t  *t = get_rss_irn(rss, t_irn);
1152         rss_edge_t *k_edge;
1153
1154         DBG((rss->dbg, LEVEL_2, "\t\tremoving parents covered by %+F:\n", t_irn));
1155
1156         foreach_pset(cbc->kill_edges, rss_edge_t*, k_edge) {
1157                 if (k_edge->tgt == t_irn && ir_nodeset_contains(x, k_edge->src)) {
1158                         ir_nodeset_remove(x, k_edge->src);
1159                         plist_insert_back(t->parent_list, k_edge->src);
1160                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", k_edge->src));
1161                 }
1162         }
1163 }
1164
1165 static void update_cumulated_descendent_values(rss_t *rss, ir_nodeset_t *y, ir_node *t_irn)
1166 {
1167         rss_irn_t *t = get_rss_irn(rss, t_irn);
1168         plist_element_t *el;
1169
1170         DBG((rss->dbg, LEVEL_2, "\t\tupdating cumulated descendant value of %+F:\n", t_irn));
1171
1172         foreach_plist(t->descendant_list, el) {
1173                 ir_nodeset_insert(y, (ir_node*)plist_element_get_value(el));
1174                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", plist_element_get_value(el)));
1175         }
1176 }
1177
1178 /**
1179  * Greedy-k: a heuristics for the MMA problem
1180  */
1181 static void compute_killing_function(rss_t *rss)
1182 {
1183         cbc_t *cbc;
1184         struct obstack obst;
1185
1186         obstack_init(&obst);
1187
1188         rss->cbc_set = pset_new_ptr(5);
1189         compute_bipartite_decomposition(rss);
1190
1191         /* for all bipartite components do: */
1192         foreach_pset(rss->cbc_set, cbc_t*, cbc) {
1193                 ir_node *p;
1194                 ir_nodeset_t x;
1195                 ir_nodeset_t y;
1196                 ir_nodeset_iterator_t iter;
1197                 child_t **sks    = NEW_ARR_F(child_t *, 20);
1198                 size_t  cur_len  =  0;
1199                 size_t  cur_size = 20;
1200                 size_t  i;
1201
1202                 ir_nodeset_init_size(&x, 10);
1203                 ir_nodeset_init_size(&y, 10);
1204
1205                 DBG((rss->dbg, LEVEL_1, "\tcomputing SKS for cbc %d:\n", cbc->nr));
1206                 DBG((rss->dbg, LEVEL_2, "\t\tinitializing parents X:\n"));
1207
1208                 /* X = S_cb (all parents are initially uncovered) */
1209                 foreach_ir_nodeset(&cbc->parents, p, iter) {
1210                         ir_nodeset_insert(&x, p);
1211                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", p));
1212                 }
1213
1214                 /* while X not empty */
1215                 while (ir_nodeset_size(&x) > 0) {
1216                         child_t *t = OALLOCZ(&obst, child_t);
1217
1218                         t = select_child_max_cost(rss, &x, &y, t, cbc);
1219
1220                         if (cur_len >= cur_size) {
1221                                 cur_size *= 2;
1222                                 ARR_EXTO(child_t *, sks, cur_size);
1223                         }
1224
1225                         DBG((rss->dbg, LEVEL_2, "\t\tinsert child %+F (%.3f) into SKS at pos %d\n", t->irn, t->cost, cur_len));
1226
1227                         sks[cur_len++] = t;
1228                         remove_covered_parents(rss, &x, t->irn, cbc);
1229                         update_cumulated_descendent_values(rss, &y, t->irn);
1230                 }
1231
1232                 ARR_SHRINKLEN(sks, cur_len);
1233
1234                 /* sort SKS in increasing cost order */
1235                 qsort(sks, cur_len, sizeof(sks[0]), cmp_child_costs);
1236
1237                 DBG((rss->dbg, LEVEL_2, "\tprocessing SKS for cbc %d:\n", cbc->nr));
1238
1239                 /* build killing function */
1240                 for (i = cur_len; i != 0;) { /* loop over sks in decreasing cost order */
1241                         child_t         *t = sks[--i];
1242                         rss_irn_t       *rt = get_rss_irn(rss, t->irn);
1243                         plist_element_t *p_el;
1244
1245                         DBG((rss->dbg, LEVEL_3, "\t\t\tkiller %+F (%.3f):\n", t->irn, t->cost));
1246
1247                         /* kill all unkilled parents of t */
1248                         foreach_plist(rt->parent_list, p_el) {
1249                                 ir_node    *par = (ir_node*)plist_element_get_value(p_el);
1250                                 rss_irn_t *rpar = get_rss_irn(rss, par);
1251
1252                                 if (is_Sink(rpar->killer)) {
1253                                         rpar->killer = t->irn;
1254                                         DBG((rss->dbg, LEVEL_2, "\t\tkill %+F\n", rpar->irn));
1255                                 }
1256                                 else {
1257                                         DBG((rss->dbg, LEVEL_3, "\t\t\tkeeping %+F as killer for %+F\n", rpar->killer, rpar->irn));
1258                                 }
1259                         }
1260                 }
1261
1262                 ir_nodeset_destroy(&x);
1263                 ir_nodeset_destroy(&y);
1264                 DEL_ARR_F(sks);
1265         }
1266
1267         if (rss->opts->dump_flags & RSS_DUMP_KILL)
1268                 debug_vcg_dump_kill(rss);
1269
1270         del_pset(rss->cbc_set);
1271         obstack_free(&obst, NULL);
1272 }
1273
1274 /**
1275  * Adds the edge src -> tgt to the dvg. Checks if reverse edge is already there (asserts).
1276  */
1277 static inline void add_dvg_edge(rss_t *rss, dvg_t *dvg, const ir_node *src, const ir_node *tgt, int have_source)
1278 {
1279         rss_edge_t *dvg_edge;
1280         rss_edge_t key;
1281
1282         if (! have_source)
1283                 ir_nodeset_insert(&dvg->nodes, (ir_node *) src);
1284         else
1285                 assert(ir_nodeset_contains(&dvg->nodes, src) && "Wrong assumption");
1286
1287         ir_nodeset_insert(&dvg->nodes, (ir_node *) tgt);
1288
1289         key.src = (ir_node *) tgt;
1290         key.tgt = (ir_node *) src;
1291         assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1292
1293         key.src = (ir_node *) src;
1294         key.tgt = (ir_node *) tgt;
1295         if (NULL != pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1296                 /* add the edge to the DVG */
1297                 dvg_edge = OALLOC(phase_obst(&rss->ph), rss_edge_t);
1298
1299                 dvg_edge->src  = (ir_node *) src;
1300                 dvg_edge->tgt  = (ir_node *) tgt;
1301                 dvg_edge->next = NULL;
1302
1303                 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", src, tgt));
1304                 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1305         }
1306 }
1307
1308 /**
1309  * Computes the disjoint value DAG (DVG).
1310  * BEWARE: It is not made explicitly clear in the Touati paper,
1311  *         but the DVG is meant to be build from the KILLING DAG
1312  */
1313 static void compute_dvg(rss_t *rss, dvg_t *dvg)
1314 {
1315         plist_element_t *el;
1316
1317         DBG((rss->dbg, LEVEL_1, "\tcomputing DVG:\n"));
1318
1319         foreach_plist(rss->nodes, el) {
1320                 ir_node    *u_irn    = (ir_node*)plist_element_get_value(el);
1321                 rss_irn_t  *u        = get_rss_irn(rss, u_irn);
1322                 rss_irn_t  *u_killer = get_rss_irn(rss, u->killer);
1323                 int        i;
1324
1325                 /* TODO: omit nodes only having sink as pkiller and killing no one */
1326
1327                 /* add an edge to killer */
1328                 add_dvg_edge(rss, dvg, u_irn, u->killer, 0);
1329
1330                 if (is_Sink(u->killer))
1331                         continue;
1332
1333                 /* We add an edge to every killer, from where we could be reached. */
1334                 for (i = ARR_LEN_SAFE(u_killer->descendants) - 1; i >= 0; --i) {
1335                         add_dvg_edge(rss, dvg, u_irn, u_killer->descendants[i], 1);
1336                 }
1337
1338 #if 0
1339
1340                 foreach_plist(rss->nodes, el2) {
1341                         ir_node *v_irn = plist_element_get_value(el2);
1342
1343                         /*
1344                                 There is an edge (u, v) in the DVG iff v is a descendant of the killer(u).
1345                         */
1346                         if (BSEARCH_IRN_ARR(v_irn, u_kill->descendants)) {
1347                                 rss_edge_t *dvg_edge = OALLOC(phase_obst(&rss->ph), rss_edge_t);
1348                                 rss_edge_t key;
1349
1350                                 /* insert the user into the DVG and append it to the user list of u */
1351                                 ir_nodeset_insert(&dvg->nodes, v_irn);
1352                                 if (! plist_has_value(u->dvg_user_list, v_irn))
1353                                         plist_insert_back(u->dvg_user_list, v_irn);
1354
1355                                 dvg_edge->src  = u_irn;
1356                                 dvg_edge->tgt  = v_irn;
1357                                 dvg_edge->next = NULL;
1358
1359                                 key.src = v_irn;
1360                                 key.tgt = u_irn;
1361
1362                                 assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1363
1364                                 /* add the edge to the DVG */
1365                                 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", u_irn, v_irn));
1366                                 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1367                         }
1368                 }
1369 #endif /* if 0 */
1370         }
1371
1372         if (rss->opts->dump_flags & RSS_DUMP_DVG)
1373                 debug_vcg_dump_dvg(rss, dvg);
1374 }
1375
1376 /**
1377  * Updates the dvg structure when a serialization edge from src -> tgt is added.
1378  */
1379 static void update_dvg(rss_t *rss, dvg_t *dvg, rss_irn_t *src, rss_irn_t *tgt)
1380 {
1381         int i, j, idx;
1382         rss_edge_t *edge;
1383         rss_edge_t **arr = ALLOCAN(rss_edge_t*, pset_count(dvg->edges));
1384
1385         /*
1386                 Add an edge from serialization target to serialization src:
1387                 src cannot be alive with target
1388         */
1389         add_dvg_edge(rss, dvg, tgt->irn, src->irn, 0);
1390
1391         /* Add edges to src's descendants as well, they also getting serialized. */
1392         for (i = ARR_LEN_SAFE(src->descendants) - 1; i >= 0; --i) {
1393                 add_dvg_edge(rss, dvg, tgt->irn, src->descendants[i], 1);
1394         }
1395
1396         /* We also have to add edges from targets predecessors in dvg */
1397         idx = 0;
1398         /* We cannot insert elements into set over which we iterate ... */
1399         foreach_pset(dvg->edges, rss_edge_t*, edge) {
1400                 if (edge->tgt == tgt->irn) {
1401                         arr[idx++] = edge;
1402                 }
1403         }
1404
1405         for (i = 0; i < idx; ++i) {
1406                 edge = arr[i];
1407                 add_dvg_edge(rss, dvg, edge->src, src->irn, 1);
1408                 for (j = ARR_LEN_SAFE(src->descendants) - 1; j >= 0; --j) {
1409                         add_dvg_edge(rss, dvg, edge->src, src->descendants[j], 1);
1410                 }
1411         }
1412 }
1413
1414 #if 0
1415 /**
1416  * Accumulate all descendants for root into list.
1417  */
1418 static void accumulate_dvg_descendant_values(rss_t *rss, rss_irn_t *root, plist_t *list)
1419 {
1420         if (plist_count(root->dvg_user_list) > 0) {
1421                 plist_element_t *el;
1422
1423                 foreach_plist(root->dvg_user_list, el) {
1424                         ir_node   *v_irn = plist_element_get_value(el);
1425                         rss_irn_t *v     = get_rss_irn(rss, v_irn);
1426
1427                         /* add v as descendant */
1428                         if (! plist_has_value(list, v_irn)) {
1429                                 plist_insert_back(list, v_irn);
1430                                 DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG descendant %+F\n", v_irn));
1431                         }
1432
1433                         /* accumulate v's descendants */
1434                         accumulate_dvg_descendant_values(rss, v, list);
1435                 }
1436         }
1437 }
1438
1439 /**
1440  * Builds the list of potential killers for each node
1441  * in the given DVG.
1442  * Needs the descendant list for all user as sorted array.
1443  */
1444 static void build_dvg_pkiller_list(rss_t *rss, dvg_t *dvg)
1445 {
1446         ir_nodeset_iterator_t iter;
1447         ir_node *irn;
1448
1449         foreach_nodeset(&dvg->nodes, irn, iter) {
1450                 rss_irn_t       *node = get_rss_irn(rss, irn);
1451                 plist_element_t *el, *el2;
1452
1453                 DBG((rss->dbg, DEBUG_DVG, "\t\tbuilding pkiller list for %+F\n", irn));
1454
1455                 /* check each user */
1456                 foreach_plist(node->dvg_user_list, el) {
1457                         ir_node *u_irn = plist_element_get_value(el);
1458
1459                         /* is the current user u_irn not a descendant of any other user -> pkiller */
1460                         foreach_plist(node->dvg_user_list, el2) {
1461                                 ir_node   *v_irn = plist_element_get_value(el2);
1462                                 rss_irn_t *v     = get_rss_irn(rss, v_irn);
1463
1464                                 if (el != el2                             &&
1465                                         ! BSEARCH_IRN_ARR(u_irn, v->dvg_desc) &&
1466                                         ! plist_has_value(node->dvg_pkiller_list, u_irn))
1467                                 {
1468                                         plist_insert_back(node->dvg_pkiller_list, u_irn);
1469                                         DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG pkiller %+F\n", u_irn));
1470                                 }
1471                         }
1472                 }
1473
1474                 node->dvg_pkiller = build_sorted_array_from_list(node->dvg_pkiller_list, phase_obst(&rss->ph));
1475         }
1476
1477         DEBUG_ONLY(
1478                 if (firm_dbg_get_mask(rss->dbg) & DEBUG_DVG)
1479                         debug_vcg_dump_dvg_pkiller(rss, dvg);
1480         );
1481 }
1482
1483 #endif /* if 0 */
1484
1485 /**
1486  * Compute the maximal antichain of the current DVG.
1487  * This is a reimplementation of the MAXIMAL_ANTI_CHAIN function
1488  * from the DDG library 1.1 (DAG.cpp).
1489  */
1490 static ir_nodeset_t *compute_maximal_antichain(rss_t *rss, dvg_t *dvg, int iteration)
1491 {
1492         unsigned    n               = ir_nodeset_size(&dvg->nodes);
1493         unsigned    *assignment     = ALLOCAN(unsigned, n);
1494         unsigned    *assignment_rev = ALLOCAN(unsigned, n);
1495         int         *idx_map        = ALLOCAN(int, n);
1496         hungarian_problem_t *bp;
1497         ir_nodeset_t *values, *temp;
1498         ir_nodeset_iterator_t iter;
1499         ir_node     *u_irn;
1500         unsigned    i, j;
1501         unsigned    cost;
1502         int         cur_chain, res;
1503         rss_edge_t  *dvg_edge;
1504
1505 #define MAP_IDX(irn) bsearch_for_index(get_irn_idx(irn), idx_map,  n,  1)
1506
1507         if (pset_count(dvg->edges) == 0)
1508                 return NULL;
1509
1510         bp = hungarian_new(n, n, HUNGARIAN_MATCH_NORMAL);
1511
1512         /*
1513                 At first, we build an index map for the nodes in the DVG,
1514                 because we cannot use the irn idx therefore as the resulting
1515                 bipartite data structure would have around 1.2GB.
1516                 So we limit the size to the number of nodes we have in the DVG
1517                 and build a sorted index map for their irn indices.
1518         */
1519         i = 0;
1520         foreach_ir_nodeset(&dvg->nodes, u_irn, iter) {
1521                 idx_map[i++] = get_irn_idx(u_irn);
1522         }
1523         qsort(idx_map, n, sizeof(idx_map[0]), cmp_int);
1524
1525         foreach_pset(dvg->edges, rss_edge_t*, dvg_edge) {
1526                 int idx_u = MAP_IDX(dvg_edge->src);
1527                 int idx_v = MAP_IDX(dvg_edge->tgt);
1528
1529                 /* add the entry to the bipartite data structure */
1530                 hungarian_add(bp, idx_u, idx_v, 1);
1531                 DBG((rss->dbg, LEVEL_3, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1532                         idx_u, dvg_edge->src, idx_v, dvg_edge->tgt));
1533         }
1534 #if 0
1535         /*
1536                 Add a bipartite entry for each pair of nodes (u, v), where exists a
1537                 path in the DVG from u to v, ie. connect all descendants(v) to v.
1538                 desc_dvg(v) = accumulated descendants of all z in dvg_user_list(v)
1539         */
1540         foreach_ir_nodeset(&dvg->nodes, u_irn, iter) {
1541                 rss_irn_t *u        = get_rss_irn(rss, u_irn);
1542                 int       idx_u_irn = MAP_IDX(u_irn);
1543
1544                 DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG descendants of %+F:\n", u_irn));
1545
1546                 //plist_clear(u->dvg_desc_list);
1547                 //accumulate_dvg_descendant_values(rss, u, u->dvg_desc_list);
1548
1549                 /*
1550                         FIXME: The array is build on the phase obstack and we cannot free the data.
1551                         So the array get as many times allocated as this function is called.
1552                 */
1553
1554                 /* build the sorted array for faster searches */
1555                 //u->dvg_desc = build_sorted_array_from_list(u->dvg_desc_list, phase_obst(&rss->ph));
1556
1557                 DBG((rss->dbg, DEBUG_MAX_AC, "\t\tadding bipartite entries of %+F:\n", u_irn));
1558
1559                 /* add the bipartite entries for all descendants */
1560                 for (i = ARR_LEN_SAFE(u->dvg_desc) - 1; i >= 0; --i) {
1561                         rss_irn_t *desc    = get_rss_irn(rss, u->dvg_desc[i]);
1562                         int       idx_desc = MAP_IDX(u->dvg_desc[i]);
1563
1564                         /* add the entry to the bipartite data structure */
1565                         hungarian_add(bp, idx_u_irn, idx_desc, 1);
1566                         DBG((rss->dbg, DEBUG_MAX_AC, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1567                                 idx_u_irn, u_irn, idx_desc, u->dvg_desc[i]));
1568
1569                         need_matching = 1;
1570                 }
1571         }
1572 #endif
1573
1574         /* We want maximum cardinality matching */
1575         hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL);
1576
1577 #if 0
1578         DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG pkiller:\n"));
1579         /* beware: the following function needs the dvg_desc array */
1580         build_dvg_pkiller_list(rss, dvg);
1581 #endif
1582
1583         DBG((rss->dbg, LEVEL_2, "\t\tcomputing bipartite matching\n"));
1584         /*
1585                 The maximum cardinality bipartite matching gives us the minimal
1586                 chain partition, which corresponds to the maximum anti chains.
1587         */
1588         res = hungarian_solve(bp, assignment, &cost, 1);
1589         assert(res == 0 && "Bipartite matching failed!");
1590
1591         hungarian_free(bp);
1592         memset(assignment_rev, -1, n * sizeof(assignment_rev[0]));
1593
1594         /* build the reverse assignment, ie. foreach i -> j, add j -> i */
1595         for (i = 0; i < n; ++i) {
1596                 if (assignment[i] != (unsigned)-1) {
1597                         unsigned j = assignment[i];
1598                         assignment_rev[j] = i;
1599                 }
1600         }
1601
1602         DBG((rss->dbg, LEVEL_2, "\t\tgot assignment with cost %u\n", cost));
1603         DBG((rss->dbg, LEVEL_3, "\t\t\tassignment   ---   reverse assignment\n"));
1604         for (i = 0; i < n; ++i) {
1605                 DBG((rss->dbg, LEVEL_3, "\t\t\t%3u -> %3u         %3u -> %3u\n", i, assignment[i], i, assignment_rev[i]));
1606         }
1607
1608         values    = XMALLOC(ir_nodeset_t);
1609         ir_nodeset_init_size(values, 10);
1610         cur_chain = 0;
1611         /* Construction of the minimal chain partition */
1612         for (j = 0; j < n; ++j) {
1613                 /* check nodes, which did not occur as target */
1614                 if (assignment_rev[j] == (unsigned)-1) {
1615                         int       xj      = idx_map[j];
1616                         ir_node   *xj_irn = get_idx_irn(rss->irg, xj);
1617                         rss_irn_t *xj_rss = get_rss_irn(rss, xj_irn);
1618                         chain_t   *c      = OALLOC(phase_obst(&rss->ph), chain_t);
1619                         unsigned  source;
1620
1621                         /* there was no source for j -> we have a source of a new chain */
1622                         ir_nodeset_insert(values, xj_irn);
1623
1624                         c->elements = plist_obstack_new(phase_obst(&rss->ph));
1625                         c->nr       = cur_chain++;
1626                         plist_insert_back(c->elements, xj_irn);
1627
1628                         xj_rss->chain = c;
1629
1630                         DBG((rss->dbg, LEVEL_2, "\t\tstarting chain %d:\n", c->nr));
1631                         DBG((rss->dbg, LEVEL_2, "\t\t\t%+F (%u)", xj_irn, j));
1632
1633                         /* follow chain, having j as source */
1634                         source = j;
1635                         while (assignment[source] != (unsigned)-1) {
1636                                 int       target  = assignment[source];
1637                                 int       irn_idx = idx_map[target];
1638                                 ir_node   *irn    = get_idx_irn(rss->irg, irn_idx);
1639                                 rss_irn_t *node   = get_rss_irn(rss, irn);
1640
1641                                 plist_insert_back(c->elements, irn);
1642                                 node->chain = c;
1643
1644                                 DB((rss->dbg, LEVEL_2, " -> %+F (%d)", irn, target));
1645
1646                                 /* new source = last target */
1647                                 source = target;
1648                         }
1649
1650                         DB((rss->dbg, LEVEL_2, "\n"));
1651                 }
1652         }
1653
1654         /*
1655                 Computing the maximal antichain: Select an element from each
1656                 chain such, such it is parallel with the others.
1657         */
1658         DBG((rss->dbg, LEVEL_1, "\t\tcomputing set of saturation values (MAX AC)\n"));
1659         DBG((rss->dbg, LEVEL_3, "\t\tstarting with:\n"));
1660
1661         DEBUG_ONLY(
1662                 if (firm_dbg_get_mask(rss->dbg) & LEVEL_3)
1663                         dump_nodeset(values, "\t\t\t");
1664         )
1665
1666         temp = NULL;
1667         do {
1668                 /*
1669                         We need an explicit array for the values as
1670                         we cannot iterate multiple times over the same
1671                         set at the same time. :-(((((
1672                         TODO Matze: now we can, so rewrite this...
1673                 */
1674                 unsigned n         = ir_nodeset_size(values);
1675                 unsigned i         = 0;
1676                 ir_node **val_arr = NEW_ARR_F(ir_node *, n);
1677
1678                 foreach_ir_nodeset(values, u_irn, iter)
1679                         val_arr[i++] = u_irn;
1680
1681                 if (temp) {
1682                         ir_nodeset_destroy(temp);
1683                         free(temp);
1684                 }
1685
1686                 temp = XMALLOC(ir_nodeset_t);
1687                 ir_nodeset_init_size(temp, 10);
1688
1689                 /* Select all nodes from current value set, having another node in the set as descendant. */
1690                 for (i = 0; i < n; ++i) {
1691                         rss_irn_t *u = get_rss_irn(rss, val_arr[i]);
1692
1693                         for (j = 0; j < n; ++j) {
1694                                 if (i != j) {
1695                                         rss_edge_t key;
1696
1697                                         key.src = val_arr[i];
1698                                         key.tgt = val_arr[j];
1699
1700                                         if (pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1701                                                 /* v[j] is descendant of u -> remove u and break */
1702                                                 ir_nodeset_insert(temp, (ir_node *) u->irn);
1703                                                 ir_nodeset_remove(values, u->irn);
1704
1705                                                 DBG((rss->dbg, LEVEL_3, "\t\t\tremoving %+F from values, adding it to temp\n", u->irn));
1706
1707                                                 break;
1708                                         }
1709                                 }
1710                         }
1711                 }
1712
1713                 /* Try to insert the chain predecessor of all selected u's */
1714                 foreach_ir_nodeset(temp, u_irn, iter) {
1715                         rss_irn_t       *u  = get_rss_irn(rss, u_irn);
1716                         chain_t         *c  = u->chain;
1717                         plist_element_t *el = plist_find_value(c->elements, u_irn);
1718
1719                         assert(el && "Missing element in chain!");
1720
1721                         /* If u has predecessor in chain: insert the predecessor */
1722                         if (el == plist_element_get_prev(el)) {
1723                                 ir_nodeset_insert(values, (ir_node*)plist_element_get_value(el));
1724                                 DBG((rss->dbg, LEVEL_3, "\t\t\tadding %+F to values\n", plist_element_get_value(el)));
1725                         }
1726                 }
1727
1728
1729                 DEL_ARR_F(val_arr);
1730         } while (ir_nodeset_size(temp) > 0);
1731
1732         DBG((rss->dbg, LEVEL_2, "\t\tfinal set:\n"));
1733         DEBUG_ONLY(
1734                 if (firm_dbg_get_mask(rss->dbg) & LEVEL_2) {
1735                         dump_nodeset(values, "\t\t\t");
1736                 }
1737         );
1738
1739         if (rss->opts->dump_flags & RSS_DUMP_MAXAC)
1740                 debug_vcg_dump_pkg(rss, values, iteration);
1741
1742         if (temp != NULL) {
1743                 ir_nodeset_destroy(temp);
1744                 free(temp);
1745         }
1746
1747         return values;
1748
1749 #undef MAP_IDX
1750 }
1751
1752 /**
1753  * Computes the best serialization between two nodes of sat_vals.
1754  */
1755 static serialization_t *compute_best_admissible_serialization(rss_t *rss, ir_nodeset_t *sat_vals, serialization_t *ser, int num_regs)
1756 {
1757         int        n                    = ir_nodeset_size(sat_vals);
1758         int        n_idx                = ARR_LEN_SAFE(rss->idx_map);
1759         int        i                    = 0;
1760         ir_node    **val_arr            = ALLOCAN(ir_node*, n);
1761         bitset_t   *bs_sv               = bitset_alloca(n_idx);
1762         bitset_t   *bs_vdesc            = bitset_alloca(n_idx);
1763         bitset_t   *bs_tmp              = bitset_alloca(n_idx);
1764         bitset_t   *bs_ukilldesc        = bitset_alloca(n_idx);
1765         int        best_benefit         = INT_MAX;
1766         int        best_omega2          = INT_MAX;
1767         int        best_benefit_omega20 = INT_MAX;
1768         int        has_omega1           = 0;
1769         int        j, k;
1770         ir_node    *irn;
1771         ir_nodeset_iterator_t iter;
1772         rss_edge_t min_benefit_edge = {NULL, NULL, NULL};
1773         rss_edge_t min_omega20_edge = {NULL, NULL, NULL};
1774         rss_irn_t  *ser_u_omega1 = NULL, *ser_v_omega1 = NULL;
1775         rss_irn_t  *ser_u_omega20 = NULL, *ser_v_omega20 = NULL;
1776
1777         DBG((rss->dbg, LEVEL_1, "\tcomputing admissible serializations:\n"));
1778
1779         /*
1780                 We need an explicit array for the values as
1781                 we cannot iterate multiple times over the same
1782                 set at the same time. :-(((((
1783         */
1784
1785         foreach_ir_nodeset(sat_vals, irn, iter) {
1786                 val_arr[i++] = irn;
1787                 bitset_set(bs_sv, BLOCK_IDX_MAP(rss, irn));
1788         }
1789
1790         /*
1791                 We build all admissible serializations and remember the best found so far.
1792                 For u in sat_vals:
1793                  For v in sat_val:
1794                    if v in pkiller(u): add edge from v to all other pkiller(u)
1795                    else: for all vv in pkiller(u): add edge from v to vv if there exists no path from vv to v
1796         */
1797
1798 /*
1799         A node is unserializable if:
1800         - it has only one killer and this one is Sink
1801     - it kills no other values
1802         In this case there is no serialization which could
1803         reduce the registerpressure
1804 */
1805 #define IS_UNSERIALIZABLE_NODE(rss_node)                  \
1806         (                                                     \
1807                 (                                                 \
1808                         (plist_count(rss_node->pkiller_list) == 1) && \
1809                         is_Sink(rss_node->killer)                  && \
1810                         (rss_node->kill_count                == 0)    \
1811                 )                            ||                   \
1812                 be_is_Barrier(rss_node->irn) ||                   \
1813                 be_is_Keep(rss_node->irn)                         \
1814         )
1815
1816         /* for all u in sat_vals */
1817         for (i = 0; i < n; ++i) {
1818                 rss_irn_t       *u = get_rss_irn(rss, val_arr[i]);
1819                 plist_element_t *el;
1820
1821                 /* ignore nodes where serialization does not help */
1822                 if (IS_UNSERIALIZABLE_NODE(u)) {
1823                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", u->irn));
1824                         continue;
1825                 }
1826
1827                 /* accumulate all descendants of all pkiller(u) */
1828                 bitset_clear_all(bs_ukilldesc);
1829                 foreach_plist(u->pkiller_list, el) {
1830                         ir_node   *irn  = (ir_node*)plist_element_get_value(el);
1831                         rss_irn_t *node = get_rss_irn(rss, irn);
1832
1833                         if (! is_Sink(irn))
1834                                 bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, irn));
1835                         else
1836                                 continue;
1837
1838                         for (k = ARR_LEN_SAFE(node->descendants) - 1; k >= 0; --k) {
1839                                 if (! is_Sink(node->descendants[k]))
1840                                         bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, node->descendants[k]));
1841                         }
1842                 }
1843
1844                 /* for all v in sat_vals */
1845                 for (j = 0; j < n; ++j) {
1846                         ir_node   *v_irn   = val_arr[j];
1847                         rss_irn_t *v       = get_rss_irn(rss, v_irn);
1848                         unsigned  v_height = get_irn_height(rss->h, v_irn);
1849                         int       omega1, omega2, is_pkiller;
1850
1851                         /* v cannot be serialized with itself
1852                          * ignore nodes where serialization does not help */
1853                         if (i == j || IS_UNSERIALIZABLE_NODE(v)) {
1854 #ifdef DEBUG_libfirm
1855                                 if (i != j)
1856                                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", v->irn));
1857 #endif
1858                                 continue;
1859                         }
1860
1861                         /* get descendants of v */
1862                         bitset_clear_all(bs_vdesc);
1863                         bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v_irn));
1864                         for (k = ARR_LEN_SAFE(v->descendants) - 1; k >= 0; --k) {
1865                                 if (! is_Sink(v->descendants[k]))
1866                                         bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v->descendants[k]));
1867                         }
1868
1869                         /* if v is in pkiller(u) */
1870                         is_pkiller = plist_has_value(u->pkiller_list, v_irn);
1871
1872                         /* for all vv in pkiller(u) */
1873                         foreach_plist(u->pkiller_list, el) {
1874                                 ir_node *vv_irn  = (ir_node*)plist_element_get_value(el);
1875                                 int     add_edge;
1876
1877                                 if (is_Sink(vv_irn) || is_cfop(vv_irn))
1878                                         continue;
1879
1880                                 if (is_pkiller)
1881                                         add_edge = vv_irn != v_irn && skip_Proj(vv_irn) != skip_Proj(v_irn);
1882                                 else
1883                                         add_edge = ! heights_reachable_in_block(rss->h, skip_Proj(vv_irn), skip_Proj(v_irn));
1884
1885                                 /*
1886                                         As we add an edge from vv -> v, we have to make sure,
1887                                         that there exists no path from v to vv.
1888                                 */
1889
1890                                 if (add_edge) {
1891                                         unsigned vv_height = get_irn_height(rss->h, vv_irn);
1892                                         unsigned critical_path_cost;
1893                                         unsigned mu1, mu2;
1894
1895                                         /*
1896                                                 mu1 = | descendants(v) cut sat_vals |
1897                                                 the number of saturating values which cannot
1898                                                 be simultaneously alive with u
1899                                         */
1900                                         bitset_copy(bs_tmp, bs_vdesc);
1901                                         bitset_and(bs_tmp, bs_sv);
1902                                         mu1 = bitset_popcount(bs_tmp);
1903
1904                                         /*
1905                                                 mu2 = | accum_desc_all_pkiller(u) without descendants(v) |
1906                                         */
1907                                         if (is_pkiller) {
1908                                                 bitset_copy(bs_tmp, bs_ukilldesc);
1909                                                 bitset_andnot(bs_tmp, bs_vdesc);
1910                                                 mu2 = bitset_popcount(bs_tmp);
1911                                         }
1912                                         else {
1913                                                 mu2 = 0;
1914                                         }
1915
1916                                         /* omega1 = mu1 - mu2 */
1917                                         omega1 = mu1 - mu2;
1918
1919                                         if (omega1 != 0)
1920                                                 has_omega1 = 1;
1921
1922                                         /* omega2 = increase of critical path */
1923                                         critical_path_cost =
1924                                                 v_height                        /* longest path from v to sink */
1925                                                 + rss->max_height - vv_height   /* longest path from source to vv */
1926                                                 + 1;                            /* edge */
1927
1928                                         /*
1929                                                 If critical_path_cost > max_height -> the new edge
1930                                                 would increase the longest critical path by the difference.
1931                                         */
1932                                         omega2 = critical_path_cost > rss->max_height ? critical_path_cost - rss->max_height : 0;
1933
1934                                         /* this keeps track of the edge with the best benefit */
1935                                         if (omega1 >= num_regs - n && omega1 < best_benefit) {
1936                                                 min_benefit_edge.src = v_irn;
1937                                                 min_benefit_edge.tgt = vv_irn;
1938
1939                                                 ser_u_omega1 = u;
1940                                                 ser_v_omega1 = v;
1941
1942                                                 best_benefit    = omega1;
1943                                                 ser->new_killer = is_pkiller;
1944                                         }
1945
1946                                         /* this keeps track of the edge with the best omega1 costs where omega2 == 0 */
1947                                         if (omega2 == 0 && omega1 >= num_regs - n && omega1 < best_benefit_omega20) {
1948                                                 min_omega20_edge.src = v_irn;
1949                                                 min_omega20_edge.tgt = vv_irn;
1950
1951                                                 ser_u_omega20 = u;
1952                                                 ser_v_omega20 = v;
1953
1954                                                 best_benefit_omega20 = omega1;
1955                                                 ser->new_killer      = is_pkiller;
1956                                         }
1957
1958                                         best_omega2 = MIN(best_omega2, omega2);
1959
1960                                         DBG((rss->dbg, LEVEL_2, "\t\tfound %+F -> %+F (w1 %d, w2 %d)\n",
1961                                                 v_irn, vv_irn, omega1, omega2));
1962                                 } /* if add_edge */
1963                         } /* for all vv in pkiller(u) */
1964                 } /* for all v in sat_vals */
1965         } /* for all u in sat_vals */
1966
1967         if (! has_omega1)
1968                 return NULL;
1969
1970         if (best_omega2 == 0) {
1971                 ser->u         = ser_u_omega20;
1972                 ser->v         = ser_v_omega20;
1973                 ser->edge->src = min_omega20_edge.src;
1974                 ser->edge->tgt = min_omega20_edge.tgt;
1975                 ser->omega1    = best_benefit_omega20;
1976                 ser->omega2    = best_omega2;
1977         }
1978         else {
1979                 ser->u         = ser_u_omega1;
1980                 ser->v         = ser_v_omega1;
1981                 ser->edge->src = min_benefit_edge.src;
1982                 ser->edge->tgt = min_benefit_edge.tgt;
1983                 ser->omega1    = best_benefit;
1984                 ser->omega2    = best_omega2;
1985         }
1986
1987         return ser;
1988
1989 #undef IS_UNSERIALIZABLE_NODE
1990 }
1991
1992 /**
1993  * Perform the value serialization heuristic and add all
1994  * computed serialization edges as dependencies to the irg.
1995  */
1996 static void perform_value_serialization_heuristic(rss_t *rss)
1997 {
1998         unsigned available_regs, iteration;
1999         dvg_t    dvg;
2000         ir_nodeset_t *sat_vals;
2001         pset *ser_set = new_pset(cmp_rss_edges, 20);
2002
2003         available_regs = be_get_n_allocatable_regs(rss->irg, rss->cls);
2004
2005         DBG((rss->dbg, LEVEL_1, "\n\t#available regs: %d\n\n", available_regs));
2006
2007         /*
2008                 At first we need to compute the disjoint value DAG (DVG = {V, E_dv}).
2009                 V    = set of all nodes we are currently interested in
2010                 E_dv = there is an edge from u to v iff v is a descendant of killer(u), forall u, v in V
2011         */
2012         ir_nodeset_init_size(&dvg.nodes, plist_count(rss->nodes));
2013         dvg.edges = new_pset(cmp_rss_edges, plist_count(rss->nodes) * 5);
2014         compute_dvg(rss, &dvg);
2015
2016         /*
2017                 Then we perform the heuristic serialization algorithm
2018                 on the DVG which gives us all necessary serialization
2019                 edges.
2020         */
2021         DBG((rss->dbg, LEVEL_1, "\tcomputing maximal antichain:\n"));
2022         iteration = 0;
2023         sat_vals  = compute_maximal_antichain(rss, &dvg, iteration++);
2024         while (sat_vals && (ir_nodeset_size(sat_vals) > available_regs)) {
2025                 serialization_t *ser, best_ser;
2026                 rss_edge_t      *edge = OALLOC(phase_obst(&rss->ph), rss_edge_t);
2027                 ir_node         *dep_src, *dep_tgt;
2028
2029                 best_ser.edge = edge;
2030                 ser = compute_best_admissible_serialization(rss, sat_vals, &best_ser, available_regs);
2031
2032                 DBG((rss->dbg, LEVEL_1, "\tcurrent register saturation %d, target %d\n", ir_nodeset_size(sat_vals), available_regs));
2033
2034                 if (! ser) {
2035                         DBG((rss->dbg, LEVEL_1, "\tno RS improving serialization found, breaking at iteration %d\n", iteration));
2036                         break;
2037                 }
2038
2039                 /* Insert the serialization as dependency edge into the irg. */
2040                 DBG((rss->dbg, LEVEL_1, "\tserializing %+F -> %+F with edge %+F -> %+F and cost %d, %d\n",
2041                         ser->u->irn, ser->v->irn, ser->edge->src, ser->edge->tgt, ser->omega1, ser->omega2));
2042
2043                 if (pset_find(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge)))
2044                         ir_printf("WARNING: serialization %+F -> %+F computed twice!\n", ser->edge->src, ser->edge->tgt);
2045
2046
2047                 pset_insert(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge));
2048
2049                 /* update the dvg */
2050                 update_dvg(rss, &dvg, ser->v, ser->u);
2051                 update_dvg(rss, &dvg, get_rss_irn(rss, ser->edge->src), get_rss_irn(rss, ser->edge->tgt));
2052                 if (sat_vals != NULL) {
2053                         ir_nodeset_destroy(sat_vals);
2054                         free(sat_vals);
2055                 }
2056
2057                 dep_src = skip_Proj(ser->edge->src);
2058                 dep_tgt = ser->edge->tgt;
2059                 add_irn_dep(dep_src, dep_tgt);
2060
2061                 /* Update descendants, consumer and pkillers of the target */
2062                 update_node_info(rss, ser->edge->tgt, ser->edge->src);
2063
2064                 /* TODO: try to find a cheaper way for updating height information */
2065                 rss->max_height = heights_recompute_block(rss->h, rss->block);
2066
2067                 /* Recompute the antichain for next serialization */
2068                 DBG((rss->dbg, LEVEL_1, "\tre-computing maximal antichain:\n"));
2069                 sat_vals = compute_maximal_antichain(rss, &dvg, iteration++);
2070         }
2071
2072         ir_nodeset_destroy(&dvg.nodes);
2073         del_pset(dvg.edges);
2074 }
2075
2076 /**
2077  * Do initial calculations for a block.
2078  */
2079 static void process_block(ir_node *block, void *env)
2080 {
2081         rss_t *rss = (rss_t*)env;
2082         int   i, n;
2083         const ir_edge_t *edge;
2084
2085         phase_init(&rss->ph, rss->irg, init_rss_irn);
2086
2087         DBG((rss->dbg, LEVEL_1, "preprocessing block %+F\n", block));
2088         rss->block = block;
2089
2090         /* build an index map for all nodes in the current block */
2091         i            = 0;
2092         n            = get_irn_n_edges(block);
2093         NEW_ARR_A(int, rss->idx_map, n);
2094         foreach_out_edge(block, edge) {
2095                 ir_node *irn      = get_edge_src_irn(edge);
2096                 rss->idx_map[i++] = get_irn_idx(irn);
2097         }
2098         qsort(rss->idx_map, n, sizeof(rss->idx_map[0]), cmp_int);
2099         rss->max_height = heights_recompute_block(rss->h, block);
2100
2101         /* loop over all register classes */
2102         for (i = rss->arch_env->n_register_classes - 1; i >= 0; --i) {
2103                 const arch_register_class_t *cls = &rss->arch_env->register_classes[i];
2104
2105                 rss->cls = cls;
2106                 DBG((rss->dbg, LEVEL_1, "register class %s\n", arch_register_class_name(cls)));
2107
2108                 /* Get all live value at end of Block having current register class */
2109                 ir_nodeset_init(&rss->live_block);
2110                 be_liveness_end_of_block(rss->liveness, rss->cls, rss->block, &rss->live_block);
2111
2112                 /* reset the list of interesting nodes */
2113                 plist_clear(rss->nodes);
2114                 plist_insert_back(rss->nodes, _sink);
2115
2116                 /* collect all nodes having a certain register class */
2117                 foreach_out_edge(block, edge) {
2118                         ir_node *irn  = get_edge_src_irn(edge);
2119                         ir_mode *mode = get_irn_mode(irn);
2120
2121                         /*
2122                                 We skip:
2123                                 - mode_T nodes (the projs are asked)
2124                                 - mode_X nodes (control flow nodes are always scheduled last)
2125                                 - Keeps (they are always immediately scheduled)
2126                                 - Phi (same as Keep)
2127                         */
2128                         if (mode == mode_T || mode == mode_X || is_Phi(irn))
2129                                 continue;
2130
2131                         /*
2132                                 In case of a proj, we skip
2133                                 - Barrier (they are a Barrier :)
2134                                 - Start
2135                                 - the Proj itself, as it's scheduled always with it's super node
2136                         */
2137                         if (is_Proj(irn)) {
2138                                 ir_node *pred = get_Proj_pred(irn);
2139                                 if (be_is_Barrier(pred) || be_is_Start(pred))
2140                                         continue;
2141                         }
2142
2143                         /* calculate the descendants and consumer for each node in the block */
2144                         collect_node_info(rss, skip_Proj(irn));
2145
2146                         if (be_is_Keep(irn))
2147                                 continue;
2148
2149                         if (!arch_irn_is_ignore(irn) &&
2150                                         arch_get_irn_reg_class_out(irn) == cls) {
2151                                 plist_insert_back(rss->nodes, skip_Proj(irn));
2152                         }
2153                         //}
2154                 }
2155
2156                 /* compute the potential killing set PK(G) */
2157                 compute_pkill_set(rss);
2158
2159                 /* compute the killing function k* */
2160                 compute_killing_function(rss);
2161
2162                 /*
2163                         Compute the heuristic value serialization and
2164                         add the necessary dependencies to the irg.
2165                 */
2166                 perform_value_serialization_heuristic(rss);
2167
2168                 ir_nodeset_destroy(&rss->live_block);
2169         }
2170
2171         phase_deinit(&rss->ph);
2172 }
2173
2174 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_schedrss);
2175 void be_init_schedrss(void)
2176 {
2177         lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
2178         lc_opt_entry_t *sched_grp = lc_opt_get_grp(be_grp, "sched");
2179         lc_opt_entry_t *rss_grp = lc_opt_get_grp(sched_grp, "rss");
2180
2181         lc_opt_add_table(rss_grp, rss_option_table);
2182 }
2183
2184 /**
2185  * Preprocess the irg for scheduling.
2186  */
2187 void rss_schedule_preparation(ir_graph *irg)
2188 {
2189         rss_t rss;
2190
2191         FIRM_DBG_REGISTER(rss.dbg, "firm.be.sched.rss");
2192
2193         //firm_dbg_set_mask(rss.dbg, LEVEL_1 | LEVEL_2 | LEVEL_3);
2194
2195         init_rss_special_nodes(irg);
2196
2197         rss.irg      = irg;
2198         rss.arch_env = be_get_irg_arch_env(irg);
2199         rss.abi      = be_get_irg_abi(irg);
2200         rss.h        = heights_new(irg);
2201         rss.nodes    = plist_new();
2202         rss.opts     = &rss_options;
2203         rss.liveness = be_liveness(irg);
2204         be_liveness_assure_sets(rss.liveness);
2205         irg_block_walk_graph(irg, NULL, process_block, &rss);
2206         heights_free(rss.h);
2207         plist_free(rss.nodes);
2208         be_liveness_free(rss.liveness);
2209
2210         if (be_get_irg_options(irg)->dump_flags & DUMP_SCHED)
2211                 dump_ir_graph(irg, "rss");
2212 }