make firm (mostly) -Wmissing-prototypes clean
[libfirm] / ir / be / beschedrss.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       Implementation of a register saturating list scheduler.
23  * @author      Christian Wuerdig
24  * @date        29.08.2006
25  * @version     $Id$
26  *
27  * Implementation of a register saturating list scheduler
28  * as described in: Sid-Ahmed-Ali Touati
29  * Register Saturation in Superscalar and VLIW Codes
30  */
31 #include "config.h"
32
33 #include "beschedrss.h"
34
35 #include <limits.h>
36
37 #include "obst.h"
38 #include "debug.h"
39
40 #include "irgraph_t.h"
41 #include "irnode_t.h"
42 #include "iredges_t.h"
43 #include "ircons_t.h"
44 #include "irphase_t.h"
45 #include "irgwalk.h"
46 #include "irdump.h"
47 #include "irtools.h"
48 #include "irbitset.h"
49 #include "irprintf.h"
50 #include "irnodeset.h"
51 #include "bipartite.h"
52 #include "hungarian.h"
53 #include "plist.h"
54 #include "array_t.h"
55
56 #include "height.h"
57
58 #include "beabi.h"
59 #include "bemodule.h"
60 #include "benode.h"
61 #include "besched.h"
62 #include "beirg.h"
63 #include "belive.h"
64
65 #include "lc_opts.h"
66 #include "lc_opts_enum.h"
67
68
69 #define ARR_LEN_SAFE(arr) ((arr) != NULL ? ARR_LEN((arr)) : 0)
70
71 #define HASH_RSS_EDGE(edge) ((get_irn_node_nr((edge)->src) << 16) | (get_irn_node_nr((edge)->tgt) & 0xFFFF))
72 #define BSEARCH_IRN_ARR(val, arr) \
73         bsearch(&(val), (arr), ARR_LEN_SAFE((arr)), sizeof((arr)[0]), cmp_irn_idx)
74
75 #define BLOCK_IDX_MAP(rss, irn) bsearch_for_index(get_irn_idx((irn)), (rss)->idx_map, ARR_LEN_SAFE((rss)->idx_map), 1)
76
77 /* the rss options */
78 typedef struct _rss_opts_t {
79         int dump_flags;
80 } rss_opts_t;
81
82 /* Represents a child with associated costs */
83 typedef struct _child {
84         ir_node *irn;
85         float   cost;
86 } child_t;
87
88 /* We need edges for several purposes. */
89 typedef struct _rss_edge {
90         ir_node *src;
91         ir_node *tgt;
92         void    *next;
93 } rss_edge_t;
94
95 /* Represents a connected bipartite component. */
96 typedef struct _cbc {
97         ir_nodeset_t parents;       /**< = S  a set of value producers */
98         ir_nodeset_t children;      /**< = T  a set of value consumers */
99         pset    *kill_edges;    /**< = E  a set of edges (t in T, s in S) such as each s in S gets killed by at least one t in T */
100         int     nr;             /**< a deterministic index for set insertion (used as hash) */
101 } cbc_t;
102
103 /* Represents a disjoint value DAG. */
104 typedef struct _dvg {
105         ir_nodeset_t nodes;
106         pset    *edges;
107 } dvg_t;
108
109 /* Represents a chain of nodes. */
110 typedef struct _chain {
111         plist_t *elements;   /**< List of chain elements */
112         int     nr;          /**< a deterministic index for set insertion (used as hash) */
113 } chain_t;
114
115 typedef struct _rss_irn {
116         plist_t  *consumer_list;    /**< List of consumers */
117         const ir_node **consumer;   /**< Sorted consumer array (needed for faster access) */
118
119         plist_t  *parent_list;      /**< List of parents */
120         plist_t  *pkiller_list;     /**< List of potential killers */
121
122         plist_t  *descendant_list;  /**< List of descendants */
123         const ir_node **descendants;      /**< Sorted descendant array (needed for faster access) */
124
125         const ir_node  *killer;     /**< The selected unique killer */
126         const ir_node  *irn;        /**< The corresponding firm node to this rss_irn */
127
128         chain_t  *chain;            /**< The chain, this node is associated to */
129
130         unsigned desc_walk;         /**< visited flag for collecting descendants */
131         unsigned kill_count;        /**< number of nodes killed by this one */
132
133         unsigned live_out : 1;      /**< irn has consumers outside of it's block */
134         unsigned visited  : 1;      /**< visited flag for bipartite decomposition */
135         unsigned havecons : 1;      /**< visited flag collect consumer */
136         unsigned handled  : 1;      /**< flag indicating whether or not the list structures have been build */
137         unsigned dumped   : 1;      /**< flag indication whether or not this node was dumped */
138 } rss_irn_t;
139
140 /* Represents a serialization edge with associated costs. */
141 typedef struct _serialization {
142         rss_irn_t  *u;       /* the top node */
143         rss_irn_t  *v;       /* the node about to be serialized after u */
144         rss_edge_t *edge;    /* the edge selected for the serialization */
145         int        omega1;   /* estimated: available regs - RS reduction */
146         int        omega2;   /* increase in critical path length */
147         int        new_killer;
148 } serialization_t;
149
150 typedef struct _rss {
151         ir_phase          ph;              /**< Phase to hold some data */
152         heights_t        *h;              /**< The current height object */
153         ir_graph         *irg;            /**< The irg to preprocess */
154         plist_t          *nodes;          /**< The list of interesting nodes */
155         const arch_env_t *arch_env;       /**< The architecture environment */
156         be_abi_irg_t     *abi;            /**< The abi for this irg */
157         pset             *cbc_set;        /**< A set of connected bipartite components */
158         ir_node          *block;          /**< The current block in progress. */
159         int              *idx_map;        /**< Mapping irn indices to per block indices */
160         unsigned         max_height;      /**< maximum height in the current block */
161         rss_opts_t       *opts;           /**< The options */
162         be_lv_t          *liveness;       /**< The liveness information for this irg */
163         ir_nodeset_t      live_block;     /**< Values alive at end of block */
164         const arch_register_class_t *cls; /**< The current register class */
165         DEBUG_ONLY(firm_dbg_module_t *dbg);
166 } rss_t;
167
168 #define get_rss_irn(rss, irn)  (phase_get_or_set_irn_data(&rss->ph, irn))
169
170 /**
171  * We need some special nodes:
172  * a source and a sink for all live-in and live-out values of a block
173  */
174 enum {
175         iro_rss_Source,
176         iro_rss_Sink,
177         iro_rss_last
178 };
179
180 /** The opcode of the rss_Source node once allocated. */
181 static ir_op *op_rss_Source;
182 /** The opcode of the rss_Sink node once allocated. */
183 static ir_op *op_rss_Sink;
184
185 /** The rss_Source node of the current graph. */
186 static ir_node *_source = NULL;
187 /** The rss_Sink node of the current graph. */
188 static ir_node *_sink   = NULL;
189
190 #define is_Source(irn) ((irn) == _source)
191 #define is_Sink(irn)   ((irn) == _sink)
192
193 enum {
194         RSS_DUMP_NONE  = 0,
195         RSS_DUMP_CBC   = 1 << 0,
196         RSS_DUMP_PKG   = 1 << 1,
197         RSS_DUMP_KILL  = 1 << 2,
198         RSS_DUMP_DVG   = 1 << 3,
199         RSS_DUMP_MAXAC = 1 << 4,
200         RSS_DUMP_ALL   = (RSS_DUMP_MAXAC << 1) - 1,
201 };
202
203 static rss_opts_t rss_options = {
204         RSS_DUMP_NONE,
205 };
206
207 static const lc_opt_enum_int_items_t dump_items[] = {
208         { "none",  RSS_DUMP_NONE  },
209         { "cbc",   RSS_DUMP_CBC   },
210         { "pkg",   RSS_DUMP_PKG   },
211         { "kill",  RSS_DUMP_KILL  },
212         { "dvg",   RSS_DUMP_DVG   },
213         { "maxac", RSS_DUMP_MAXAC },
214         { "all",   RSS_DUMP_ALL   },
215         { NULL, 0 }
216 };
217
218 static lc_opt_enum_int_var_t dump_var = {
219         &rss_options.dump_flags, dump_items
220 };
221
222 static const lc_opt_table_entry_t rss_option_table[] = {
223         LC_OPT_ENT_ENUM_MASK("dump", "dump phases", &dump_var),
224         LC_OPT_LAST
225 };
226
227 /******************************************************************************
228  *  _          _                    __                  _   _
229  * | |        | |                  / _|                | | (_)
230  * | |__   ___| |_ __   ___ _ __  | |_ _   _ _ __   ___| |_ _  ___  _ __  ___
231  * | '_ \ / _ \ | '_ \ / _ \ '__| |  _| | | | '_ \ / __| __| |/ _ \| '_ \/ __|
232  * | | | |  __/ | |_) |  __/ |    | | | |_| | | | | (__| |_| | (_) | | | \__ \
233  * |_| |_|\___|_| .__/ \___|_|    |_|  \__,_|_| |_|\___|\__|_|\___/|_| |_|___/
234  *            | |
235  *            |_|
236  ******************************************************************************/
237
238 /**
239  * Acquire opcodes if needed and create source and sink nodes.
240  */
241 static void init_rss_special_nodes(ir_graph *irg)
242 {
243         ir_node *block;
244
245         if (op_rss_Source == NULL) {
246                 int iro_rss_base = get_next_ir_opcodes(iro_rss_last);
247                 op_rss_Source = new_ir_op(iro_rss_base + iro_rss_Source, "rss_Source", op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
248                 op_rss_Sink   = new_ir_op(iro_rss_base + iro_rss_Sink,   "rss_Sink",   op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
249         }
250         block   = get_irg_start_block(irg);
251         _source = new_ir_node(NULL, irg, block, op_rss_Source, mode_ANY, 0, NULL);
252         _sink   = new_ir_node(NULL, irg, block, op_rss_Sink, mode_ANY, 0, NULL);
253 }
254
255 static int cmp_int(const void *a, const void *b)
256 {
257         const int *i1 = a;
258         const int *i2 = b;
259
260         return QSORT_CMP(*i1, *i2);
261 }
262
263 static int cmp_child_costs(const void *a, const void *b)
264 {
265         const child_t *c1 = a;
266         const child_t *c2 = b;
267
268         return QSORT_CMP(c1->cost, c2->cost);
269 }
270
271 static int cmp_irn_idx(const void *a, const void *b)
272 {
273         const ir_node *n1 = *(ir_node **)a;
274         const ir_node *n2 = *(ir_node **)b;
275
276         return QSORT_CMP(get_irn_idx(n1), get_irn_idx(n2));
277 }
278
279 static int cmp_rss_edges(const void *a, const void *b)
280 {
281         const rss_edge_t *e1 = a;
282         const rss_edge_t *e2 = b;
283
284         return (e1->src != e2->src) || (e1->tgt != e2->tgt);
285 }
286
287 static int bsearch_for_index(int key, int *arr, size_t len, int force)
288 {
289         int left = 0;
290         int right = len;
291
292         while (right >= left) {
293                 int idx = (left + right) / 2;
294
295                 if (key < arr[idx])
296                         right = idx - 1;
297                 else if (key > arr[idx])
298                         left = idx + 1;
299                 else
300                         return idx;
301         }
302
303         if (force)
304                 assert(0 && "Something is wrong, key not found.");
305         return -1;
306 }
307
308 static const ir_node **build_sorted_array_from_list(plist_t *irn_list, struct obstack *obst)
309 {
310         plist_element_t *el;
311         int             i   = 0;
312         int             len = plist_count(irn_list);
313         const ir_node   **arr = (const ir_node**)NEW_ARR_D(ir_node*, obst, len);
314
315         /* copy the list into the array */
316         foreach_plist(irn_list, el) {
317                 arr[i++] = plist_element_get_value(el);
318         }
319
320         /* sort the array by node index */
321         /* HACK cast for MSVC */
322         qsort((void*)arr, len, sizeof(arr[0]), cmp_irn_idx);
323
324         return arr;
325 }
326
327 /*****************************************************
328  *      _      _                       _
329  *     | |    | |                     (_)
330  *   __| | ___| |__  _   _  __ _  __ _ _ _ __   __ _
331  *  / _` |/ _ \ '_ \| | | |/ _` |/ _` | | '_ \ / _` |
332  * | (_| |  __/ |_) | |_| | (_| | (_| | | | | | (_| |
333  *  \__,_|\___|_.__/ \__,_|\__, |\__, |_|_| |_|\__, |
334  *                          __/ | __/ |         __/ |
335  *                         |___/ |___/         |___/
336  *
337  *****************************************************/
338
339 #ifdef DEBUG_libfirm
340 static void dump_nodeset(ir_nodeset_t *ns, const char *prefix)
341 {
342         ir_nodeset_iterator_t iter;
343         ir_node *irn;
344
345         ir_nodeset_iterator_init(&iter, ns);
346         while ( (irn = ir_nodeset_iterator_next(&iter)) != NULL ) {
347                 ir_fprintf(stderr, "%s%+F\n", prefix, irn);
348         }
349 }
350 #endif
351
352 static void build_file_name(rss_t *rss, const char *suffix, size_t suf_len, char *buf, size_t len)
353 {
354         const char *irg_name;
355
356         memset(buf, 0, len);
357         irg_name = get_entity_name(get_irg_entity(rss->irg));
358         snprintf(buf, len - suf_len, "%s-%s-block-%ld",
359                 irg_name, arch_register_class_name(rss->cls), get_irn_node_nr(rss->block));
360         strcat(buf, suffix);
361 }
362
363 /* Dumps all collected bipartite components of current irg as vcg. */
364 static void debug_vcg_dump_bipartite(rss_t *rss)
365 {
366         cbc_t *cbc;
367         FILE  *f;
368         char  file_name[256];
369         static const char suffix[] = "-RSS-CBC.vcg";
370
371         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
372         f = fopen(file_name, "w");
373
374         ir_fprintf(f, "graph: { title: \"connected bipartite component graph of %+F\"\n", rss->irg);
375         fprintf(f, "display_edge_labels: no\n");
376         fprintf(f, "layoutalgorithm: mindepth\n");
377         fprintf(f, "manhattan_edges: yes\n\n");
378
379         foreach_pset(rss->cbc_set, cbc) {
380                 ir_nodeset_iterator_t iter;
381                 ir_node    *n;
382                 rss_edge_t *ke;
383
384                 fprintf(f, "graph: { titel: \"cbc %d\" label: \"cbc %d\" status:clustered color:yellow\n", cbc->nr, cbc->nr);
385                 foreach_ir_nodeset(&cbc->parents, n, iter) {
386                         ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
387                 }
388                 foreach_ir_nodeset(&cbc->children, n, iter) {
389                         ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
390                 }
391                 foreach_pset(cbc->kill_edges, ke) {
392                         ir_fprintf(f, "edge: { sourcename: \"n%d_%d\" targetname: \"n%d_%d\" }\n",
393                                 get_irn_node_nr(ke->src), cbc->nr, get_irn_node_nr(ke->tgt), cbc->nr);
394                 }
395                 fprintf(f, "}\n\n");
396         }
397         fprintf(f, "}\n");
398         fclose(f);
399 }
400
401 /* Dump the computed killing function as vcg. */
402 static void debug_vcg_dump_kill(rss_t *rss)
403 {
404         FILE            *f;
405         char            file_name[256];
406         plist_element_t *el;
407         static const char suffix[] = "-RSS-KILL.vcg";
408
409         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
410         f = fopen(file_name, "w");
411
412         ir_fprintf(f, "graph: { title: \"computed kill graph of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
413         fprintf(f, "display_edge_labels: no\n");
414         fprintf(f, "layoutalgorithm: mindepth\n");
415         fprintf(f, "manhattan_edges: yes\n\n");
416
417         {
418                 /* reset dump_flag */
419                 ir_node *irn;
420                 foreach_phase_irn(&rss->ph, irn) {
421                         rss_irn_t *node = get_rss_irn(rss, irn);
422                         node->dumped = 0;
423                 }
424         }
425
426         /* dump all nodes and their killers */
427         foreach_plist(rss->nodes, el) {
428                 ir_node   *irn     = plist_element_get_value(el);
429                 rss_irn_t *rirn    = get_rss_irn(rss, irn);
430                 rss_irn_t *pk_rirn = get_rss_irn(rss, rirn->killer);
431
432                 if (! rirn->dumped) {
433                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
434                         rirn->dumped = 1;
435                 }
436
437                 if (! pk_rirn->dumped) {
438                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(rirn->killer), rirn->killer);
439                         pk_rirn->dumped = 1;
440                 }
441
442                 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
443                         get_irn_node_nr(rirn->killer), get_irn_node_nr(irn));
444         }
445
446         fprintf(f, "}\n");
447         fclose(f);
448 }
449
450 /* Dumps the potential killing DAG (PKG) as vcg. */
451 static void debug_vcg_dump_pkg(rss_t *rss, ir_nodeset_t *max_ac, int iteration)
452 {
453         FILE              *f;
454         char              file_name[256];
455         char              suffix[32];
456         static const char suffix1[] = "-RSS-PKG.vcg";
457         static const char suffix2[] = "-RSS-PKG-MAXAC.vcg";
458         plist_element_t   *el;
459
460         if (! max_ac) {
461                 snprintf(suffix, sizeof(suffix), "%s", suffix1);
462         }
463         else {
464                 snprintf(suffix, sizeof(suffix), "-%02d%s", iteration, suffix2);
465         }
466
467         build_file_name(rss, suffix, strlen(suffix) + 1, file_name, sizeof(file_name));
468         f = fopen(file_name, "w");
469
470         ir_fprintf(f, "graph: { title: \"potential killing DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
471         fprintf(f, "display_edge_labels: no\n");
472         fprintf(f, "layoutalgorithm: mindepth\n");
473         fprintf(f, "manhattan_edges: yes\n\n");
474
475         {
476                 /* reset dump_flag */
477                 ir_node *irn;
478                 foreach_phase_irn(&rss->ph, irn) {
479                         rss_irn_t *node = get_rss_irn(rss, irn);
480                         node->dumped = 0;
481                 }
482         }
483
484         foreach_plist(rss->nodes, el) {
485                 ir_node   *irn  = plist_element_get_value(el);
486                 rss_irn_t *rirn = get_rss_irn(rss, irn);
487                 char      *c1   = "";
488                 plist_element_t *k_el;
489
490                 /* dump selected saturating values in yellow */
491                 if (max_ac && ir_nodeset_contains(max_ac, irn))
492                         c1 = "color:yellow";
493
494                 if (! rirn->dumped) {
495                         if (rirn->chain)
496                                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F   c%d\" %s }\n", get_irn_node_nr(irn), irn, rirn->chain->nr, c1);
497                         else
498                                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(irn), irn, c1);
499                         rirn->dumped = 1;
500                 }
501
502                 foreach_plist(rirn->pkiller_list, k_el) {
503                         ir_node   *pkiller = plist_element_get_value(k_el);
504                         rss_irn_t *pk_rirn = get_rss_irn(rss, pkiller);
505                         char      *c2      = "";
506
507                         if (max_ac && ir_nodeset_contains(max_ac, pkiller))
508                                 c2 = "color:yellow";
509
510                         if (! pk_rirn->dumped) {
511                                 if (pk_rirn->chain)
512                                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F   c%d\" %s }\n", get_irn_node_nr(pkiller), pkiller, pk_rirn->chain->nr, c2);
513                                 else
514                                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(pkiller), pkiller, c2);
515                                 pk_rirn->dumped = 1;
516                         }
517                         ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
518                                 get_irn_node_nr(pkiller), get_irn_node_nr(irn));
519                 }
520         }
521         fprintf(f, "}\n");
522         fclose(f);
523 }
524
525 /* Dumps the disjoint value DAG (DVG) as vcg. */
526 static void debug_vcg_dump_dvg(rss_t *rss, dvg_t *dvg)
527 {
528         static const char suffix[] = "-RSS-DVG.vcg";
529         FILE       *f;
530         char       file_name[256];
531         ir_nodeset_iterator_t iter;
532         ir_node    *irn;
533         rss_edge_t *edge;
534
535         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
536         f = fopen(file_name, "w");
537
538         ir_fprintf(f, "graph: { title: \"disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
539         fprintf(f, "display_edge_labels: no\n");
540         fprintf(f, "layoutalgorithm: mindepth\n");
541         fprintf(f, "manhattan_edges: yes\n\n");
542
543         /* dump all nodes */
544         foreach_ir_nodeset(&dvg->nodes, irn, iter) {
545                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
546         }
547
548         /* dump all edges */
549         foreach_pset(dvg->edges, edge) {
550                 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
551                         get_irn_node_nr(edge->src), get_irn_node_nr(edge->tgt));
552         }
553
554         fprintf(f, "}\n");
555         fclose(f);
556 }
557
558 #if 0
559 /* Dumps the PKG(DVG). */
560 static void debug_vcg_dump_dvg_pkiller(rss_t *rss, dvg_t *dvg)
561 {
562         static const char suffix[] = "-RSS-DVG-PKG.vcg";
563         FILE       *f;
564         char       file_name[256];
565         ir_nodeset_iterator_t iter;
566         ir_node    *irn;
567
568         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
569         f = fopen(file_name, "w");
570
571         ir_fprintf(f, "graph: { title: \"PKG of disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
572         fprintf(f, "display_edge_labels: no\n");
573         fprintf(f, "layoutalgorithm: mindepth\n");
574         fprintf(f, "manhattan_edges: yes\n\n");
575
576         /* dump all nodes */
577         foreach_ir_nodeset(&dvg->nodes, irn, iter) {
578                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
579         }
580
581         /* dump all edges */
582         foreach_ir_nodeset(&dvg->nodes, irn, iter) {
583                 rss_irn_t       *node = get_rss_irn(rss, irn);
584                 plist_element_t *el;
585
586                 foreach_plist(node->dvg_pkiller_list, el) {
587                         ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
588                                 get_irn_node_nr(plist_element_get_value(el)), get_irn_node_nr(irn));
589                 }
590         }
591
592         fprintf(f, "}\n");
593         fclose(f);
594 }
595 #endif /* if 0 */
596
597 /**
598  * In case there is no rss information for irn, initialize it.
599  */
600 static void *init_rss_irn(ir_phase *ph, const ir_node *irn, void *old)
601 {
602         rss_irn_t *res = old ? old : phase_alloc(ph, sizeof(res[0]));
603
604         res->descendant_list = plist_obstack_new(phase_obst(ph));
605         res->descendants     = NULL;
606
607         res->consumer_list   = plist_obstack_new(phase_obst(ph));
608         res->consumer        = NULL;
609
610         res->pkiller_list    = plist_obstack_new(phase_obst(ph));
611
612         res->parent_list     = plist_obstack_new(phase_obst(ph));
613
614         res->killer          = NULL;
615         res->irn             = irn;
616         res->chain           = NULL;
617
618         res->kill_count      = 0;
619         res->desc_walk       = 0;
620         res->live_out        = 0;
621         res->visited         = 0;
622         res->handled         = 0;
623         res->dumped          = 0;
624         res->havecons        = 0;
625
626         return res;
627 }
628
629 /**
630  * Collect all nodes data dependent on current node.
631  */
632 static void collect_descendants(rss_t *rss, rss_irn_t *rirn, ir_node *irn, int *got_sink, unsigned cur_desc_walk)
633 {
634         const ir_edge_t *edge;
635         rss_irn_t       *cur_node = get_rss_irn(rss, irn);
636         ir_node         *block    = rss->block;
637         ir_edge_kind_t  ekind[2]  = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
638         int             i;
639
640         if (cur_node->desc_walk >= cur_desc_walk)
641                 return;
642         cur_node->desc_walk = cur_desc_walk;
643
644         /* Stop at Barriers */
645         if (be_is_Barrier(irn))
646                 return;
647
648         /* loop over normal and dependency edges */
649         for (i = 0; i < 2; ++i) {
650                 foreach_out_edge_kind(irn, edge, ekind[i]) {
651                         ir_node *user = get_edge_src_irn(edge);
652
653                         /* skip ignore nodes as they do not really contribute to register pressure */
654                         if (arch_irn_is_ignore(user))
655                                 continue;
656
657                         /*
658                                 (a) mode_X means Jump            -> out_edge
659                                 (b) Phi as user of a normal node -> out-edge
660                         */
661                         if (get_irn_mode(user) == mode_X || is_Phi(user)) {
662                                 if (! *got_sink)
663                                         goto force_sink;
664                                 else
665                                         continue;
666                         }
667
668                         if (is_Proj(user)) {
669                                 //if (arch_get_irn_reg_class_out(user) == rss->cls)
670                                         collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
671                         }
672                         else {
673                                 /* check if user lives in block and is not a control flow node */
674                                 if (get_nodes_block(user) == block) {
675                                         if (! plist_has_value(rirn->descendant_list, user)) {
676                                                 plist_insert_back(rirn->descendant_list, user);
677                                                 DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", user));
678                                         }
679                                         collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
680                                 }
681                                 else if (! *got_sink) {
682 force_sink:
683                                         /* user lives out of block: add sink as descendant if not already done */
684                                         plist_insert_back(rirn->descendant_list, _sink);
685                                         *got_sink = 1;
686                                         DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", _sink));
687                                 }
688                         }
689                 }
690         }
691 }
692
693 /**
694  * Handles a single consumer.
695  */
696 static void collect_single_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *consumer, int *got_sink)
697 {
698         ir_node *block = rss->block;
699
700         assert(! is_Proj(consumer) && "Cannot handle Projs");
701
702         if (! is_Phi(consumer) && ! is_Block(consumer) && get_nodes_block(consumer) == block) {
703                 if (!arch_irn_is_ignore(consumer) &&
704                                 !plist_has_value(rss_irn->consumer_list, consumer)) {
705                         plist_insert_back(rss_irn->consumer_list, consumer);
706                         DBG((rss->dbg, LEVEL_2, "\t\tconsumer %+F\n", consumer));
707                 }
708         }
709         else {
710                 rss_irn->live_out = 1;
711                 DBG((rss->dbg, LEVEL_2, "\t\tlive out %+F", consumer));
712                 if (! *got_sink) {
713                         plist_insert_back(rss_irn->consumer_list, _sink);
714                         *got_sink = 1;
715                         DB((rss->dbg, LEVEL_2, ", %+F added instead", _sink));
716                 }
717                 DB((rss->dbg, LEVEL_2, "\n"));
718         }
719 }
720
721 /**
722  * Collect all nodes consuming the value(s) produced by current node.
723  */
724 static void collect_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *irn, int *got_sink)
725 {
726         const ir_edge_t *edge;
727         int             i;
728         ir_edge_kind_t  ekind[2]  = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
729         rss_irn_t       *cur_node = get_rss_irn(rss, irn);
730
731         if (cur_node->havecons)
732                 return;
733         cur_node->havecons = 1;
734
735         for (i = 0; i < 2; ++i) {
736                 foreach_out_edge_kind(irn, edge, ekind[i]) {
737                         ir_node *consumer = get_edge_src_irn(edge);
738
739                         if (is_Proj(consumer)) {
740                                 //if (arch_get_irn_reg_class_out(consumer) == rss->cls)
741                                         collect_consumer(rss, rss_irn, consumer, got_sink);
742                         }
743                         else
744                                 collect_single_consumer(rss, rss_irn, consumer, got_sink);
745                 }
746         }
747 }
748
749 /**
750  * Collects all consumer and descendant of a irn.
751  */
752 static void collect_node_info(rss_t *rss, ir_node *irn)
753 {
754         static unsigned cur_desc_walk = 0;
755         rss_irn_t       *rss_irn      = get_rss_irn(rss, irn);
756         int             got_sink;
757
758         assert(! is_Proj(irn) && "Cannot handle Projs.");
759
760         /* check if node info is already available */
761         if (rss_irn->handled)
762                 return;
763                 //phase_reinit_single_irn_data(&rss->ph, irn);
764
765         DBG((rss->dbg, LEVEL_1, "\tcomputing consumers of %+F:\n", irn));
766
767         /* collect all consumer */
768         got_sink = 0;
769         collect_consumer(rss, rss_irn, irn, &got_sink);
770
771         /* build sorted consumer array */
772         rss_irn->consumer = build_sorted_array_from_list(rss_irn->consumer_list, phase_obst(&rss->ph));
773
774         DBG((rss->dbg, LEVEL_1, "\tcompute descendants of %+F:\n", irn));
775
776         /* collect descendants */
777         got_sink = 0;
778         collect_descendants(rss, rss_irn, irn, &got_sink, ++cur_desc_walk);
779
780         /* build sorted descendant array */
781         rss_irn->descendants = build_sorted_array_from_list(rss_irn->descendant_list, phase_obst(&rss->ph));
782
783         rss_irn->handled = 1;
784 }
785
786 /**
787  * Checks if v is a potential killer of u.
788  * v is in pkill(u) iff descendants(v) cut consumer(u) is v
789  *
790  * @param rss   The rss object
791  * @param v      The node to check for killer
792  * @param u      The potentially killed value
793  * @return 1 if v is in pkill(u), 0 otherwise
794  */
795 static int is_potential_killer(rss_t *rss, rss_irn_t *v, rss_irn_t *u)
796 {
797         plist_t *list;
798         const ir_node **arr;
799         plist_element_t *el;
800         (void) rss;
801
802         assert(is_Sink(v->irn) || ((plist_count(v->descendant_list) > 0 && v->descendants) || 1));
803         assert(is_Sink(u->irn) || ((plist_count(u->consumer_list)   > 0 && u->consumer)    || 1));
804
805         /* as we loop over the list: loop over the shorter one */
806         if (plist_count(v->descendant_list) > plist_count(u->consumer_list)) {
807                 list = u->consumer_list;
808                 arr  = v->descendants;
809         }
810         else {
811                 list = v->descendant_list;
812                 arr  = u->consumer;
813         }
814
815         /* for each list element: try to find element in array */
816         foreach_plist(list, el) {
817                 ir_node *irn   = plist_element_get_value(el);
818                 ir_node *match = BSEARCH_IRN_ARR(irn, arr);
819
820                 if (match && match != irn)
821                         return 0;
822         }
823
824         return 1;
825 }
826
827 /**
828  * Update descendants, consumer and pkiller list for the given irn.
829  */
830 static void update_node_info(rss_t *rss, ir_node *irn, ir_node *pk_irn)
831 {
832         rss_irn_t *node    = get_rss_irn(rss, irn);
833         rss_irn_t *pkiller = get_rss_irn(rss, pk_irn);
834
835         /* add consumer and rebuild the consumer array */
836         if (! plist_has_value(node->consumer_list, pk_irn)) {
837                 plist_insert_back(node->consumer_list, pk_irn);
838                 node->consumer = build_sorted_array_from_list(node->consumer_list, phase_obst(&rss->ph));
839         }
840
841         /* add pk_irn as descendant, add it's descendants to irn's and rebuild array */
842         if (! plist_has_value(node->descendant_list, pk_irn)) {
843                 plist_element_t *el;
844
845                 plist_insert_back(node->descendant_list, pk_irn);
846
847                 foreach_plist(pkiller->descendant_list, el) {
848                         ir_node *desc = plist_element_get_value(el);
849
850                         if (! plist_has_value(node->descendant_list, desc))
851                                 plist_insert_back(node->descendant_list, desc);
852                 }
853
854                 node->descendants = build_sorted_array_from_list(node->descendant_list, phase_obst(&rss->ph));
855         }
856
857 }
858
859 /**
860  * Compute the potential killing set PK.
861  */
862 static void compute_pkill_set(rss_t *rss)
863 {
864         plist_element_t *u_el, *v_el;
865
866         foreach_plist(rss->nodes, u_el) {
867                 ir_node   *u_irn = plist_element_get_value(u_el);
868                 rss_irn_t *u     = get_rss_irn(rss, u_irn);
869
870                 DBG((rss->dbg, LEVEL_1, "\tcomputing potential killers of %+F:\n", u_irn));
871
872                 /* check each consumer if it is a potential killer  */
873                 foreach_plist(u->consumer_list, v_el) {
874                         ir_node   *v_irn = plist_element_get_value(v_el);
875                         rss_irn_t *v     = get_rss_irn(rss, v_irn);
876
877                         /* check, if v is a potential killer of u */
878                         if (is_potential_killer(rss, v, u)) {
879                                 if (! plist_has_value(u->pkiller_list, v_irn))
880                                         plist_insert_back(u->pkiller_list, v_irn);
881                                 v->kill_count++;
882                                 DBG((rss->dbg, LEVEL_2, "\t\tpotential killer %+F\n", v_irn));
883                         }
884                 }
885
886                 u->killer = _sink;
887         }
888
889         if (rss->opts->dump_flags & RSS_DUMP_PKG)
890                 debug_vcg_dump_pkg(rss, NULL, 0);
891 }
892
893 /**
894  * Build set of killing edges (from values to their potential killers)
895  */
896 static void build_kill_edges(rss_t *rss, pset *epk)
897 {
898         plist_element_t *el, *k_el;
899
900         foreach_plist(rss->nodes, el) {
901                 ir_node    *irn  = plist_element_get_value(el);
902                 rss_irn_t *rirn = get_rss_irn(rss, irn);
903
904                 DBG((rss->dbg, LEVEL_3, "\t\tbuilding kill edges for %+F:\n", irn));
905
906                 foreach_plist(rirn->pkiller_list, k_el) {
907                         ir_node    *pkiller = plist_element_get_value(k_el);
908                         rss_edge_t *ke      = OALLOC(phase_obst(&rss->ph), rss_edge_t);
909
910                         ke->src  = irn;
911                         ke->tgt  = pkiller;
912                         ke->next = NULL;
913
914                         DBG((rss->dbg, LEVEL_3, "\t\t\ttarget %+F\n", pkiller));
915
916                         pset_insert(epk, ke, HASH_RSS_EDGE(ke));
917                 }
918         }
919 }
920
921 #ifdef DEBUG_libfirm
922 /* print the given cbc for debugging purpose */
923 static void debug_print_cbc(firm_dbg_module_t *mod, cbc_t *cbc)
924 {
925         ir_nodeset_iterator_t iter;
926         ir_node    *n;
927         rss_edge_t *ke;
928
929         DBG((mod, LEVEL_3, "\t\tS = set of parents:\n"));
930         foreach_ir_nodeset(&cbc->parents, n, iter) {
931                 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
932         }
933         DBG((mod, LEVEL_3, "\t\tT = set of children:\n"));
934         foreach_ir_nodeset(&cbc->children, n, iter) {
935                 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
936         }
937         DBG((mod, LEVEL_3, "\t\tE = Edges from producers to consumers\n"));
938         foreach_pset(cbc->kill_edges, ke) {
939                 DBG((mod, LEVEL_3, "\t\t\t%+F -> %+F\n", ke->src, ke->tgt));
940         }
941 }
942 #endif
943
944 /**
945  * Construct the bipartite decomposition.
946  * Sid-Ahmed-Ali Touati, Phd Thesis
947  * Register Pressure in Instruction Level Parallelism, p. 71
948  */
949 static void compute_bipartite_decomposition(rss_t *rss)
950 {
951         pset *epk    = new_pset(cmp_rss_edges, 10);
952         int  cur_num = 0;
953
954         plist_element_t *el;
955
956         DBG((rss->dbg, LEVEL_1, "\tcomputing bipartite decomposition:\n"));
957
958         build_kill_edges(rss, epk);
959
960         foreach_plist(rss->nodes, el) {
961                 ir_node   *u_irn   = plist_element_get_value(el);
962                 rss_irn_t *u       = get_rss_irn(rss, u_irn);
963                 int       p_change = 1;
964                 int       c_change = 1;
965                 int       vrfy_ok  = 1;
966
967                 cbc_t           *cbc;
968                 plist_element_t *el2;
969                 rss_edge_t      *k_edge;
970                 rss_edge_t      *kedge_root = NULL;
971                 ir_node         *t_irn, *s_irn;
972                 ir_nodeset_iterator_t iter;
973
974                 if (u->visited || u_irn == _sink)
975                         continue;
976
977                 DBG((rss->dbg, LEVEL_2, "\t\t%+F choosen:\n", u_irn));
978
979                 cbc     = OALLOC(phase_obst(&rss->ph), cbc_t);
980                 cbc->nr = cur_num++;
981
982                 /* initialize S_cb */
983                 ir_nodeset_init_size(&cbc->parents, 5);
984                 ir_nodeset_insert(&cbc->parents, u_irn);
985                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (init)\n", u_irn));
986
987                 /* E_cb = empty */
988                 cbc->kill_edges = new_pset(cmp_rss_edges, 5);
989
990                 /* each parent gets killed by at least one value from children */
991
992                 /* T_cb = PK_successors(u) */
993                 ir_nodeset_init_size(&cbc->children, 5);
994                 foreach_plist(u->pkiller_list, el2) {
995                         ir_nodeset_insert(&cbc->children, plist_element_get_value(el2));
996                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (init)\n", plist_element_get_value(el2)));
997                 }
998
999                 /*
1000                         Now: insert the parents of all children into the parent set
1001                         and insert the children of all parents into the children set
1002                         until the sets don't change any more
1003                 */
1004                 while (p_change || c_change) {
1005                         ir_nodeset_iterator_t iter;
1006                         p_change = c_change = 0;
1007
1008                         /* accumulate parents */
1009                         foreach_ir_nodeset(&cbc->children, t_irn, iter) {
1010                                 foreach_pset(epk, k_edge) {
1011                                         ir_node *val = k_edge->src;
1012
1013                                         if (k_edge->tgt == t_irn && ! ir_nodeset_contains(&cbc->parents, val)) {
1014                                                 ir_nodeset_insert(&cbc->parents, val);
1015                                                 p_change = 1;
1016                                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (killed by %+F)\n", val, t_irn));
1017                                         }
1018                                 }
1019                         }
1020
1021                         /* accumulate children */
1022                         foreach_ir_nodeset(&cbc->parents, s_irn, iter) {
1023                                 foreach_pset(epk, k_edge) {
1024                                         ir_node *val = k_edge->tgt;
1025
1026                                         if (k_edge->src == s_irn  && ! ir_nodeset_contains(&cbc->children, val)) {
1027                                                 ir_nodeset_insert(&cbc->children, val);
1028                                                 c_change = 1;
1029                                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (kills %+F)\n", val, s_irn));
1030                                         }
1031                                 }
1032                         }
1033                 }
1034
1035                 /* mark all parent values as visited */
1036                 foreach_ir_nodeset(&cbc->parents, s_irn, iter) {
1037                         rss_irn_t *s = get_rss_irn(rss, s_irn);
1038                         s->visited = 1;
1039                         /* assure bipartite property */
1040 #if 0
1041                         if (ir_nodeset_contains(&cbc->children, s_irn)) {
1042                                 ir_nodeset_remove(&cbc->children, s_irn);
1043                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F removed from to children\n", s_irn));
1044                         }
1045 #endif
1046                 }
1047
1048                 /* update edges */
1049                 foreach_pset(epk, k_edge) {
1050                         if (ir_nodeset_contains(&cbc->parents, k_edge->src) &&
1051                             ir_nodeset_contains(&cbc->children, k_edge->tgt)) {
1052                                 pset_insert(cbc->kill_edges, k_edge, HASH_RSS_EDGE(k_edge));
1053                                 /*
1054                                         Link all k_edges which are about to be removed together.
1055                                         Beware: pset_remove kills the iterator.
1056                                 */
1057                                 k_edge->next = kedge_root;
1058                                 kedge_root   = k_edge;
1059                         }
1060                 }
1061
1062                 /* remove all linked k_edges */
1063                 for (; kedge_root; kedge_root = kedge_root->next) {
1064                         pset_remove(epk, kedge_root, HASH_RSS_EDGE(kedge_root));
1065                 }
1066
1067                 /* verify the cbc */
1068                 foreach_ir_nodeset(&cbc->parents, s_irn, iter) {
1069                         int is_killed = 0;
1070
1071                         foreach_pset(cbc->kill_edges, k_edge) {
1072                                 if (k_edge->src == s_irn) {
1073                                         is_killed = 1;
1074                                         pset_break(cbc->kill_edges);
1075                                         break;
1076                                 }
1077                         }
1078
1079                         if (! is_killed) {
1080                                 vrfy_ok = 0;
1081                                 ir_fprintf(stderr, "Warning: parent %+F is not killed in current cbc\n", s_irn);
1082                         }
1083                 }
1084                 assert(vrfy_ok && "Verification of CBC failed");
1085
1086                 /* add the connected bipartite component */
1087                 if (ir_nodeset_size(&cbc->parents) > 0 &&
1088                     ir_nodeset_size(&cbc->children) > 0 &&
1089                         pset_count(cbc->kill_edges) > 0)
1090                         pset_insert(rss->cbc_set, cbc, (unsigned)cbc->nr);
1091                 DBG((rss->dbg, LEVEL_2, "\tbipartite component %d inserted:\n", cbc->nr));
1092                 DEBUG_ONLY(
1093                         if (firm_dbg_get_mask(rss->dbg) & LEVEL_2)
1094                                 debug_print_cbc(rss->dbg, cbc);
1095                 );
1096         }
1097
1098         if (rss->opts->dump_flags & RSS_DUMP_CBC)
1099                 debug_vcg_dump_bipartite(rss);
1100
1101         del_pset(epk);
1102 }
1103
1104 /**
1105  * Select the child with the maximum cost.
1106  */
1107 static child_t *select_child_max_cost(rss_t *rss, ir_nodeset_t *x, ir_nodeset_t *y, child_t *t, cbc_t *cbc)
1108 {
1109         ir_node *child;
1110         ir_nodeset_iterator_t iter;
1111         float   max_cost = -1.0f;
1112
1113         DBG((rss->dbg, LEVEL_2, "\t\tcomputing children costs:\n"));
1114
1115         foreach_ir_nodeset(&cbc->children, child, iter) {
1116                 rss_irn_t  *r_child             = get_rss_irn(rss, child);
1117                 int         num_unkilled_parents = 0;
1118                 int         num_descendants;
1119                 rss_edge_t *k_edge;
1120                 float       cost;
1121
1122                 /* get the number of unkilled parents */
1123                 foreach_pset(cbc->kill_edges, k_edge) {
1124                         if (k_edge->tgt == child && ir_nodeset_contains(x, k_edge->src))
1125                                 ++num_unkilled_parents;
1126                 }
1127
1128                 cost = (float)num_unkilled_parents;
1129
1130                 num_descendants = plist_count(r_child->descendant_list) + ir_nodeset_size(y);
1131
1132                 if (num_descendants > 0)
1133                         cost /= (float)num_descendants;
1134
1135                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F, #desc %d, cost %.3f\n", child, num_descendants, cost));
1136
1137                 if (cost > max_cost) {
1138                         t->irn   = child;
1139                         t->cost  = cost;
1140                         max_cost = cost;
1141                 }
1142         }
1143
1144         return t;
1145 }
1146
1147 /**
1148  * Remove all parents from x which are killed by t_irn.
1149  */
1150 static void remove_covered_parents(rss_t *rss, ir_nodeset_t *x, ir_node *t_irn, cbc_t *cbc)
1151 {
1152         rss_irn_t  *t = get_rss_irn(rss, t_irn);
1153         rss_edge_t *k_edge;
1154
1155         DBG((rss->dbg, LEVEL_2, "\t\tremoving parents covered by %+F:\n", t_irn));
1156
1157         foreach_pset(cbc->kill_edges, k_edge) {
1158                 if (k_edge->tgt == t_irn && ir_nodeset_contains(x, k_edge->src)) {
1159                         ir_nodeset_remove(x, k_edge->src);
1160                         plist_insert_back(t->parent_list, k_edge->src);
1161                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", k_edge->src));
1162                 }
1163         }
1164 }
1165
1166 static void update_cumulated_descendent_values(rss_t *rss, ir_nodeset_t *y, ir_node *t_irn)
1167 {
1168         rss_irn_t *t = get_rss_irn(rss, t_irn);
1169         plist_element_t *el;
1170
1171         DBG((rss->dbg, LEVEL_2, "\t\tupdating cumulated descendant value of %+F:\n", t_irn));
1172
1173         foreach_plist(t->descendant_list, el) {
1174                 ir_nodeset_insert(y, plist_element_get_value(el));
1175                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", plist_element_get_value(el)));
1176         }
1177 }
1178
1179 /**
1180  * Greedy-k: a heuristics for the MMA problem
1181  */
1182 static void compute_killing_function(rss_t *rss)
1183 {
1184         cbc_t *cbc;
1185         struct obstack obst;
1186
1187         obstack_init(&obst);
1188
1189         rss->cbc_set = pset_new_ptr(5);
1190         compute_bipartite_decomposition(rss);
1191
1192         /* for all bipartite components do: */
1193         foreach_pset(rss->cbc_set, cbc) {
1194                 ir_node *p;
1195                 ir_nodeset_t x;
1196                 ir_nodeset_t y;
1197                 ir_nodeset_iterator_t iter;
1198                 child_t **sks    = NEW_ARR_F(child_t *, 20);
1199                 int     cur_len  = 0;
1200                 int     cur_size = 20;
1201                 int     i;
1202
1203                 ir_nodeset_init_size(&x, 10);
1204                 ir_nodeset_init_size(&y, 10);
1205
1206                 DBG((rss->dbg, LEVEL_1, "\tcomputing SKS for cbc %d:\n", cbc->nr));
1207                 DBG((rss->dbg, LEVEL_2, "\t\tinitializing parents X:\n"));
1208
1209                 /* X = S_cb (all parents are initially uncovered) */
1210                 foreach_ir_nodeset(&cbc->parents, p, iter) {
1211                         ir_nodeset_insert(&x, p);
1212                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", p));
1213                 }
1214
1215                 /* while X not empty */
1216                 while (ir_nodeset_size(&x) > 0) {
1217                         child_t *t = OALLOCZ(&obst, child_t);
1218
1219                         t = select_child_max_cost(rss, &x, &y, t, cbc);
1220
1221                         if (cur_len >= cur_size) {
1222                                 ARR_EXTO(child_t *, sks, cur_size * 2);
1223                                 cur_size *= 2;
1224                         }
1225
1226                         DBG((rss->dbg, LEVEL_2, "\t\tinsert child %+F (%.3f) into SKS at pos %d\n", t->irn, t->cost, cur_len));
1227
1228                         sks[cur_len++] = t;
1229                         remove_covered_parents(rss, &x, t->irn, cbc);
1230                         update_cumulated_descendent_values(rss, &y, t->irn);
1231                 }
1232
1233                 ARR_SHRINKLEN(sks, cur_len);
1234
1235                 /* sort SKS in increasing cost order */
1236                 qsort(sks, cur_len, sizeof(sks[0]), cmp_child_costs);
1237
1238                 DBG((rss->dbg, LEVEL_2, "\tprocessing SKS for cbc %d:\n", cbc->nr));
1239
1240                 /* build killing function */
1241                 for (i = cur_len - 1; i >= 0; --i) { /* loop over sks in decreasing cost order */
1242                         child_t         *t = sks[i];
1243                         rss_irn_t       *rt = get_rss_irn(rss, t->irn);
1244                         plist_element_t *p_el;
1245
1246                         DBG((rss->dbg, LEVEL_3, "\t\t\tkiller %+F (%.3f):\n", t->irn, t->cost));
1247
1248                         /* kill all unkilled parents of t */
1249                         foreach_plist(rt->parent_list, p_el) {
1250                                 ir_node    *par = plist_element_get_value(p_el);
1251                                 rss_irn_t *rpar = get_rss_irn(rss, par);
1252
1253                                 if (is_Sink(rpar->killer)) {
1254                                         rpar->killer = t->irn;
1255                                         DBG((rss->dbg, LEVEL_2, "\t\tkill %+F\n", rpar->irn));
1256                                 }
1257                                 else {
1258                                         DBG((rss->dbg, LEVEL_3, "\t\t\tkeeping %+F as killer for %+F\n", rpar->killer, rpar->irn));
1259                                 }
1260                         }
1261                 }
1262
1263                 ir_nodeset_destroy(&x);
1264                 ir_nodeset_destroy(&y);
1265                 DEL_ARR_F(sks);
1266         }
1267
1268         if (rss->opts->dump_flags & RSS_DUMP_KILL)
1269                 debug_vcg_dump_kill(rss);
1270
1271         del_pset(rss->cbc_set);
1272         obstack_free(&obst, NULL);
1273 }
1274
1275 /**
1276  * Adds the edge src -> tgt to the dvg. Checks if reverse edge is already there (asserts).
1277  */
1278 static inline void add_dvg_edge(rss_t *rss, dvg_t *dvg, const ir_node *src, const ir_node *tgt, int have_source)
1279 {
1280         rss_edge_t *dvg_edge;
1281         rss_edge_t key;
1282
1283         if (! have_source)
1284                 ir_nodeset_insert(&dvg->nodes, (ir_node *) src);
1285         else
1286                 assert(ir_nodeset_contains(&dvg->nodes, src) && "Wrong assumption");
1287
1288         ir_nodeset_insert(&dvg->nodes, (ir_node *) tgt);
1289
1290         key.src = (ir_node *) tgt;
1291         key.tgt = (ir_node *) src;
1292         assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1293
1294         key.src = (ir_node *) src;
1295         key.tgt = (ir_node *) tgt;
1296         if (NULL != pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1297                 /* add the edge to the DVG */
1298                 dvg_edge = OALLOC(phase_obst(&rss->ph), rss_edge_t);
1299
1300                 dvg_edge->src  = (ir_node *) src;
1301                 dvg_edge->tgt  = (ir_node *) tgt;
1302                 dvg_edge->next = NULL;
1303
1304                 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", src, tgt));
1305                 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1306         }
1307 }
1308
1309 /**
1310  * Computes the disjoint value DAG (DVG).
1311  * BEWARE: It is not made explicitly clear in the Touati paper,
1312  *         but the DVG is meant to be build from the KILLING DAG
1313  */
1314 static void compute_dvg(rss_t *rss, dvg_t *dvg)
1315 {
1316         plist_element_t *el;
1317
1318         DBG((rss->dbg, LEVEL_1, "\tcomputing DVG:\n"));
1319
1320         foreach_plist(rss->nodes, el) {
1321                 ir_node    *u_irn    = plist_element_get_value(el);
1322                 rss_irn_t  *u        = get_rss_irn(rss, u_irn);
1323                 rss_irn_t  *u_killer = get_rss_irn(rss, u->killer);
1324                 int        i;
1325
1326                 /* TODO: omit nodes only having sink as pkiller and killing no one */
1327
1328                 /* add an edge to killer */
1329                 add_dvg_edge(rss, dvg, u_irn, u->killer, 0);
1330
1331                 if (is_Sink(u->killer))
1332                         continue;
1333
1334                 /* We add an edge to every killer, from where we could be reached. */
1335                 for (i = ARR_LEN_SAFE(u_killer->descendants) - 1; i >= 0; --i) {
1336                         add_dvg_edge(rss, dvg, u_irn, u_killer->descendants[i], 1);
1337                 }
1338
1339 #if 0
1340
1341                 foreach_plist(rss->nodes, el2) {
1342                         ir_node *v_irn = plist_element_get_value(el2);
1343
1344                         /*
1345                                 There is an edge (u, v) in the DVG iff v is a descendant of the killer(u).
1346                         */
1347                         if (BSEARCH_IRN_ARR(v_irn, u_kill->descendants)) {
1348                                 rss_edge_t *dvg_edge = OALLOC(phase_obst(&rss->ph), rss_edge_t);
1349                                 rss_edge_t key;
1350
1351                                 /* insert the user into the DVG and append it to the user list of u */
1352                                 ir_nodeset_insert(&dvg->nodes, v_irn);
1353                                 if (! plist_has_value(u->dvg_user_list, v_irn))
1354                                         plist_insert_back(u->dvg_user_list, v_irn);
1355
1356                                 dvg_edge->src  = u_irn;
1357                                 dvg_edge->tgt  = v_irn;
1358                                 dvg_edge->next = NULL;
1359
1360                                 key.src = v_irn;
1361                                 key.tgt = u_irn;
1362
1363                                 assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1364
1365                                 /* add the edge to the DVG */
1366                                 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", u_irn, v_irn));
1367                                 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1368                         }
1369                 }
1370 #endif /* if 0 */
1371         }
1372
1373         if (rss->opts->dump_flags & RSS_DUMP_DVG)
1374                 debug_vcg_dump_dvg(rss, dvg);
1375 }
1376
1377 /**
1378  * Updates the dvg structure when a serialization edge from src -> tgt is added.
1379  */
1380 static void update_dvg(rss_t *rss, dvg_t *dvg, rss_irn_t *src, rss_irn_t *tgt)
1381 {
1382         int i, j, idx;
1383         rss_edge_t *edge;
1384         rss_edge_t **arr = ALLOCAN(rss_edge_t*, pset_count(dvg->edges));
1385
1386         /*
1387                 Add an edge from serialization target to serialization src:
1388                 src cannot be alive with target
1389         */
1390         add_dvg_edge(rss, dvg, tgt->irn, src->irn, 0);
1391
1392         /* Add edges to src's descendants as well, they also getting serialized. */
1393         for (i = ARR_LEN_SAFE(src->descendants) - 1; i >= 0; --i) {
1394                 add_dvg_edge(rss, dvg, tgt->irn, src->descendants[i], 1);
1395         }
1396
1397         /* We also have to add edges from targets predecessors in dvg */
1398         idx = 0;
1399         /* We cannot insert elements into set over which we iterate ... */
1400         foreach_pset(dvg->edges, edge) {
1401                 if (edge->tgt == tgt->irn) {
1402                         arr[idx++] = edge;
1403                 }
1404         }
1405
1406         for (i = 0; i < idx; ++i) {
1407                 edge = arr[i];
1408                 add_dvg_edge(rss, dvg, edge->src, src->irn, 1);
1409                 for (j = ARR_LEN_SAFE(src->descendants) - 1; j >= 0; --j) {
1410                         add_dvg_edge(rss, dvg, edge->src, src->descendants[j], 1);
1411                 }
1412         }
1413 }
1414
1415 #if 0
1416 /**
1417  * Accumulate all descendants for root into list.
1418  */
1419 static void accumulate_dvg_descendant_values(rss_t *rss, rss_irn_t *root, plist_t *list)
1420 {
1421         if (plist_count(root->dvg_user_list) > 0) {
1422                 plist_element_t *el;
1423
1424                 foreach_plist(root->dvg_user_list, el) {
1425                         ir_node   *v_irn = plist_element_get_value(el);
1426                         rss_irn_t *v     = get_rss_irn(rss, v_irn);
1427
1428                         /* add v as descendant */
1429                         if (! plist_has_value(list, v_irn)) {
1430                                 plist_insert_back(list, v_irn);
1431                                 DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG descendant %+F\n", v_irn));
1432                         }
1433
1434                         /* accumulate v's descendants */
1435                         accumulate_dvg_descendant_values(rss, v, list);
1436                 }
1437         }
1438 }
1439
1440 /**
1441  * Builds the list of potential killers for each node
1442  * in the given DVG.
1443  * Needs the descendant list for all user as sorted array.
1444  */
1445 static void build_dvg_pkiller_list(rss_t *rss, dvg_t *dvg)
1446 {
1447         ir_nodeset_iterator_t iter;
1448         ir_node *irn;
1449
1450         foreach_nodeset(&dvg->nodes, irn, iter) {
1451                 rss_irn_t       *node = get_rss_irn(rss, irn);
1452                 plist_element_t *el, *el2;
1453
1454                 DBG((rss->dbg, DEBUG_DVG, "\t\tbuilding pkiller list for %+F\n", irn));
1455
1456                 /* check each user */
1457                 foreach_plist(node->dvg_user_list, el) {
1458                         ir_node *u_irn = plist_element_get_value(el);
1459
1460                         /* is the current user u_irn not a descendant of any other user -> pkiller */
1461                         foreach_plist(node->dvg_user_list, el2) {
1462                                 ir_node   *v_irn = plist_element_get_value(el2);
1463                                 rss_irn_t *v     = get_rss_irn(rss, v_irn);
1464
1465                                 if (el != el2                             &&
1466                                         ! BSEARCH_IRN_ARR(u_irn, v->dvg_desc) &&
1467                                         ! plist_has_value(node->dvg_pkiller_list, u_irn))
1468                                 {
1469                                         plist_insert_back(node->dvg_pkiller_list, u_irn);
1470                                         DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG pkiller %+F\n", u_irn));
1471                                 }
1472                         }
1473                 }
1474
1475                 node->dvg_pkiller = build_sorted_array_from_list(node->dvg_pkiller_list, phase_obst(&rss->ph));
1476         }
1477
1478         DEBUG_ONLY(
1479                 if (firm_dbg_get_mask(rss->dbg) & DEBUG_DVG)
1480                         debug_vcg_dump_dvg_pkiller(rss, dvg);
1481         );
1482 }
1483
1484 #endif /* if 0 */
1485
1486 /**
1487  * Compute the maximal antichain of the current DVG.
1488  * This is a reimplementation of the MAXIMAL_ANTI_CHAIN function
1489  * from the DDG library 1.1 (DAG.cpp).
1490  */
1491 static ir_nodeset_t *compute_maximal_antichain(rss_t *rss, dvg_t *dvg, int iteration)
1492 {
1493         int         n               = ir_nodeset_size(&dvg->nodes);
1494         int         *assignment     = ALLOCAN(int, n);
1495         int         *assignment_rev = ALLOCAN(int, n);
1496         int         *idx_map        = ALLOCAN(int, n);
1497         hungarian_problem_t *bp;
1498         ir_nodeset_t *values, *temp;
1499         ir_nodeset_iterator_t iter;
1500         ir_node     *u_irn;
1501         int         i, j, cost, cur_chain, res;
1502         rss_edge_t  *dvg_edge;
1503
1504 #define MAP_IDX(irn) bsearch_for_index(get_irn_idx(irn), idx_map,  n,  1)
1505
1506         if (pset_count(dvg->edges) == 0)
1507                 return NULL;
1508
1509         bp = hungarian_new(n, n, HUNGARIAN_MATCH_NORMAL);
1510
1511         /*
1512                 At first, we build an index map for the nodes in the DVG,
1513                 because we cannot use the irn idx therefore as the resulting
1514                 bipartite data structure would have around 1.2GB.
1515                 So we limit the size to the number of nodes we have in the DVG
1516                 and build a sorted index map for their irn indices.
1517         */
1518         i = 0;
1519         foreach_ir_nodeset(&dvg->nodes, u_irn, iter) {
1520                 idx_map[i++] = get_irn_idx(u_irn);
1521         }
1522         qsort(idx_map, n, sizeof(idx_map[0]), cmp_int);
1523
1524         foreach_pset(dvg->edges, dvg_edge) {
1525                 int idx_u = MAP_IDX(dvg_edge->src);
1526                 int idx_v = MAP_IDX(dvg_edge->tgt);
1527
1528                 /* add the entry to the bipartite data structure */
1529                 hungarian_add(bp, idx_u, idx_v, 1);
1530                 DBG((rss->dbg, LEVEL_3, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1531                         idx_u, dvg_edge->src, idx_v, dvg_edge->tgt));
1532         }
1533 #if 0
1534         /*
1535                 Add a bipartite entry for each pair of nodes (u, v), where exists a
1536                 path in the DVG from u to v, ie. connect all descendants(v) to v.
1537                 desc_dvg(v) = accumulated descendants of all z in dvg_user_list(v)
1538         */
1539         foreach_ir_nodeset(&dvg->nodes, u_irn, iter) {
1540                 rss_irn_t *u        = get_rss_irn(rss, u_irn);
1541                 int       idx_u_irn = MAP_IDX(u_irn);
1542
1543                 DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG descendants of %+F:\n", u_irn));
1544
1545                 //plist_clear(u->dvg_desc_list);
1546                 //accumulate_dvg_descendant_values(rss, u, u->dvg_desc_list);
1547
1548                 /*
1549                         FIXME: The array is build on the phase obstack and we cannot free the data.
1550                         So the array get as many times allocated as this function is called.
1551                 */
1552
1553                 /* build the sorted array for faster searches */
1554                 //u->dvg_desc = build_sorted_array_from_list(u->dvg_desc_list, phase_obst(&rss->ph));
1555
1556                 DBG((rss->dbg, DEBUG_MAX_AC, "\t\tadding bipartite entries of %+F:\n", u_irn));
1557
1558                 /* add the bipartite entries for all descendants */
1559                 for (i = ARR_LEN_SAFE(u->dvg_desc) - 1; i >= 0; --i) {
1560                         rss_irn_t *desc    = get_rss_irn(rss, u->dvg_desc[i]);
1561                         int       idx_desc = MAP_IDX(u->dvg_desc[i]);
1562
1563                         /* add the entry to the bipartite data structure */
1564                         hungarian_add(bp, idx_u_irn, idx_desc, 1);
1565                         DBG((rss->dbg, DEBUG_MAX_AC, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1566                                 idx_u_irn, u_irn, idx_desc, u->dvg_desc[i]));
1567
1568                         need_matching = 1;
1569                 }
1570         }
1571 #endif
1572
1573         /* We want maximum cardinality matching */
1574         hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL);
1575
1576 #if 0
1577         DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG pkiller:\n"));
1578         /* beware: the following function needs the dvg_desc array */
1579         build_dvg_pkiller_list(rss, dvg);
1580 #endif
1581
1582         DBG((rss->dbg, LEVEL_2, "\t\tcomputing bipartite matching\n"));
1583         /*
1584                 The maximum cardinality bipartite matching gives us the minimal
1585                 chain partition, which corresponds to the maximum anti chains.
1586         */
1587         res = hungarian_solve(bp, assignment, &cost, 1);
1588         assert(res == 0 && "Bipartite matching failed!");
1589
1590         hungarian_free(bp);
1591         memset(assignment_rev, -1, n * sizeof(assignment_rev[0]));
1592
1593         /* build the reverse assignment, ie. foreach i -> j, add j -> i */
1594         for (i = 0; i < n; ++i) {
1595                 if (assignment[i] >= 0) {
1596                         int j = assignment[i];
1597                         assignment_rev[j] = i;
1598                 }
1599         }
1600
1601         DBG((rss->dbg, LEVEL_2, "\t\tgot assignment with cost %d\n", cost));
1602         DBG((rss->dbg, LEVEL_3, "\t\t\tassignment   ---   reverse assignment\n", cost));
1603         for (i = 0; i < n; ++i) {
1604                 DBG((rss->dbg, LEVEL_3, "\t\t\t%3d -> %3d         %3d -> %3d\n", i, assignment[i], i, assignment_rev[i]));
1605         }
1606
1607         values    = XMALLOC(ir_nodeset_t);
1608         ir_nodeset_init_size(values, 10);
1609         cur_chain = 0;
1610         /* Construction of the minimal chain partition */
1611         for (j = 0; j < n; ++j) {
1612                 /* check nodes, which did not occur as target */
1613                 if (assignment_rev[j] == -1) {
1614                         int       xj      = idx_map[j];
1615                         ir_node   *xj_irn = get_idx_irn(rss->irg, xj);
1616                         rss_irn_t *xj_rss = get_rss_irn(rss, xj_irn);
1617                         chain_t   *c      = OALLOC(phase_obst(&rss->ph), chain_t);
1618                         int       source;
1619
1620                         /* there was no source for j -> we have a source of a new chain */
1621                         ir_nodeset_insert(values, xj_irn);
1622
1623                         c->elements = plist_obstack_new(phase_obst(&rss->ph));
1624                         c->nr       = cur_chain++;
1625                         plist_insert_back(c->elements, xj_irn);
1626
1627                         xj_rss->chain = c;
1628
1629                         DBG((rss->dbg, LEVEL_2, "\t\tstarting chain %d:\n", c->nr));
1630                         DBG((rss->dbg, LEVEL_2, "\t\t\t%+F (%d)", xj_irn, j));
1631
1632                         /* follow chain, having j as source */
1633                         source = j;
1634                         while (assignment[source] >= 0) {
1635                                 int       target  = assignment[source];
1636                                 int       irn_idx = idx_map[target];
1637                                 ir_node   *irn    = get_idx_irn(rss->irg, irn_idx);
1638                                 rss_irn_t *node   = get_rss_irn(rss, irn);
1639
1640                                 plist_insert_back(c->elements, irn);
1641                                 node->chain = c;
1642
1643                                 DB((rss->dbg, LEVEL_2, " -> %+F (%d)", irn, target));
1644
1645                                 /* new source = last target */
1646                                 source = target;
1647                         }
1648
1649                         DB((rss->dbg, LEVEL_2, "\n"));
1650                 }
1651         }
1652
1653         /*
1654                 Computing the maximal antichain: Select an element from each
1655                 chain such, such it is parallel with the others.
1656         */
1657         DBG((rss->dbg, LEVEL_1, "\t\tcomputing set of saturation values (MAX AC)\n"));
1658         DBG((rss->dbg, LEVEL_3, "\t\tstarting with:\n"));
1659
1660         DEBUG_ONLY(
1661                 if (firm_dbg_get_mask(rss->dbg) & LEVEL_3)
1662                         dump_nodeset(values, "\t\t\t");
1663         )
1664
1665         temp = NULL;
1666         do {
1667                 /*
1668                         We need an explicit array for the values as
1669                         we cannot iterate multiple times over the same
1670                         set at the same time. :-(((((
1671                         TODO Matze: now we can, so rewrite this...
1672                 */
1673                 int     n         = ir_nodeset_size(values);
1674                 int     i         = 0;
1675                 ir_node **val_arr = NEW_ARR_F(ir_node *, n);
1676
1677                 foreach_ir_nodeset(values, u_irn, iter)
1678                         val_arr[i++] = u_irn;
1679
1680                 if (temp) {
1681                         ir_nodeset_destroy(temp);
1682                         free(temp);
1683                 }
1684
1685                 temp = XMALLOC(ir_nodeset_t);
1686                 ir_nodeset_init_size(temp, 10);
1687
1688                 /* Select all nodes from current value set, having another node in the set as descendant. */
1689                 for (i = 0; i < n; ++i) {
1690                         rss_irn_t *u = get_rss_irn(rss, val_arr[i]);
1691
1692                         for (j = 0; j < n; ++j) {
1693                                 if (i != j) {
1694                                         rss_edge_t key;
1695
1696                                         key.src = val_arr[i];
1697                                         key.tgt = val_arr[j];
1698
1699                                         if (pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1700                                                 /* v[j] is descendant of u -> remove u and break */
1701                                                 ir_nodeset_insert(temp, (ir_node *) u->irn);
1702                                                 ir_nodeset_remove(values, u->irn);
1703
1704                                                 DBG((rss->dbg, LEVEL_3, "\t\t\tremoving %+F from values, adding it to temp\n", u->irn));
1705
1706                                                 break;
1707                                         }
1708                                 }
1709                         }
1710                 }
1711
1712                 /* Try to insert the chain predecessor of all selected u's */
1713                 foreach_ir_nodeset(temp, u_irn, iter) {
1714                         rss_irn_t       *u  = get_rss_irn(rss, u_irn);
1715                         chain_t         *c  = u->chain;
1716                         plist_element_t *el = plist_find_value(c->elements, u_irn);
1717
1718                         assert(el && "Missing element in chain!");
1719
1720                         /* If u has predecessor in chain: insert the predecessor */
1721                         if (el == plist_element_get_prev(el)) {
1722                                 ir_nodeset_insert(values, plist_element_get_value(el));
1723                                 DBG((rss->dbg, LEVEL_3, "\t\t\tadding %+F to values\n", plist_element_get_value(el)));
1724                         }
1725                 }
1726
1727
1728                 DEL_ARR_F(val_arr);
1729         } while (ir_nodeset_size(temp) > 0);
1730
1731         DBG((rss->dbg, LEVEL_2, "\t\tfinal set:\n"));
1732         DEBUG_ONLY(
1733                 if (firm_dbg_get_mask(rss->dbg) & LEVEL_2) {
1734                         dump_nodeset(values, "\t\t\t");
1735                 }
1736         );
1737
1738         if (rss->opts->dump_flags & RSS_DUMP_MAXAC)
1739                 debug_vcg_dump_pkg(rss, values, iteration);
1740
1741         if (temp != NULL) {
1742                 ir_nodeset_destroy(temp);
1743                 free(temp);
1744         }
1745
1746         return values;
1747
1748 #undef MAP_IDX
1749 }
1750
1751 /**
1752  * Computes the best serialization between two nodes of sat_vals.
1753  */
1754 static serialization_t *compute_best_admissible_serialization(rss_t *rss, ir_nodeset_t *sat_vals, serialization_t *ser, int num_regs)
1755 {
1756         int        n                    = ir_nodeset_size(sat_vals);
1757         int        n_idx                = ARR_LEN_SAFE(rss->idx_map);
1758         int        i                    = 0;
1759         ir_node    **val_arr            = ALLOCAN(ir_node*, n);
1760         bitset_t   *bs_sv               = bitset_alloca(n_idx);
1761         bitset_t   *bs_vdesc            = bitset_alloca(n_idx);
1762         bitset_t   *bs_tmp              = bitset_alloca(n_idx);
1763         bitset_t   *bs_ukilldesc        = bitset_alloca(n_idx);
1764         int        best_benefit         = INT_MAX;
1765         int        best_omega2          = INT_MAX;
1766         int        best_benefit_omega20 = INT_MAX;
1767         int        has_omega1           = 0;
1768         int        j, k;
1769         ir_node    *irn;
1770         ir_nodeset_iterator_t iter;
1771         rss_edge_t min_benefit_edge = {NULL, NULL, NULL};
1772         rss_edge_t min_omega20_edge = {NULL, NULL, NULL};
1773         rss_irn_t  *ser_u_omega1 = NULL, *ser_v_omega1 = NULL;
1774         rss_irn_t  *ser_u_omega20 = NULL, *ser_v_omega20 = NULL;
1775
1776         DBG((rss->dbg, LEVEL_1, "\tcomputing admissible serializations:\n"));
1777
1778         /*
1779                 We need an explicit array for the values as
1780                 we cannot iterate multiple times over the same
1781                 set at the same time. :-(((((
1782         */
1783
1784         foreach_ir_nodeset(sat_vals, irn, iter) {
1785                 val_arr[i++] = irn;
1786                 bitset_set(bs_sv, BLOCK_IDX_MAP(rss, irn));
1787         }
1788
1789         /*
1790                 We build all admissible serializations and remember the best found so far.
1791                 For u in sat_vals:
1792                  For v in sat_val:
1793                    if v in pkiller(u): add edge from v to all other pkiller(u)
1794                    else: for all vv in pkiller(u): add edge from v to vv if there exists no path from vv to v
1795         */
1796
1797 /*
1798         A node is unserializable if:
1799         - it has only one killer and this one is Sink
1800     - it kills no other values
1801         In this case there is no serialization which could
1802         reduce the registerpressure
1803 */
1804 #define IS_UNSERIALIZABLE_NODE(rss_node)                  \
1805         (                                                     \
1806                 (                                                 \
1807                         (plist_count(rss_node->pkiller_list) == 1) && \
1808                         is_Sink(rss_node->killer)                  && \
1809                         (rss_node->kill_count                == 0)    \
1810                 )                            ||                   \
1811                 be_is_Barrier(rss_node->irn) ||                   \
1812                 be_is_Keep(rss_node->irn)                         \
1813         )
1814
1815         /* for all u in sat_vals */
1816         for (i = 0; i < n; ++i) {
1817                 rss_irn_t       *u = get_rss_irn(rss, val_arr[i]);
1818                 plist_element_t *el;
1819
1820                 /* ignore nodes where serialization does not help */
1821                 if (IS_UNSERIALIZABLE_NODE(u)) {
1822                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", u->irn));
1823                         continue;
1824                 }
1825
1826                 /* accumulate all descendants of all pkiller(u) */
1827                 bitset_clear_all(bs_ukilldesc);
1828                 foreach_plist(u->pkiller_list, el) {
1829                         ir_node   *irn  = plist_element_get_value(el);
1830                         rss_irn_t *node = get_rss_irn(rss, irn);
1831
1832                         if (! is_Sink(irn))
1833                                 bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, irn));
1834                         else
1835                                 continue;
1836
1837                         for (k = ARR_LEN_SAFE(node->descendants) - 1; k >= 0; --k) {
1838                                 if (! is_Sink(node->descendants[k]))
1839                                         bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, node->descendants[k]));
1840                         }
1841                 }
1842
1843                 /* for all v in sat_vals */
1844                 for (j = 0; j < n; ++j) {
1845                         ir_node   *v_irn   = val_arr[j];
1846                         rss_irn_t *v       = get_rss_irn(rss, v_irn);
1847                         unsigned  v_height = get_irn_height(rss->h, v_irn);
1848                         int       omega1, omega2, is_pkiller;
1849
1850                         /* v cannot be serialized with itself
1851                          * ignore nodes where serialization does not help */
1852                         if (i == j || IS_UNSERIALIZABLE_NODE(v)) {
1853 #ifdef DEBUG_libfirm
1854                                 if (i != j)
1855                                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", v->irn));
1856 #endif
1857                                 continue;
1858                         }
1859
1860                         /* get descendants of v */
1861                         bitset_clear_all(bs_vdesc);
1862                         bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v_irn));
1863                         for (k = ARR_LEN_SAFE(v->descendants) - 1; k >= 0; --k) {
1864                                 if (! is_Sink(v->descendants[k]))
1865                                         bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v->descendants[k]));
1866                         }
1867
1868                         /* if v is in pkiller(u) */
1869                         is_pkiller = plist_has_value(u->pkiller_list, v_irn);
1870
1871                         /* for all vv in pkiller(u) */
1872                         foreach_plist(u->pkiller_list, el) {
1873                                 ir_node *vv_irn  = plist_element_get_value(el);
1874                                 int     add_edge;
1875
1876                                 if (is_Sink(vv_irn) || is_cfop(vv_irn))
1877                                         continue;
1878
1879                                 if (is_pkiller)
1880                                         add_edge = vv_irn != v_irn && skip_Proj(vv_irn) != skip_Proj(v_irn);
1881                                 else
1882                                         add_edge = ! heights_reachable_in_block(rss->h, skip_Proj(vv_irn), skip_Proj(v_irn));
1883
1884                                 /*
1885                                         As we add an edge from vv -> v, we have to make sure,
1886                                         that there exists no path from v to vv.
1887                                 */
1888
1889                                 if (add_edge) {
1890                                         unsigned vv_height = get_irn_height(rss->h, vv_irn);
1891                                         unsigned critical_path_cost;
1892                                         unsigned mu1, mu2;
1893
1894                                         /*
1895                                                 mu1 = | descendants(v) cut sat_vals |
1896                                                 the number of saturating values which cannot
1897                                                 be simultaneously alive with u
1898                                         */
1899                                         bitset_copy(bs_tmp, bs_vdesc);
1900                                         mu1 = bitset_popcnt(bitset_and(bs_tmp, bs_sv));
1901
1902                                         /*
1903                                                 mu2 = | accum_desc_all_pkiller(u) without descendants(v) |
1904                                         */
1905                                         if (is_pkiller) {
1906                                                 bitset_copy(bs_tmp, bs_ukilldesc);
1907                                                 mu2 = bitset_popcnt(bitset_andnot(bs_tmp, bs_vdesc));
1908                                         }
1909                                         else {
1910                                                 mu2 = 0;
1911                                         }
1912
1913                                         /* omega1 = mu1 - mu2 */
1914                                         omega1 = mu1 - mu2;
1915
1916                                         if (omega1 != 0)
1917                                                 has_omega1 = 1;
1918
1919                                         /* omega2 = increase of critical path */
1920                                         critical_path_cost =
1921                                                 v_height                        /* longest path from v to sink */
1922                                                 + rss->max_height - vv_height   /* longest path from source to vv */
1923                                                 + 1;                            /* edge */
1924
1925                                         /*
1926                                                 If critical_path_cost > max_height -> the new edge
1927                                                 would increase the longest critical path by the difference.
1928                                         */
1929                                         omega2 = critical_path_cost > rss->max_height ? critical_path_cost - rss->max_height : 0;
1930
1931                                         /* this keeps track of the edge with the best benefit */
1932                                         if (omega1 >= num_regs - n && omega1 < best_benefit) {
1933                                                 min_benefit_edge.src = v_irn;
1934                                                 min_benefit_edge.tgt = vv_irn;
1935
1936                                                 ser_u_omega1 = u;
1937                                                 ser_v_omega1 = v;
1938
1939                                                 best_benefit    = omega1;
1940                                                 ser->new_killer = is_pkiller;
1941                                         }
1942
1943                                         /* this keeps track of the edge with the best omega1 costs where omega2 == 0 */
1944                                         if (omega2 == 0 && omega1 >= num_regs - n && omega1 < best_benefit_omega20) {
1945                                                 min_omega20_edge.src = v_irn;
1946                                                 min_omega20_edge.tgt = vv_irn;
1947
1948                                                 ser_u_omega20 = u;
1949                                                 ser_v_omega20 = v;
1950
1951                                                 best_benefit_omega20 = omega1;
1952                                                 ser->new_killer      = is_pkiller;
1953                                         }
1954
1955                                         best_omega2 = MIN(best_omega2, omega2);
1956
1957                                         DBG((rss->dbg, LEVEL_2, "\t\tfound %+F -> %+F (w1 %d, w2 %d)\n",
1958                                                 v_irn, vv_irn, omega1, omega2));
1959                                 } /* if add_edge */
1960                         } /* for all vv in pkiller(u) */
1961                 } /* for all v in sat_vals */
1962         } /* for all u in sat_vals */
1963
1964         if (! has_omega1)
1965                 return NULL;
1966
1967         if (best_omega2 == 0) {
1968                 ser->u         = ser_u_omega20;
1969                 ser->v         = ser_v_omega20;
1970                 ser->edge->src = min_omega20_edge.src;
1971                 ser->edge->tgt = min_omega20_edge.tgt;
1972                 ser->omega1    = best_benefit_omega20;
1973                 ser->omega2    = best_omega2;
1974         }
1975         else {
1976                 ser->u         = ser_u_omega1;
1977                 ser->v         = ser_v_omega1;
1978                 ser->edge->src = min_benefit_edge.src;
1979                 ser->edge->tgt = min_benefit_edge.tgt;
1980                 ser->omega1    = best_benefit;
1981                 ser->omega2    = best_omega2;
1982         }
1983
1984         return ser;
1985
1986 #undef IS_UNSERIALIZABLE_NODE
1987 }
1988
1989 /**
1990  * Perform the value serialization heuristic and add all
1991  * computed serialization edges as dependencies to the irg.
1992  */
1993 static void perform_value_serialization_heuristic(rss_t *rss)
1994 {
1995         bitset_t *arch_nonign_bs = bitset_alloca(arch_register_class_n_regs(rss->cls));
1996         bitset_t *abi_ign_bs     = bitset_alloca(arch_register_class_n_regs(rss->cls));
1997         unsigned available_regs, iteration;
1998         dvg_t    dvg;
1999         ir_nodeset_t *sat_vals;
2000         pset *ser_set = new_pset(cmp_rss_edges, 20);
2001
2002         /* available_regs = R = |arch_non_ignore_regs cut ~abi_ignore_regs| */
2003         arch_put_non_ignore_regs(rss->cls, arch_nonign_bs);
2004         be_abi_put_ignore_regs(rss->abi, rss->cls, abi_ign_bs);
2005         bitset_andnot(arch_nonign_bs, abi_ign_bs);
2006         available_regs  = bitset_popcnt(arch_nonign_bs);
2007         //num_live = pset_count(rss->live_block);
2008         //available_regs -= num_live < available_regs ? num_live : 0;
2009
2010         DBG((rss->dbg, LEVEL_1, "\n\t#available regs: %d\n\n", available_regs));
2011
2012         /*
2013                 At first we need to compute the disjoint value DAG (DVG = {V, E_dv}).
2014                 V    = set of all nodes we are currently interested in
2015                 E_dv = there is an edge from u to v iff v is a descendant of killer(u), forall u, v in V
2016         */
2017         ir_nodeset_init_size(&dvg.nodes, plist_count(rss->nodes));
2018         dvg.edges = new_pset(cmp_rss_edges, plist_count(rss->nodes) * 5);
2019         compute_dvg(rss, &dvg);
2020
2021         /*
2022                 Then we perform the heuristic serialization algorithm
2023                 on the DVG which gives us all necessary serialization
2024                 edges.
2025         */
2026         DBG((rss->dbg, LEVEL_1, "\tcomputing maximal antichain:\n"));
2027         iteration = 0;
2028         sat_vals  = compute_maximal_antichain(rss, &dvg, iteration++);
2029         while (sat_vals && (ir_nodeset_size(sat_vals) > available_regs)) {
2030                 serialization_t *ser, best_ser;
2031                 rss_edge_t      *edge = OALLOC(phase_obst(&rss->ph), rss_edge_t);
2032                 ir_node         *dep_src, *dep_tgt;
2033
2034                 best_ser.edge = edge;
2035                 ser = compute_best_admissible_serialization(rss, sat_vals, &best_ser, available_regs);
2036
2037                 DBG((rss->dbg, LEVEL_1, "\tcurrent register saturation %d, target %d\n", ir_nodeset_size(sat_vals), available_regs));
2038
2039                 if (! ser) {
2040                         DBG((rss->dbg, LEVEL_1, "\tno RS improving serialization found, breaking at iteration %d\n", iteration));
2041                         break;
2042                 }
2043
2044                 /* Insert the serialization as dependency edge into the irg. */
2045                 DBG((rss->dbg, LEVEL_1, "\tserializing %+F -> %+F with edge %+F -> %+F and cost %d, %d\n",
2046                         ser->u->irn, ser->v->irn, ser->edge->src, ser->edge->tgt, ser->omega1, ser->omega2));
2047
2048                 if (pset_find(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge)))
2049                         ir_printf("WARNING: serialization %+F -> %+F computed twice!\n", ser->edge->src, ser->edge->tgt);
2050
2051
2052                 pset_insert(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge));
2053
2054                 /* update the dvg */
2055                 update_dvg(rss, &dvg, ser->v, ser->u);
2056                 update_dvg(rss, &dvg, get_rss_irn(rss, ser->edge->src), get_rss_irn(rss, ser->edge->tgt));
2057                 if (sat_vals != NULL) {
2058                         ir_nodeset_destroy(sat_vals);
2059                         free(sat_vals);
2060                 }
2061
2062                 dep_src = skip_Proj(ser->edge->src);
2063                 dep_tgt = ser->edge->tgt;
2064                 add_irn_dep(dep_src, dep_tgt);
2065
2066                 /* Update descendants, consumer and pkillers of the target */
2067                 update_node_info(rss, ser->edge->tgt, ser->edge->src);
2068
2069                 /* TODO: try to find a cheaper way for updating height information */
2070                 rss->max_height = heights_recompute_block(rss->h, rss->block);
2071
2072                 /* Recompute the antichain for next serialization */
2073                 DBG((rss->dbg, LEVEL_1, "\tre-computing maximal antichain:\n"));
2074                 sat_vals = compute_maximal_antichain(rss, &dvg, iteration++);
2075         }
2076
2077         ir_nodeset_destroy(&dvg.nodes);
2078         del_pset(dvg.edges);
2079 }
2080
2081 /**
2082  * Do initial calculations for a block.
2083  */
2084 static void process_block(ir_node *block, void *env)
2085 {
2086         rss_t *rss = env;
2087         int   i, n;
2088         const ir_edge_t *edge;
2089
2090         phase_init(&rss->ph, "rss block preprocessor", rss->irg, PHASE_DEFAULT_GROWTH, init_rss_irn, NULL);
2091
2092         DBG((rss->dbg, LEVEL_1, "preprocessing block %+F\n", block));
2093         rss->block = block;
2094
2095         /* build an index map for all nodes in the current block */
2096         i            = 0;
2097         n            = get_irn_n_edges(block);
2098         NEW_ARR_A(int *, rss->idx_map, n);
2099         foreach_out_edge(block, edge) {
2100                 ir_node *irn      = get_edge_src_irn(edge);
2101                 rss->idx_map[i++] = get_irn_idx(irn);
2102         }
2103         qsort(rss->idx_map, n, sizeof(rss->idx_map[0]), cmp_int);
2104         rss->max_height = heights_recompute_block(rss->h, block);
2105
2106         /* loop over all register classes */
2107         for (i = arch_env_get_n_reg_class(rss->arch_env) - 1; i >= 0; --i) {
2108                 const arch_register_class_t *cls = arch_env_get_reg_class(rss->arch_env, i);
2109
2110                 rss->cls = cls;
2111                 DBG((rss->dbg, LEVEL_1, "register class %s\n", arch_register_class_name(cls)));
2112
2113                 /* Get all live value at end of Block having current register class */
2114                 ir_nodeset_init(&rss->live_block);
2115                 be_liveness_end_of_block(rss->liveness, rss->cls, rss->block, &rss->live_block);
2116
2117                 /* reset the list of interesting nodes */
2118                 plist_clear(rss->nodes);
2119                 plist_insert_back(rss->nodes, _sink);
2120
2121                 /* collect all nodes having a certain register class */
2122                 foreach_out_edge(block, edge) {
2123                         ir_node *irn  = get_edge_src_irn(edge);
2124                         ir_mode *mode = get_irn_mode(irn);
2125
2126                         /*
2127                                 We skip:
2128                                 - mode_T nodes (the projs are asked)
2129                                 - mode_X nodes (control flow nodes are always scheduled last)
2130                                 - Keeps (they are always immediately scheduled)
2131                                 - Phi (same as Keep)
2132                         */
2133                         if (mode == mode_T || mode == mode_X || is_Phi(irn))
2134                                 continue;
2135
2136                         /*
2137                                 In case of a proj, we skip
2138                                 - Barrier (they are a Barrier :)
2139                                 - Start
2140                                 - the Proj itself, as it's scheduled always with it's super node
2141                         */
2142                         if (is_Proj(irn)) {
2143                                 ir_node *pred = get_Proj_pred(irn);
2144                                 if (be_is_Barrier(pred) || be_is_Start(pred))
2145                                         continue;
2146                         }
2147
2148                         /* calculate the descendants and consumer for each node in the block */
2149                         collect_node_info(rss, skip_Proj(irn));
2150
2151                         if (be_is_Keep(irn))
2152                                 continue;
2153
2154                         if (!arch_irn_is_ignore(irn) &&
2155                                         arch_get_irn_reg_class_out(irn) == cls) {
2156                                 plist_insert_back(rss->nodes, skip_Proj(irn));
2157                         }
2158                         //}
2159                 }
2160
2161                 /* compute the potential killing set PK(G) */
2162                 compute_pkill_set(rss);
2163
2164                 /* compute the killing function k* */
2165                 compute_killing_function(rss);
2166
2167                 /*
2168                         Compute the heuristic value serialization and
2169                         add the necessary dependencies to the irg.
2170                 */
2171                 perform_value_serialization_heuristic(rss);
2172
2173                 ir_nodeset_destroy(&rss->live_block);
2174         }
2175
2176         phase_free(&rss->ph);
2177 }
2178
2179 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_schedrss);
2180 void be_init_schedrss(void)
2181 {
2182         lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
2183         lc_opt_entry_t *sched_grp = lc_opt_get_grp(be_grp, "sched");
2184         lc_opt_entry_t *rss_grp = lc_opt_get_grp(sched_grp, "rss");
2185
2186         lc_opt_add_table(rss_grp, rss_option_table);
2187 }
2188
2189 /**
2190  * Preprocess the irg for scheduling.
2191  */
2192 void rss_schedule_preparation(be_irg_t *birg)
2193 {
2194         ir_graph *irg = be_get_birg_irg(birg);
2195         rss_t rss;
2196
2197         FIRM_DBG_REGISTER(rss.dbg, "firm.be.sched.rss");
2198
2199         //firm_dbg_set_mask(rss.dbg, LEVEL_1 | LEVEL_2 | LEVEL_3);
2200
2201         init_rss_special_nodes(irg);
2202
2203         rss.irg      = irg;
2204         rss.arch_env = be_get_birg_arch_env(birg);
2205         rss.abi      = birg->abi;
2206         rss.h        = heights_new(irg);
2207         rss.nodes    = plist_new();
2208         rss.opts     = &rss_options;
2209         rss.liveness = be_liveness(irg);
2210         be_liveness_assure_sets(rss.liveness);
2211         irg_block_walk_graph(irg, NULL, process_block, &rss);
2212         heights_free(rss.h);
2213         plist_free(rss.nodes);
2214         be_liveness_free(rss.liveness);
2215
2216         if (birg->main_env->options->dump_flags & DUMP_SCHED)
2217                 be_dump(rss.irg, "-rss", dump_ir_block_graph);
2218 }