support for custom attribute types on nodes
[libfirm] / ir / be / beschedrss.c
1 /*
2  * Copyright (C) 1995-2007 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       Implementation of a register saturating list scheduler.
23  * @author      Christian Wuerdig
24  * @date        29.08.2006
25  * @version     $Id$
26  *
27  * Implementation of a register saturating list scheduler
28  * as described in: Sid-Ahmed-Ali Touati
29  * Register Saturation in Superscalar and VLIW Codes
30  */
31 #ifdef HAVE_CONFIG_H
32 #include "config.h"
33 #endif
34
35 #include <limits.h>
36
37 #include "obst.h"
38 #include "debug.h"
39
40 #include "irgraph_t.h"
41 #include "irnode_t.h"
42 #include "iredges_t.h"
43 #include "ircons_t.h"
44 #include "irphase_t.h"
45 #include "irgwalk.h"
46 #include "irdump.h"
47 #include "irtools.h"
48 #include "irbitset.h"
49 #include "irprintf.h"
50 #include "bipartite.h"
51 #include "hungarian.h"
52 #include "plist.h"
53
54 #include "height.h"
55
56 #include "beabi.h"
57 #include "bemodule.h"
58 #include "benode_t.h"
59 #include "besched_t.h"
60 #include "beirg_t.h"
61
62 #include <libcore/lc_opts.h>
63 #include <libcore/lc_opts_enum.h>
64
65
66 #define ARR_LEN_SAFE(arr) ((arr) != NULL ? ARR_LEN((arr)) : 0)
67
68 #define HASH_RSS_EDGE(edge) ((get_irn_node_nr((edge)->src) << 16) | (get_irn_node_nr((edge)->tgt) & 0xFFFF))
69 #define BSEARCH_IRN_ARR(val, arr) \
70         bsearch(&(val), (arr), ARR_LEN_SAFE((arr)), sizeof((arr)[0]), cmp_irn_idx)
71
72 #define BLOCK_IDX_MAP(rss, irn) bsearch_for_index(get_irn_idx((irn)), (rss)->idx_map, ARR_LEN_SAFE((rss)->idx_map), 1)
73
74 /* the rss options */
75 typedef struct _rss_opts_t {
76         int dump_flags;
77 } rss_opts_t;
78
79 /* Represents a child with associated costs */
80 typedef struct _child {
81         ir_node *irn;
82         float   cost;
83 } child_t;
84
85 /* We need edges for several purposes. */
86 typedef struct _rss_edge {
87         ir_node *src;
88         ir_node *tgt;
89         void    *next;
90 } rss_edge_t;
91
92 /* Represents a connected bipartite component. */
93 typedef struct _cbc {
94         ir_nodeset_t parents;       /**< = S  a set of value producers */
95         ir_nodeset_t children;      /**< = T  a set of value consumers */
96         pset    *kill_edges;    /**< = E  a set of edges (t in T, s in S) such as each s in S gets killed by at least one t in T */
97         int     nr;             /**< a deterministic index for set insertion (used as hash) */
98 } cbc_t;
99
100 /* Represents a disjoint value DAG. */
101 typedef struct _dvg {
102         ir_nodeset_t nodes;
103         pset    *edges;
104 } dvg_t;
105
106 /* Represents a chain of nodes. */
107 typedef struct _chain {
108         plist_t *elements;   /**< List of chain elements */
109         int     nr;          /**< a deterministic index for set insertion (used as hash) */
110 } chain_t;
111
112 typedef struct _rss_irn {
113         plist_t  *consumer_list;    /**< List of consumers */
114         ir_node **consumer;         /**< Sorted consumer array (needed for faster access) */
115
116         plist_t  *parent_list;      /**< List of parents */
117         plist_t  *pkiller_list;     /**< List of potential killers */
118
119         plist_t  *descendant_list;  /**< List of descendants */
120         ir_node **descendants;      /**< Sorted descendant array (needed for faster access) */
121
122         ir_node  *killer;           /**< The selected unique killer */
123         ir_node  *irn;              /**< The corresponding firm node to this rss_irn */
124
125         chain_t  *chain;            /**< The chain, this node is associated to */
126
127         unsigned desc_walk;         /**< visited flag for collecting descendants */
128         unsigned kill_count;        /**< number of nodes killed by this one */
129
130         unsigned live_out : 1;      /**< irn has consumers outside of it's block */
131         unsigned visited  : 1;      /**< visited flag for bipartite decomposition */
132         unsigned havecons : 1;      /**< visited flag collect consumer */
133         unsigned handled  : 1;      /**< flag indicating whether or not the list structures have been build */
134         unsigned dumped   : 1;      /**< flag indication whether or not this node was dumped */
135 } rss_irn_t;
136
137 /* Represents a serialization edge with associated costs. */
138 typedef struct _serialization {
139         rss_irn_t  *u;       /* the top node */
140         rss_irn_t  *v;       /* the node about to be serialized after u */
141         rss_edge_t *edge;    /* the edge selected for the serialization */
142         int        omega1;   /* estimated: available regs - RS reduction */
143         int        omega2;   /* increase in critical path length */
144         int        new_killer;
145 } serialization_t;
146
147 typedef struct _rss {
148         ir_phase          ph;              /**< Phase to hold some data */
149         heights_t        *h;              /**< The current height object */
150         ir_graph         *irg;            /**< The irg to preprocess */
151         plist_t          *nodes;          /**< The list of interesting nodes */
152         const arch_env_t *arch_env;       /**< The architecture environment */
153         be_abi_irg_t     *abi;            /**< The abi for this irg */
154         pset             *cbc_set;        /**< A set of connected bipartite components */
155         ir_node          *block;          /**< The current block in progress. */
156         int              *idx_map;        /**< Mapping irn indices to per block indices */
157         unsigned         max_height;      /**< maximum height in the current block */
158         rss_opts_t       *opts;           /**< The options */
159         be_lv_t          *liveness;       /**< The liveness information for this irg */
160         pset             *live_block;     /**< Values alive at end of block */
161         const arch_register_class_t *cls; /**< The current register class */
162         DEBUG_ONLY(firm_dbg_module_t *dbg);
163 } rss_t;
164
165 #define get_rss_irn(rss, irn)  (phase_get_or_set_irn_data(&rss->ph, irn))
166
167 /**
168  * We need some special nodes:
169  * a source and a sink for all live-in and live-out values of a block
170  */
171 enum {
172         iro_rss_Source,
173         iro_rss_Sink,
174         iro_rss_last
175 };
176
177 /** The opcode of the rss_Source node once allocated. */
178 static ir_op *op_rss_Source;
179 /** The opcode of the rss_Sink node once allocated. */
180 static ir_op *op_rss_Sink;
181
182 /** The rss_Source node of the current graph. */
183 static ir_node *_source = NULL;
184 /** The rss_Sink node of the current graph. */
185 static ir_node *_sink   = NULL;
186
187 #define is_Source(irn) ((irn) == _source)
188 #define is_Sink(irn)   ((irn) == _sink)
189
190 enum {
191         RSS_DUMP_NONE  = 0,
192         RSS_DUMP_CBC   = 1 << 0,
193         RSS_DUMP_PKG   = 1 << 1,
194         RSS_DUMP_KILL  = 1 << 2,
195         RSS_DUMP_DVG   = 1 << 3,
196         RSS_DUMP_MAXAC = 1 << 4,
197         RSS_DUMP_ALL   = (RSS_DUMP_MAXAC << 1) - 1,
198 };
199
200 static rss_opts_t rss_options = {
201         RSS_DUMP_NONE,
202 };
203
204 static const lc_opt_enum_int_items_t dump_items[] = {
205         { "none",  RSS_DUMP_NONE  },
206         { "cbc",   RSS_DUMP_CBC   },
207         { "pkg",   RSS_DUMP_PKG   },
208         { "kill",  RSS_DUMP_KILL  },
209         { "dvg",   RSS_DUMP_DVG   },
210         { "maxac", RSS_DUMP_MAXAC },
211         { "all",   RSS_DUMP_ALL   },
212         { NULL, 0 }
213 };
214
215 static lc_opt_enum_int_var_t dump_var = {
216         &rss_options.dump_flags, dump_items
217 };
218
219 static const lc_opt_table_entry_t rss_option_table[] = {
220         LC_OPT_ENT_ENUM_MASK("dump", "dump phases", &dump_var),
221         { NULL }
222 };
223
224 /******************************************************************************
225  *  _          _                    __                  _   _
226  * | |        | |                  / _|                | | (_)
227  * | |__   ___| |_ __   ___ _ __  | |_ _   _ _ __   ___| |_ _  ___  _ __  ___
228  * | '_ \ / _ \ | '_ \ / _ \ '__| |  _| | | | '_ \ / __| __| |/ _ \| '_ \/ __|
229  * | | | |  __/ | |_) |  __/ |    | | | |_| | | | | (__| |_| | (_) | | | \__ \
230  * |_| |_|\___|_| .__/ \___|_|    |_|  \__,_|_| |_|\___|\__|_|\___/|_| |_|___/
231  *            | |
232  *            |_|
233  ******************************************************************************/
234
235 /**
236  * Acquire opcodes if needed and create source and sink nodes.
237  */
238 static void init_rss_special_nodes(ir_graph *irg) {
239         ir_node *block;
240
241         if (op_rss_Source == NULL) {
242                 int iro_rss_base = get_next_ir_opcodes(iro_rss_last);
243                 op_rss_Source = new_ir_op(iro_rss_base + iro_rss_Source, "rss_Source", op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
244                 op_rss_Sink   = new_ir_op(iro_rss_base + iro_rss_Sink,   "rss_Sink",   op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
245         }
246         block   = get_irg_start_block(irg);
247         _source = new_ir_node(NULL, irg, block, op_rss_Source, mode_ANY, 0, NULL);
248         _sink   = new_ir_node(NULL, irg, block, op_rss_Sink, mode_ANY, 0, NULL);
249 }
250
251 static int cmp_int(const void *a, const void *b) {
252         const int *i1 = a;
253         const int *i2 = b;
254
255         return QSORT_CMP(*i1, *i2);
256 }
257
258 static int cmp_child_costs(const void *a, const void *b) {
259         const child_t *c1 = a;
260         const child_t *c2 = b;
261
262         return QSORT_CMP(c1->cost, c2->cost);
263 }
264
265 static int cmp_irn_idx(const void *a, const void *b) {
266         const ir_node *n1 = *(ir_node **)a;
267         const ir_node *n2 = *(ir_node **)b;
268
269         return QSORT_CMP(get_irn_idx(n1), get_irn_idx(n2));
270 }
271
272 static int cmp_rss_edges(const void *a, const void *b) {
273         const rss_edge_t *e1 = a;
274         const rss_edge_t *e2 = b;
275
276         return (e1->src != e2->src) || (e1->tgt != e2->tgt);
277 }
278
279 static int bsearch_for_index(int key, int *arr, size_t len, int force) {
280         int left = 0;
281         int right = len;
282
283         while (right >= left) {
284                 int idx = (left + right) / 2;
285
286                 if (key < arr[idx])
287                         right = idx - 1;
288                 else if (key > arr[idx])
289                         left = idx + 1;
290                 else
291                         return idx;
292         }
293
294         if (force)
295                 assert(0 && "Something is wrong, key not found.");
296         return -1;
297 }
298
299 static ir_node **build_sorted_array_from_list(plist_t *irn_list, struct obstack *obst) {
300         plist_element_t *el;
301         int     i     = 0;
302         int     len   = plist_count(irn_list);
303         ir_node **arr = NEW_ARR_D(ir_node *, obst, len);
304
305         /* copy the list into the array */
306         foreach_plist(irn_list, el) {
307                 arr[i++] = plist_element_get_value(el);
308         }
309
310         /* sort the array by node index */
311         qsort(arr, len, sizeof(arr[0]), cmp_irn_idx);
312
313         return arr;
314 }
315
316 /*****************************************************
317  *      _      _                       _
318  *     | |    | |                     (_)
319  *   __| | ___| |__  _   _  __ _  __ _ _ _ __   __ _
320  *  / _` |/ _ \ '_ \| | | |/ _` |/ _` | | '_ \ / _` |
321  * | (_| |  __/ |_) | |_| | (_| | (_| | | | | | (_| |
322  *  \__,_|\___|_.__/ \__,_|\__, |\__, |_|_| |_|\__, |
323  *                          __/ | __/ |         __/ |
324  *                         |___/ |___/         |___/
325  *
326  *****************************************************/
327
328 #ifdef DEBUG_libfirm
329 static void dump_nodeset(ir_nodeset_t *ns, const char *prefix) {
330         ir_nodeset_iterator_t iter;
331         ir_node *irn;
332
333         ir_nodeset_iterator_init(&iter, ns);
334         while( (irn = ir_nodeset_iterator_next(&iter)) != NULL ) {
335                 ir_fprintf(stderr, "%s%+F\n", prefix, irn);
336         }
337 }
338 #endif
339
340 static void build_file_name(rss_t *rss, const char *suffix, size_t suf_len, char *buf, size_t len) {
341         const char *irg_name;
342
343         memset(buf, 0, len);
344         irg_name = get_entity_name(get_irg_entity(rss->irg));
345         snprintf(buf, len - suf_len, "%s-%s-block-%ld",
346                 irg_name, arch_register_class_name(rss->cls), get_irn_node_nr(rss->block));
347         strcat(buf, suffix);
348 }
349
350 /* Dumps all collected bipartite components of current irg as vcg. */
351 static void debug_vcg_dump_bipartite(rss_t *rss) {
352         cbc_t *cbc;
353         FILE  *f;
354         char  file_name[256];
355         static const char suffix[] = "-RSS-CBC.vcg";
356
357         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
358         f = fopen(file_name, "w");
359
360         ir_fprintf(f, "graph: { title: \"connected bipartite component graph of %+F\"\n", rss->irg);
361         fprintf(f, "display_edge_labels: no\n");
362         fprintf(f, "layoutalgorithm: mindepth\n");
363         fprintf(f, "manhattan_edges: yes\n\n");
364
365         foreach_pset(rss->cbc_set, cbc) {
366                 ir_nodeset_iterator_t iter;
367                 ir_node    *n;
368                 rss_edge_t *ke;
369
370                 fprintf(f, "graph: { titel: \"cbc %d\" label: \"cbc %d\" status:clustered color:yellow\n", cbc->nr, cbc->nr);
371                 foreach_ir_nodeset(&cbc->parents, n, iter) {
372                         ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
373                 }
374                 foreach_ir_nodeset(&cbc->children, n, iter) {
375                         ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
376                 }
377                 foreach_pset(cbc->kill_edges, ke) {
378                         ir_fprintf(f, "edge: { sourcename: \"n%d_%d\" targetname: \"n%d_%d\" }\n",
379                                 get_irn_node_nr(ke->src), cbc->nr, get_irn_node_nr(ke->tgt), cbc->nr);
380                 }
381                 fprintf(f, "}\n\n");
382         }
383         fprintf(f, "}\n");
384         fclose(f);
385 }
386
387 /* Dump the computed killing function as vcg. */
388 static void debug_vcg_dump_kill(rss_t *rss) {
389         FILE            *f;
390         char            file_name[256];
391         plist_element_t *el;
392         static const char suffix[] = "-RSS-KILL.vcg";
393
394         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
395         f = fopen(file_name, "w");
396
397         ir_fprintf(f, "graph: { title: \"computed kill graph of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
398         fprintf(f, "display_edge_labels: no\n");
399         fprintf(f, "layoutalgorithm: mindepth\n");
400         fprintf(f, "manhattan_edges: yes\n\n");
401
402         {
403                 /* reset dump_flag */
404                 ir_node *irn;
405                 foreach_phase_irn(&rss->ph, irn) {
406                         rss_irn_t *node = get_rss_irn(rss, irn);
407                         node->dumped = 0;
408                 }
409         }
410
411         /* dump all nodes and their killers */
412         foreach_plist(rss->nodes, el) {
413                 ir_node   *irn     = plist_element_get_value(el);
414                 rss_irn_t *rirn    = get_rss_irn(rss, irn);
415                 rss_irn_t *pk_rirn = get_rss_irn(rss, rirn->killer);
416
417                 if (! rirn->dumped) {
418                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
419                         rirn->dumped = 1;
420                 }
421
422                 if (! pk_rirn->dumped) {
423                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(rirn->killer), rirn->killer);
424                         pk_rirn->dumped = 1;
425                 }
426
427                 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
428                         get_irn_node_nr(rirn->killer), get_irn_node_nr(irn));
429         }
430
431         fprintf(f, "}\n");
432         fclose(f);
433 }
434
435 /* Dumps the potential killing DAG (PKG) as vcg. */
436 static void debug_vcg_dump_pkg(rss_t *rss, ir_nodeset_t *max_ac, int iteration) {
437         FILE              *f;
438         char              file_name[256];
439         char              *suffix   = alloca(32);
440         static const char suffix1[] = "-RSS-PKG.vcg";
441         static const char suffix2[] = "-RSS-PKG-MAXAC.vcg";
442         plist_element_t   *el;
443
444         if (! max_ac) {
445                 snprintf(suffix, 32, "%s", suffix1);
446         }
447         else {
448                 snprintf(suffix, 32, "-%02d%s", iteration, suffix2);
449         }
450
451         build_file_name(rss, suffix, strlen(suffix) + 1, file_name, sizeof(file_name));
452         f = fopen(file_name, "w");
453
454         ir_fprintf(f, "graph: { title: \"potential killing DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
455         fprintf(f, "display_edge_labels: no\n");
456         fprintf(f, "layoutalgorithm: mindepth\n");
457         fprintf(f, "manhattan_edges: yes\n\n");
458
459         {
460                 /* reset dump_flag */
461                 ir_node *irn;
462                 foreach_phase_irn(&rss->ph, irn) {
463                         rss_irn_t *node = get_rss_irn(rss, irn);
464                         node->dumped = 0;
465                 }
466         }
467
468         foreach_plist(rss->nodes, el) {
469                 ir_node   *irn  = plist_element_get_value(el);
470                 rss_irn_t *rirn = get_rss_irn(rss, irn);
471                 char      *c1   = "";
472                 plist_element_t *k_el;
473
474                 /* dump selected saturating values in yellow */
475                 if (max_ac && ir_nodeset_contains(max_ac, irn))
476                         c1 = "color:yellow";
477
478                 if (! rirn->dumped) {
479                         if (rirn->chain)
480                                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F   c%d\" %s }\n", get_irn_node_nr(irn), irn, rirn->chain->nr, c1);
481                         else
482                                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(irn), irn, c1);
483                         rirn->dumped = 1;
484                 }
485
486                 foreach_plist(rirn->pkiller_list, k_el) {
487                         ir_node   *pkiller = plist_element_get_value(k_el);
488                         rss_irn_t *pk_rirn = get_rss_irn(rss, pkiller);
489                         char      *c2      = "";
490
491                         if (max_ac && ir_nodeset_contains(max_ac, pkiller))
492                                 c2 = "color:yellow";
493
494                         if (! pk_rirn->dumped) {
495                                 if (pk_rirn->chain)
496                                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F   c%d\" %s }\n", get_irn_node_nr(pkiller), pkiller, pk_rirn->chain->nr, c2);
497                                 else
498                                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(pkiller), pkiller, c2);
499                                 pk_rirn->dumped = 1;
500                         }
501                         ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
502                                 get_irn_node_nr(pkiller), get_irn_node_nr(irn));
503                 }
504         }
505         fprintf(f, "}\n");
506         fclose(f);
507 }
508
509 /* Dumps the disjoint value DAG (DVG) as vcg. */
510 static void debug_vcg_dump_dvg(rss_t *rss, dvg_t *dvg) {
511         static const char suffix[] = "-RSS-DVG.vcg";
512         FILE       *f;
513         char       file_name[256];
514         ir_nodeset_iterator_t iter;
515         ir_node    *irn;
516         rss_edge_t *edge;
517
518         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
519         f = fopen(file_name, "w");
520
521         ir_fprintf(f, "graph: { title: \"disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
522         fprintf(f, "display_edge_labels: no\n");
523         fprintf(f, "layoutalgorithm: mindepth\n");
524         fprintf(f, "manhattan_edges: yes\n\n");
525
526         /* dump all nodes */
527         foreach_ir_nodeset(&dvg->nodes, irn, iter) {
528                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
529         }
530
531         /* dump all edges */
532         foreach_pset(dvg->edges, edge) {
533                 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
534                         get_irn_node_nr(edge->src), get_irn_node_nr(edge->tgt));
535         }
536
537         fprintf(f, "}\n");
538         fclose(f);
539 }
540
541 #if 0
542 /* Dumps the PKG(DVG). */
543 static void debug_vcg_dump_dvg_pkiller(rss_t *rss, dvg_t *dvg) {
544         static const char suffix[] = "-RSS-DVG-PKG.vcg";
545         FILE       *f;
546         char       file_name[256];
547         ir_nodeset_iterator_t iter;
548         ir_node    *irn;
549
550         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
551         f = fopen(file_name, "w");
552
553         ir_fprintf(f, "graph: { title: \"PKG of disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
554         fprintf(f, "display_edge_labels: no\n");
555         fprintf(f, "layoutalgorithm: mindepth\n");
556         fprintf(f, "manhattan_edges: yes\n\n");
557
558         /* dump all nodes */
559         foreach_ir_nodeset(&dvg->nodes, irn, iter) {
560                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
561         }
562
563         /* dump all edges */
564         foreach_ir_nodeset(&dvg->nodes, irn, iter) {
565                 rss_irn_t       *node = get_rss_irn(rss, irn);
566                 plist_element_t *el;
567
568                 foreach_plist(node->dvg_pkiller_list, el) {
569                         ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
570                                 get_irn_node_nr(plist_element_get_value(el)), get_irn_node_nr(irn));
571                 }
572         }
573
574         fprintf(f, "}\n");
575         fclose(f);
576 }
577 #endif /* if 0 */
578
579 /**
580  * In case there is no rss information for irn, initialize it.
581  */
582 static void *init_rss_irn(ir_phase *ph, ir_node *irn, void *old) {
583         rss_irn_t *res = old ? old : phase_alloc(ph, sizeof(res[0]));
584
585         res->descendant_list = plist_obstack_new(phase_obst(ph));
586         res->descendants     = NULL;
587
588         res->consumer_list   = plist_obstack_new(phase_obst(ph));
589         res->consumer        = NULL;
590
591         res->pkiller_list    = plist_obstack_new(phase_obst(ph));
592
593         res->parent_list     = plist_obstack_new(phase_obst(ph));
594
595         res->killer          = NULL;
596         res->irn             = irn;
597         res->chain           = NULL;
598
599         res->kill_count      = 0;
600         res->desc_walk       = 0;
601         res->live_out        = 0;
602         res->visited         = 0;
603         res->handled         = 0;
604         res->dumped          = 0;
605         res->havecons        = 0;
606
607         return res;
608 }
609
610 /**
611  * Collect all nodes data dependent on current node.
612  */
613 static void collect_descendants(rss_t *rss, rss_irn_t *rirn, ir_node *irn, int *got_sink, unsigned cur_desc_walk) {
614         const ir_edge_t *edge;
615         rss_irn_t       *cur_node = get_rss_irn(rss, irn);
616         ir_node         *block    = rss->block;
617         ir_edge_kind_t  ekind[2]  = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
618         int             i;
619
620         if (cur_node->desc_walk >= cur_desc_walk)
621                 return;
622         cur_node->desc_walk = cur_desc_walk;
623
624         /* Stop at Barriers */
625         if (be_is_Barrier(irn))
626                 return;
627
628         /* loop over normal and dependency edges */
629         for (i = 0; i < 2; ++i) {
630                 foreach_out_edge_kind(irn, edge, ekind[i]) {
631                         ir_node *user = get_edge_src_irn(edge);
632
633                         /* skip ignore nodes as they do not really contribute to register pressure */
634                         if (arch_irn_is(rss->arch_env, user, ignore))
635                                 continue;
636
637                         /*
638                                 (a) mode_X means Jump            -> out_edge
639                                 (b) Phi as user of a normal node -> out-edge
640                         */
641                         if (get_irn_mode(user) == mode_X || is_Phi(user)) {
642                                 if (! *got_sink)
643                                         goto force_sink;
644                                 else
645                                         continue;
646                         }
647
648                         if (is_Proj(user)) {
649
650                                 //if (arch_get_irn_reg_class(rss->arch_env, user, -1) == rss->cls)
651                                         collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
652                         }
653                         else {
654                                 /* check if user lives in block and is not a control flow node */
655                                 if (get_nodes_block(user) == block) {
656                                         if (! plist_has_value(rirn->descendant_list, user)) {
657                                                 plist_insert_back(rirn->descendant_list, user);
658                                                 DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", user));
659                                         }
660                                         collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
661                                 }
662                                 else if (! *got_sink) {
663 force_sink:
664                                         /* user lives out of block: add sink as descendant if not already done */
665                                         plist_insert_back(rirn->descendant_list, _sink);
666                                         *got_sink = 1;
667                                         DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", _sink));
668                                 }
669                         }
670                 }
671         }
672 }
673
674 /**
675  * Handles a single consumer.
676  */
677 static void collect_single_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *consumer, int *got_sink) {
678         ir_node *block = rss->block;
679
680         assert(! is_Proj(consumer) && "Cannot handle Projs");
681
682         if (! is_Phi(consumer) && ! is_Block(consumer) && get_nodes_block(consumer) == block) {
683                 if (! arch_irn_is(rss->arch_env, consumer, ignore) && ! plist_has_value(rss_irn->consumer_list, consumer)) {
684                         plist_insert_back(rss_irn->consumer_list, consumer);
685                         DBG((rss->dbg, LEVEL_2, "\t\tconsumer %+F\n", consumer));
686                 }
687         }
688         else {
689                 rss_irn->live_out = 1;
690                 DBG((rss->dbg, LEVEL_2, "\t\tlive out %+F", consumer));
691                 if (! *got_sink) {
692                         plist_insert_back(rss_irn->consumer_list, _sink);
693                         *got_sink = 1;
694                         DB((rss->dbg, LEVEL_2, ", %+F added instead", _sink));
695                 }
696                 DB((rss->dbg, LEVEL_2, "\n"));
697         }
698 }
699
700 /**
701  * Collect all nodes consuming the value(s) produced by current node.
702  */
703 static void collect_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *irn, int *got_sink) {
704         const ir_edge_t *edge;
705         int             i;
706         ir_edge_kind_t  ekind[2]  = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
707         rss_irn_t       *cur_node = get_rss_irn(rss, irn);
708
709         if (cur_node->havecons)
710                 return;
711         cur_node->havecons = 1;
712
713         for (i = 0; i < 2; ++i) {
714                 foreach_out_edge_kind(irn, edge, ekind[i]) {
715                         ir_node *consumer = get_edge_src_irn(edge);
716
717                         if (is_Proj(consumer)) {
718                                 //if (arch_get_irn_reg_class(rss->arch_env, consumer, -1) == rss->cls)
719                                         collect_consumer(rss, rss_irn, consumer, got_sink);
720                         }
721                         else
722                                 collect_single_consumer(rss, rss_irn, consumer, got_sink);
723                 }
724         }
725 }
726
727 /**
728  * Collects all consumer and descendant of a irn.
729  */
730 static void collect_node_info(rss_t *rss, ir_node *irn) {
731         static unsigned cur_desc_walk = 0;
732         rss_irn_t       *rss_irn      = get_rss_irn(rss, irn);
733         int             got_sink;
734
735         assert(! is_Proj(irn) && "Cannot handle Projs.");
736
737         /* check if node info is already available */
738         if (rss_irn->handled)
739                 return;
740                 //phase_reinit_single_irn_data(&rss->ph, irn);
741
742         DBG((rss->dbg, LEVEL_1, "\tcomputing consumers of %+F:\n", irn));
743
744         /* collect all consumer */
745         got_sink = 0;
746         collect_consumer(rss, rss_irn, irn, &got_sink);
747
748         /* build sorted consumer array */
749         rss_irn->consumer = build_sorted_array_from_list(rss_irn->consumer_list, phase_obst(&rss->ph));
750
751         DBG((rss->dbg, LEVEL_1, "\tcompute descendants of %+F:\n", irn));
752
753         /* collect descendants */
754         got_sink = 0;
755         collect_descendants(rss, rss_irn, irn, &got_sink, ++cur_desc_walk);
756
757         /* build sorted descendant array */
758         rss_irn->descendants = build_sorted_array_from_list(rss_irn->descendant_list, phase_obst(&rss->ph));
759
760         rss_irn->handled = 1;
761 }
762
763 /**
764  * Checks if v is a potential killer of u.
765  * v is in pkill(u) iff descendants(v) cut consumer(u) is v
766  *
767  * @param rss   The rss object
768  * @param v      The node to check for killer
769  * @param u      The potentially killed value
770  * @return 1 if v is in pkill(u), 0 otherwise
771  */
772 static int is_potential_killer(rss_t *rss, rss_irn_t *v, rss_irn_t *u) {
773         plist_t *list;
774         ir_node **arr;
775         plist_element_t *el;
776
777         assert(is_Sink(v->irn) || ((plist_count(v->descendant_list) > 0 && v->descendants) || 1));
778         assert(is_Sink(u->irn) || ((plist_count(u->consumer_list)   > 0 && u->consumer)    || 1));
779
780         /* as we loop over the list: loop over the shorter one */
781         if (plist_count(v->descendant_list) > plist_count(u->consumer_list)) {
782                 list = u->consumer_list;
783                 arr  = v->descendants;
784         }
785         else {
786                 list = v->descendant_list;
787                 arr  = u->consumer;
788         }
789
790         /* for each list element: try to find element in array */
791         foreach_plist(list, el) {
792                 ir_node *irn   = plist_element_get_value(el);
793                 ir_node *match = BSEARCH_IRN_ARR(irn, arr);
794
795                 if (match && match != irn)
796                         return 0;
797         }
798
799         return 1;
800 }
801
802 /**
803  * Update descendants, consumer and pkiller list for the given irn.
804  */
805 static void update_node_info(rss_t *rss, ir_node *irn, ir_node *pk_irn) {
806         rss_irn_t *node    = get_rss_irn(rss, irn);
807         rss_irn_t *pkiller = get_rss_irn(rss, pk_irn);
808
809         /* add consumer and rebuild the consumer array */
810         if (! plist_has_value(node->consumer_list, pk_irn)) {
811                 plist_insert_back(node->consumer_list, pk_irn);
812                 node->consumer = build_sorted_array_from_list(node->consumer_list, phase_obst(&rss->ph));
813         }
814
815         /* add pk_irn as descendant, add it's descendants to irn's and rebuild array */
816         if (! plist_has_value(node->descendant_list, pk_irn)) {
817                 plist_element_t *el;
818
819                 plist_insert_back(node->descendant_list, pk_irn);
820
821                 foreach_plist(pkiller->descendant_list, el) {
822                         ir_node *desc = plist_element_get_value(el);
823
824                         if (! plist_has_value(node->descendant_list, desc))
825                                 plist_insert_back(node->descendant_list, desc);
826                 }
827
828                 node->descendants = build_sorted_array_from_list(node->descendant_list, phase_obst(&rss->ph));
829         }
830
831 }
832
833 /**
834  * Compute the potential killing set PK.
835  */
836 static void compute_pkill_set(rss_t *rss) {
837         plist_element_t *u_el, *v_el;
838
839         foreach_plist(rss->nodes, u_el) {
840                 ir_node   *u_irn = plist_element_get_value(u_el);
841                 rss_irn_t *u     = get_rss_irn(rss, u_irn);
842
843                 DBG((rss->dbg, LEVEL_1, "\tcomputing potential killers of %+F:\n", u_irn));
844
845                 /* check each consumer if it is a potential killer  */
846                 foreach_plist(u->consumer_list, v_el) {
847                         ir_node   *v_irn = plist_element_get_value(v_el);
848                         rss_irn_t *v     = get_rss_irn(rss, v_irn);
849
850                         /* check, if v is a potential killer of u */
851                         if (is_potential_killer(rss, v, u)) {
852                                 if (! plist_has_value(u->pkiller_list, v_irn))
853                                         plist_insert_back(u->pkiller_list, v_irn);
854                                 v->kill_count++;
855                                 DBG((rss->dbg, LEVEL_2, "\t\tpotential killer %+F\n", v_irn));
856                         }
857                 }
858
859                 u->killer = _sink;
860         }
861
862         if (rss->opts->dump_flags & RSS_DUMP_PKG)
863                 debug_vcg_dump_pkg(rss, NULL, 0);
864 }
865
866 /**
867  * Build set of killing edges (from values to their potential killers)
868  */
869 static void build_kill_edges(rss_t *rss, pset *epk) {
870         plist_element_t *el, *k_el;
871
872         foreach_plist(rss->nodes, el) {
873                 ir_node    *irn  = plist_element_get_value(el);
874                 rss_irn_t *rirn = get_rss_irn(rss, irn);
875
876                 DBG((rss->dbg, LEVEL_3, "\t\tbuilding kill edges for %+F:\n", irn));
877
878                 foreach_plist(rirn->pkiller_list, k_el) {
879                         ir_node    *pkiller = plist_element_get_value(k_el);
880                         rss_edge_t *ke      = obstack_alloc(phase_obst(&rss->ph), sizeof(*ke));
881
882                         ke->src  = irn;
883                         ke->tgt  = pkiller;
884                         ke->next = NULL;
885
886                         DBG((rss->dbg, LEVEL_3, "\t\t\ttarget %+F\n", pkiller));
887
888                         pset_insert(epk, ke, HASH_RSS_EDGE(ke));
889                 }
890         }
891 }
892
893 #ifdef DEBUG_libfirm
894 /* print the given cbc for debugging purpose */
895 static void debug_print_cbc(firm_dbg_module_t *mod, cbc_t *cbc) {
896         ir_nodeset_iterator_t iter;
897         ir_node    *n;
898         rss_edge_t *ke;
899
900         DBG((mod, LEVEL_3, "\t\tS = set of parents:\n"));
901         foreach_ir_nodeset(&cbc->parents, n, iter) {
902                 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
903         }
904         DBG((mod, LEVEL_3, "\t\tT = set of children:\n"));
905         foreach_ir_nodeset(&cbc->children, n, iter) {
906                 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
907         }
908         DBG((mod, LEVEL_3, "\t\tE = Edges from producers to consumers\n"));
909         foreach_pset(cbc->kill_edges, ke) {
910                 DBG((mod, LEVEL_3, "\t\t\t%+F -> %+F\n", ke->src, ke->tgt));
911         }
912 }
913 #endif
914
915 /**
916  * Construct the bipartite decomposition.
917  * Sid-Ahmed-Ali Touati, Phd Thesis
918  * Register Pressure in Instruction Level Parallelism, p. 71
919  */
920 static void compute_bipartite_decomposition(rss_t *rss) {
921         pset *epk    = new_pset(cmp_rss_edges, 10);
922         int  cur_num = 0;
923
924         plist_element_t *el;
925
926         DBG((rss->dbg, LEVEL_1, "\tcomputing bipartite decomposition:\n"));
927
928         build_kill_edges(rss, epk);
929
930         foreach_plist(rss->nodes, el) {
931                 ir_node   *u_irn   = plist_element_get_value(el);
932                 rss_irn_t *u       = get_rss_irn(rss, u_irn);
933                 int       p_change = 1;
934                 int       c_change = 1;
935                 int       vrfy_ok  = 1;
936
937                 cbc_t           *cbc;
938                 plist_element_t *el2;
939                 rss_edge_t      *k_edge;
940                 rss_edge_t      *kedge_root = NULL;
941                 ir_node         *t_irn, *s_irn;
942                 ir_nodeset_iterator_t iter;
943
944                 if (u->visited || u_irn == _sink)
945                         continue;
946
947                 DBG((rss->dbg, LEVEL_2, "\t\t%+F choosen:\n", u_irn));
948
949                 cbc     = obstack_alloc(phase_obst(&rss->ph), sizeof(*cbc));
950                 cbc->nr = cur_num++;
951
952                 /* initialize S_cb */
953                 ir_nodeset_init_size(&cbc->parents, 5);
954                 ir_nodeset_insert(&cbc->parents, u_irn);
955                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (init)\n", u_irn));
956
957                 /* E_cb = empty */
958                 cbc->kill_edges = new_pset(cmp_rss_edges, 5);
959
960                 /* each parent gets killed by at least one value from children */
961
962                 /* T_cb = PK_successors(u) */
963                 ir_nodeset_init_size(&cbc->children, 5);
964                 foreach_plist(u->pkiller_list, el2) {
965                         ir_nodeset_insert(&cbc->children, plist_element_get_value(el2));
966                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (init)\n", plist_element_get_value(el2)));
967                 }
968
969                 /*
970                         Now: insert the parents of all children into the parent set
971                         and insert the children of all parents into the children set
972                         until the sets don't change any more
973                 */
974                 while (p_change || c_change) {
975                         ir_nodeset_iterator_t iter;
976                         p_change = c_change = 0;
977
978                         /* accumulate parents */
979                         foreach_ir_nodeset(&cbc->children, t_irn, iter) {
980                                 foreach_pset(epk, k_edge) {
981                                         ir_node *val = k_edge->src;
982
983                                         if (k_edge->tgt == t_irn && ! ir_nodeset_contains(&cbc->parents, val)) {
984                                                 ir_nodeset_insert(&cbc->parents, val);
985                                                 p_change = 1;
986                                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (killed by %+F)\n", val, t_irn));
987                                         }
988                                 }
989                         }
990
991                         /* accumulate children */
992                         foreach_ir_nodeset(&cbc->parents, s_irn, iter) {
993                                 foreach_pset(epk, k_edge) {
994                                         ir_node *val = k_edge->tgt;
995
996                                         if (k_edge->src == s_irn  && ! ir_nodeset_contains(&cbc->children, val)) {
997                                                 ir_nodeset_insert(&cbc->children, val);
998                                                 c_change = 1;
999                                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (kills %+F)\n", val, s_irn));
1000                                         }
1001                                 }
1002                         }
1003                 }
1004
1005                 /* mark all parent values as visited */
1006                 foreach_ir_nodeset(&cbc->parents, s_irn, iter) {
1007                         rss_irn_t *s = get_rss_irn(rss, s_irn);
1008                         s->visited = 1;
1009                         /* assure bipartite property */
1010 #if 0
1011                         if (ir_nodeset_contains(&cbc->children, s_irn)) {
1012                                 ir_nodeset_remove(&cbc->children, s_irn);
1013                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F removed from to children\n", s_irn));
1014                         }
1015 #endif
1016                 }
1017
1018                 /* update edges */
1019                 foreach_pset(epk, k_edge) {
1020                         if (ir_nodeset_contains(&cbc->parents, k_edge->src) &&
1021                             ir_nodeset_contains(&cbc->children, k_edge->tgt)) {
1022                                 pset_insert(cbc->kill_edges, k_edge, HASH_RSS_EDGE(k_edge));
1023                                 /*
1024                                         Link all k_edges which are about to be removed together.
1025                                         Beware: pset_remove kills the iterator.
1026                                 */
1027                                 k_edge->next = kedge_root;
1028                                 kedge_root   = k_edge;
1029                         }
1030                 }
1031
1032                 /* remove all linked k_edges */
1033                 for (; kedge_root; kedge_root = kedge_root->next) {
1034                         pset_remove(epk, kedge_root, HASH_RSS_EDGE(kedge_root));
1035                 }
1036
1037                 /* verify the cbc */
1038                 foreach_ir_nodeset(&cbc->parents, s_irn, iter) {
1039                         int is_killed = 0;
1040
1041                         foreach_pset(cbc->kill_edges, k_edge) {
1042                                 if (k_edge->src == s_irn) {
1043                                         is_killed = 1;
1044                                         pset_break(cbc->kill_edges);
1045                                         break;
1046                                 }
1047                         }
1048
1049                         if (! is_killed) {
1050                                 vrfy_ok = 0;
1051                                 ir_fprintf(stderr, "Warning: parent %+F is not killed in current cbc\n", s_irn);
1052                         }
1053                 }
1054                 assert(vrfy_ok && "Verification of CBC failed");
1055
1056                 /* add the connected bipartite component */
1057                 if (ir_nodeset_size(&cbc->parents) > 0 &&
1058                     ir_nodeset_size(&cbc->children) > 0 &&
1059                         pset_count(cbc->kill_edges) > 0)
1060                         pset_insert(rss->cbc_set, cbc, (unsigned)cbc->nr);
1061                 DBG((rss->dbg, LEVEL_2, "\tbipartite component %d inserted:\n", cbc->nr));
1062                 DEBUG_ONLY(
1063                         if (firm_dbg_get_mask(rss->dbg) & LEVEL_2)
1064                                 debug_print_cbc(rss->dbg, cbc);
1065                 );
1066         }
1067
1068         if (rss->opts->dump_flags & RSS_DUMP_CBC)
1069                 debug_vcg_dump_bipartite(rss);
1070
1071         del_pset(epk);
1072 }
1073
1074 /**
1075  * Select the child with the maximum cost.
1076  */
1077 static child_t *select_child_max_cost(rss_t *rss, ir_nodeset_t *x, ir_nodeset_t *y, child_t *t, cbc_t *cbc) {
1078         ir_node *child;
1079         ir_nodeset_iterator_t iter;
1080         float   max_cost = -1.0f;
1081
1082         DBG((rss->dbg, LEVEL_2, "\t\tcomputing children costs:\n"));
1083
1084         foreach_ir_nodeset(&cbc->children, child, iter) {
1085                 rss_irn_t  *r_child             = get_rss_irn(rss, child);
1086                 int         num_unkilled_parents = 0;
1087                 int         num_descendants;
1088                 rss_edge_t *k_edge;
1089                 float       cost;
1090
1091                 /* get the number of unkilled parents */
1092                 foreach_pset(cbc->kill_edges, k_edge) {
1093                         if (k_edge->tgt == child && ir_nodeset_contains(x, k_edge->src))
1094                                 ++num_unkilled_parents;
1095                 }
1096
1097                 cost = (float)num_unkilled_parents;
1098
1099                 num_descendants = plist_count(r_child->descendant_list) + ir_nodeset_size(y);
1100
1101                 if (num_descendants > 0)
1102                         cost /= (float)num_descendants;
1103
1104                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F, #desc %d, cost %.3f\n", child, num_descendants, cost));
1105
1106                 if (cost > max_cost) {
1107                         t->irn   = child;
1108                         t->cost  = cost;
1109                         max_cost = cost;
1110                 }
1111         }
1112
1113         return t;
1114 }
1115
1116 /**
1117  * Remove all parents from x which are killed by t_irn.
1118  */
1119 static void remove_covered_parents(rss_t *rss, ir_nodeset_t *x, ir_node *t_irn, cbc_t *cbc) {
1120         rss_irn_t  *t = get_rss_irn(rss, t_irn);
1121         rss_edge_t *k_edge;
1122
1123         DBG((rss->dbg, LEVEL_2, "\t\tremoving parents covered by %+F:\n", t_irn));
1124
1125         foreach_pset(cbc->kill_edges, k_edge) {
1126                 if (k_edge->tgt == t_irn && ir_nodeset_contains(x, k_edge->src)) {
1127                         ir_nodeset_remove(x, k_edge->src);
1128                         plist_insert_back(t->parent_list, k_edge->src);
1129                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", k_edge->src));
1130                 }
1131         }
1132 }
1133
1134 static void update_cumulated_descendent_values(rss_t *rss, ir_nodeset_t *y, ir_node *t_irn) {
1135         rss_irn_t *t = get_rss_irn(rss, t_irn);
1136         plist_element_t *el;
1137
1138         DBG((rss->dbg, LEVEL_2, "\t\tupdating cumulated descendant value of %+F:\n", t_irn));
1139
1140         foreach_plist(t->descendant_list, el) {
1141                 ir_nodeset_insert(y, plist_element_get_value(el));
1142                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", plist_element_get_value(el)));
1143         }
1144 }
1145
1146 /**
1147  * Greedy-k: a heuristics for the MMA problem
1148  */
1149 static void compute_killing_function(rss_t *rss) {
1150         cbc_t *cbc;
1151         struct obstack obst;
1152
1153         obstack_init(&obst);
1154
1155         rss->cbc_set = pset_new_ptr(5);
1156         compute_bipartite_decomposition(rss);
1157
1158         /* for all bipartite components do: */
1159         foreach_pset(rss->cbc_set, cbc) {
1160                 ir_node *p;
1161                 ir_nodeset_t x;
1162                 ir_nodeset_t y;
1163                 ir_nodeset_iterator_t iter;
1164                 child_t **sks    = NEW_ARR_F(child_t *, 20);
1165                 int     cur_len  = 0;
1166                 int     cur_size = 20;
1167                 int     i;
1168
1169                 ir_nodeset_init_size(&x, 10);
1170                 ir_nodeset_init_size(&y, 10);
1171
1172                 DBG((rss->dbg, LEVEL_1, "\tcomputing SKS for cbc %d:\n", cbc->nr));
1173                 DBG((rss->dbg, LEVEL_2, "\t\tinitializing parents X:\n"));
1174
1175                 /* X = S_cb (all parents are initially uncovered) */
1176                 foreach_ir_nodeset(&cbc->parents, p, iter) {
1177                         ir_nodeset_insert(&x, p);
1178                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", p));
1179                 }
1180
1181                 /* while X not empty */
1182                 while (ir_nodeset_size(&x) > 0) {
1183                         child_t *t = obstack_alloc(&obst, sizeof(*t));
1184                         memset(t, 0, sizeof(*t));
1185
1186                         t = select_child_max_cost(rss, &x, &y, t, cbc);
1187
1188                         if (cur_len >= cur_size) {
1189                                 ARR_EXTO(child_t *, sks, cur_size * 2);
1190                                 cur_size *= 2;
1191                         }
1192
1193                         DBG((rss->dbg, LEVEL_2, "\t\tinsert child %+F (%.3f) into SKS at pos %d\n", t->irn, t->cost, cur_len));
1194
1195                         sks[cur_len++] = t;
1196                         remove_covered_parents(rss, &x, t->irn, cbc);
1197                         update_cumulated_descendent_values(rss, &y, t->irn);
1198                 }
1199
1200                 ARR_SHRINKLEN(sks, cur_len);
1201
1202                 /* sort SKS in increasing cost order */
1203                 qsort(sks, cur_len, sizeof(sks[0]), cmp_child_costs);
1204
1205                 DBG((rss->dbg, LEVEL_2, "\tprocessing SKS for cbc %d:\n", cbc->nr));
1206
1207                 /* build killing function */
1208                 for (i = cur_len - 1; i >= 0; --i) { /* loop over sks in decreasing cost order */
1209                         child_t         *t = sks[i];
1210                         rss_irn_t       *rt = get_rss_irn(rss, t->irn);
1211                         plist_element_t *p_el;
1212
1213                         DBG((rss->dbg, LEVEL_3, "\t\t\tkiller %+F (%.3f):\n", t->irn, t->cost));
1214
1215                         /* kill all unkilled parents of t */
1216                         foreach_plist(rt->parent_list, p_el) {
1217                                 ir_node    *par = plist_element_get_value(p_el);
1218                                 rss_irn_t *rpar = get_rss_irn(rss, par);
1219
1220                                 if (is_Sink(rpar->killer)) {
1221                                         rpar->killer = t->irn;
1222                                         DBG((rss->dbg, LEVEL_2, "\t\tkill %+F\n", rpar->irn));
1223                                 }
1224                                 else {
1225                                         DBG((rss->dbg, LEVEL_3, "\t\t\tkeeping %+F as killer for %+F\n", rpar->killer, rpar->irn));
1226                                 }
1227                         }
1228                 }
1229
1230                 ir_nodeset_destroy(&x);
1231                 ir_nodeset_destroy(&y);
1232                 DEL_ARR_F(sks);
1233         }
1234
1235         if (rss->opts->dump_flags & RSS_DUMP_KILL)
1236                 debug_vcg_dump_kill(rss);
1237
1238         del_pset(rss->cbc_set);
1239         obstack_free(&obst, NULL);
1240 }
1241
1242 /**
1243  * Adds the edge src -> tgt to the dvg. Checks if reverse edge is already there (asserts).
1244  */
1245 static INLINE void add_dvg_edge(rss_t *rss, dvg_t *dvg, ir_node *src, ir_node *tgt, int have_source) {
1246         rss_edge_t *dvg_edge;
1247         rss_edge_t key;
1248
1249         if (! have_source)
1250                 ir_nodeset_insert(&dvg->nodes, src);
1251         else
1252                 assert(ir_nodeset_contains(&dvg->nodes, src) && "Wrong assumption");
1253
1254         ir_nodeset_insert(&dvg->nodes, tgt);
1255
1256         key.src = tgt;
1257         key.tgt = src;
1258         assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1259
1260         key.src = src;
1261         key.tgt = tgt;
1262         if (NULL != pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1263                 /* add the edge to the DVG */
1264                 dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1265
1266                 dvg_edge->src  = src;
1267                 dvg_edge->tgt  = tgt;
1268                 dvg_edge->next = NULL;
1269
1270                 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", src, tgt));
1271                 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1272         }
1273 }
1274
1275 /**
1276  * Computes the disjoint value DAG (DVG).
1277  * BEWARE: It is not made explicitly clear in the Touati paper,
1278  *         but the DVG is meant to be build from the KILLING DAG
1279  */
1280 static void compute_dvg(rss_t *rss, dvg_t *dvg) {
1281         plist_element_t *el;
1282
1283         DBG((rss->dbg, LEVEL_1, "\tcomputing DVG:\n"));
1284
1285         foreach_plist(rss->nodes, el) {
1286                 ir_node    *u_irn    = plist_element_get_value(el);
1287                 rss_irn_t  *u        = get_rss_irn(rss, u_irn);
1288                 rss_irn_t  *u_killer = get_rss_irn(rss, u->killer);
1289                 int        i;
1290
1291                 /* TODO: omit nodes only having sink as pkiller and killing no one */
1292
1293                 /* add an edge to killer */
1294                 add_dvg_edge(rss, dvg, u_irn, u->killer, 0);
1295
1296                 if (is_Sink(u->killer))
1297                         continue;
1298
1299                 /* We add an edge to every killer, from where we could be reached. */
1300                 for (i = ARR_LEN_SAFE(u_killer->descendants) - 1; i >= 0; --i) {
1301                         add_dvg_edge(rss, dvg, u_irn, u_killer->descendants[i], 1);
1302                 }
1303
1304 #if 0
1305
1306                 foreach_plist(rss->nodes, el2) {
1307                         ir_node *v_irn = plist_element_get_value(el2);
1308
1309                         /*
1310                                 There is an edge (u, v) in the DVG iff v is a descendant of the killer(u).
1311                         */
1312                         if (BSEARCH_IRN_ARR(v_irn, u_kill->descendants)) {
1313                                 rss_edge_t *dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1314                                 rss_edge_t key;
1315
1316                                 /* insert the user into the DVG and append it to the user list of u */
1317                                 ir_nodeset_insert(&dvg->nodes, v_irn);
1318                                 if (! plist_has_value(u->dvg_user_list, v_irn))
1319                                         plist_insert_back(u->dvg_user_list, v_irn);
1320
1321                                 dvg_edge->src  = u_irn;
1322                                 dvg_edge->tgt  = v_irn;
1323                                 dvg_edge->next = NULL;
1324
1325                                 key.src = v_irn;
1326                                 key.tgt = u_irn;
1327
1328                                 assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1329
1330                                 /* add the edge to the DVG */
1331                                 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", u_irn, v_irn));
1332                                 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1333                         }
1334                 }
1335 #endif /* if 0 */
1336         }
1337
1338         if (rss->opts->dump_flags & RSS_DUMP_DVG)
1339                 debug_vcg_dump_dvg(rss, dvg);
1340 }
1341
1342 /**
1343  * Updates the dvg structure when a serialization edge from src -> tgt is added.
1344  */
1345 static void update_dvg(rss_t *rss, dvg_t *dvg, rss_irn_t *src, rss_irn_t *tgt) {
1346         int i, j, idx;
1347         rss_edge_t *edge;
1348         rss_edge_t **arr = alloca(pset_count(dvg->edges) * sizeof(arr[0]));
1349
1350         /*
1351                 Add an edge from serialization target to serialization src:
1352                 src cannot be alive with target
1353         */
1354         add_dvg_edge(rss, dvg, tgt->irn, src->irn, 0);
1355
1356         /* Add edges to src's descendants as well, they also getting serialized. */
1357         for (i = ARR_LEN_SAFE(src->descendants) - 1; i >= 0; --i) {
1358                 add_dvg_edge(rss, dvg, tgt->irn, src->descendants[i], 1);
1359         }
1360
1361         /* We also have to add edges from targets predecessors in dvg */
1362         idx = 0;
1363         /* We cannot insert elements into set over which we iterate ... */
1364         foreach_pset(dvg->edges, edge) {
1365                 if (edge->tgt == tgt->irn) {
1366                         arr[idx++] = edge;
1367                 }
1368         }
1369
1370         for (i = 0; i < idx; ++i) {
1371                 edge = arr[i];
1372                 add_dvg_edge(rss, dvg, edge->src, src->irn, 1);
1373                 for (j = ARR_LEN_SAFE(src->descendants) - 1; j >= 0; --j) {
1374                         add_dvg_edge(rss, dvg, edge->src, src->descendants[j], 1);
1375                 }
1376         }
1377 }
1378
1379 #if 0
1380 /**
1381  * Accumulate all descendants for root into list.
1382  */
1383 static void accumulate_dvg_descendant_values(rss_t *rss, rss_irn_t *root, plist_t *list) {
1384         if (plist_count(root->dvg_user_list) > 0) {
1385                 plist_element_t *el;
1386
1387                 foreach_plist(root->dvg_user_list, el) {
1388                         ir_node   *v_irn = plist_element_get_value(el);
1389                         rss_irn_t *v     = get_rss_irn(rss, v_irn);
1390
1391                         /* add v as descendant */
1392                         if (! plist_has_value(list, v_irn)) {
1393                                 plist_insert_back(list, v_irn);
1394                                 DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG descendant %+F\n", v_irn));
1395                         }
1396
1397                         /* accumulate v's descendants */
1398                         accumulate_dvg_descendant_values(rss, v, list);
1399                 }
1400         }
1401 }
1402
1403 /**
1404  * Builds the list of potential killers for each node
1405  * in the given DVG.
1406  * Needs the descendant list for all user as sorted array.
1407  */
1408 static void build_dvg_pkiller_list(rss_t *rss, dvg_t *dvg) {
1409         ir_nodeset_iterator_t iter;
1410         ir_node *irn;
1411
1412         foreach_nodeset(&dvg->nodes, irn, iter) {
1413                 rss_irn_t       *node = get_rss_irn(rss, irn);
1414                 plist_element_t *el, *el2;
1415
1416                 DBG((rss->dbg, DEBUG_DVG, "\t\tbuilding pkiller list for %+F\n", irn));
1417
1418                 /* check each user */
1419                 foreach_plist(node->dvg_user_list, el) {
1420                         ir_node *u_irn = plist_element_get_value(el);
1421
1422                         /* is the current user u_irn not a descendant of any other user -> pkiller */
1423                         foreach_plist(node->dvg_user_list, el2) {
1424                                 ir_node   *v_irn = plist_element_get_value(el2);
1425                                 rss_irn_t *v     = get_rss_irn(rss, v_irn);
1426
1427                                 if (el != el2                             &&
1428                                         ! BSEARCH_IRN_ARR(u_irn, v->dvg_desc) &&
1429                                         ! plist_has_value(node->dvg_pkiller_list, u_irn))
1430                                 {
1431                                         plist_insert_back(node->dvg_pkiller_list, u_irn);
1432                                         DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG pkiller %+F\n", u_irn));
1433                                 }
1434                         }
1435                 }
1436
1437                 node->dvg_pkiller = build_sorted_array_from_list(node->dvg_pkiller_list, phase_obst(&rss->ph));
1438         }
1439
1440         DEBUG_ONLY(
1441                 if (firm_dbg_get_mask(rss->dbg) & DEBUG_DVG)
1442                         debug_vcg_dump_dvg_pkiller(rss, dvg);
1443         );
1444 }
1445
1446 #endif /* if 0 */
1447
1448 /**
1449  * Compute the maximal antichain of the current DVG.
1450  * This is a reimplementation of the MAXIMAL_ANTI_CHAIN function
1451  * from the DDG library 1.1 (DAG.cpp).
1452  */
1453 static ir_nodeset_t *compute_maximal_antichain(rss_t *rss, dvg_t *dvg, int iteration) {
1454         int         n               = ir_nodeset_size(&dvg->nodes);
1455         int         *assignment     = alloca(n * sizeof(assignment[0]));
1456         int         *assignment_rev = alloca(n * sizeof(assignment_rev[0]));
1457         int         *idx_map        = alloca(n * sizeof(idx_map[0]));
1458         hungarian_problem_t *bp;
1459         ir_nodeset_t *values, *temp;
1460         ir_nodeset_iterator_t iter;
1461         ir_node     *u_irn;
1462         int         i, j, cost, cur_chain, res;
1463         rss_edge_t  *dvg_edge;
1464
1465 #define MAP_IDX(irn) bsearch_for_index(get_irn_idx(irn), idx_map,  n,  1)
1466
1467         if (pset_count(dvg->edges) == 0)
1468                 return NULL;
1469
1470         bp = hungarian_new(n, n, 1, HUNGARIAN_MATCH_NORMAL);
1471
1472         /*
1473                 At first, we build an index map for the nodes in the DVG,
1474                 because we cannot use the irn idx therefore as the resulting
1475                 bipartite data structure would have around 1.2GB.
1476                 So we limit the size to the number of nodes we have in the DVG
1477                 and build a sorted index map for their irn indices.
1478         */
1479         i = 0;
1480         foreach_ir_nodeset(&dvg->nodes, u_irn, iter) {
1481                 idx_map[i++] = get_irn_idx(u_irn);
1482         }
1483         qsort(idx_map, n, sizeof(idx_map[0]), cmp_int);
1484
1485         foreach_pset(dvg->edges, dvg_edge) {
1486                 int idx_u = MAP_IDX(dvg_edge->src);
1487                 int idx_v = MAP_IDX(dvg_edge->tgt);
1488
1489                 /* add the entry to the bipartite data structure */
1490                 hungarian_add(bp, idx_u, idx_v, 1);
1491                 DBG((rss->dbg, LEVEL_3, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1492                         idx_u, dvg_edge->src, idx_v, dvg_edge->tgt));
1493         }
1494 #if 0
1495         /*
1496                 Add a bipartite entry for each pair of nodes (u, v), where exists a
1497                 path in the DVG from u to v, ie. connect all descendants(v) to v.
1498                 desc_dvg(v) = accumulated descendants of all z in dvg_user_list(v)
1499         */
1500         foreach_ir_nodeset(&dvg->nodes, u_irn, iter) {
1501                 rss_irn_t *u        = get_rss_irn(rss, u_irn);
1502                 int       idx_u_irn = MAP_IDX(u_irn);
1503
1504                 DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG descendants of %+F:\n", u_irn));
1505
1506                 //plist_clear(u->dvg_desc_list);
1507                 //accumulate_dvg_descendant_values(rss, u, u->dvg_desc_list);
1508
1509                 /*
1510                         FIXME: The array is build on the phase obstack and we cannot free the data.
1511                         So the array get as many times allocated as this function is called.
1512                 */
1513
1514                 /* build the sorted array for faster searches */
1515                 //u->dvg_desc = build_sorted_array_from_list(u->dvg_desc_list, phase_obst(&rss->ph));
1516
1517                 DBG((rss->dbg, DEBUG_MAX_AC, "\t\tadding bipartite entries of %+F:\n", u_irn));
1518
1519                 /* add the bipartite entries for all descendants */
1520                 for (i = ARR_LEN_SAFE(u->dvg_desc) - 1; i >= 0; --i) {
1521                         rss_irn_t *desc    = get_rss_irn(rss, u->dvg_desc[i]);
1522                         int       idx_desc = MAP_IDX(u->dvg_desc[i]);
1523
1524                         /* add the entry to the bipartite data structure */
1525                         hungarian_add(bp, idx_u_irn, idx_desc, 1);
1526                         DBG((rss->dbg, DEBUG_MAX_AC, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1527                                 idx_u_irn, u_irn, idx_desc, u->dvg_desc[i]));
1528
1529                         need_matching = 1;
1530                 }
1531         }
1532 #endif
1533
1534         /* We want maximum cardinality matching */
1535         hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL);
1536
1537 #if 0
1538         DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG pkiller:\n"));
1539         /* beware: the following function needs the dvg_desc array */
1540         build_dvg_pkiller_list(rss, dvg);
1541 #endif
1542
1543         DBG((rss->dbg, LEVEL_2, "\t\tcomputing bipartite matching\n"));
1544         /*
1545                 The maximum cardinality bipartite matching gives us the minimal
1546                 chain partition, which corresponds to the maximum anti chains.
1547         */
1548         res = hungarian_solve(bp, assignment, &cost, 1);
1549         assert(res == 0 && "Bipartite matching failed!");
1550
1551         hungarian_free(bp);
1552         memset(assignment_rev, -1, n * sizeof(assignment_rev[0]));
1553
1554         /* build the reverse assignment, ie. foreach i -> j, add j -> i */
1555         for (i = 0; i < n; ++i) {
1556                 if (assignment[i] >= 0) {
1557                         int j = assignment[i];
1558                         assignment_rev[j] = i;
1559                 }
1560         }
1561
1562         DBG((rss->dbg, LEVEL_2, "\t\tgot assignment with cost %d\n", cost));
1563         DBG((rss->dbg, LEVEL_3, "\t\t\tassignment   ---   reverse assignment\n", cost));
1564         for (i = 0; i < n; ++i) {
1565                 DBG((rss->dbg, LEVEL_3, "\t\t\t%3d -> %3d         %3d -> %3d\n", i, assignment[i], i, assignment_rev[i]));
1566         }
1567
1568         values    = xmalloc(sizeof(values[0]));
1569         ir_nodeset_init_size(values, 10);
1570         cur_chain = 0;
1571         /* Construction of the minimal chain partition */
1572         for (j = 0; j < n; ++j) {
1573                 /* check nodes, which did not occur as target */
1574                 if (assignment_rev[j] == -1) {
1575                         int       xj      = idx_map[j];
1576                         ir_node   *xj_irn = get_idx_irn(rss->irg, xj);
1577                         rss_irn_t *xj_rss = get_rss_irn(rss, xj_irn);
1578                         chain_t   *c      = obstack_alloc(phase_obst(&rss->ph), sizeof(*c));
1579                         int       source;
1580
1581                         /* there was no source for j -> we have a source of a new chain */
1582                         ir_nodeset_insert(values, xj_irn);
1583
1584                         c->elements = plist_obstack_new(phase_obst(&rss->ph));
1585                         c->nr       = cur_chain++;
1586                         plist_insert_back(c->elements, xj_irn);
1587
1588                         xj_rss->chain = c;
1589
1590                         DBG((rss->dbg, LEVEL_2, "\t\tstarting chain %d:\n", c->nr));
1591                         DBG((rss->dbg, LEVEL_2, "\t\t\t%+F (%d)", xj_irn, j));
1592
1593                         /* follow chain, having j as source */
1594                         source = j;
1595                         while (assignment[source] >= 0) {
1596                                 int       target  = assignment[source];
1597                                 int       irn_idx = idx_map[target];
1598                                 ir_node   *irn    = get_idx_irn(rss->irg, irn_idx);
1599                                 rss_irn_t *node   = get_rss_irn(rss, irn);
1600
1601                                 plist_insert_back(c->elements, irn);
1602                                 node->chain = c;
1603
1604                                 DB((rss->dbg, LEVEL_2, " -> %+F (%d)", irn, target));
1605
1606                                 /* new source = last target */
1607                                 source = target;
1608                         }
1609
1610                         DB((rss->dbg, LEVEL_2, "\n"));
1611                 }
1612         }
1613
1614         /*
1615                 Computing the maximal antichain: Select an element from each
1616                 chain such, such it is parallel with the others.
1617         */
1618         DBG((rss->dbg, LEVEL_1, "\t\tcomputing set of saturation values (MAX AC)\n"));
1619         DBG((rss->dbg, LEVEL_3, "\t\tstarting with:\n"));
1620
1621         DEBUG_ONLY(
1622                 if (firm_dbg_get_mask(rss->dbg) & LEVEL_3)
1623                         dump_nodeset(values, "\t\t\t");
1624         )
1625
1626         temp = NULL;
1627         do {
1628                 /*
1629                         We need an explicit array for the values as
1630                         we cannot iterate multiple times over the same
1631                         set at the same time. :-(((((
1632                         TODO Matze: now we can, so rewrite this...
1633                 */
1634                 int     n         = ir_nodeset_size(values);
1635                 int     i         = 0;
1636                 ir_node **val_arr = NEW_ARR_F(ir_node *, n);
1637
1638                 foreach_ir_nodeset(values, u_irn, iter)
1639                         val_arr[i++] = u_irn;
1640
1641                 if (temp) {
1642                         ir_nodeset_destroy(temp);
1643                         free(temp);
1644                 }
1645
1646                 temp = xmalloc(sizeof(temp[0]));
1647                 ir_nodeset_init_size(temp, 10);
1648
1649                 /* Select all nodes from current value set, having another node in the set as descendant. */
1650                 for (i = 0; i < n; ++i) {
1651                         rss_irn_t *u = get_rss_irn(rss, val_arr[i]);
1652
1653                         for (j = 0; j < n; ++j) {
1654                                 if (i != j) {
1655                                         rss_edge_t key;
1656
1657                                         key.src = val_arr[i];
1658                                         key.tgt = val_arr[j];
1659
1660                                         if (pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1661                                                 /* v[j] is descendant of u -> remove u and break */
1662                                                 ir_nodeset_insert(temp, u->irn);
1663                                                 ir_nodeset_remove(values, u->irn);
1664
1665                                                 DBG((rss->dbg, LEVEL_3, "\t\t\tremoving %+F from values, adding it to temp\n", u->irn));
1666
1667                                                 break;
1668                                         }
1669                                 }
1670                         }
1671                 }
1672
1673                 /* Try to insert the chain predecessor of all selected u's */
1674                 foreach_ir_nodeset(temp, u_irn, iter) {
1675                         rss_irn_t       *u  = get_rss_irn(rss, u_irn);
1676                         chain_t         *c  = u->chain;
1677                         plist_element_t *el = plist_find_value(c->elements, u_irn);
1678
1679                         assert(el && "Missing element in chain!");
1680
1681                         /* If u has predecessor in chain: insert the predecessor */
1682                         if (el == plist_element_get_prev(el)) {
1683                                 ir_nodeset_insert(values, plist_element_get_value(el));
1684                                 DBG((rss->dbg, LEVEL_3, "\t\t\tadding %+F to values\n", plist_element_get_value(el)));
1685                         }
1686                 }
1687
1688
1689                 DEL_ARR_F(val_arr);
1690         } while (ir_nodeset_size(temp) > 0);
1691
1692         DBG((rss->dbg, LEVEL_2, "\t\tfinal set:\n"));
1693         DEBUG_ONLY(
1694                 if (firm_dbg_get_mask(rss->dbg) & LEVEL_2) {
1695                         dump_nodeset(values, "\t\t\t");
1696                 }
1697         );
1698
1699         if (rss->opts->dump_flags & RSS_DUMP_MAXAC)
1700                 debug_vcg_dump_pkg(rss, values, iteration);
1701
1702         if(temp != NULL) {
1703                 ir_nodeset_destroy(temp);
1704                 free(temp);
1705         }
1706
1707         return values;
1708
1709 #undef MAP_IDX
1710 }
1711
1712 /**
1713  * Computes the best serialization between two nodes of sat_vals.
1714  */
1715 static serialization_t *compute_best_admissible_serialization(rss_t *rss, ir_nodeset_t *sat_vals, serialization_t *ser, int num_regs) {
1716         int        n                    = ir_nodeset_size(sat_vals);
1717         int        n_idx                = ARR_LEN_SAFE(rss->idx_map);
1718         int        i                    = 0;
1719         ir_node    **val_arr            = alloca(n * sizeof(val_arr[0]));
1720         bitset_t   *bs_sv               = bitset_alloca(n_idx);
1721         bitset_t   *bs_vdesc            = bitset_alloca(n_idx);
1722         bitset_t   *bs_tmp              = bitset_alloca(n_idx);
1723         bitset_t   *bs_ukilldesc        = bitset_alloca(n_idx);
1724         int        best_benefit         = INT_MAX;
1725         int        best_omega2          = INT_MAX;
1726         int        best_benefit_omega20 = INT_MAX;
1727         int        has_omega1           = 0;
1728         int        j, k;
1729         ir_node    *irn;
1730         ir_nodeset_iterator_t iter;
1731         rss_edge_t min_benefit_edge;
1732         rss_edge_t min_omega20_edge;
1733         rss_irn_t  *ser_u_omega1 = NULL, *ser_v_omega1 = NULL;
1734         rss_irn_t  *ser_u_omega20 = NULL, *ser_v_omega20 = NULL;
1735
1736         DBG((rss->dbg, LEVEL_1, "\tcomputing admissible serializations:\n"));
1737
1738         /*
1739                 We need an explicit array for the values as
1740                 we cannot iterate multiple times over the same
1741                 set at the same time. :-(((((
1742         */
1743
1744         foreach_ir_nodeset(sat_vals, irn, iter) {
1745                 val_arr[i++] = irn;
1746                 bitset_set(bs_sv, BLOCK_IDX_MAP(rss, irn));
1747         }
1748
1749         /*
1750                 We build all admissible serializations and remember the best found so far.
1751                 For u in sat_vals:
1752                  For v in sat_val:
1753                    if v in pkiller(u): add edge from v to all other pkiller(u)
1754                    else: for all vv in pkiller(u): add edge from v to vv if there exists no path from vv to v
1755         */
1756
1757 /*
1758         A node is unserializable if:
1759         - it has only one killer and this one is Sink
1760     - it kills no other values
1761         In this case there is no serialization which could
1762         reduce the registerpressure
1763 */
1764 #define IS_UNSERIALIZABLE_NODE(rss_node)                  \
1765         (                                                     \
1766                 (                                                 \
1767                         (plist_count(rss_node->pkiller_list) == 1) && \
1768                         is_Sink(rss_node->killer)                  && \
1769                         (rss_node->kill_count                == 0)    \
1770                 )                            ||                   \
1771                 be_is_Barrier(rss_node->irn) ||                   \
1772                 be_is_Keep(rss_node->irn)                         \
1773         )
1774
1775         /* for all u in sat_vals */
1776         for (i = 0; i < n; ++i) {
1777                 rss_irn_t       *u = get_rss_irn(rss, val_arr[i]);
1778                 plist_element_t *el;
1779
1780                 /* ignore nodes where serialization does not help */
1781                 if (IS_UNSERIALIZABLE_NODE(u)) {
1782                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", u->irn));
1783                         continue;
1784                 }
1785
1786                 /* accumulate all descendants of all pkiller(u) */
1787                 bitset_clear_all(bs_ukilldesc);
1788                 foreach_plist(u->pkiller_list, el) {
1789                         ir_node   *irn  = plist_element_get_value(el);
1790                         rss_irn_t *node = get_rss_irn(rss, irn);
1791
1792                         if (! is_Sink(irn))
1793                                 bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, irn));
1794                         else
1795                                 continue;
1796
1797                         for (k = ARR_LEN_SAFE(node->descendants) - 1; k >= 0; --k) {
1798                                 if (! is_Sink(node->descendants[k]))
1799                                         bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, node->descendants[k]));
1800                         }
1801                 }
1802
1803                 /* for all v in sat_vals */
1804                 for (j = 0; j < n; ++j) {
1805                         ir_node   *v_irn   = val_arr[j];
1806                         rss_irn_t *v       = get_rss_irn(rss, v_irn);
1807                         unsigned  v_height = get_irn_height(rss->h, v_irn);
1808                         int       omega1, omega2, is_pkiller;
1809
1810                         /* v cannot be serialized with itself
1811                          * ignore nodes where serialization does not help */
1812                         if (i == j || IS_UNSERIALIZABLE_NODE(v)) {
1813                                 if (i != j)
1814                                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", v->irn));
1815                                 continue;
1816                         }
1817
1818                         /* get descendants of v */
1819                         bitset_clear_all(bs_vdesc);
1820                         bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v_irn));
1821                         for (k = ARR_LEN_SAFE(v->descendants) - 1; k >= 0; --k) {
1822                                 if (! is_Sink(v->descendants[k]))
1823                                         bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v->descendants[k]));
1824                         }
1825
1826                         /* if v is in pkiller(u) */
1827                         is_pkiller = plist_has_value(u->pkiller_list, v_irn);
1828
1829                         /* for all vv in pkiller(u) */
1830                         foreach_plist(u->pkiller_list, el) {
1831                                 ir_node *vv_irn  = plist_element_get_value(el);
1832                                 int     add_edge;
1833
1834                                 if (is_Sink(vv_irn) || is_cfop(vv_irn))
1835                                         continue;
1836
1837                                 if (is_pkiller)
1838                                         add_edge = vv_irn != v_irn && skip_Proj(vv_irn) != skip_Proj(v_irn);
1839                                 else
1840                                         add_edge = ! heights_reachable_in_block(rss->h, skip_Proj(vv_irn), skip_Proj(v_irn));
1841
1842                                 /*
1843                                         As we add an edge from vv -> v, we have to make sure,
1844                                         that there exists no path from v to vv.
1845                                 */
1846
1847                                 if (add_edge) {
1848                                         unsigned vv_height = get_irn_height(rss->h, vv_irn);
1849                                         unsigned critical_path_cost;
1850                                         unsigned mu1, mu2;
1851
1852                                         /*
1853                                                 mu1 = | descendants(v) cut sat_vals |
1854                                                 the number of saturating values which cannot
1855                                                 be simultaneously alive with u
1856                                         */
1857                                         bitset_copy(bs_tmp, bs_vdesc);
1858                                         mu1 = bitset_popcnt(bitset_and(bs_tmp, bs_sv));
1859
1860                                         /*
1861                                                 mu2 = | accum_desc_all_pkiller(u) without descendants(v) |
1862                                         */
1863                                         if (is_pkiller) {
1864                                                 bitset_copy(bs_tmp, bs_ukilldesc);
1865                                                 mu2 = bitset_popcnt(bitset_andnot(bs_tmp, bs_vdesc));
1866                                         }
1867                                         else {
1868                                                 mu2 = 0;
1869                                         }
1870
1871                                         /* omega1 = mu1 - mu2 */
1872                                         omega1 = mu1 - mu2;
1873
1874                                         if (omega1 != 0)
1875                                                 has_omega1 = 1;
1876
1877                                         /* omega2 = increase of critical path */
1878                                         critical_path_cost =
1879                                                 v_height                        /* longest path from v to sink */
1880                                                 + rss->max_height - vv_height   /* longest path from source to vv */
1881                                                 + 1;                            /* edge */
1882
1883                                         /*
1884                                                 If critical_path_cost > max_height -> the new edge
1885                                                 would increase the longest critical path by the difference.
1886                                         */
1887                                         omega2 = critical_path_cost > rss->max_height ? critical_path_cost - rss->max_height : 0;
1888
1889                                         /* this keeps track of the edge with the best benefit */
1890                                         if (omega1 >= num_regs - n && omega1 < best_benefit) {
1891                                                 min_benefit_edge.src = v_irn;
1892                                                 min_benefit_edge.tgt = vv_irn;
1893
1894                                                 ser_u_omega1 = u;
1895                                                 ser_v_omega1 = v;
1896
1897                                                 best_benefit    = omega1;
1898                                                 ser->new_killer = is_pkiller;
1899                                         }
1900
1901                                         /* this keeps track of the edge with the best omega1 costs where omega2 == 0 */
1902                                         if (omega2 == 0 && omega1 >= num_regs - n && omega1 < best_benefit_omega20) {
1903                                                 min_omega20_edge.src = v_irn;
1904                                                 min_omega20_edge.tgt = vv_irn;
1905
1906                                                 ser_u_omega20 = u;
1907                                                 ser_v_omega20 = v;
1908
1909                                                 best_benefit_omega20 = omega1;
1910                                                 ser->new_killer      = is_pkiller;
1911                                         }
1912
1913                                         best_omega2 = MIN(best_omega2, omega2);
1914
1915                                         DBG((rss->dbg, LEVEL_2, "\t\tfound %+F -> %+F (w1 %d, w2 %d)\n",
1916                                                 v_irn, vv_irn, omega1, omega2));
1917                                 } /* if add_edge */
1918                         } /* for all vv in pkiller(u) */
1919                 } /* for all v in sat_vals */
1920         } /* for all u in sat_vals */
1921
1922         if (! has_omega1)
1923                 return NULL;
1924
1925         if (best_omega2 == 0) {
1926                 ser->u         = ser_u_omega20;
1927                 ser->v         = ser_v_omega20;
1928                 ser->edge->src = min_omega20_edge.src;
1929                 ser->edge->tgt = min_omega20_edge.tgt;
1930                 ser->omega1    = best_benefit_omega20;
1931                 ser->omega2    = best_omega2;
1932         }
1933         else {
1934                 ser->u         = ser_u_omega1;
1935                 ser->v         = ser_v_omega1;
1936                 ser->edge->src = min_benefit_edge.src;
1937                 ser->edge->tgt = min_benefit_edge.tgt;
1938                 ser->omega1    = best_benefit;
1939                 ser->omega2    = best_omega2;
1940         }
1941
1942         return ser;
1943
1944 #undef IS_UNSERIALIZABLE_NODE
1945 }
1946
1947 /**
1948  * Perform the value serialization heuristic and add all
1949  * computed serialization edges as dependencies to the irg.
1950  */
1951 static void perform_value_serialization_heuristic(rss_t *rss) {
1952         bitset_t *arch_nonign_bs = bitset_alloca(arch_register_class_n_regs(rss->cls));
1953         bitset_t *abi_ign_bs     = bitset_alloca(arch_register_class_n_regs(rss->cls));
1954         unsigned available_regs, iteration;
1955         dvg_t    dvg;
1956         ir_nodeset_t *sat_vals;
1957         pset *ser_set = new_pset(cmp_rss_edges, 20);
1958
1959         /* available_regs = R = |arch_non_ignore_regs cut ~abi_ignore_regs| */
1960         arch_put_non_ignore_regs(rss->arch_env, rss->cls, arch_nonign_bs);
1961         be_abi_put_ignore_regs(rss->abi, rss->cls, abi_ign_bs);
1962         bitset_andnot(arch_nonign_bs, abi_ign_bs);
1963         available_regs  = bitset_popcnt(arch_nonign_bs);
1964         //num_live = pset_count(rss->live_block);
1965         //available_regs -= num_live < available_regs ? num_live : 0;
1966
1967         DBG((rss->dbg, LEVEL_1, "\n\t#available regs: %d\n\n", available_regs));
1968
1969         /*
1970                 At first we need to compute the disjoint value DAG (DVG = {V, E_dv}).
1971                 V    = set of all nodes we are currently interested in
1972                 E_dv = there is an edge from u to v iff v is a descendant of killer(u), forall u, v in V
1973         */
1974         ir_nodeset_init_size(&dvg.nodes, plist_count(rss->nodes));
1975         dvg.edges = new_pset(cmp_rss_edges, plist_count(rss->nodes) * 5);
1976         compute_dvg(rss, &dvg);
1977
1978         /*
1979                 Then we perform the heuristic serialization algorithm
1980                 on the DVG which gives us all necessary serialization
1981                 edges.
1982         */
1983         DBG((rss->dbg, LEVEL_1, "\tcomputing maximal antichain:\n"));
1984         iteration = 0;
1985         sat_vals  = compute_maximal_antichain(rss, &dvg, iteration++);
1986         while (sat_vals && (ir_nodeset_size(sat_vals) > available_regs)) {
1987                 serialization_t *ser, best_ser;
1988                 rss_edge_t      *edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*edge));
1989                 ir_node         *dep_src, *dep_tgt;
1990
1991                 best_ser.edge = edge;
1992                 ser = compute_best_admissible_serialization(rss, sat_vals, &best_ser, available_regs);
1993
1994                 DBG((rss->dbg, LEVEL_1, "\tcurrent register saturation %d, target %d\n", ir_nodeset_size(sat_vals), available_regs));
1995
1996                 if (! ser) {
1997                         DBG((rss->dbg, LEVEL_1, "\tno RS improving serialization found, breaking at iteration %d\n", iteration));
1998                         break;
1999                 }
2000
2001                 /* Insert the serialization as dependency edge into the irg. */
2002                 DBG((rss->dbg, LEVEL_1, "\tserializing %+F -> %+F with edge %+F -> %+F and cost %d, %d\n",
2003                         ser->u->irn, ser->v->irn, ser->edge->src, ser->edge->tgt, ser->omega1, ser->omega2));
2004
2005                 if (pset_find(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge)))
2006                         ir_printf("WARNING: serialization %+F -> %+F computed twice!\n", ser->edge->src, ser->edge->tgt);
2007
2008
2009                 pset_insert(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge));
2010
2011                 /* update the dvg */
2012                 update_dvg(rss, &dvg, ser->v, ser->u);
2013                 update_dvg(rss, &dvg, get_rss_irn(rss, ser->edge->src), get_rss_irn(rss, ser->edge->tgt));
2014                 if(sat_vals != NULL) {
2015                         ir_nodeset_destroy(sat_vals);
2016                         free(sat_vals);
2017                 }
2018
2019                 dep_src = skip_Proj(ser->edge->src);
2020                 dep_tgt = ser->edge->tgt;
2021                 add_irn_dep(dep_src, dep_tgt);
2022
2023                 /* Update descendants, consumer and pkillers of the target */
2024                 update_node_info(rss, ser->edge->tgt, ser->edge->src);
2025
2026                 /* TODO: try to find a cheaper way for updating height information */
2027                 rss->max_height = heights_recompute_block(rss->h, rss->block);
2028
2029                 /* Recompute the antichain for next serialization */
2030                 DBG((rss->dbg, LEVEL_1, "\tre-computing maximal antichain:\n"));
2031                 sat_vals = compute_maximal_antichain(rss, &dvg, iteration++);
2032         }
2033
2034         ir_nodeset_destroy(&dvg.nodes);
2035         del_pset(dvg.edges);
2036 }
2037
2038 /**
2039  * Do initial calculations for a block.
2040  */
2041 static void process_block(ir_node *block, void *env) {
2042         rss_t *rss = env;
2043         int   i, n;
2044         const ir_edge_t *edge;
2045
2046         phase_init(&rss->ph, "rss block preprocessor", rss->irg, PHASE_DEFAULT_GROWTH, init_rss_irn, NULL);
2047
2048         DBG((rss->dbg, LEVEL_1, "preprocessing block %+F\n", block));
2049         rss->block = block;
2050
2051         /* build an index map for all nodes in the current block */
2052         i            = 0;
2053         n            = get_irn_n_edges(block);
2054         NEW_ARR_A(int *, rss->idx_map, n);
2055         foreach_out_edge(block, edge) {
2056                 ir_node *irn      = get_edge_src_irn(edge);
2057                 rss->idx_map[i++] = get_irn_idx(irn);
2058         }
2059         qsort(rss->idx_map, n, sizeof(rss->idx_map[0]), cmp_int);
2060         rss->max_height = heights_recompute_block(rss->h, block);
2061
2062         /* loop over all register classes */
2063         for (i = arch_isa_get_n_reg_class(rss->arch_env->isa) - 1; i >= 0; --i) {
2064                 const arch_register_class_t *cls = arch_isa_get_reg_class(rss->arch_env->isa, i);
2065
2066                 rss->cls = cls;
2067                 DBG((rss->dbg, LEVEL_1, "register class %s\n", arch_register_class_name(cls)));
2068
2069                 /* Get all live value at end of Block having current register class */
2070                 rss->live_block = pset_new_ptr(10);
2071                 be_liveness_end_of_block(rss->liveness, rss->arch_env, rss->cls, rss->block, rss->live_block);
2072
2073                 /* reset the list of interesting nodes */
2074                 plist_clear(rss->nodes);
2075                 plist_insert_back(rss->nodes, _sink);
2076
2077                 /* collect all nodes having a certain register class */
2078                 foreach_out_edge(block, edge) {
2079                         ir_node *irn  = get_edge_src_irn(edge);
2080                         ir_mode *mode = get_irn_mode(irn);
2081
2082                         /*
2083                                 We skip:
2084                                 - mode_T nodes (the projs are asked)
2085                                 - mode_X nodes (control flow nodes are always scheduled last)
2086                                 - Keeps (they are always immediately scheduled)
2087                                 - Phi (same as Keep)
2088                         */
2089                         if (mode == mode_T || mode == mode_X || is_Phi(irn))
2090                                 continue;
2091
2092                         /*
2093                                 In case of a proj, we skip
2094                                 - Barrier (they are a Barrier :)
2095                                 - Start
2096                                 - the Proj itself, as it's scheduled always with it's super node
2097                         */
2098                         if (is_Proj(irn)) {
2099                                 ir_node *pred = get_Proj_pred(irn);
2100                                 if (be_is_Barrier(pred) || is_Start(pred))
2101                                         continue;
2102                         }
2103
2104                         /* calculate the descendants and consumer for each node in the block */
2105                         collect_node_info(rss, skip_Proj(irn));
2106
2107                         if (be_is_Keep(irn))
2108                                 continue;
2109
2110                         if (! arch_irn_is(rss->arch_env, irn, ignore) && arch_get_irn_reg_class(rss->arch_env, irn, -1) == cls) {
2111                                 plist_insert_back(rss->nodes, skip_Proj(irn));
2112                         }
2113                         //}
2114                 }
2115
2116                 /* compute the potential killing set PK(G) */
2117                 compute_pkill_set(rss);
2118
2119                 /* compute the killing function k* */
2120                 compute_killing_function(rss);
2121
2122                 /*
2123                         Compute the heuristic value serialization and
2124                         add the necessary dependencies to the irg.
2125                 */
2126                 perform_value_serialization_heuristic(rss);
2127
2128                 del_pset(rss->live_block);
2129         }
2130
2131         phase_free(&rss->ph);
2132 }
2133
2134 /**
2135  * Register the options.
2136  */
2137 void be_init_schedrss(void) {
2138         lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
2139         lc_opt_entry_t *sched_grp = lc_opt_get_grp(be_grp, "sched");
2140         lc_opt_entry_t *rss_grp = lc_opt_get_grp(sched_grp, "rss");
2141
2142         lc_opt_add_table(rss_grp, rss_option_table);
2143 }
2144
2145 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_schedrss);
2146
2147 /**
2148  * Preprocess the irg for scheduling.
2149  */
2150 void rss_schedule_preparation(const be_irg_t *birg) {
2151         ir_graph *irg = be_get_birg_irg(birg);
2152         rss_t rss;
2153
2154         FIRM_DBG_REGISTER(rss.dbg, "firm.be.sched.rss");
2155
2156         //firm_dbg_set_mask(rss.dbg, LEVEL_1 | LEVEL_2 | LEVEL_3);
2157
2158         init_rss_special_nodes(irg);
2159
2160         rss.irg      = irg;
2161         rss.arch_env = be_get_birg_arch_env(birg);
2162         rss.abi      = birg->abi;
2163         rss.h        = heights_new(irg);
2164         rss.nodes    = plist_new();
2165         rss.opts     = &rss_options;
2166         rss.liveness = be_liveness(irg);
2167         irg_block_walk_graph(irg, NULL, process_block, &rss);
2168         heights_free(rss.h);
2169         plist_free(rss.nodes);
2170         be_liveness_free(rss.liveness);
2171
2172         if (birg->main_env->options->dump_flags & DUMP_SCHED)
2173                 be_dump(rss.irg, "-rss", dump_ir_block_graph);
2174 }