- let be_liveness() take the irg, not the birg which was just copied around
[libfirm] / ir / be / beschedrss.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       Implementation of a register saturating list scheduler.
23  * @author      Christian Wuerdig
24  * @date        29.08.2006
25  * @version     $Id$
26  *
27  * Implementation of a register saturating list scheduler
28  * as described in: Sid-Ahmed-Ali Touati
29  * Register Saturation in Superscalar and VLIW Codes
30  */
31 #include "config.h"
32
33 #include <limits.h>
34
35 #include "obst.h"
36 #include "debug.h"
37
38 #include "irgraph_t.h"
39 #include "irnode_t.h"
40 #include "iredges_t.h"
41 #include "ircons_t.h"
42 #include "irphase_t.h"
43 #include "irgwalk.h"
44 #include "irdump.h"
45 #include "irtools.h"
46 #include "irbitset.h"
47 #include "irprintf.h"
48 #include "bipartite.h"
49 #include "hungarian.h"
50 #include "plist.h"
51 #include "array_t.h"
52
53 #include "height.h"
54
55 #include "beabi.h"
56 #include "bemodule.h"
57 #include "benode_t.h"
58 #include "besched_t.h"
59 #include "beirg_t.h"
60
61 #include "lc_opts.h"
62 #include "lc_opts_enum.h"
63
64
65 #define ARR_LEN_SAFE(arr) ((arr) != NULL ? ARR_LEN((arr)) : 0)
66
67 #define HASH_RSS_EDGE(edge) ((get_irn_node_nr((edge)->src) << 16) | (get_irn_node_nr((edge)->tgt) & 0xFFFF))
68 #define BSEARCH_IRN_ARR(val, arr) \
69         bsearch(&(val), (arr), ARR_LEN_SAFE((arr)), sizeof((arr)[0]), cmp_irn_idx)
70
71 #define BLOCK_IDX_MAP(rss, irn) bsearch_for_index(get_irn_idx((irn)), (rss)->idx_map, ARR_LEN_SAFE((rss)->idx_map), 1)
72
73 /* the rss options */
74 typedef struct _rss_opts_t {
75         int dump_flags;
76 } rss_opts_t;
77
78 /* Represents a child with associated costs */
79 typedef struct _child {
80         ir_node *irn;
81         float   cost;
82 } child_t;
83
84 /* We need edges for several purposes. */
85 typedef struct _rss_edge {
86         ir_node *src;
87         ir_node *tgt;
88         void    *next;
89 } rss_edge_t;
90
91 /* Represents a connected bipartite component. */
92 typedef struct _cbc {
93         ir_nodeset_t parents;       /**< = S  a set of value producers */
94         ir_nodeset_t children;      /**< = T  a set of value consumers */
95         pset    *kill_edges;    /**< = E  a set of edges (t in T, s in S) such as each s in S gets killed by at least one t in T */
96         int     nr;             /**< a deterministic index for set insertion (used as hash) */
97 } cbc_t;
98
99 /* Represents a disjoint value DAG. */
100 typedef struct _dvg {
101         ir_nodeset_t nodes;
102         pset    *edges;
103 } dvg_t;
104
105 /* Represents a chain of nodes. */
106 typedef struct _chain {
107         plist_t *elements;   /**< List of chain elements */
108         int     nr;          /**< a deterministic index for set insertion (used as hash) */
109 } chain_t;
110
111 typedef struct _rss_irn {
112         plist_t  *consumer_list;    /**< List of consumers */
113         const ir_node **consumer;   /**< Sorted consumer array (needed for faster access) */
114
115         plist_t  *parent_list;      /**< List of parents */
116         plist_t  *pkiller_list;     /**< List of potential killers */
117
118         plist_t  *descendant_list;  /**< List of descendants */
119         const ir_node **descendants;      /**< Sorted descendant array (needed for faster access) */
120
121         const ir_node  *killer;     /**< The selected unique killer */
122         const ir_node  *irn;        /**< The corresponding firm node to this rss_irn */
123
124         chain_t  *chain;            /**< The chain, this node is associated to */
125
126         unsigned desc_walk;         /**< visited flag for collecting descendants */
127         unsigned kill_count;        /**< number of nodes killed by this one */
128
129         unsigned live_out : 1;      /**< irn has consumers outside of it's block */
130         unsigned visited  : 1;      /**< visited flag for bipartite decomposition */
131         unsigned havecons : 1;      /**< visited flag collect consumer */
132         unsigned handled  : 1;      /**< flag indicating whether or not the list structures have been build */
133         unsigned dumped   : 1;      /**< flag indication whether or not this node was dumped */
134 } rss_irn_t;
135
136 /* Represents a serialization edge with associated costs. */
137 typedef struct _serialization {
138         rss_irn_t  *u;       /* the top node */
139         rss_irn_t  *v;       /* the node about to be serialized after u */
140         rss_edge_t *edge;    /* the edge selected for the serialization */
141         int        omega1;   /* estimated: available regs - RS reduction */
142         int        omega2;   /* increase in critical path length */
143         int        new_killer;
144 } serialization_t;
145
146 typedef struct _rss {
147         ir_phase          ph;              /**< Phase to hold some data */
148         heights_t        *h;              /**< The current height object */
149         ir_graph         *irg;            /**< The irg to preprocess */
150         plist_t          *nodes;          /**< The list of interesting nodes */
151         const arch_env_t *arch_env;       /**< The architecture environment */
152         be_abi_irg_t     *abi;            /**< The abi for this irg */
153         pset             *cbc_set;        /**< A set of connected bipartite components */
154         ir_node          *block;          /**< The current block in progress. */
155         int              *idx_map;        /**< Mapping irn indices to per block indices */
156         unsigned         max_height;      /**< maximum height in the current block */
157         rss_opts_t       *opts;           /**< The options */
158         be_lv_t          *liveness;       /**< The liveness information for this irg */
159         ir_nodeset_t      live_block;     /**< Values alive at end of block */
160         const arch_register_class_t *cls; /**< The current register class */
161         DEBUG_ONLY(firm_dbg_module_t *dbg);
162 } rss_t;
163
164 #define get_rss_irn(rss, irn)  (phase_get_or_set_irn_data(&rss->ph, irn))
165
166 /**
167  * We need some special nodes:
168  * a source and a sink for all live-in and live-out values of a block
169  */
170 enum {
171         iro_rss_Source,
172         iro_rss_Sink,
173         iro_rss_last
174 };
175
176 /** The opcode of the rss_Source node once allocated. */
177 static ir_op *op_rss_Source;
178 /** The opcode of the rss_Sink node once allocated. */
179 static ir_op *op_rss_Sink;
180
181 /** The rss_Source node of the current graph. */
182 static ir_node *_source = NULL;
183 /** The rss_Sink node of the current graph. */
184 static ir_node *_sink   = NULL;
185
186 #define is_Source(irn) ((irn) == _source)
187 #define is_Sink(irn)   ((irn) == _sink)
188
189 enum {
190         RSS_DUMP_NONE  = 0,
191         RSS_DUMP_CBC   = 1 << 0,
192         RSS_DUMP_PKG   = 1 << 1,
193         RSS_DUMP_KILL  = 1 << 2,
194         RSS_DUMP_DVG   = 1 << 3,
195         RSS_DUMP_MAXAC = 1 << 4,
196         RSS_DUMP_ALL   = (RSS_DUMP_MAXAC << 1) - 1,
197 };
198
199 static rss_opts_t rss_options = {
200         RSS_DUMP_NONE,
201 };
202
203 static const lc_opt_enum_int_items_t dump_items[] = {
204         { "none",  RSS_DUMP_NONE  },
205         { "cbc",   RSS_DUMP_CBC   },
206         { "pkg",   RSS_DUMP_PKG   },
207         { "kill",  RSS_DUMP_KILL  },
208         { "dvg",   RSS_DUMP_DVG   },
209         { "maxac", RSS_DUMP_MAXAC },
210         { "all",   RSS_DUMP_ALL   },
211         { NULL, 0 }
212 };
213
214 static lc_opt_enum_int_var_t dump_var = {
215         &rss_options.dump_flags, dump_items
216 };
217
218 static const lc_opt_table_entry_t rss_option_table[] = {
219         LC_OPT_ENT_ENUM_MASK("dump", "dump phases", &dump_var),
220         LC_OPT_LAST
221 };
222
223 /******************************************************************************
224  *  _          _                    __                  _   _
225  * | |        | |                  / _|                | | (_)
226  * | |__   ___| |_ __   ___ _ __  | |_ _   _ _ __   ___| |_ _  ___  _ __  ___
227  * | '_ \ / _ \ | '_ \ / _ \ '__| |  _| | | | '_ \ / __| __| |/ _ \| '_ \/ __|
228  * | | | |  __/ | |_) |  __/ |    | | | |_| | | | | (__| |_| | (_) | | | \__ \
229  * |_| |_|\___|_| .__/ \___|_|    |_|  \__,_|_| |_|\___|\__|_|\___/|_| |_|___/
230  *            | |
231  *            |_|
232  ******************************************************************************/
233
234 /**
235  * Acquire opcodes if needed and create source and sink nodes.
236  */
237 static void init_rss_special_nodes(ir_graph *irg) {
238         ir_node *block;
239
240         if (op_rss_Source == NULL) {
241                 int iro_rss_base = get_next_ir_opcodes(iro_rss_last);
242                 op_rss_Source = new_ir_op(iro_rss_base + iro_rss_Source, "rss_Source", op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
243                 op_rss_Sink   = new_ir_op(iro_rss_base + iro_rss_Sink,   "rss_Sink",   op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
244         }
245         block   = get_irg_start_block(irg);
246         _source = new_ir_node(NULL, irg, block, op_rss_Source, mode_ANY, 0, NULL);
247         _sink   = new_ir_node(NULL, irg, block, op_rss_Sink, mode_ANY, 0, NULL);
248 }
249
250 static int cmp_int(const void *a, const void *b) {
251         const int *i1 = a;
252         const int *i2 = b;
253
254         return QSORT_CMP(*i1, *i2);
255 }
256
257 static int cmp_child_costs(const void *a, const void *b) {
258         const child_t *c1 = a;
259         const child_t *c2 = b;
260
261         return QSORT_CMP(c1->cost, c2->cost);
262 }
263
264 static int cmp_irn_idx(const void *a, const void *b) {
265         const ir_node *n1 = *(ir_node **)a;
266         const ir_node *n2 = *(ir_node **)b;
267
268         return QSORT_CMP(get_irn_idx(n1), get_irn_idx(n2));
269 }
270
271 static int cmp_rss_edges(const void *a, const void *b) {
272         const rss_edge_t *e1 = a;
273         const rss_edge_t *e2 = b;
274
275         return (e1->src != e2->src) || (e1->tgt != e2->tgt);
276 }
277
278 static int bsearch_for_index(int key, int *arr, size_t len, int force) {
279         int left = 0;
280         int right = len;
281
282         while (right >= left) {
283                 int idx = (left + right) / 2;
284
285                 if (key < arr[idx])
286                         right = idx - 1;
287                 else if (key > arr[idx])
288                         left = idx + 1;
289                 else
290                         return idx;
291         }
292
293         if (force)
294                 assert(0 && "Something is wrong, key not found.");
295         return -1;
296 }
297
298 static const ir_node **build_sorted_array_from_list(plist_t *irn_list, struct obstack *obst) {
299         plist_element_t *el;
300         int             i   = 0;
301         int             len = plist_count(irn_list);
302         const ir_node   **arr = (const ir_node**)NEW_ARR_D(ir_node*, obst, len);
303
304         /* copy the list into the array */
305         foreach_plist(irn_list, el) {
306                 arr[i++] = plist_element_get_value(el);
307         }
308
309         /* sort the array by node index */
310         /* HACK cast for MSVC */
311         qsort((void*)arr, len, sizeof(arr[0]), cmp_irn_idx);
312
313         return arr;
314 }
315
316 /*****************************************************
317  *      _      _                       _
318  *     | |    | |                     (_)
319  *   __| | ___| |__  _   _  __ _  __ _ _ _ __   __ _
320  *  / _` |/ _ \ '_ \| | | |/ _` |/ _` | | '_ \ / _` |
321  * | (_| |  __/ |_) | |_| | (_| | (_| | | | | | (_| |
322  *  \__,_|\___|_.__/ \__,_|\__, |\__, |_|_| |_|\__, |
323  *                          __/ | __/ |         __/ |
324  *                         |___/ |___/         |___/
325  *
326  *****************************************************/
327
328 #ifdef DEBUG_libfirm
329 static void dump_nodeset(ir_nodeset_t *ns, const char *prefix) {
330         ir_nodeset_iterator_t iter;
331         ir_node *irn;
332
333         ir_nodeset_iterator_init(&iter, ns);
334         while( (irn = ir_nodeset_iterator_next(&iter)) != NULL ) {
335                 ir_fprintf(stderr, "%s%+F\n", prefix, irn);
336         }
337 }
338 #endif
339
340 static void build_file_name(rss_t *rss, const char *suffix, size_t suf_len, char *buf, size_t len) {
341         const char *irg_name;
342
343         memset(buf, 0, len);
344         irg_name = get_entity_name(get_irg_entity(rss->irg));
345         snprintf(buf, len - suf_len, "%s-%s-block-%ld",
346                 irg_name, arch_register_class_name(rss->cls), get_irn_node_nr(rss->block));
347         strcat(buf, suffix);
348 }
349
350 /* Dumps all collected bipartite components of current irg as vcg. */
351 static void debug_vcg_dump_bipartite(rss_t *rss) {
352         cbc_t *cbc;
353         FILE  *f;
354         char  file_name[256];
355         static const char suffix[] = "-RSS-CBC.vcg";
356
357         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
358         f = fopen(file_name, "w");
359
360         ir_fprintf(f, "graph: { title: \"connected bipartite component graph of %+F\"\n", rss->irg);
361         fprintf(f, "display_edge_labels: no\n");
362         fprintf(f, "layoutalgorithm: mindepth\n");
363         fprintf(f, "manhattan_edges: yes\n\n");
364
365         foreach_pset(rss->cbc_set, cbc) {
366                 ir_nodeset_iterator_t iter;
367                 ir_node    *n;
368                 rss_edge_t *ke;
369
370                 fprintf(f, "graph: { titel: \"cbc %d\" label: \"cbc %d\" status:clustered color:yellow\n", cbc->nr, cbc->nr);
371                 foreach_ir_nodeset(&cbc->parents, n, iter) {
372                         ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
373                 }
374                 foreach_ir_nodeset(&cbc->children, n, iter) {
375                         ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
376                 }
377                 foreach_pset(cbc->kill_edges, ke) {
378                         ir_fprintf(f, "edge: { sourcename: \"n%d_%d\" targetname: \"n%d_%d\" }\n",
379                                 get_irn_node_nr(ke->src), cbc->nr, get_irn_node_nr(ke->tgt), cbc->nr);
380                 }
381                 fprintf(f, "}\n\n");
382         }
383         fprintf(f, "}\n");
384         fclose(f);
385 }
386
387 /* Dump the computed killing function as vcg. */
388 static void debug_vcg_dump_kill(rss_t *rss) {
389         FILE            *f;
390         char            file_name[256];
391         plist_element_t *el;
392         static const char suffix[] = "-RSS-KILL.vcg";
393
394         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
395         f = fopen(file_name, "w");
396
397         ir_fprintf(f, "graph: { title: \"computed kill graph of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
398         fprintf(f, "display_edge_labels: no\n");
399         fprintf(f, "layoutalgorithm: mindepth\n");
400         fprintf(f, "manhattan_edges: yes\n\n");
401
402         {
403                 /* reset dump_flag */
404                 ir_node *irn;
405                 foreach_phase_irn(&rss->ph, irn) {
406                         rss_irn_t *node = get_rss_irn(rss, irn);
407                         node->dumped = 0;
408                 }
409         }
410
411         /* dump all nodes and their killers */
412         foreach_plist(rss->nodes, el) {
413                 ir_node   *irn     = plist_element_get_value(el);
414                 rss_irn_t *rirn    = get_rss_irn(rss, irn);
415                 rss_irn_t *pk_rirn = get_rss_irn(rss, rirn->killer);
416
417                 if (! rirn->dumped) {
418                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
419                         rirn->dumped = 1;
420                 }
421
422                 if (! pk_rirn->dumped) {
423                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(rirn->killer), rirn->killer);
424                         pk_rirn->dumped = 1;
425                 }
426
427                 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
428                         get_irn_node_nr(rirn->killer), get_irn_node_nr(irn));
429         }
430
431         fprintf(f, "}\n");
432         fclose(f);
433 }
434
435 /* Dumps the potential killing DAG (PKG) as vcg. */
436 static void debug_vcg_dump_pkg(rss_t *rss, ir_nodeset_t *max_ac, int iteration)
437 {
438         FILE              *f;
439         char              file_name[256];
440         char              suffix[32];
441         static const char suffix1[] = "-RSS-PKG.vcg";
442         static const char suffix2[] = "-RSS-PKG-MAXAC.vcg";
443         plist_element_t   *el;
444
445         if (! max_ac) {
446                 snprintf(suffix, sizeof(suffix), "%s", suffix1);
447         }
448         else {
449                 snprintf(suffix, sizeof(suffix), "-%02d%s", iteration, suffix2);
450         }
451
452         build_file_name(rss, suffix, strlen(suffix) + 1, file_name, sizeof(file_name));
453         f = fopen(file_name, "w");
454
455         ir_fprintf(f, "graph: { title: \"potential killing DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
456         fprintf(f, "display_edge_labels: no\n");
457         fprintf(f, "layoutalgorithm: mindepth\n");
458         fprintf(f, "manhattan_edges: yes\n\n");
459
460         {
461                 /* reset dump_flag */
462                 ir_node *irn;
463                 foreach_phase_irn(&rss->ph, irn) {
464                         rss_irn_t *node = get_rss_irn(rss, irn);
465                         node->dumped = 0;
466                 }
467         }
468
469         foreach_plist(rss->nodes, el) {
470                 ir_node   *irn  = plist_element_get_value(el);
471                 rss_irn_t *rirn = get_rss_irn(rss, irn);
472                 char      *c1   = "";
473                 plist_element_t *k_el;
474
475                 /* dump selected saturating values in yellow */
476                 if (max_ac && ir_nodeset_contains(max_ac, irn))
477                         c1 = "color:yellow";
478
479                 if (! rirn->dumped) {
480                         if (rirn->chain)
481                                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F   c%d\" %s }\n", get_irn_node_nr(irn), irn, rirn->chain->nr, c1);
482                         else
483                                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(irn), irn, c1);
484                         rirn->dumped = 1;
485                 }
486
487                 foreach_plist(rirn->pkiller_list, k_el) {
488                         ir_node   *pkiller = plist_element_get_value(k_el);
489                         rss_irn_t *pk_rirn = get_rss_irn(rss, pkiller);
490                         char      *c2      = "";
491
492                         if (max_ac && ir_nodeset_contains(max_ac, pkiller))
493                                 c2 = "color:yellow";
494
495                         if (! pk_rirn->dumped) {
496                                 if (pk_rirn->chain)
497                                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F   c%d\" %s }\n", get_irn_node_nr(pkiller), pkiller, pk_rirn->chain->nr, c2);
498                                 else
499                                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(pkiller), pkiller, c2);
500                                 pk_rirn->dumped = 1;
501                         }
502                         ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
503                                 get_irn_node_nr(pkiller), get_irn_node_nr(irn));
504                 }
505         }
506         fprintf(f, "}\n");
507         fclose(f);
508 }
509
510 /* Dumps the disjoint value DAG (DVG) as vcg. */
511 static void debug_vcg_dump_dvg(rss_t *rss, dvg_t *dvg) {
512         static const char suffix[] = "-RSS-DVG.vcg";
513         FILE       *f;
514         char       file_name[256];
515         ir_nodeset_iterator_t iter;
516         ir_node    *irn;
517         rss_edge_t *edge;
518
519         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
520         f = fopen(file_name, "w");
521
522         ir_fprintf(f, "graph: { title: \"disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
523         fprintf(f, "display_edge_labels: no\n");
524         fprintf(f, "layoutalgorithm: mindepth\n");
525         fprintf(f, "manhattan_edges: yes\n\n");
526
527         /* dump all nodes */
528         foreach_ir_nodeset(&dvg->nodes, irn, iter) {
529                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
530         }
531
532         /* dump all edges */
533         foreach_pset(dvg->edges, edge) {
534                 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
535                         get_irn_node_nr(edge->src), get_irn_node_nr(edge->tgt));
536         }
537
538         fprintf(f, "}\n");
539         fclose(f);
540 }
541
542 #if 0
543 /* Dumps the PKG(DVG). */
544 static void debug_vcg_dump_dvg_pkiller(rss_t *rss, dvg_t *dvg) {
545         static const char suffix[] = "-RSS-DVG-PKG.vcg";
546         FILE       *f;
547         char       file_name[256];
548         ir_nodeset_iterator_t iter;
549         ir_node    *irn;
550
551         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
552         f = fopen(file_name, "w");
553
554         ir_fprintf(f, "graph: { title: \"PKG of disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
555         fprintf(f, "display_edge_labels: no\n");
556         fprintf(f, "layoutalgorithm: mindepth\n");
557         fprintf(f, "manhattan_edges: yes\n\n");
558
559         /* dump all nodes */
560         foreach_ir_nodeset(&dvg->nodes, irn, iter) {
561                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
562         }
563
564         /* dump all edges */
565         foreach_ir_nodeset(&dvg->nodes, irn, iter) {
566                 rss_irn_t       *node = get_rss_irn(rss, irn);
567                 plist_element_t *el;
568
569                 foreach_plist(node->dvg_pkiller_list, el) {
570                         ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
571                                 get_irn_node_nr(plist_element_get_value(el)), get_irn_node_nr(irn));
572                 }
573         }
574
575         fprintf(f, "}\n");
576         fclose(f);
577 }
578 #endif /* if 0 */
579
580 /**
581  * In case there is no rss information for irn, initialize it.
582  */
583 static void *init_rss_irn(ir_phase *ph, const ir_node *irn, void *old) {
584         rss_irn_t *res = old ? old : phase_alloc(ph, sizeof(res[0]));
585
586         res->descendant_list = plist_obstack_new(phase_obst(ph));
587         res->descendants     = NULL;
588
589         res->consumer_list   = plist_obstack_new(phase_obst(ph));
590         res->consumer        = NULL;
591
592         res->pkiller_list    = plist_obstack_new(phase_obst(ph));
593
594         res->parent_list     = plist_obstack_new(phase_obst(ph));
595
596         res->killer          = NULL;
597         res->irn             = irn;
598         res->chain           = NULL;
599
600         res->kill_count      = 0;
601         res->desc_walk       = 0;
602         res->live_out        = 0;
603         res->visited         = 0;
604         res->handled         = 0;
605         res->dumped          = 0;
606         res->havecons        = 0;
607
608         return res;
609 }
610
611 /**
612  * Collect all nodes data dependent on current node.
613  */
614 static void collect_descendants(rss_t *rss, rss_irn_t *rirn, ir_node *irn, int *got_sink, unsigned cur_desc_walk) {
615         const ir_edge_t *edge;
616         rss_irn_t       *cur_node = get_rss_irn(rss, irn);
617         ir_node         *block    = rss->block;
618         ir_edge_kind_t  ekind[2]  = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
619         int             i;
620
621         if (cur_node->desc_walk >= cur_desc_walk)
622                 return;
623         cur_node->desc_walk = cur_desc_walk;
624
625         /* Stop at Barriers */
626         if (be_is_Barrier(irn))
627                 return;
628
629         /* loop over normal and dependency edges */
630         for (i = 0; i < 2; ++i) {
631                 foreach_out_edge_kind(irn, edge, ekind[i]) {
632                         ir_node *user = get_edge_src_irn(edge);
633
634                         /* skip ignore nodes as they do not really contribute to register pressure */
635                         if (arch_irn_is_ignore(user))
636                                 continue;
637
638                         /*
639                                 (a) mode_X means Jump            -> out_edge
640                                 (b) Phi as user of a normal node -> out-edge
641                         */
642                         if (get_irn_mode(user) == mode_X || is_Phi(user)) {
643                                 if (! *got_sink)
644                                         goto force_sink;
645                                 else
646                                         continue;
647                         }
648
649                         if (is_Proj(user)) {
650                                 //if (arch_get_irn_reg_class_out(user) == rss->cls)
651                                         collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
652                         }
653                         else {
654                                 /* check if user lives in block and is not a control flow node */
655                                 if (get_nodes_block(user) == block) {
656                                         if (! plist_has_value(rirn->descendant_list, user)) {
657                                                 plist_insert_back(rirn->descendant_list, user);
658                                                 DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", user));
659                                         }
660                                         collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
661                                 }
662                                 else if (! *got_sink) {
663 force_sink:
664                                         /* user lives out of block: add sink as descendant if not already done */
665                                         plist_insert_back(rirn->descendant_list, _sink);
666                                         *got_sink = 1;
667                                         DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", _sink));
668                                 }
669                         }
670                 }
671         }
672 }
673
674 /**
675  * Handles a single consumer.
676  */
677 static void collect_single_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *consumer, int *got_sink) {
678         ir_node *block = rss->block;
679
680         assert(! is_Proj(consumer) && "Cannot handle Projs");
681
682         if (! is_Phi(consumer) && ! is_Block(consumer) && get_nodes_block(consumer) == block) {
683                 if (!arch_irn_is_ignore(consumer) &&
684                                 !plist_has_value(rss_irn->consumer_list, consumer)) {
685                         plist_insert_back(rss_irn->consumer_list, consumer);
686                         DBG((rss->dbg, LEVEL_2, "\t\tconsumer %+F\n", consumer));
687                 }
688         }
689         else {
690                 rss_irn->live_out = 1;
691                 DBG((rss->dbg, LEVEL_2, "\t\tlive out %+F", consumer));
692                 if (! *got_sink) {
693                         plist_insert_back(rss_irn->consumer_list, _sink);
694                         *got_sink = 1;
695                         DB((rss->dbg, LEVEL_2, ", %+F added instead", _sink));
696                 }
697                 DB((rss->dbg, LEVEL_2, "\n"));
698         }
699 }
700
701 /**
702  * Collect all nodes consuming the value(s) produced by current node.
703  */
704 static void collect_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *irn, int *got_sink) {
705         const ir_edge_t *edge;
706         int             i;
707         ir_edge_kind_t  ekind[2]  = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
708         rss_irn_t       *cur_node = get_rss_irn(rss, irn);
709
710         if (cur_node->havecons)
711                 return;
712         cur_node->havecons = 1;
713
714         for (i = 0; i < 2; ++i) {
715                 foreach_out_edge_kind(irn, edge, ekind[i]) {
716                         ir_node *consumer = get_edge_src_irn(edge);
717
718                         if (is_Proj(consumer)) {
719                                 //if (arch_get_irn_reg_class_out(consumer) == rss->cls)
720                                         collect_consumer(rss, rss_irn, consumer, got_sink);
721                         }
722                         else
723                                 collect_single_consumer(rss, rss_irn, consumer, got_sink);
724                 }
725         }
726 }
727
728 /**
729  * Collects all consumer and descendant of a irn.
730  */
731 static void collect_node_info(rss_t *rss, ir_node *irn) {
732         static unsigned cur_desc_walk = 0;
733         rss_irn_t       *rss_irn      = get_rss_irn(rss, irn);
734         int             got_sink;
735
736         assert(! is_Proj(irn) && "Cannot handle Projs.");
737
738         /* check if node info is already available */
739         if (rss_irn->handled)
740                 return;
741                 //phase_reinit_single_irn_data(&rss->ph, irn);
742
743         DBG((rss->dbg, LEVEL_1, "\tcomputing consumers of %+F:\n", irn));
744
745         /* collect all consumer */
746         got_sink = 0;
747         collect_consumer(rss, rss_irn, irn, &got_sink);
748
749         /* build sorted consumer array */
750         rss_irn->consumer = build_sorted_array_from_list(rss_irn->consumer_list, phase_obst(&rss->ph));
751
752         DBG((rss->dbg, LEVEL_1, "\tcompute descendants of %+F:\n", irn));
753
754         /* collect descendants */
755         got_sink = 0;
756         collect_descendants(rss, rss_irn, irn, &got_sink, ++cur_desc_walk);
757
758         /* build sorted descendant array */
759         rss_irn->descendants = build_sorted_array_from_list(rss_irn->descendant_list, phase_obst(&rss->ph));
760
761         rss_irn->handled = 1;
762 }
763
764 /**
765  * Checks if v is a potential killer of u.
766  * v is in pkill(u) iff descendants(v) cut consumer(u) is v
767  *
768  * @param rss   The rss object
769  * @param v      The node to check for killer
770  * @param u      The potentially killed value
771  * @return 1 if v is in pkill(u), 0 otherwise
772  */
773 static int is_potential_killer(rss_t *rss, rss_irn_t *v, rss_irn_t *u) {
774         plist_t *list;
775         const ir_node **arr;
776         plist_element_t *el;
777         (void) rss;
778
779         assert(is_Sink(v->irn) || ((plist_count(v->descendant_list) > 0 && v->descendants) || 1));
780         assert(is_Sink(u->irn) || ((plist_count(u->consumer_list)   > 0 && u->consumer)    || 1));
781
782         /* as we loop over the list: loop over the shorter one */
783         if (plist_count(v->descendant_list) > plist_count(u->consumer_list)) {
784                 list = u->consumer_list;
785                 arr  = v->descendants;
786         }
787         else {
788                 list = v->descendant_list;
789                 arr  = u->consumer;
790         }
791
792         /* for each list element: try to find element in array */
793         foreach_plist(list, el) {
794                 ir_node *irn   = plist_element_get_value(el);
795                 ir_node *match = BSEARCH_IRN_ARR(irn, arr);
796
797                 if (match && match != irn)
798                         return 0;
799         }
800
801         return 1;
802 }
803
804 /**
805  * Update descendants, consumer and pkiller list for the given irn.
806  */
807 static void update_node_info(rss_t *rss, ir_node *irn, ir_node *pk_irn) {
808         rss_irn_t *node    = get_rss_irn(rss, irn);
809         rss_irn_t *pkiller = get_rss_irn(rss, pk_irn);
810
811         /* add consumer and rebuild the consumer array */
812         if (! plist_has_value(node->consumer_list, pk_irn)) {
813                 plist_insert_back(node->consumer_list, pk_irn);
814                 node->consumer = build_sorted_array_from_list(node->consumer_list, phase_obst(&rss->ph));
815         }
816
817         /* add pk_irn as descendant, add it's descendants to irn's and rebuild array */
818         if (! plist_has_value(node->descendant_list, pk_irn)) {
819                 plist_element_t *el;
820
821                 plist_insert_back(node->descendant_list, pk_irn);
822
823                 foreach_plist(pkiller->descendant_list, el) {
824                         ir_node *desc = plist_element_get_value(el);
825
826                         if (! plist_has_value(node->descendant_list, desc))
827                                 plist_insert_back(node->descendant_list, desc);
828                 }
829
830                 node->descendants = build_sorted_array_from_list(node->descendant_list, phase_obst(&rss->ph));
831         }
832
833 }
834
835 /**
836  * Compute the potential killing set PK.
837  */
838 static void compute_pkill_set(rss_t *rss) {
839         plist_element_t *u_el, *v_el;
840
841         foreach_plist(rss->nodes, u_el) {
842                 ir_node   *u_irn = plist_element_get_value(u_el);
843                 rss_irn_t *u     = get_rss_irn(rss, u_irn);
844
845                 DBG((rss->dbg, LEVEL_1, "\tcomputing potential killers of %+F:\n", u_irn));
846
847                 /* check each consumer if it is a potential killer  */
848                 foreach_plist(u->consumer_list, v_el) {
849                         ir_node   *v_irn = plist_element_get_value(v_el);
850                         rss_irn_t *v     = get_rss_irn(rss, v_irn);
851
852                         /* check, if v is a potential killer of u */
853                         if (is_potential_killer(rss, v, u)) {
854                                 if (! plist_has_value(u->pkiller_list, v_irn))
855                                         plist_insert_back(u->pkiller_list, v_irn);
856                                 v->kill_count++;
857                                 DBG((rss->dbg, LEVEL_2, "\t\tpotential killer %+F\n", v_irn));
858                         }
859                 }
860
861                 u->killer = _sink;
862         }
863
864         if (rss->opts->dump_flags & RSS_DUMP_PKG)
865                 debug_vcg_dump_pkg(rss, NULL, 0);
866 }
867
868 /**
869  * Build set of killing edges (from values to their potential killers)
870  */
871 static void build_kill_edges(rss_t *rss, pset *epk) {
872         plist_element_t *el, *k_el;
873
874         foreach_plist(rss->nodes, el) {
875                 ir_node    *irn  = plist_element_get_value(el);
876                 rss_irn_t *rirn = get_rss_irn(rss, irn);
877
878                 DBG((rss->dbg, LEVEL_3, "\t\tbuilding kill edges for %+F:\n", irn));
879
880                 foreach_plist(rirn->pkiller_list, k_el) {
881                         ir_node    *pkiller = plist_element_get_value(k_el);
882                         rss_edge_t *ke      = obstack_alloc(phase_obst(&rss->ph), sizeof(*ke));
883
884                         ke->src  = irn;
885                         ke->tgt  = pkiller;
886                         ke->next = NULL;
887
888                         DBG((rss->dbg, LEVEL_3, "\t\t\ttarget %+F\n", pkiller));
889
890                         pset_insert(epk, ke, HASH_RSS_EDGE(ke));
891                 }
892         }
893 }
894
895 #ifdef DEBUG_libfirm
896 /* print the given cbc for debugging purpose */
897 static void debug_print_cbc(firm_dbg_module_t *mod, cbc_t *cbc) {
898         ir_nodeset_iterator_t iter;
899         ir_node    *n;
900         rss_edge_t *ke;
901
902         DBG((mod, LEVEL_3, "\t\tS = set of parents:\n"));
903         foreach_ir_nodeset(&cbc->parents, n, iter) {
904                 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
905         }
906         DBG((mod, LEVEL_3, "\t\tT = set of children:\n"));
907         foreach_ir_nodeset(&cbc->children, n, iter) {
908                 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
909         }
910         DBG((mod, LEVEL_3, "\t\tE = Edges from producers to consumers\n"));
911         foreach_pset(cbc->kill_edges, ke) {
912                 DBG((mod, LEVEL_3, "\t\t\t%+F -> %+F\n", ke->src, ke->tgt));
913         }
914 }
915 #endif
916
917 /**
918  * Construct the bipartite decomposition.
919  * Sid-Ahmed-Ali Touati, Phd Thesis
920  * Register Pressure in Instruction Level Parallelism, p. 71
921  */
922 static void compute_bipartite_decomposition(rss_t *rss) {
923         pset *epk    = new_pset(cmp_rss_edges, 10);
924         int  cur_num = 0;
925
926         plist_element_t *el;
927
928         DBG((rss->dbg, LEVEL_1, "\tcomputing bipartite decomposition:\n"));
929
930         build_kill_edges(rss, epk);
931
932         foreach_plist(rss->nodes, el) {
933                 ir_node   *u_irn   = plist_element_get_value(el);
934                 rss_irn_t *u       = get_rss_irn(rss, u_irn);
935                 int       p_change = 1;
936                 int       c_change = 1;
937                 int       vrfy_ok  = 1;
938
939                 cbc_t           *cbc;
940                 plist_element_t *el2;
941                 rss_edge_t      *k_edge;
942                 rss_edge_t      *kedge_root = NULL;
943                 ir_node         *t_irn, *s_irn;
944                 ir_nodeset_iterator_t iter;
945
946                 if (u->visited || u_irn == _sink)
947                         continue;
948
949                 DBG((rss->dbg, LEVEL_2, "\t\t%+F choosen:\n", u_irn));
950
951                 cbc     = obstack_alloc(phase_obst(&rss->ph), sizeof(*cbc));
952                 cbc->nr = cur_num++;
953
954                 /* initialize S_cb */
955                 ir_nodeset_init_size(&cbc->parents, 5);
956                 ir_nodeset_insert(&cbc->parents, u_irn);
957                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (init)\n", u_irn));
958
959                 /* E_cb = empty */
960                 cbc->kill_edges = new_pset(cmp_rss_edges, 5);
961
962                 /* each parent gets killed by at least one value from children */
963
964                 /* T_cb = PK_successors(u) */
965                 ir_nodeset_init_size(&cbc->children, 5);
966                 foreach_plist(u->pkiller_list, el2) {
967                         ir_nodeset_insert(&cbc->children, plist_element_get_value(el2));
968                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (init)\n", plist_element_get_value(el2)));
969                 }
970
971                 /*
972                         Now: insert the parents of all children into the parent set
973                         and insert the children of all parents into the children set
974                         until the sets don't change any more
975                 */
976                 while (p_change || c_change) {
977                         ir_nodeset_iterator_t iter;
978                         p_change = c_change = 0;
979
980                         /* accumulate parents */
981                         foreach_ir_nodeset(&cbc->children, t_irn, iter) {
982                                 foreach_pset(epk, k_edge) {
983                                         ir_node *val = k_edge->src;
984
985                                         if (k_edge->tgt == t_irn && ! ir_nodeset_contains(&cbc->parents, val)) {
986                                                 ir_nodeset_insert(&cbc->parents, val);
987                                                 p_change = 1;
988                                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (killed by %+F)\n", val, t_irn));
989                                         }
990                                 }
991                         }
992
993                         /* accumulate children */
994                         foreach_ir_nodeset(&cbc->parents, s_irn, iter) {
995                                 foreach_pset(epk, k_edge) {
996                                         ir_node *val = k_edge->tgt;
997
998                                         if (k_edge->src == s_irn  && ! ir_nodeset_contains(&cbc->children, val)) {
999                                                 ir_nodeset_insert(&cbc->children, val);
1000                                                 c_change = 1;
1001                                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (kills %+F)\n", val, s_irn));
1002                                         }
1003                                 }
1004                         }
1005                 }
1006
1007                 /* mark all parent values as visited */
1008                 foreach_ir_nodeset(&cbc->parents, s_irn, iter) {
1009                         rss_irn_t *s = get_rss_irn(rss, s_irn);
1010                         s->visited = 1;
1011                         /* assure bipartite property */
1012 #if 0
1013                         if (ir_nodeset_contains(&cbc->children, s_irn)) {
1014                                 ir_nodeset_remove(&cbc->children, s_irn);
1015                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F removed from to children\n", s_irn));
1016                         }
1017 #endif
1018                 }
1019
1020                 /* update edges */
1021                 foreach_pset(epk, k_edge) {
1022                         if (ir_nodeset_contains(&cbc->parents, k_edge->src) &&
1023                             ir_nodeset_contains(&cbc->children, k_edge->tgt)) {
1024                                 pset_insert(cbc->kill_edges, k_edge, HASH_RSS_EDGE(k_edge));
1025                                 /*
1026                                         Link all k_edges which are about to be removed together.
1027                                         Beware: pset_remove kills the iterator.
1028                                 */
1029                                 k_edge->next = kedge_root;
1030                                 kedge_root   = k_edge;
1031                         }
1032                 }
1033
1034                 /* remove all linked k_edges */
1035                 for (; kedge_root; kedge_root = kedge_root->next) {
1036                         pset_remove(epk, kedge_root, HASH_RSS_EDGE(kedge_root));
1037                 }
1038
1039                 /* verify the cbc */
1040                 foreach_ir_nodeset(&cbc->parents, s_irn, iter) {
1041                         int is_killed = 0;
1042
1043                         foreach_pset(cbc->kill_edges, k_edge) {
1044                                 if (k_edge->src == s_irn) {
1045                                         is_killed = 1;
1046                                         pset_break(cbc->kill_edges);
1047                                         break;
1048                                 }
1049                         }
1050
1051                         if (! is_killed) {
1052                                 vrfy_ok = 0;
1053                                 ir_fprintf(stderr, "Warning: parent %+F is not killed in current cbc\n", s_irn);
1054                         }
1055                 }
1056                 assert(vrfy_ok && "Verification of CBC failed");
1057
1058                 /* add the connected bipartite component */
1059                 if (ir_nodeset_size(&cbc->parents) > 0 &&
1060                     ir_nodeset_size(&cbc->children) > 0 &&
1061                         pset_count(cbc->kill_edges) > 0)
1062                         pset_insert(rss->cbc_set, cbc, (unsigned)cbc->nr);
1063                 DBG((rss->dbg, LEVEL_2, "\tbipartite component %d inserted:\n", cbc->nr));
1064                 DEBUG_ONLY(
1065                         if (firm_dbg_get_mask(rss->dbg) & LEVEL_2)
1066                                 debug_print_cbc(rss->dbg, cbc);
1067                 );
1068         }
1069
1070         if (rss->opts->dump_flags & RSS_DUMP_CBC)
1071                 debug_vcg_dump_bipartite(rss);
1072
1073         del_pset(epk);
1074 }
1075
1076 /**
1077  * Select the child with the maximum cost.
1078  */
1079 static child_t *select_child_max_cost(rss_t *rss, ir_nodeset_t *x, ir_nodeset_t *y, child_t *t, cbc_t *cbc) {
1080         ir_node *child;
1081         ir_nodeset_iterator_t iter;
1082         float   max_cost = -1.0f;
1083
1084         DBG((rss->dbg, LEVEL_2, "\t\tcomputing children costs:\n"));
1085
1086         foreach_ir_nodeset(&cbc->children, child, iter) {
1087                 rss_irn_t  *r_child             = get_rss_irn(rss, child);
1088                 int         num_unkilled_parents = 0;
1089                 int         num_descendants;
1090                 rss_edge_t *k_edge;
1091                 float       cost;
1092
1093                 /* get the number of unkilled parents */
1094                 foreach_pset(cbc->kill_edges, k_edge) {
1095                         if (k_edge->tgt == child && ir_nodeset_contains(x, k_edge->src))
1096                                 ++num_unkilled_parents;
1097                 }
1098
1099                 cost = (float)num_unkilled_parents;
1100
1101                 num_descendants = plist_count(r_child->descendant_list) + ir_nodeset_size(y);
1102
1103                 if (num_descendants > 0)
1104                         cost /= (float)num_descendants;
1105
1106                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F, #desc %d, cost %.3f\n", child, num_descendants, cost));
1107
1108                 if (cost > max_cost) {
1109                         t->irn   = child;
1110                         t->cost  = cost;
1111                         max_cost = cost;
1112                 }
1113         }
1114
1115         return t;
1116 }
1117
1118 /**
1119  * Remove all parents from x which are killed by t_irn.
1120  */
1121 static void remove_covered_parents(rss_t *rss, ir_nodeset_t *x, ir_node *t_irn, cbc_t *cbc) {
1122         rss_irn_t  *t = get_rss_irn(rss, t_irn);
1123         rss_edge_t *k_edge;
1124
1125         DBG((rss->dbg, LEVEL_2, "\t\tremoving parents covered by %+F:\n", t_irn));
1126
1127         foreach_pset(cbc->kill_edges, k_edge) {
1128                 if (k_edge->tgt == t_irn && ir_nodeset_contains(x, k_edge->src)) {
1129                         ir_nodeset_remove(x, k_edge->src);
1130                         plist_insert_back(t->parent_list, k_edge->src);
1131                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", k_edge->src));
1132                 }
1133         }
1134 }
1135
1136 static void update_cumulated_descendent_values(rss_t *rss, ir_nodeset_t *y, ir_node *t_irn) {
1137         rss_irn_t *t = get_rss_irn(rss, t_irn);
1138         plist_element_t *el;
1139
1140         DBG((rss->dbg, LEVEL_2, "\t\tupdating cumulated descendant value of %+F:\n", t_irn));
1141
1142         foreach_plist(t->descendant_list, el) {
1143                 ir_nodeset_insert(y, plist_element_get_value(el));
1144                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", plist_element_get_value(el)));
1145         }
1146 }
1147
1148 /**
1149  * Greedy-k: a heuristics for the MMA problem
1150  */
1151 static void compute_killing_function(rss_t *rss) {
1152         cbc_t *cbc;
1153         struct obstack obst;
1154
1155         obstack_init(&obst);
1156
1157         rss->cbc_set = pset_new_ptr(5);
1158         compute_bipartite_decomposition(rss);
1159
1160         /* for all bipartite components do: */
1161         foreach_pset(rss->cbc_set, cbc) {
1162                 ir_node *p;
1163                 ir_nodeset_t x;
1164                 ir_nodeset_t y;
1165                 ir_nodeset_iterator_t iter;
1166                 child_t **sks    = NEW_ARR_F(child_t *, 20);
1167                 int     cur_len  = 0;
1168                 int     cur_size = 20;
1169                 int     i;
1170
1171                 ir_nodeset_init_size(&x, 10);
1172                 ir_nodeset_init_size(&y, 10);
1173
1174                 DBG((rss->dbg, LEVEL_1, "\tcomputing SKS for cbc %d:\n", cbc->nr));
1175                 DBG((rss->dbg, LEVEL_2, "\t\tinitializing parents X:\n"));
1176
1177                 /* X = S_cb (all parents are initially uncovered) */
1178                 foreach_ir_nodeset(&cbc->parents, p, iter) {
1179                         ir_nodeset_insert(&x, p);
1180                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", p));
1181                 }
1182
1183                 /* while X not empty */
1184                 while (ir_nodeset_size(&x) > 0) {
1185                         child_t *t = obstack_alloc(&obst, sizeof(*t));
1186                         memset(t, 0, sizeof(*t));
1187
1188                         t = select_child_max_cost(rss, &x, &y, t, cbc);
1189
1190                         if (cur_len >= cur_size) {
1191                                 ARR_EXTO(child_t *, sks, cur_size * 2);
1192                                 cur_size *= 2;
1193                         }
1194
1195                         DBG((rss->dbg, LEVEL_2, "\t\tinsert child %+F (%.3f) into SKS at pos %d\n", t->irn, t->cost, cur_len));
1196
1197                         sks[cur_len++] = t;
1198                         remove_covered_parents(rss, &x, t->irn, cbc);
1199                         update_cumulated_descendent_values(rss, &y, t->irn);
1200                 }
1201
1202                 ARR_SHRINKLEN(sks, cur_len);
1203
1204                 /* sort SKS in increasing cost order */
1205                 qsort(sks, cur_len, sizeof(sks[0]), cmp_child_costs);
1206
1207                 DBG((rss->dbg, LEVEL_2, "\tprocessing SKS for cbc %d:\n", cbc->nr));
1208
1209                 /* build killing function */
1210                 for (i = cur_len - 1; i >= 0; --i) { /* loop over sks in decreasing cost order */
1211                         child_t         *t = sks[i];
1212                         rss_irn_t       *rt = get_rss_irn(rss, t->irn);
1213                         plist_element_t *p_el;
1214
1215                         DBG((rss->dbg, LEVEL_3, "\t\t\tkiller %+F (%.3f):\n", t->irn, t->cost));
1216
1217                         /* kill all unkilled parents of t */
1218                         foreach_plist(rt->parent_list, p_el) {
1219                                 ir_node    *par = plist_element_get_value(p_el);
1220                                 rss_irn_t *rpar = get_rss_irn(rss, par);
1221
1222                                 if (is_Sink(rpar->killer)) {
1223                                         rpar->killer = t->irn;
1224                                         DBG((rss->dbg, LEVEL_2, "\t\tkill %+F\n", rpar->irn));
1225                                 }
1226                                 else {
1227                                         DBG((rss->dbg, LEVEL_3, "\t\t\tkeeping %+F as killer for %+F\n", rpar->killer, rpar->irn));
1228                                 }
1229                         }
1230                 }
1231
1232                 ir_nodeset_destroy(&x);
1233                 ir_nodeset_destroy(&y);
1234                 DEL_ARR_F(sks);
1235         }
1236
1237         if (rss->opts->dump_flags & RSS_DUMP_KILL)
1238                 debug_vcg_dump_kill(rss);
1239
1240         del_pset(rss->cbc_set);
1241         obstack_free(&obst, NULL);
1242 }
1243
1244 /**
1245  * Adds the edge src -> tgt to the dvg. Checks if reverse edge is already there (asserts).
1246  */
1247 static inline void add_dvg_edge(rss_t *rss, dvg_t *dvg, const ir_node *src, const ir_node *tgt, int have_source) {
1248         rss_edge_t *dvg_edge;
1249         rss_edge_t key;
1250
1251         if (! have_source)
1252                 ir_nodeset_insert(&dvg->nodes, (ir_node *) src);
1253         else
1254                 assert(ir_nodeset_contains(&dvg->nodes, src) && "Wrong assumption");
1255
1256         ir_nodeset_insert(&dvg->nodes, (ir_node *) tgt);
1257
1258         key.src = (ir_node *) tgt;
1259         key.tgt = (ir_node *) src;
1260         assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1261
1262         key.src = (ir_node *) src;
1263         key.tgt = (ir_node *) tgt;
1264         if (NULL != pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1265                 /* add the edge to the DVG */
1266                 dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1267
1268                 dvg_edge->src  = (ir_node *) src;
1269                 dvg_edge->tgt  = (ir_node *) tgt;
1270                 dvg_edge->next = NULL;
1271
1272                 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", src, tgt));
1273                 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1274         }
1275 }
1276
1277 /**
1278  * Computes the disjoint value DAG (DVG).
1279  * BEWARE: It is not made explicitly clear in the Touati paper,
1280  *         but the DVG is meant to be build from the KILLING DAG
1281  */
1282 static void compute_dvg(rss_t *rss, dvg_t *dvg) {
1283         plist_element_t *el;
1284
1285         DBG((rss->dbg, LEVEL_1, "\tcomputing DVG:\n"));
1286
1287         foreach_plist(rss->nodes, el) {
1288                 ir_node    *u_irn    = plist_element_get_value(el);
1289                 rss_irn_t  *u        = get_rss_irn(rss, u_irn);
1290                 rss_irn_t  *u_killer = get_rss_irn(rss, u->killer);
1291                 int        i;
1292
1293                 /* TODO: omit nodes only having sink as pkiller and killing no one */
1294
1295                 /* add an edge to killer */
1296                 add_dvg_edge(rss, dvg, u_irn, u->killer, 0);
1297
1298                 if (is_Sink(u->killer))
1299                         continue;
1300
1301                 /* We add an edge to every killer, from where we could be reached. */
1302                 for (i = ARR_LEN_SAFE(u_killer->descendants) - 1; i >= 0; --i) {
1303                         add_dvg_edge(rss, dvg, u_irn, u_killer->descendants[i], 1);
1304                 }
1305
1306 #if 0
1307
1308                 foreach_plist(rss->nodes, el2) {
1309                         ir_node *v_irn = plist_element_get_value(el2);
1310
1311                         /*
1312                                 There is an edge (u, v) in the DVG iff v is a descendant of the killer(u).
1313                         */
1314                         if (BSEARCH_IRN_ARR(v_irn, u_kill->descendants)) {
1315                                 rss_edge_t *dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1316                                 rss_edge_t key;
1317
1318                                 /* insert the user into the DVG and append it to the user list of u */
1319                                 ir_nodeset_insert(&dvg->nodes, v_irn);
1320                                 if (! plist_has_value(u->dvg_user_list, v_irn))
1321                                         plist_insert_back(u->dvg_user_list, v_irn);
1322
1323                                 dvg_edge->src  = u_irn;
1324                                 dvg_edge->tgt  = v_irn;
1325                                 dvg_edge->next = NULL;
1326
1327                                 key.src = v_irn;
1328                                 key.tgt = u_irn;
1329
1330                                 assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1331
1332                                 /* add the edge to the DVG */
1333                                 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", u_irn, v_irn));
1334                                 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1335                         }
1336                 }
1337 #endif /* if 0 */
1338         }
1339
1340         if (rss->opts->dump_flags & RSS_DUMP_DVG)
1341                 debug_vcg_dump_dvg(rss, dvg);
1342 }
1343
1344 /**
1345  * Updates the dvg structure when a serialization edge from src -> tgt is added.
1346  */
1347 static void update_dvg(rss_t *rss, dvg_t *dvg, rss_irn_t *src, rss_irn_t *tgt) {
1348         int i, j, idx;
1349         rss_edge_t *edge;
1350         rss_edge_t **arr = ALLOCAN(rss_edge_t*, pset_count(dvg->edges));
1351
1352         /*
1353                 Add an edge from serialization target to serialization src:
1354                 src cannot be alive with target
1355         */
1356         add_dvg_edge(rss, dvg, tgt->irn, src->irn, 0);
1357
1358         /* Add edges to src's descendants as well, they also getting serialized. */
1359         for (i = ARR_LEN_SAFE(src->descendants) - 1; i >= 0; --i) {
1360                 add_dvg_edge(rss, dvg, tgt->irn, src->descendants[i], 1);
1361         }
1362
1363         /* We also have to add edges from targets predecessors in dvg */
1364         idx = 0;
1365         /* We cannot insert elements into set over which we iterate ... */
1366         foreach_pset(dvg->edges, edge) {
1367                 if (edge->tgt == tgt->irn) {
1368                         arr[idx++] = edge;
1369                 }
1370         }
1371
1372         for (i = 0; i < idx; ++i) {
1373                 edge = arr[i];
1374                 add_dvg_edge(rss, dvg, edge->src, src->irn, 1);
1375                 for (j = ARR_LEN_SAFE(src->descendants) - 1; j >= 0; --j) {
1376                         add_dvg_edge(rss, dvg, edge->src, src->descendants[j], 1);
1377                 }
1378         }
1379 }
1380
1381 #if 0
1382 /**
1383  * Accumulate all descendants for root into list.
1384  */
1385 static void accumulate_dvg_descendant_values(rss_t *rss, rss_irn_t *root, plist_t *list) {
1386         if (plist_count(root->dvg_user_list) > 0) {
1387                 plist_element_t *el;
1388
1389                 foreach_plist(root->dvg_user_list, el) {
1390                         ir_node   *v_irn = plist_element_get_value(el);
1391                         rss_irn_t *v     = get_rss_irn(rss, v_irn);
1392
1393                         /* add v as descendant */
1394                         if (! plist_has_value(list, v_irn)) {
1395                                 plist_insert_back(list, v_irn);
1396                                 DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG descendant %+F\n", v_irn));
1397                         }
1398
1399                         /* accumulate v's descendants */
1400                         accumulate_dvg_descendant_values(rss, v, list);
1401                 }
1402         }
1403 }
1404
1405 /**
1406  * Builds the list of potential killers for each node
1407  * in the given DVG.
1408  * Needs the descendant list for all user as sorted array.
1409  */
1410 static void build_dvg_pkiller_list(rss_t *rss, dvg_t *dvg) {
1411         ir_nodeset_iterator_t iter;
1412         ir_node *irn;
1413
1414         foreach_nodeset(&dvg->nodes, irn, iter) {
1415                 rss_irn_t       *node = get_rss_irn(rss, irn);
1416                 plist_element_t *el, *el2;
1417
1418                 DBG((rss->dbg, DEBUG_DVG, "\t\tbuilding pkiller list for %+F\n", irn));
1419
1420                 /* check each user */
1421                 foreach_plist(node->dvg_user_list, el) {
1422                         ir_node *u_irn = plist_element_get_value(el);
1423
1424                         /* is the current user u_irn not a descendant of any other user -> pkiller */
1425                         foreach_plist(node->dvg_user_list, el2) {
1426                                 ir_node   *v_irn = plist_element_get_value(el2);
1427                                 rss_irn_t *v     = get_rss_irn(rss, v_irn);
1428
1429                                 if (el != el2                             &&
1430                                         ! BSEARCH_IRN_ARR(u_irn, v->dvg_desc) &&
1431                                         ! plist_has_value(node->dvg_pkiller_list, u_irn))
1432                                 {
1433                                         plist_insert_back(node->dvg_pkiller_list, u_irn);
1434                                         DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG pkiller %+F\n", u_irn));
1435                                 }
1436                         }
1437                 }
1438
1439                 node->dvg_pkiller = build_sorted_array_from_list(node->dvg_pkiller_list, phase_obst(&rss->ph));
1440         }
1441
1442         DEBUG_ONLY(
1443                 if (firm_dbg_get_mask(rss->dbg) & DEBUG_DVG)
1444                         debug_vcg_dump_dvg_pkiller(rss, dvg);
1445         );
1446 }
1447
1448 #endif /* if 0 */
1449
1450 /**
1451  * Compute the maximal antichain of the current DVG.
1452  * This is a reimplementation of the MAXIMAL_ANTI_CHAIN function
1453  * from the DDG library 1.1 (DAG.cpp).
1454  */
1455 static ir_nodeset_t *compute_maximal_antichain(rss_t *rss, dvg_t *dvg, int iteration) {
1456         int         n               = ir_nodeset_size(&dvg->nodes);
1457         int         *assignment     = ALLOCAN(int, n);
1458         int         *assignment_rev = ALLOCAN(int, n);
1459         int         *idx_map        = ALLOCAN(int, n);
1460         hungarian_problem_t *bp;
1461         ir_nodeset_t *values, *temp;
1462         ir_nodeset_iterator_t iter;
1463         ir_node     *u_irn;
1464         int         i, j, cost, cur_chain, res;
1465         rss_edge_t  *dvg_edge;
1466
1467 #define MAP_IDX(irn) bsearch_for_index(get_irn_idx(irn), idx_map,  n,  1)
1468
1469         if (pset_count(dvg->edges) == 0)
1470                 return NULL;
1471
1472         bp = hungarian_new(n, n, 1, HUNGARIAN_MATCH_NORMAL);
1473
1474         /*
1475                 At first, we build an index map for the nodes in the DVG,
1476                 because we cannot use the irn idx therefore as the resulting
1477                 bipartite data structure would have around 1.2GB.
1478                 So we limit the size to the number of nodes we have in the DVG
1479                 and build a sorted index map for their irn indices.
1480         */
1481         i = 0;
1482         foreach_ir_nodeset(&dvg->nodes, u_irn, iter) {
1483                 idx_map[i++] = get_irn_idx(u_irn);
1484         }
1485         qsort(idx_map, n, sizeof(idx_map[0]), cmp_int);
1486
1487         foreach_pset(dvg->edges, dvg_edge) {
1488                 int idx_u = MAP_IDX(dvg_edge->src);
1489                 int idx_v = MAP_IDX(dvg_edge->tgt);
1490
1491                 /* add the entry to the bipartite data structure */
1492                 hungarian_add(bp, idx_u, idx_v, 1);
1493                 DBG((rss->dbg, LEVEL_3, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1494                         idx_u, dvg_edge->src, idx_v, dvg_edge->tgt));
1495         }
1496 #if 0
1497         /*
1498                 Add a bipartite entry for each pair of nodes (u, v), where exists a
1499                 path in the DVG from u to v, ie. connect all descendants(v) to v.
1500                 desc_dvg(v) = accumulated descendants of all z in dvg_user_list(v)
1501         */
1502         foreach_ir_nodeset(&dvg->nodes, u_irn, iter) {
1503                 rss_irn_t *u        = get_rss_irn(rss, u_irn);
1504                 int       idx_u_irn = MAP_IDX(u_irn);
1505
1506                 DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG descendants of %+F:\n", u_irn));
1507
1508                 //plist_clear(u->dvg_desc_list);
1509                 //accumulate_dvg_descendant_values(rss, u, u->dvg_desc_list);
1510
1511                 /*
1512                         FIXME: The array is build on the phase obstack and we cannot free the data.
1513                         So the array get as many times allocated as this function is called.
1514                 */
1515
1516                 /* build the sorted array for faster searches */
1517                 //u->dvg_desc = build_sorted_array_from_list(u->dvg_desc_list, phase_obst(&rss->ph));
1518
1519                 DBG((rss->dbg, DEBUG_MAX_AC, "\t\tadding bipartite entries of %+F:\n", u_irn));
1520
1521                 /* add the bipartite entries for all descendants */
1522                 for (i = ARR_LEN_SAFE(u->dvg_desc) - 1; i >= 0; --i) {
1523                         rss_irn_t *desc    = get_rss_irn(rss, u->dvg_desc[i]);
1524                         int       idx_desc = MAP_IDX(u->dvg_desc[i]);
1525
1526                         /* add the entry to the bipartite data structure */
1527                         hungarian_add(bp, idx_u_irn, idx_desc, 1);
1528                         DBG((rss->dbg, DEBUG_MAX_AC, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1529                                 idx_u_irn, u_irn, idx_desc, u->dvg_desc[i]));
1530
1531                         need_matching = 1;
1532                 }
1533         }
1534 #endif
1535
1536         /* We want maximum cardinality matching */
1537         hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL);
1538
1539 #if 0
1540         DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG pkiller:\n"));
1541         /* beware: the following function needs the dvg_desc array */
1542         build_dvg_pkiller_list(rss, dvg);
1543 #endif
1544
1545         DBG((rss->dbg, LEVEL_2, "\t\tcomputing bipartite matching\n"));
1546         /*
1547                 The maximum cardinality bipartite matching gives us the minimal
1548                 chain partition, which corresponds to the maximum anti chains.
1549         */
1550         res = hungarian_solve(bp, assignment, &cost, 1);
1551         assert(res == 0 && "Bipartite matching failed!");
1552
1553         hungarian_free(bp);
1554         memset(assignment_rev, -1, n * sizeof(assignment_rev[0]));
1555
1556         /* build the reverse assignment, ie. foreach i -> j, add j -> i */
1557         for (i = 0; i < n; ++i) {
1558                 if (assignment[i] >= 0) {
1559                         int j = assignment[i];
1560                         assignment_rev[j] = i;
1561                 }
1562         }
1563
1564         DBG((rss->dbg, LEVEL_2, "\t\tgot assignment with cost %d\n", cost));
1565         DBG((rss->dbg, LEVEL_3, "\t\t\tassignment   ---   reverse assignment\n", cost));
1566         for (i = 0; i < n; ++i) {
1567                 DBG((rss->dbg, LEVEL_3, "\t\t\t%3d -> %3d         %3d -> %3d\n", i, assignment[i], i, assignment_rev[i]));
1568         }
1569
1570         values    = XMALLOC(ir_nodeset_t);
1571         ir_nodeset_init_size(values, 10);
1572         cur_chain = 0;
1573         /* Construction of the minimal chain partition */
1574         for (j = 0; j < n; ++j) {
1575                 /* check nodes, which did not occur as target */
1576                 if (assignment_rev[j] == -1) {
1577                         int       xj      = idx_map[j];
1578                         ir_node   *xj_irn = get_idx_irn(rss->irg, xj);
1579                         rss_irn_t *xj_rss = get_rss_irn(rss, xj_irn);
1580                         chain_t   *c      = obstack_alloc(phase_obst(&rss->ph), sizeof(*c));
1581                         int       source;
1582
1583                         /* there was no source for j -> we have a source of a new chain */
1584                         ir_nodeset_insert(values, xj_irn);
1585
1586                         c->elements = plist_obstack_new(phase_obst(&rss->ph));
1587                         c->nr       = cur_chain++;
1588                         plist_insert_back(c->elements, xj_irn);
1589
1590                         xj_rss->chain = c;
1591
1592                         DBG((rss->dbg, LEVEL_2, "\t\tstarting chain %d:\n", c->nr));
1593                         DBG((rss->dbg, LEVEL_2, "\t\t\t%+F (%d)", xj_irn, j));
1594
1595                         /* follow chain, having j as source */
1596                         source = j;
1597                         while (assignment[source] >= 0) {
1598                                 int       target  = assignment[source];
1599                                 int       irn_idx = idx_map[target];
1600                                 ir_node   *irn    = get_idx_irn(rss->irg, irn_idx);
1601                                 rss_irn_t *node   = get_rss_irn(rss, irn);
1602
1603                                 plist_insert_back(c->elements, irn);
1604                                 node->chain = c;
1605
1606                                 DB((rss->dbg, LEVEL_2, " -> %+F (%d)", irn, target));
1607
1608                                 /* new source = last target */
1609                                 source = target;
1610                         }
1611
1612                         DB((rss->dbg, LEVEL_2, "\n"));
1613                 }
1614         }
1615
1616         /*
1617                 Computing the maximal antichain: Select an element from each
1618                 chain such, such it is parallel with the others.
1619         */
1620         DBG((rss->dbg, LEVEL_1, "\t\tcomputing set of saturation values (MAX AC)\n"));
1621         DBG((rss->dbg, LEVEL_3, "\t\tstarting with:\n"));
1622
1623         DEBUG_ONLY(
1624                 if (firm_dbg_get_mask(rss->dbg) & LEVEL_3)
1625                         dump_nodeset(values, "\t\t\t");
1626         )
1627
1628         temp = NULL;
1629         do {
1630                 /*
1631                         We need an explicit array for the values as
1632                         we cannot iterate multiple times over the same
1633                         set at the same time. :-(((((
1634                         TODO Matze: now we can, so rewrite this...
1635                 */
1636                 int     n         = ir_nodeset_size(values);
1637                 int     i         = 0;
1638                 ir_node **val_arr = NEW_ARR_F(ir_node *, n);
1639
1640                 foreach_ir_nodeset(values, u_irn, iter)
1641                         val_arr[i++] = u_irn;
1642
1643                 if (temp) {
1644                         ir_nodeset_destroy(temp);
1645                         free(temp);
1646                 }
1647
1648                 temp = XMALLOC(ir_nodeset_t);
1649                 ir_nodeset_init_size(temp, 10);
1650
1651                 /* Select all nodes from current value set, having another node in the set as descendant. */
1652                 for (i = 0; i < n; ++i) {
1653                         rss_irn_t *u = get_rss_irn(rss, val_arr[i]);
1654
1655                         for (j = 0; j < n; ++j) {
1656                                 if (i != j) {
1657                                         rss_edge_t key;
1658
1659                                         key.src = val_arr[i];
1660                                         key.tgt = val_arr[j];
1661
1662                                         if (pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1663                                                 /* v[j] is descendant of u -> remove u and break */
1664                                                 ir_nodeset_insert(temp, (ir_node *) u->irn);
1665                                                 ir_nodeset_remove(values, u->irn);
1666
1667                                                 DBG((rss->dbg, LEVEL_3, "\t\t\tremoving %+F from values, adding it to temp\n", u->irn));
1668
1669                                                 break;
1670                                         }
1671                                 }
1672                         }
1673                 }
1674
1675                 /* Try to insert the chain predecessor of all selected u's */
1676                 foreach_ir_nodeset(temp, u_irn, iter) {
1677                         rss_irn_t       *u  = get_rss_irn(rss, u_irn);
1678                         chain_t         *c  = u->chain;
1679                         plist_element_t *el = plist_find_value(c->elements, u_irn);
1680
1681                         assert(el && "Missing element in chain!");
1682
1683                         /* If u has predecessor in chain: insert the predecessor */
1684                         if (el == plist_element_get_prev(el)) {
1685                                 ir_nodeset_insert(values, plist_element_get_value(el));
1686                                 DBG((rss->dbg, LEVEL_3, "\t\t\tadding %+F to values\n", plist_element_get_value(el)));
1687                         }
1688                 }
1689
1690
1691                 DEL_ARR_F(val_arr);
1692         } while (ir_nodeset_size(temp) > 0);
1693
1694         DBG((rss->dbg, LEVEL_2, "\t\tfinal set:\n"));
1695         DEBUG_ONLY(
1696                 if (firm_dbg_get_mask(rss->dbg) & LEVEL_2) {
1697                         dump_nodeset(values, "\t\t\t");
1698                 }
1699         );
1700
1701         if (rss->opts->dump_flags & RSS_DUMP_MAXAC)
1702                 debug_vcg_dump_pkg(rss, values, iteration);
1703
1704         if(temp != NULL) {
1705                 ir_nodeset_destroy(temp);
1706                 free(temp);
1707         }
1708
1709         return values;
1710
1711 #undef MAP_IDX
1712 }
1713
1714 /**
1715  * Computes the best serialization between two nodes of sat_vals.
1716  */
1717 static serialization_t *compute_best_admissible_serialization(rss_t *rss, ir_nodeset_t *sat_vals, serialization_t *ser, int num_regs) {
1718         int        n                    = ir_nodeset_size(sat_vals);
1719         int        n_idx                = ARR_LEN_SAFE(rss->idx_map);
1720         int        i                    = 0;
1721         ir_node    **val_arr            = ALLOCAN(ir_node*, n);
1722         bitset_t   *bs_sv               = bitset_alloca(n_idx);
1723         bitset_t   *bs_vdesc            = bitset_alloca(n_idx);
1724         bitset_t   *bs_tmp              = bitset_alloca(n_idx);
1725         bitset_t   *bs_ukilldesc        = bitset_alloca(n_idx);
1726         int        best_benefit         = INT_MAX;
1727         int        best_omega2          = INT_MAX;
1728         int        best_benefit_omega20 = INT_MAX;
1729         int        has_omega1           = 0;
1730         int        j, k;
1731         ir_node    *irn;
1732         ir_nodeset_iterator_t iter;
1733         rss_edge_t min_benefit_edge = {NULL, NULL, NULL};
1734         rss_edge_t min_omega20_edge = {NULL, NULL, NULL};
1735         rss_irn_t  *ser_u_omega1 = NULL, *ser_v_omega1 = NULL;
1736         rss_irn_t  *ser_u_omega20 = NULL, *ser_v_omega20 = NULL;
1737
1738         DBG((rss->dbg, LEVEL_1, "\tcomputing admissible serializations:\n"));
1739
1740         /*
1741                 We need an explicit array for the values as
1742                 we cannot iterate multiple times over the same
1743                 set at the same time. :-(((((
1744         */
1745
1746         foreach_ir_nodeset(sat_vals, irn, iter) {
1747                 val_arr[i++] = irn;
1748                 bitset_set(bs_sv, BLOCK_IDX_MAP(rss, irn));
1749         }
1750
1751         /*
1752                 We build all admissible serializations and remember the best found so far.
1753                 For u in sat_vals:
1754                  For v in sat_val:
1755                    if v in pkiller(u): add edge from v to all other pkiller(u)
1756                    else: for all vv in pkiller(u): add edge from v to vv if there exists no path from vv to v
1757         */
1758
1759 /*
1760         A node is unserializable if:
1761         - it has only one killer and this one is Sink
1762     - it kills no other values
1763         In this case there is no serialization which could
1764         reduce the registerpressure
1765 */
1766 #define IS_UNSERIALIZABLE_NODE(rss_node)                  \
1767         (                                                     \
1768                 (                                                 \
1769                         (plist_count(rss_node->pkiller_list) == 1) && \
1770                         is_Sink(rss_node->killer)                  && \
1771                         (rss_node->kill_count                == 0)    \
1772                 )                            ||                   \
1773                 be_is_Barrier(rss_node->irn) ||                   \
1774                 be_is_Keep(rss_node->irn)                         \
1775         )
1776
1777         /* for all u in sat_vals */
1778         for (i = 0; i < n; ++i) {
1779                 rss_irn_t       *u = get_rss_irn(rss, val_arr[i]);
1780                 plist_element_t *el;
1781
1782                 /* ignore nodes where serialization does not help */
1783                 if (IS_UNSERIALIZABLE_NODE(u)) {
1784                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", u->irn));
1785                         continue;
1786                 }
1787
1788                 /* accumulate all descendants of all pkiller(u) */
1789                 bitset_clear_all(bs_ukilldesc);
1790                 foreach_plist(u->pkiller_list, el) {
1791                         ir_node   *irn  = plist_element_get_value(el);
1792                         rss_irn_t *node = get_rss_irn(rss, irn);
1793
1794                         if (! is_Sink(irn))
1795                                 bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, irn));
1796                         else
1797                                 continue;
1798
1799                         for (k = ARR_LEN_SAFE(node->descendants) - 1; k >= 0; --k) {
1800                                 if (! is_Sink(node->descendants[k]))
1801                                         bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, node->descendants[k]));
1802                         }
1803                 }
1804
1805                 /* for all v in sat_vals */
1806                 for (j = 0; j < n; ++j) {
1807                         ir_node   *v_irn   = val_arr[j];
1808                         rss_irn_t *v       = get_rss_irn(rss, v_irn);
1809                         unsigned  v_height = get_irn_height(rss->h, v_irn);
1810                         int       omega1, omega2, is_pkiller;
1811
1812                         /* v cannot be serialized with itself
1813                          * ignore nodes where serialization does not help */
1814                         if (i == j || IS_UNSERIALIZABLE_NODE(v)) {
1815 #ifdef DEBUG_libfirm
1816                                 if (i != j)
1817                                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", v->irn));
1818 #endif
1819                                 continue;
1820                         }
1821
1822                         /* get descendants of v */
1823                         bitset_clear_all(bs_vdesc);
1824                         bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v_irn));
1825                         for (k = ARR_LEN_SAFE(v->descendants) - 1; k >= 0; --k) {
1826                                 if (! is_Sink(v->descendants[k]))
1827                                         bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v->descendants[k]));
1828                         }
1829
1830                         /* if v is in pkiller(u) */
1831                         is_pkiller = plist_has_value(u->pkiller_list, v_irn);
1832
1833                         /* for all vv in pkiller(u) */
1834                         foreach_plist(u->pkiller_list, el) {
1835                                 ir_node *vv_irn  = plist_element_get_value(el);
1836                                 int     add_edge;
1837
1838                                 if (is_Sink(vv_irn) || is_cfop(vv_irn))
1839                                         continue;
1840
1841                                 if (is_pkiller)
1842                                         add_edge = vv_irn != v_irn && skip_Proj(vv_irn) != skip_Proj(v_irn);
1843                                 else
1844                                         add_edge = ! heights_reachable_in_block(rss->h, skip_Proj(vv_irn), skip_Proj(v_irn));
1845
1846                                 /*
1847                                         As we add an edge from vv -> v, we have to make sure,
1848                                         that there exists no path from v to vv.
1849                                 */
1850
1851                                 if (add_edge) {
1852                                         unsigned vv_height = get_irn_height(rss->h, vv_irn);
1853                                         unsigned critical_path_cost;
1854                                         unsigned mu1, mu2;
1855
1856                                         /*
1857                                                 mu1 = | descendants(v) cut sat_vals |
1858                                                 the number of saturating values which cannot
1859                                                 be simultaneously alive with u
1860                                         */
1861                                         bitset_copy(bs_tmp, bs_vdesc);
1862                                         mu1 = bitset_popcnt(bitset_and(bs_tmp, bs_sv));
1863
1864                                         /*
1865                                                 mu2 = | accum_desc_all_pkiller(u) without descendants(v) |
1866                                         */
1867                                         if (is_pkiller) {
1868                                                 bitset_copy(bs_tmp, bs_ukilldesc);
1869                                                 mu2 = bitset_popcnt(bitset_andnot(bs_tmp, bs_vdesc));
1870                                         }
1871                                         else {
1872                                                 mu2 = 0;
1873                                         }
1874
1875                                         /* omega1 = mu1 - mu2 */
1876                                         omega1 = mu1 - mu2;
1877
1878                                         if (omega1 != 0)
1879                                                 has_omega1 = 1;
1880
1881                                         /* omega2 = increase of critical path */
1882                                         critical_path_cost =
1883                                                 v_height                        /* longest path from v to sink */
1884                                                 + rss->max_height - vv_height   /* longest path from source to vv */
1885                                                 + 1;                            /* edge */
1886
1887                                         /*
1888                                                 If critical_path_cost > max_height -> the new edge
1889                                                 would increase the longest critical path by the difference.
1890                                         */
1891                                         omega2 = critical_path_cost > rss->max_height ? critical_path_cost - rss->max_height : 0;
1892
1893                                         /* this keeps track of the edge with the best benefit */
1894                                         if (omega1 >= num_regs - n && omega1 < best_benefit) {
1895                                                 min_benefit_edge.src = v_irn;
1896                                                 min_benefit_edge.tgt = vv_irn;
1897
1898                                                 ser_u_omega1 = u;
1899                                                 ser_v_omega1 = v;
1900
1901                                                 best_benefit    = omega1;
1902                                                 ser->new_killer = is_pkiller;
1903                                         }
1904
1905                                         /* this keeps track of the edge with the best omega1 costs where omega2 == 0 */
1906                                         if (omega2 == 0 && omega1 >= num_regs - n && omega1 < best_benefit_omega20) {
1907                                                 min_omega20_edge.src = v_irn;
1908                                                 min_omega20_edge.tgt = vv_irn;
1909
1910                                                 ser_u_omega20 = u;
1911                                                 ser_v_omega20 = v;
1912
1913                                                 best_benefit_omega20 = omega1;
1914                                                 ser->new_killer      = is_pkiller;
1915                                         }
1916
1917                                         best_omega2 = MIN(best_omega2, omega2);
1918
1919                                         DBG((rss->dbg, LEVEL_2, "\t\tfound %+F -> %+F (w1 %d, w2 %d)\n",
1920                                                 v_irn, vv_irn, omega1, omega2));
1921                                 } /* if add_edge */
1922                         } /* for all vv in pkiller(u) */
1923                 } /* for all v in sat_vals */
1924         } /* for all u in sat_vals */
1925
1926         if (! has_omega1)
1927                 return NULL;
1928
1929         if (best_omega2 == 0) {
1930                 ser->u         = ser_u_omega20;
1931                 ser->v         = ser_v_omega20;
1932                 ser->edge->src = min_omega20_edge.src;
1933                 ser->edge->tgt = min_omega20_edge.tgt;
1934                 ser->omega1    = best_benefit_omega20;
1935                 ser->omega2    = best_omega2;
1936         }
1937         else {
1938                 ser->u         = ser_u_omega1;
1939                 ser->v         = ser_v_omega1;
1940                 ser->edge->src = min_benefit_edge.src;
1941                 ser->edge->tgt = min_benefit_edge.tgt;
1942                 ser->omega1    = best_benefit;
1943                 ser->omega2    = best_omega2;
1944         }
1945
1946         return ser;
1947
1948 #undef IS_UNSERIALIZABLE_NODE
1949 }
1950
1951 /**
1952  * Perform the value serialization heuristic and add all
1953  * computed serialization edges as dependencies to the irg.
1954  */
1955 static void perform_value_serialization_heuristic(rss_t *rss) {
1956         bitset_t *arch_nonign_bs = bitset_alloca(arch_register_class_n_regs(rss->cls));
1957         bitset_t *abi_ign_bs     = bitset_alloca(arch_register_class_n_regs(rss->cls));
1958         unsigned available_regs, iteration;
1959         dvg_t    dvg;
1960         ir_nodeset_t *sat_vals;
1961         pset *ser_set = new_pset(cmp_rss_edges, 20);
1962
1963         /* available_regs = R = |arch_non_ignore_regs cut ~abi_ignore_regs| */
1964         arch_put_non_ignore_regs(rss->cls, arch_nonign_bs);
1965         be_abi_put_ignore_regs(rss->abi, rss->cls, abi_ign_bs);
1966         bitset_andnot(arch_nonign_bs, abi_ign_bs);
1967         available_regs  = bitset_popcnt(arch_nonign_bs);
1968         //num_live = pset_count(rss->live_block);
1969         //available_regs -= num_live < available_regs ? num_live : 0;
1970
1971         DBG((rss->dbg, LEVEL_1, "\n\t#available regs: %d\n\n", available_regs));
1972
1973         /*
1974                 At first we need to compute the disjoint value DAG (DVG = {V, E_dv}).
1975                 V    = set of all nodes we are currently interested in
1976                 E_dv = there is an edge from u to v iff v is a descendant of killer(u), forall u, v in V
1977         */
1978         ir_nodeset_init_size(&dvg.nodes, plist_count(rss->nodes));
1979         dvg.edges = new_pset(cmp_rss_edges, plist_count(rss->nodes) * 5);
1980         compute_dvg(rss, &dvg);
1981
1982         /*
1983                 Then we perform the heuristic serialization algorithm
1984                 on the DVG which gives us all necessary serialization
1985                 edges.
1986         */
1987         DBG((rss->dbg, LEVEL_1, "\tcomputing maximal antichain:\n"));
1988         iteration = 0;
1989         sat_vals  = compute_maximal_antichain(rss, &dvg, iteration++);
1990         while (sat_vals && (ir_nodeset_size(sat_vals) > available_regs)) {
1991                 serialization_t *ser, best_ser;
1992                 rss_edge_t      *edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*edge));
1993                 ir_node         *dep_src, *dep_tgt;
1994
1995                 best_ser.edge = edge;
1996                 ser = compute_best_admissible_serialization(rss, sat_vals, &best_ser, available_regs);
1997
1998                 DBG((rss->dbg, LEVEL_1, "\tcurrent register saturation %d, target %d\n", ir_nodeset_size(sat_vals), available_regs));
1999
2000                 if (! ser) {
2001                         DBG((rss->dbg, LEVEL_1, "\tno RS improving serialization found, breaking at iteration %d\n", iteration));
2002                         break;
2003                 }
2004
2005                 /* Insert the serialization as dependency edge into the irg. */
2006                 DBG((rss->dbg, LEVEL_1, "\tserializing %+F -> %+F with edge %+F -> %+F and cost %d, %d\n",
2007                         ser->u->irn, ser->v->irn, ser->edge->src, ser->edge->tgt, ser->omega1, ser->omega2));
2008
2009                 if (pset_find(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge)))
2010                         ir_printf("WARNING: serialization %+F -> %+F computed twice!\n", ser->edge->src, ser->edge->tgt);
2011
2012
2013                 pset_insert(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge));
2014
2015                 /* update the dvg */
2016                 update_dvg(rss, &dvg, ser->v, ser->u);
2017                 update_dvg(rss, &dvg, get_rss_irn(rss, ser->edge->src), get_rss_irn(rss, ser->edge->tgt));
2018                 if(sat_vals != NULL) {
2019                         ir_nodeset_destroy(sat_vals);
2020                         free(sat_vals);
2021                 }
2022
2023                 dep_src = skip_Proj(ser->edge->src);
2024                 dep_tgt = ser->edge->tgt;
2025                 add_irn_dep(dep_src, dep_tgt);
2026
2027                 /* Update descendants, consumer and pkillers of the target */
2028                 update_node_info(rss, ser->edge->tgt, ser->edge->src);
2029
2030                 /* TODO: try to find a cheaper way for updating height information */
2031                 rss->max_height = heights_recompute_block(rss->h, rss->block);
2032
2033                 /* Recompute the antichain for next serialization */
2034                 DBG((rss->dbg, LEVEL_1, "\tre-computing maximal antichain:\n"));
2035                 sat_vals = compute_maximal_antichain(rss, &dvg, iteration++);
2036         }
2037
2038         ir_nodeset_destroy(&dvg.nodes);
2039         del_pset(dvg.edges);
2040 }
2041
2042 /**
2043  * Do initial calculations for a block.
2044  */
2045 static void process_block(ir_node *block, void *env) {
2046         rss_t *rss = env;
2047         int   i, n;
2048         const ir_edge_t *edge;
2049
2050         phase_init(&rss->ph, "rss block preprocessor", rss->irg, PHASE_DEFAULT_GROWTH, init_rss_irn, NULL);
2051
2052         DBG((rss->dbg, LEVEL_1, "preprocessing block %+F\n", block));
2053         rss->block = block;
2054
2055         /* build an index map for all nodes in the current block */
2056         i            = 0;
2057         n            = get_irn_n_edges(block);
2058         NEW_ARR_A(int *, rss->idx_map, n);
2059         foreach_out_edge(block, edge) {
2060                 ir_node *irn      = get_edge_src_irn(edge);
2061                 rss->idx_map[i++] = get_irn_idx(irn);
2062         }
2063         qsort(rss->idx_map, n, sizeof(rss->idx_map[0]), cmp_int);
2064         rss->max_height = heights_recompute_block(rss->h, block);
2065
2066         /* loop over all register classes */
2067         for (i = arch_env_get_n_reg_class(rss->arch_env) - 1; i >= 0; --i) {
2068                 const arch_register_class_t *cls = arch_env_get_reg_class(rss->arch_env, i);
2069
2070                 rss->cls = cls;
2071                 DBG((rss->dbg, LEVEL_1, "register class %s\n", arch_register_class_name(cls)));
2072
2073                 /* Get all live value at end of Block having current register class */
2074                 ir_nodeset_init(&rss->live_block);
2075                 be_liveness_end_of_block(rss->liveness, rss->cls, rss->block, &rss->live_block);
2076
2077                 /* reset the list of interesting nodes */
2078                 plist_clear(rss->nodes);
2079                 plist_insert_back(rss->nodes, _sink);
2080
2081                 /* collect all nodes having a certain register class */
2082                 foreach_out_edge(block, edge) {
2083                         ir_node *irn  = get_edge_src_irn(edge);
2084                         ir_mode *mode = get_irn_mode(irn);
2085
2086                         /*
2087                                 We skip:
2088                                 - mode_T nodes (the projs are asked)
2089                                 - mode_X nodes (control flow nodes are always scheduled last)
2090                                 - Keeps (they are always immediately scheduled)
2091                                 - Phi (same as Keep)
2092                         */
2093                         if (mode == mode_T || mode == mode_X || is_Phi(irn))
2094                                 continue;
2095
2096                         /*
2097                                 In case of a proj, we skip
2098                                 - Barrier (they are a Barrier :)
2099                                 - Start
2100                                 - the Proj itself, as it's scheduled always with it's super node
2101                         */
2102                         if (is_Proj(irn)) {
2103                                 ir_node *pred = get_Proj_pred(irn);
2104                                 if (be_is_Barrier(pred) || is_Start(pred))
2105                                         continue;
2106                         }
2107
2108                         /* calculate the descendants and consumer for each node in the block */
2109                         collect_node_info(rss, skip_Proj(irn));
2110
2111                         if (be_is_Keep(irn))
2112                                 continue;
2113
2114                         if (!arch_irn_is_ignore(irn) &&
2115                                         arch_get_irn_reg_class_out(irn) == cls) {
2116                                 plist_insert_back(rss->nodes, skip_Proj(irn));
2117                         }
2118                         //}
2119                 }
2120
2121                 /* compute the potential killing set PK(G) */
2122                 compute_pkill_set(rss);
2123
2124                 /* compute the killing function k* */
2125                 compute_killing_function(rss);
2126
2127                 /*
2128                         Compute the heuristic value serialization and
2129                         add the necessary dependencies to the irg.
2130                 */
2131                 perform_value_serialization_heuristic(rss);
2132
2133                 ir_nodeset_destroy(&rss->live_block);
2134         }
2135
2136         phase_free(&rss->ph);
2137 }
2138
2139 /**
2140  * Register the options.
2141  */
2142 void be_init_schedrss(void) {
2143         lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
2144         lc_opt_entry_t *sched_grp = lc_opt_get_grp(be_grp, "sched");
2145         lc_opt_entry_t *rss_grp = lc_opt_get_grp(sched_grp, "rss");
2146
2147         lc_opt_add_table(rss_grp, rss_option_table);
2148 }
2149
2150 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_schedrss);
2151
2152 /**
2153  * Preprocess the irg for scheduling.
2154  */
2155 void rss_schedule_preparation(be_irg_t *birg) {
2156         ir_graph *irg = be_get_birg_irg(birg);
2157         rss_t rss;
2158
2159         FIRM_DBG_REGISTER(rss.dbg, "firm.be.sched.rss");
2160
2161         //firm_dbg_set_mask(rss.dbg, LEVEL_1 | LEVEL_2 | LEVEL_3);
2162
2163         init_rss_special_nodes(irg);
2164
2165         rss.irg      = irg;
2166         rss.arch_env = be_get_birg_arch_env(birg);
2167         rss.abi      = birg->abi;
2168         rss.h        = heights_new(irg);
2169         rss.nodes    = plist_new();
2170         rss.opts     = &rss_options;
2171         rss.liveness = be_liveness(irg);
2172         be_liveness_assure_sets(rss.liveness);
2173         irg_block_walk_graph(irg, NULL, process_block, &rss);
2174         heights_free(rss.h);
2175         plist_free(rss.nodes);
2176         be_liveness_free(rss.liveness);
2177
2178         if (birg->main_env->options->dump_flags & DUMP_SCHED)
2179                 be_dump(rss.irg, "-rss", dump_ir_block_graph);
2180 }