fix a bug in constraint enforcement; improve phi handling and phi0 construction ...
[libfirm] / ir / be / beschedrss.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       Implementation of a register saturating list scheduler.
23  * @author      Christian Wuerdig
24  * @date        29.08.2006
25  * @version     $Id$
26  *
27  * Implementation of a register saturating list scheduler
28  * as described in: Sid-Ahmed-Ali Touati
29  * Register Saturation in Superscalar and VLIW Codes
30  */
31 #include "config.h"
32
33 #include <limits.h>
34
35 #include "obst.h"
36 #include "debug.h"
37
38 #include "irgraph_t.h"
39 #include "irnode_t.h"
40 #include "iredges_t.h"
41 #include "ircons_t.h"
42 #include "irphase_t.h"
43 #include "irgwalk.h"
44 #include "irdump.h"
45 #include "irtools.h"
46 #include "irbitset.h"
47 #include "irprintf.h"
48 #include "irnodeset.h"
49 #include "bipartite.h"
50 #include "hungarian.h"
51 #include "plist.h"
52 #include "array_t.h"
53
54 #include "height.h"
55
56 #include "beabi.h"
57 #include "bemodule.h"
58 #include "benode_t.h"
59 #include "besched.h"
60 #include "beirg.h"
61 #include "belive.h"
62
63 #include "lc_opts.h"
64 #include "lc_opts_enum.h"
65
66
67 #define ARR_LEN_SAFE(arr) ((arr) != NULL ? ARR_LEN((arr)) : 0)
68
69 #define HASH_RSS_EDGE(edge) ((get_irn_node_nr((edge)->src) << 16) | (get_irn_node_nr((edge)->tgt) & 0xFFFF))
70 #define BSEARCH_IRN_ARR(val, arr) \
71         bsearch(&(val), (arr), ARR_LEN_SAFE((arr)), sizeof((arr)[0]), cmp_irn_idx)
72
73 #define BLOCK_IDX_MAP(rss, irn) bsearch_for_index(get_irn_idx((irn)), (rss)->idx_map, ARR_LEN_SAFE((rss)->idx_map), 1)
74
75 /* the rss options */
76 typedef struct _rss_opts_t {
77         int dump_flags;
78 } rss_opts_t;
79
80 /* Represents a child with associated costs */
81 typedef struct _child {
82         ir_node *irn;
83         float   cost;
84 } child_t;
85
86 /* We need edges for several purposes. */
87 typedef struct _rss_edge {
88         ir_node *src;
89         ir_node *tgt;
90         void    *next;
91 } rss_edge_t;
92
93 /* Represents a connected bipartite component. */
94 typedef struct _cbc {
95         ir_nodeset_t parents;       /**< = S  a set of value producers */
96         ir_nodeset_t children;      /**< = T  a set of value consumers */
97         pset    *kill_edges;    /**< = E  a set of edges (t in T, s in S) such as each s in S gets killed by at least one t in T */
98         int     nr;             /**< a deterministic index for set insertion (used as hash) */
99 } cbc_t;
100
101 /* Represents a disjoint value DAG. */
102 typedef struct _dvg {
103         ir_nodeset_t nodes;
104         pset    *edges;
105 } dvg_t;
106
107 /* Represents a chain of nodes. */
108 typedef struct _chain {
109         plist_t *elements;   /**< List of chain elements */
110         int     nr;          /**< a deterministic index for set insertion (used as hash) */
111 } chain_t;
112
113 typedef struct _rss_irn {
114         plist_t  *consumer_list;    /**< List of consumers */
115         const ir_node **consumer;   /**< Sorted consumer array (needed for faster access) */
116
117         plist_t  *parent_list;      /**< List of parents */
118         plist_t  *pkiller_list;     /**< List of potential killers */
119
120         plist_t  *descendant_list;  /**< List of descendants */
121         const ir_node **descendants;      /**< Sorted descendant array (needed for faster access) */
122
123         const ir_node  *killer;     /**< The selected unique killer */
124         const ir_node  *irn;        /**< The corresponding firm node to this rss_irn */
125
126         chain_t  *chain;            /**< The chain, this node is associated to */
127
128         unsigned desc_walk;         /**< visited flag for collecting descendants */
129         unsigned kill_count;        /**< number of nodes killed by this one */
130
131         unsigned live_out : 1;      /**< irn has consumers outside of it's block */
132         unsigned visited  : 1;      /**< visited flag for bipartite decomposition */
133         unsigned havecons : 1;      /**< visited flag collect consumer */
134         unsigned handled  : 1;      /**< flag indicating whether or not the list structures have been build */
135         unsigned dumped   : 1;      /**< flag indication whether or not this node was dumped */
136 } rss_irn_t;
137
138 /* Represents a serialization edge with associated costs. */
139 typedef struct _serialization {
140         rss_irn_t  *u;       /* the top node */
141         rss_irn_t  *v;       /* the node about to be serialized after u */
142         rss_edge_t *edge;    /* the edge selected for the serialization */
143         int        omega1;   /* estimated: available regs - RS reduction */
144         int        omega2;   /* increase in critical path length */
145         int        new_killer;
146 } serialization_t;
147
148 typedef struct _rss {
149         ir_phase          ph;              /**< Phase to hold some data */
150         heights_t        *h;              /**< The current height object */
151         ir_graph         *irg;            /**< The irg to preprocess */
152         plist_t          *nodes;          /**< The list of interesting nodes */
153         const arch_env_t *arch_env;       /**< The architecture environment */
154         be_abi_irg_t     *abi;            /**< The abi for this irg */
155         pset             *cbc_set;        /**< A set of connected bipartite components */
156         ir_node          *block;          /**< The current block in progress. */
157         int              *idx_map;        /**< Mapping irn indices to per block indices */
158         unsigned         max_height;      /**< maximum height in the current block */
159         rss_opts_t       *opts;           /**< The options */
160         be_lv_t          *liveness;       /**< The liveness information for this irg */
161         ir_nodeset_t      live_block;     /**< Values alive at end of block */
162         const arch_register_class_t *cls; /**< The current register class */
163         DEBUG_ONLY(firm_dbg_module_t *dbg);
164 } rss_t;
165
166 #define get_rss_irn(rss, irn)  (phase_get_or_set_irn_data(&rss->ph, irn))
167
168 /**
169  * We need some special nodes:
170  * a source and a sink for all live-in and live-out values of a block
171  */
172 enum {
173         iro_rss_Source,
174         iro_rss_Sink,
175         iro_rss_last
176 };
177
178 /** The opcode of the rss_Source node once allocated. */
179 static ir_op *op_rss_Source;
180 /** The opcode of the rss_Sink node once allocated. */
181 static ir_op *op_rss_Sink;
182
183 /** The rss_Source node of the current graph. */
184 static ir_node *_source = NULL;
185 /** The rss_Sink node of the current graph. */
186 static ir_node *_sink   = NULL;
187
188 #define is_Source(irn) ((irn) == _source)
189 #define is_Sink(irn)   ((irn) == _sink)
190
191 enum {
192         RSS_DUMP_NONE  = 0,
193         RSS_DUMP_CBC   = 1 << 0,
194         RSS_DUMP_PKG   = 1 << 1,
195         RSS_DUMP_KILL  = 1 << 2,
196         RSS_DUMP_DVG   = 1 << 3,
197         RSS_DUMP_MAXAC = 1 << 4,
198         RSS_DUMP_ALL   = (RSS_DUMP_MAXAC << 1) - 1,
199 };
200
201 static rss_opts_t rss_options = {
202         RSS_DUMP_NONE,
203 };
204
205 static const lc_opt_enum_int_items_t dump_items[] = {
206         { "none",  RSS_DUMP_NONE  },
207         { "cbc",   RSS_DUMP_CBC   },
208         { "pkg",   RSS_DUMP_PKG   },
209         { "kill",  RSS_DUMP_KILL  },
210         { "dvg",   RSS_DUMP_DVG   },
211         { "maxac", RSS_DUMP_MAXAC },
212         { "all",   RSS_DUMP_ALL   },
213         { NULL, 0 }
214 };
215
216 static lc_opt_enum_int_var_t dump_var = {
217         &rss_options.dump_flags, dump_items
218 };
219
220 static const lc_opt_table_entry_t rss_option_table[] = {
221         LC_OPT_ENT_ENUM_MASK("dump", "dump phases", &dump_var),
222         LC_OPT_LAST
223 };
224
225 /******************************************************************************
226  *  _          _                    __                  _   _
227  * | |        | |                  / _|                | | (_)
228  * | |__   ___| |_ __   ___ _ __  | |_ _   _ _ __   ___| |_ _  ___  _ __  ___
229  * | '_ \ / _ \ | '_ \ / _ \ '__| |  _| | | | '_ \ / __| __| |/ _ \| '_ \/ __|
230  * | | | |  __/ | |_) |  __/ |    | | | |_| | | | | (__| |_| | (_) | | | \__ \
231  * |_| |_|\___|_| .__/ \___|_|    |_|  \__,_|_| |_|\___|\__|_|\___/|_| |_|___/
232  *            | |
233  *            |_|
234  ******************************************************************************/
235
236 /**
237  * Acquire opcodes if needed and create source and sink nodes.
238  */
239 static void init_rss_special_nodes(ir_graph *irg) {
240         ir_node *block;
241
242         if (op_rss_Source == NULL) {
243                 int iro_rss_base = get_next_ir_opcodes(iro_rss_last);
244                 op_rss_Source = new_ir_op(iro_rss_base + iro_rss_Source, "rss_Source", op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
245                 op_rss_Sink   = new_ir_op(iro_rss_base + iro_rss_Sink,   "rss_Sink",   op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
246         }
247         block   = get_irg_start_block(irg);
248         _source = new_ir_node(NULL, irg, block, op_rss_Source, mode_ANY, 0, NULL);
249         _sink   = new_ir_node(NULL, irg, block, op_rss_Sink, mode_ANY, 0, NULL);
250 }
251
252 static int cmp_int(const void *a, const void *b) {
253         const int *i1 = a;
254         const int *i2 = b;
255
256         return QSORT_CMP(*i1, *i2);
257 }
258
259 static int cmp_child_costs(const void *a, const void *b) {
260         const child_t *c1 = a;
261         const child_t *c2 = b;
262
263         return QSORT_CMP(c1->cost, c2->cost);
264 }
265
266 static int cmp_irn_idx(const void *a, const void *b) {
267         const ir_node *n1 = *(ir_node **)a;
268         const ir_node *n2 = *(ir_node **)b;
269
270         return QSORT_CMP(get_irn_idx(n1), get_irn_idx(n2));
271 }
272
273 static int cmp_rss_edges(const void *a, const void *b) {
274         const rss_edge_t *e1 = a;
275         const rss_edge_t *e2 = b;
276
277         return (e1->src != e2->src) || (e1->tgt != e2->tgt);
278 }
279
280 static int bsearch_for_index(int key, int *arr, size_t len, int force) {
281         int left = 0;
282         int right = len;
283
284         while (right >= left) {
285                 int idx = (left + right) / 2;
286
287                 if (key < arr[idx])
288                         right = idx - 1;
289                 else if (key > arr[idx])
290                         left = idx + 1;
291                 else
292                         return idx;
293         }
294
295         if (force)
296                 assert(0 && "Something is wrong, key not found.");
297         return -1;
298 }
299
300 static const ir_node **build_sorted_array_from_list(plist_t *irn_list, struct obstack *obst) {
301         plist_element_t *el;
302         int             i   = 0;
303         int             len = plist_count(irn_list);
304         const ir_node   **arr = (const ir_node**)NEW_ARR_D(ir_node*, obst, len);
305
306         /* copy the list into the array */
307         foreach_plist(irn_list, el) {
308                 arr[i++] = plist_element_get_value(el);
309         }
310
311         /* sort the array by node index */
312         /* HACK cast for MSVC */
313         qsort((void*)arr, len, sizeof(arr[0]), cmp_irn_idx);
314
315         return arr;
316 }
317
318 /*****************************************************
319  *      _      _                       _
320  *     | |    | |                     (_)
321  *   __| | ___| |__  _   _  __ _  __ _ _ _ __   __ _
322  *  / _` |/ _ \ '_ \| | | |/ _` |/ _` | | '_ \ / _` |
323  * | (_| |  __/ |_) | |_| | (_| | (_| | | | | | (_| |
324  *  \__,_|\___|_.__/ \__,_|\__, |\__, |_|_| |_|\__, |
325  *                          __/ | __/ |         __/ |
326  *                         |___/ |___/         |___/
327  *
328  *****************************************************/
329
330 #ifdef DEBUG_libfirm
331 static void dump_nodeset(ir_nodeset_t *ns, const char *prefix) {
332         ir_nodeset_iterator_t iter;
333         ir_node *irn;
334
335         ir_nodeset_iterator_init(&iter, ns);
336         while( (irn = ir_nodeset_iterator_next(&iter)) != NULL ) {
337                 ir_fprintf(stderr, "%s%+F\n", prefix, irn);
338         }
339 }
340 #endif
341
342 static void build_file_name(rss_t *rss, const char *suffix, size_t suf_len, char *buf, size_t len) {
343         const char *irg_name;
344
345         memset(buf, 0, len);
346         irg_name = get_entity_name(get_irg_entity(rss->irg));
347         snprintf(buf, len - suf_len, "%s-%s-block-%ld",
348                 irg_name, arch_register_class_name(rss->cls), get_irn_node_nr(rss->block));
349         strcat(buf, suffix);
350 }
351
352 /* Dumps all collected bipartite components of current irg as vcg. */
353 static void debug_vcg_dump_bipartite(rss_t *rss) {
354         cbc_t *cbc;
355         FILE  *f;
356         char  file_name[256];
357         static const char suffix[] = "-RSS-CBC.vcg";
358
359         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
360         f = fopen(file_name, "w");
361
362         ir_fprintf(f, "graph: { title: \"connected bipartite component graph of %+F\"\n", rss->irg);
363         fprintf(f, "display_edge_labels: no\n");
364         fprintf(f, "layoutalgorithm: mindepth\n");
365         fprintf(f, "manhattan_edges: yes\n\n");
366
367         foreach_pset(rss->cbc_set, cbc) {
368                 ir_nodeset_iterator_t iter;
369                 ir_node    *n;
370                 rss_edge_t *ke;
371
372                 fprintf(f, "graph: { titel: \"cbc %d\" label: \"cbc %d\" status:clustered color:yellow\n", cbc->nr, cbc->nr);
373                 foreach_ir_nodeset(&cbc->parents, n, iter) {
374                         ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
375                 }
376                 foreach_ir_nodeset(&cbc->children, n, iter) {
377                         ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
378                 }
379                 foreach_pset(cbc->kill_edges, ke) {
380                         ir_fprintf(f, "edge: { sourcename: \"n%d_%d\" targetname: \"n%d_%d\" }\n",
381                                 get_irn_node_nr(ke->src), cbc->nr, get_irn_node_nr(ke->tgt), cbc->nr);
382                 }
383                 fprintf(f, "}\n\n");
384         }
385         fprintf(f, "}\n");
386         fclose(f);
387 }
388
389 /* Dump the computed killing function as vcg. */
390 static void debug_vcg_dump_kill(rss_t *rss) {
391         FILE            *f;
392         char            file_name[256];
393         plist_element_t *el;
394         static const char suffix[] = "-RSS-KILL.vcg";
395
396         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
397         f = fopen(file_name, "w");
398
399         ir_fprintf(f, "graph: { title: \"computed kill graph of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
400         fprintf(f, "display_edge_labels: no\n");
401         fprintf(f, "layoutalgorithm: mindepth\n");
402         fprintf(f, "manhattan_edges: yes\n\n");
403
404         {
405                 /* reset dump_flag */
406                 ir_node *irn;
407                 foreach_phase_irn(&rss->ph, irn) {
408                         rss_irn_t *node = get_rss_irn(rss, irn);
409                         node->dumped = 0;
410                 }
411         }
412
413         /* dump all nodes and their killers */
414         foreach_plist(rss->nodes, el) {
415                 ir_node   *irn     = plist_element_get_value(el);
416                 rss_irn_t *rirn    = get_rss_irn(rss, irn);
417                 rss_irn_t *pk_rirn = get_rss_irn(rss, rirn->killer);
418
419                 if (! rirn->dumped) {
420                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
421                         rirn->dumped = 1;
422                 }
423
424                 if (! pk_rirn->dumped) {
425                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(rirn->killer), rirn->killer);
426                         pk_rirn->dumped = 1;
427                 }
428
429                 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
430                         get_irn_node_nr(rirn->killer), get_irn_node_nr(irn));
431         }
432
433         fprintf(f, "}\n");
434         fclose(f);
435 }
436
437 /* Dumps the potential killing DAG (PKG) as vcg. */
438 static void debug_vcg_dump_pkg(rss_t *rss, ir_nodeset_t *max_ac, int iteration)
439 {
440         FILE              *f;
441         char              file_name[256];
442         char              suffix[32];
443         static const char suffix1[] = "-RSS-PKG.vcg";
444         static const char suffix2[] = "-RSS-PKG-MAXAC.vcg";
445         plist_element_t   *el;
446
447         if (! max_ac) {
448                 snprintf(suffix, sizeof(suffix), "%s", suffix1);
449         }
450         else {
451                 snprintf(suffix, sizeof(suffix), "-%02d%s", iteration, suffix2);
452         }
453
454         build_file_name(rss, suffix, strlen(suffix) + 1, file_name, sizeof(file_name));
455         f = fopen(file_name, "w");
456
457         ir_fprintf(f, "graph: { title: \"potential killing DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
458         fprintf(f, "display_edge_labels: no\n");
459         fprintf(f, "layoutalgorithm: mindepth\n");
460         fprintf(f, "manhattan_edges: yes\n\n");
461
462         {
463                 /* reset dump_flag */
464                 ir_node *irn;
465                 foreach_phase_irn(&rss->ph, irn) {
466                         rss_irn_t *node = get_rss_irn(rss, irn);
467                         node->dumped = 0;
468                 }
469         }
470
471         foreach_plist(rss->nodes, el) {
472                 ir_node   *irn  = plist_element_get_value(el);
473                 rss_irn_t *rirn = get_rss_irn(rss, irn);
474                 char      *c1   = "";
475                 plist_element_t *k_el;
476
477                 /* dump selected saturating values in yellow */
478                 if (max_ac && ir_nodeset_contains(max_ac, irn))
479                         c1 = "color:yellow";
480
481                 if (! rirn->dumped) {
482                         if (rirn->chain)
483                                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F   c%d\" %s }\n", get_irn_node_nr(irn), irn, rirn->chain->nr, c1);
484                         else
485                                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(irn), irn, c1);
486                         rirn->dumped = 1;
487                 }
488
489                 foreach_plist(rirn->pkiller_list, k_el) {
490                         ir_node   *pkiller = plist_element_get_value(k_el);
491                         rss_irn_t *pk_rirn = get_rss_irn(rss, pkiller);
492                         char      *c2      = "";
493
494                         if (max_ac && ir_nodeset_contains(max_ac, pkiller))
495                                 c2 = "color:yellow";
496
497                         if (! pk_rirn->dumped) {
498                                 if (pk_rirn->chain)
499                                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F   c%d\" %s }\n", get_irn_node_nr(pkiller), pkiller, pk_rirn->chain->nr, c2);
500                                 else
501                                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(pkiller), pkiller, c2);
502                                 pk_rirn->dumped = 1;
503                         }
504                         ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
505                                 get_irn_node_nr(pkiller), get_irn_node_nr(irn));
506                 }
507         }
508         fprintf(f, "}\n");
509         fclose(f);
510 }
511
512 /* Dumps the disjoint value DAG (DVG) as vcg. */
513 static void debug_vcg_dump_dvg(rss_t *rss, dvg_t *dvg) {
514         static const char suffix[] = "-RSS-DVG.vcg";
515         FILE       *f;
516         char       file_name[256];
517         ir_nodeset_iterator_t iter;
518         ir_node    *irn;
519         rss_edge_t *edge;
520
521         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
522         f = fopen(file_name, "w");
523
524         ir_fprintf(f, "graph: { title: \"disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
525         fprintf(f, "display_edge_labels: no\n");
526         fprintf(f, "layoutalgorithm: mindepth\n");
527         fprintf(f, "manhattan_edges: yes\n\n");
528
529         /* dump all nodes */
530         foreach_ir_nodeset(&dvg->nodes, irn, iter) {
531                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
532         }
533
534         /* dump all edges */
535         foreach_pset(dvg->edges, edge) {
536                 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
537                         get_irn_node_nr(edge->src), get_irn_node_nr(edge->tgt));
538         }
539
540         fprintf(f, "}\n");
541         fclose(f);
542 }
543
544 #if 0
545 /* Dumps the PKG(DVG). */
546 static void debug_vcg_dump_dvg_pkiller(rss_t *rss, dvg_t *dvg) {
547         static const char suffix[] = "-RSS-DVG-PKG.vcg";
548         FILE       *f;
549         char       file_name[256];
550         ir_nodeset_iterator_t iter;
551         ir_node    *irn;
552
553         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
554         f = fopen(file_name, "w");
555
556         ir_fprintf(f, "graph: { title: \"PKG of disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
557         fprintf(f, "display_edge_labels: no\n");
558         fprintf(f, "layoutalgorithm: mindepth\n");
559         fprintf(f, "manhattan_edges: yes\n\n");
560
561         /* dump all nodes */
562         foreach_ir_nodeset(&dvg->nodes, irn, iter) {
563                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
564         }
565
566         /* dump all edges */
567         foreach_ir_nodeset(&dvg->nodes, irn, iter) {
568                 rss_irn_t       *node = get_rss_irn(rss, irn);
569                 plist_element_t *el;
570
571                 foreach_plist(node->dvg_pkiller_list, el) {
572                         ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
573                                 get_irn_node_nr(plist_element_get_value(el)), get_irn_node_nr(irn));
574                 }
575         }
576
577         fprintf(f, "}\n");
578         fclose(f);
579 }
580 #endif /* if 0 */
581
582 /**
583  * In case there is no rss information for irn, initialize it.
584  */
585 static void *init_rss_irn(ir_phase *ph, const ir_node *irn, void *old) {
586         rss_irn_t *res = old ? old : phase_alloc(ph, sizeof(res[0]));
587
588         res->descendant_list = plist_obstack_new(phase_obst(ph));
589         res->descendants     = NULL;
590
591         res->consumer_list   = plist_obstack_new(phase_obst(ph));
592         res->consumer        = NULL;
593
594         res->pkiller_list    = plist_obstack_new(phase_obst(ph));
595
596         res->parent_list     = plist_obstack_new(phase_obst(ph));
597
598         res->killer          = NULL;
599         res->irn             = irn;
600         res->chain           = NULL;
601
602         res->kill_count      = 0;
603         res->desc_walk       = 0;
604         res->live_out        = 0;
605         res->visited         = 0;
606         res->handled         = 0;
607         res->dumped          = 0;
608         res->havecons        = 0;
609
610         return res;
611 }
612
613 /**
614  * Collect all nodes data dependent on current node.
615  */
616 static void collect_descendants(rss_t *rss, rss_irn_t *rirn, ir_node *irn, int *got_sink, unsigned cur_desc_walk) {
617         const ir_edge_t *edge;
618         rss_irn_t       *cur_node = get_rss_irn(rss, irn);
619         ir_node         *block    = rss->block;
620         ir_edge_kind_t  ekind[2]  = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
621         int             i;
622
623         if (cur_node->desc_walk >= cur_desc_walk)
624                 return;
625         cur_node->desc_walk = cur_desc_walk;
626
627         /* Stop at Barriers */
628         if (be_is_Barrier(irn))
629                 return;
630
631         /* loop over normal and dependency edges */
632         for (i = 0; i < 2; ++i) {
633                 foreach_out_edge_kind(irn, edge, ekind[i]) {
634                         ir_node *user = get_edge_src_irn(edge);
635
636                         /* skip ignore nodes as they do not really contribute to register pressure */
637                         if (arch_irn_is_ignore(user))
638                                 continue;
639
640                         /*
641                                 (a) mode_X means Jump            -> out_edge
642                                 (b) Phi as user of a normal node -> out-edge
643                         */
644                         if (get_irn_mode(user) == mode_X || is_Phi(user)) {
645                                 if (! *got_sink)
646                                         goto force_sink;
647                                 else
648                                         continue;
649                         }
650
651                         if (is_Proj(user)) {
652                                 //if (arch_get_irn_reg_class_out(user) == rss->cls)
653                                         collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
654                         }
655                         else {
656                                 /* check if user lives in block and is not a control flow node */
657                                 if (get_nodes_block(user) == block) {
658                                         if (! plist_has_value(rirn->descendant_list, user)) {
659                                                 plist_insert_back(rirn->descendant_list, user);
660                                                 DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", user));
661                                         }
662                                         collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
663                                 }
664                                 else if (! *got_sink) {
665 force_sink:
666                                         /* user lives out of block: add sink as descendant if not already done */
667                                         plist_insert_back(rirn->descendant_list, _sink);
668                                         *got_sink = 1;
669                                         DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", _sink));
670                                 }
671                         }
672                 }
673         }
674 }
675
676 /**
677  * Handles a single consumer.
678  */
679 static void collect_single_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *consumer, int *got_sink) {
680         ir_node *block = rss->block;
681
682         assert(! is_Proj(consumer) && "Cannot handle Projs");
683
684         if (! is_Phi(consumer) && ! is_Block(consumer) && get_nodes_block(consumer) == block) {
685                 if (!arch_irn_is_ignore(consumer) &&
686                                 !plist_has_value(rss_irn->consumer_list, consumer)) {
687                         plist_insert_back(rss_irn->consumer_list, consumer);
688                         DBG((rss->dbg, LEVEL_2, "\t\tconsumer %+F\n", consumer));
689                 }
690         }
691         else {
692                 rss_irn->live_out = 1;
693                 DBG((rss->dbg, LEVEL_2, "\t\tlive out %+F", consumer));
694                 if (! *got_sink) {
695                         plist_insert_back(rss_irn->consumer_list, _sink);
696                         *got_sink = 1;
697                         DB((rss->dbg, LEVEL_2, ", %+F added instead", _sink));
698                 }
699                 DB((rss->dbg, LEVEL_2, "\n"));
700         }
701 }
702
703 /**
704  * Collect all nodes consuming the value(s) produced by current node.
705  */
706 static void collect_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *irn, int *got_sink) {
707         const ir_edge_t *edge;
708         int             i;
709         ir_edge_kind_t  ekind[2]  = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
710         rss_irn_t       *cur_node = get_rss_irn(rss, irn);
711
712         if (cur_node->havecons)
713                 return;
714         cur_node->havecons = 1;
715
716         for (i = 0; i < 2; ++i) {
717                 foreach_out_edge_kind(irn, edge, ekind[i]) {
718                         ir_node *consumer = get_edge_src_irn(edge);
719
720                         if (is_Proj(consumer)) {
721                                 //if (arch_get_irn_reg_class_out(consumer) == rss->cls)
722                                         collect_consumer(rss, rss_irn, consumer, got_sink);
723                         }
724                         else
725                                 collect_single_consumer(rss, rss_irn, consumer, got_sink);
726                 }
727         }
728 }
729
730 /**
731  * Collects all consumer and descendant of a irn.
732  */
733 static void collect_node_info(rss_t *rss, ir_node *irn) {
734         static unsigned cur_desc_walk = 0;
735         rss_irn_t       *rss_irn      = get_rss_irn(rss, irn);
736         int             got_sink;
737
738         assert(! is_Proj(irn) && "Cannot handle Projs.");
739
740         /* check if node info is already available */
741         if (rss_irn->handled)
742                 return;
743                 //phase_reinit_single_irn_data(&rss->ph, irn);
744
745         DBG((rss->dbg, LEVEL_1, "\tcomputing consumers of %+F:\n", irn));
746
747         /* collect all consumer */
748         got_sink = 0;
749         collect_consumer(rss, rss_irn, irn, &got_sink);
750
751         /* build sorted consumer array */
752         rss_irn->consumer = build_sorted_array_from_list(rss_irn->consumer_list, phase_obst(&rss->ph));
753
754         DBG((rss->dbg, LEVEL_1, "\tcompute descendants of %+F:\n", irn));
755
756         /* collect descendants */
757         got_sink = 0;
758         collect_descendants(rss, rss_irn, irn, &got_sink, ++cur_desc_walk);
759
760         /* build sorted descendant array */
761         rss_irn->descendants = build_sorted_array_from_list(rss_irn->descendant_list, phase_obst(&rss->ph));
762
763         rss_irn->handled = 1;
764 }
765
766 /**
767  * Checks if v is a potential killer of u.
768  * v is in pkill(u) iff descendants(v) cut consumer(u) is v
769  *
770  * @param rss   The rss object
771  * @param v      The node to check for killer
772  * @param u      The potentially killed value
773  * @return 1 if v is in pkill(u), 0 otherwise
774  */
775 static int is_potential_killer(rss_t *rss, rss_irn_t *v, rss_irn_t *u) {
776         plist_t *list;
777         const ir_node **arr;
778         plist_element_t *el;
779         (void) rss;
780
781         assert(is_Sink(v->irn) || ((plist_count(v->descendant_list) > 0 && v->descendants) || 1));
782         assert(is_Sink(u->irn) || ((plist_count(u->consumer_list)   > 0 && u->consumer)    || 1));
783
784         /* as we loop over the list: loop over the shorter one */
785         if (plist_count(v->descendant_list) > plist_count(u->consumer_list)) {
786                 list = u->consumer_list;
787                 arr  = v->descendants;
788         }
789         else {
790                 list = v->descendant_list;
791                 arr  = u->consumer;
792         }
793
794         /* for each list element: try to find element in array */
795         foreach_plist(list, el) {
796                 ir_node *irn   = plist_element_get_value(el);
797                 ir_node *match = BSEARCH_IRN_ARR(irn, arr);
798
799                 if (match && match != irn)
800                         return 0;
801         }
802
803         return 1;
804 }
805
806 /**
807  * Update descendants, consumer and pkiller list for the given irn.
808  */
809 static void update_node_info(rss_t *rss, ir_node *irn, ir_node *pk_irn) {
810         rss_irn_t *node    = get_rss_irn(rss, irn);
811         rss_irn_t *pkiller = get_rss_irn(rss, pk_irn);
812
813         /* add consumer and rebuild the consumer array */
814         if (! plist_has_value(node->consumer_list, pk_irn)) {
815                 plist_insert_back(node->consumer_list, pk_irn);
816                 node->consumer = build_sorted_array_from_list(node->consumer_list, phase_obst(&rss->ph));
817         }
818
819         /* add pk_irn as descendant, add it's descendants to irn's and rebuild array */
820         if (! plist_has_value(node->descendant_list, pk_irn)) {
821                 plist_element_t *el;
822
823                 plist_insert_back(node->descendant_list, pk_irn);
824
825                 foreach_plist(pkiller->descendant_list, el) {
826                         ir_node *desc = plist_element_get_value(el);
827
828                         if (! plist_has_value(node->descendant_list, desc))
829                                 plist_insert_back(node->descendant_list, desc);
830                 }
831
832                 node->descendants = build_sorted_array_from_list(node->descendant_list, phase_obst(&rss->ph));
833         }
834
835 }
836
837 /**
838  * Compute the potential killing set PK.
839  */
840 static void compute_pkill_set(rss_t *rss) {
841         plist_element_t *u_el, *v_el;
842
843         foreach_plist(rss->nodes, u_el) {
844                 ir_node   *u_irn = plist_element_get_value(u_el);
845                 rss_irn_t *u     = get_rss_irn(rss, u_irn);
846
847                 DBG((rss->dbg, LEVEL_1, "\tcomputing potential killers of %+F:\n", u_irn));
848
849                 /* check each consumer if it is a potential killer  */
850                 foreach_plist(u->consumer_list, v_el) {
851                         ir_node   *v_irn = plist_element_get_value(v_el);
852                         rss_irn_t *v     = get_rss_irn(rss, v_irn);
853
854                         /* check, if v is a potential killer of u */
855                         if (is_potential_killer(rss, v, u)) {
856                                 if (! plist_has_value(u->pkiller_list, v_irn))
857                                         plist_insert_back(u->pkiller_list, v_irn);
858                                 v->kill_count++;
859                                 DBG((rss->dbg, LEVEL_2, "\t\tpotential killer %+F\n", v_irn));
860                         }
861                 }
862
863                 u->killer = _sink;
864         }
865
866         if (rss->opts->dump_flags & RSS_DUMP_PKG)
867                 debug_vcg_dump_pkg(rss, NULL, 0);
868 }
869
870 /**
871  * Build set of killing edges (from values to their potential killers)
872  */
873 static void build_kill_edges(rss_t *rss, pset *epk) {
874         plist_element_t *el, *k_el;
875
876         foreach_plist(rss->nodes, el) {
877                 ir_node    *irn  = plist_element_get_value(el);
878                 rss_irn_t *rirn = get_rss_irn(rss, irn);
879
880                 DBG((rss->dbg, LEVEL_3, "\t\tbuilding kill edges for %+F:\n", irn));
881
882                 foreach_plist(rirn->pkiller_list, k_el) {
883                         ir_node    *pkiller = plist_element_get_value(k_el);
884                         rss_edge_t *ke      = obstack_alloc(phase_obst(&rss->ph), sizeof(*ke));
885
886                         ke->src  = irn;
887                         ke->tgt  = pkiller;
888                         ke->next = NULL;
889
890                         DBG((rss->dbg, LEVEL_3, "\t\t\ttarget %+F\n", pkiller));
891
892                         pset_insert(epk, ke, HASH_RSS_EDGE(ke));
893                 }
894         }
895 }
896
897 #ifdef DEBUG_libfirm
898 /* print the given cbc for debugging purpose */
899 static void debug_print_cbc(firm_dbg_module_t *mod, cbc_t *cbc) {
900         ir_nodeset_iterator_t iter;
901         ir_node    *n;
902         rss_edge_t *ke;
903
904         DBG((mod, LEVEL_3, "\t\tS = set of parents:\n"));
905         foreach_ir_nodeset(&cbc->parents, n, iter) {
906                 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
907         }
908         DBG((mod, LEVEL_3, "\t\tT = set of children:\n"));
909         foreach_ir_nodeset(&cbc->children, n, iter) {
910                 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
911         }
912         DBG((mod, LEVEL_3, "\t\tE = Edges from producers to consumers\n"));
913         foreach_pset(cbc->kill_edges, ke) {
914                 DBG((mod, LEVEL_3, "\t\t\t%+F -> %+F\n", ke->src, ke->tgt));
915         }
916 }
917 #endif
918
919 /**
920  * Construct the bipartite decomposition.
921  * Sid-Ahmed-Ali Touati, Phd Thesis
922  * Register Pressure in Instruction Level Parallelism, p. 71
923  */
924 static void compute_bipartite_decomposition(rss_t *rss) {
925         pset *epk    = new_pset(cmp_rss_edges, 10);
926         int  cur_num = 0;
927
928         plist_element_t *el;
929
930         DBG((rss->dbg, LEVEL_1, "\tcomputing bipartite decomposition:\n"));
931
932         build_kill_edges(rss, epk);
933
934         foreach_plist(rss->nodes, el) {
935                 ir_node   *u_irn   = plist_element_get_value(el);
936                 rss_irn_t *u       = get_rss_irn(rss, u_irn);
937                 int       p_change = 1;
938                 int       c_change = 1;
939                 int       vrfy_ok  = 1;
940
941                 cbc_t           *cbc;
942                 plist_element_t *el2;
943                 rss_edge_t      *k_edge;
944                 rss_edge_t      *kedge_root = NULL;
945                 ir_node         *t_irn, *s_irn;
946                 ir_nodeset_iterator_t iter;
947
948                 if (u->visited || u_irn == _sink)
949                         continue;
950
951                 DBG((rss->dbg, LEVEL_2, "\t\t%+F choosen:\n", u_irn));
952
953                 cbc     = obstack_alloc(phase_obst(&rss->ph), sizeof(*cbc));
954                 cbc->nr = cur_num++;
955
956                 /* initialize S_cb */
957                 ir_nodeset_init_size(&cbc->parents, 5);
958                 ir_nodeset_insert(&cbc->parents, u_irn);
959                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (init)\n", u_irn));
960
961                 /* E_cb = empty */
962                 cbc->kill_edges = new_pset(cmp_rss_edges, 5);
963
964                 /* each parent gets killed by at least one value from children */
965
966                 /* T_cb = PK_successors(u) */
967                 ir_nodeset_init_size(&cbc->children, 5);
968                 foreach_plist(u->pkiller_list, el2) {
969                         ir_nodeset_insert(&cbc->children, plist_element_get_value(el2));
970                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (init)\n", plist_element_get_value(el2)));
971                 }
972
973                 /*
974                         Now: insert the parents of all children into the parent set
975                         and insert the children of all parents into the children set
976                         until the sets don't change any more
977                 */
978                 while (p_change || c_change) {
979                         ir_nodeset_iterator_t iter;
980                         p_change = c_change = 0;
981
982                         /* accumulate parents */
983                         foreach_ir_nodeset(&cbc->children, t_irn, iter) {
984                                 foreach_pset(epk, k_edge) {
985                                         ir_node *val = k_edge->src;
986
987                                         if (k_edge->tgt == t_irn && ! ir_nodeset_contains(&cbc->parents, val)) {
988                                                 ir_nodeset_insert(&cbc->parents, val);
989                                                 p_change = 1;
990                                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (killed by %+F)\n", val, t_irn));
991                                         }
992                                 }
993                         }
994
995                         /* accumulate children */
996                         foreach_ir_nodeset(&cbc->parents, s_irn, iter) {
997                                 foreach_pset(epk, k_edge) {
998                                         ir_node *val = k_edge->tgt;
999
1000                                         if (k_edge->src == s_irn  && ! ir_nodeset_contains(&cbc->children, val)) {
1001                                                 ir_nodeset_insert(&cbc->children, val);
1002                                                 c_change = 1;
1003                                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (kills %+F)\n", val, s_irn));
1004                                         }
1005                                 }
1006                         }
1007                 }
1008
1009                 /* mark all parent values as visited */
1010                 foreach_ir_nodeset(&cbc->parents, s_irn, iter) {
1011                         rss_irn_t *s = get_rss_irn(rss, s_irn);
1012                         s->visited = 1;
1013                         /* assure bipartite property */
1014 #if 0
1015                         if (ir_nodeset_contains(&cbc->children, s_irn)) {
1016                                 ir_nodeset_remove(&cbc->children, s_irn);
1017                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F removed from to children\n", s_irn));
1018                         }
1019 #endif
1020                 }
1021
1022                 /* update edges */
1023                 foreach_pset(epk, k_edge) {
1024                         if (ir_nodeset_contains(&cbc->parents, k_edge->src) &&
1025                             ir_nodeset_contains(&cbc->children, k_edge->tgt)) {
1026                                 pset_insert(cbc->kill_edges, k_edge, HASH_RSS_EDGE(k_edge));
1027                                 /*
1028                                         Link all k_edges which are about to be removed together.
1029                                         Beware: pset_remove kills the iterator.
1030                                 */
1031                                 k_edge->next = kedge_root;
1032                                 kedge_root   = k_edge;
1033                         }
1034                 }
1035
1036                 /* remove all linked k_edges */
1037                 for (; kedge_root; kedge_root = kedge_root->next) {
1038                         pset_remove(epk, kedge_root, HASH_RSS_EDGE(kedge_root));
1039                 }
1040
1041                 /* verify the cbc */
1042                 foreach_ir_nodeset(&cbc->parents, s_irn, iter) {
1043                         int is_killed = 0;
1044
1045                         foreach_pset(cbc->kill_edges, k_edge) {
1046                                 if (k_edge->src == s_irn) {
1047                                         is_killed = 1;
1048                                         pset_break(cbc->kill_edges);
1049                                         break;
1050                                 }
1051                         }
1052
1053                         if (! is_killed) {
1054                                 vrfy_ok = 0;
1055                                 ir_fprintf(stderr, "Warning: parent %+F is not killed in current cbc\n", s_irn);
1056                         }
1057                 }
1058                 assert(vrfy_ok && "Verification of CBC failed");
1059
1060                 /* add the connected bipartite component */
1061                 if (ir_nodeset_size(&cbc->parents) > 0 &&
1062                     ir_nodeset_size(&cbc->children) > 0 &&
1063                         pset_count(cbc->kill_edges) > 0)
1064                         pset_insert(rss->cbc_set, cbc, (unsigned)cbc->nr);
1065                 DBG((rss->dbg, LEVEL_2, "\tbipartite component %d inserted:\n", cbc->nr));
1066                 DEBUG_ONLY(
1067                         if (firm_dbg_get_mask(rss->dbg) & LEVEL_2)
1068                                 debug_print_cbc(rss->dbg, cbc);
1069                 );
1070         }
1071
1072         if (rss->opts->dump_flags & RSS_DUMP_CBC)
1073                 debug_vcg_dump_bipartite(rss);
1074
1075         del_pset(epk);
1076 }
1077
1078 /**
1079  * Select the child with the maximum cost.
1080  */
1081 static child_t *select_child_max_cost(rss_t *rss, ir_nodeset_t *x, ir_nodeset_t *y, child_t *t, cbc_t *cbc) {
1082         ir_node *child;
1083         ir_nodeset_iterator_t iter;
1084         float   max_cost = -1.0f;
1085
1086         DBG((rss->dbg, LEVEL_2, "\t\tcomputing children costs:\n"));
1087
1088         foreach_ir_nodeset(&cbc->children, child, iter) {
1089                 rss_irn_t  *r_child             = get_rss_irn(rss, child);
1090                 int         num_unkilled_parents = 0;
1091                 int         num_descendants;
1092                 rss_edge_t *k_edge;
1093                 float       cost;
1094
1095                 /* get the number of unkilled parents */
1096                 foreach_pset(cbc->kill_edges, k_edge) {
1097                         if (k_edge->tgt == child && ir_nodeset_contains(x, k_edge->src))
1098                                 ++num_unkilled_parents;
1099                 }
1100
1101                 cost = (float)num_unkilled_parents;
1102
1103                 num_descendants = plist_count(r_child->descendant_list) + ir_nodeset_size(y);
1104
1105                 if (num_descendants > 0)
1106                         cost /= (float)num_descendants;
1107
1108                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F, #desc %d, cost %.3f\n", child, num_descendants, cost));
1109
1110                 if (cost > max_cost) {
1111                         t->irn   = child;
1112                         t->cost  = cost;
1113                         max_cost = cost;
1114                 }
1115         }
1116
1117         return t;
1118 }
1119
1120 /**
1121  * Remove all parents from x which are killed by t_irn.
1122  */
1123 static void remove_covered_parents(rss_t *rss, ir_nodeset_t *x, ir_node *t_irn, cbc_t *cbc) {
1124         rss_irn_t  *t = get_rss_irn(rss, t_irn);
1125         rss_edge_t *k_edge;
1126
1127         DBG((rss->dbg, LEVEL_2, "\t\tremoving parents covered by %+F:\n", t_irn));
1128
1129         foreach_pset(cbc->kill_edges, k_edge) {
1130                 if (k_edge->tgt == t_irn && ir_nodeset_contains(x, k_edge->src)) {
1131                         ir_nodeset_remove(x, k_edge->src);
1132                         plist_insert_back(t->parent_list, k_edge->src);
1133                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", k_edge->src));
1134                 }
1135         }
1136 }
1137
1138 static void update_cumulated_descendent_values(rss_t *rss, ir_nodeset_t *y, ir_node *t_irn) {
1139         rss_irn_t *t = get_rss_irn(rss, t_irn);
1140         plist_element_t *el;
1141
1142         DBG((rss->dbg, LEVEL_2, "\t\tupdating cumulated descendant value of %+F:\n", t_irn));
1143
1144         foreach_plist(t->descendant_list, el) {
1145                 ir_nodeset_insert(y, plist_element_get_value(el));
1146                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", plist_element_get_value(el)));
1147         }
1148 }
1149
1150 /**
1151  * Greedy-k: a heuristics for the MMA problem
1152  */
1153 static void compute_killing_function(rss_t *rss) {
1154         cbc_t *cbc;
1155         struct obstack obst;
1156
1157         obstack_init(&obst);
1158
1159         rss->cbc_set = pset_new_ptr(5);
1160         compute_bipartite_decomposition(rss);
1161
1162         /* for all bipartite components do: */
1163         foreach_pset(rss->cbc_set, cbc) {
1164                 ir_node *p;
1165                 ir_nodeset_t x;
1166                 ir_nodeset_t y;
1167                 ir_nodeset_iterator_t iter;
1168                 child_t **sks    = NEW_ARR_F(child_t *, 20);
1169                 int     cur_len  = 0;
1170                 int     cur_size = 20;
1171                 int     i;
1172
1173                 ir_nodeset_init_size(&x, 10);
1174                 ir_nodeset_init_size(&y, 10);
1175
1176                 DBG((rss->dbg, LEVEL_1, "\tcomputing SKS for cbc %d:\n", cbc->nr));
1177                 DBG((rss->dbg, LEVEL_2, "\t\tinitializing parents X:\n"));
1178
1179                 /* X = S_cb (all parents are initially uncovered) */
1180                 foreach_ir_nodeset(&cbc->parents, p, iter) {
1181                         ir_nodeset_insert(&x, p);
1182                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", p));
1183                 }
1184
1185                 /* while X not empty */
1186                 while (ir_nodeset_size(&x) > 0) {
1187                         child_t *t = obstack_alloc(&obst, sizeof(*t));
1188                         memset(t, 0, sizeof(*t));
1189
1190                         t = select_child_max_cost(rss, &x, &y, t, cbc);
1191
1192                         if (cur_len >= cur_size) {
1193                                 ARR_EXTO(child_t *, sks, cur_size * 2);
1194                                 cur_size *= 2;
1195                         }
1196
1197                         DBG((rss->dbg, LEVEL_2, "\t\tinsert child %+F (%.3f) into SKS at pos %d\n", t->irn, t->cost, cur_len));
1198
1199                         sks[cur_len++] = t;
1200                         remove_covered_parents(rss, &x, t->irn, cbc);
1201                         update_cumulated_descendent_values(rss, &y, t->irn);
1202                 }
1203
1204                 ARR_SHRINKLEN(sks, cur_len);
1205
1206                 /* sort SKS in increasing cost order */
1207                 qsort(sks, cur_len, sizeof(sks[0]), cmp_child_costs);
1208
1209                 DBG((rss->dbg, LEVEL_2, "\tprocessing SKS for cbc %d:\n", cbc->nr));
1210
1211                 /* build killing function */
1212                 for (i = cur_len - 1; i >= 0; --i) { /* loop over sks in decreasing cost order */
1213                         child_t         *t = sks[i];
1214                         rss_irn_t       *rt = get_rss_irn(rss, t->irn);
1215                         plist_element_t *p_el;
1216
1217                         DBG((rss->dbg, LEVEL_3, "\t\t\tkiller %+F (%.3f):\n", t->irn, t->cost));
1218
1219                         /* kill all unkilled parents of t */
1220                         foreach_plist(rt->parent_list, p_el) {
1221                                 ir_node    *par = plist_element_get_value(p_el);
1222                                 rss_irn_t *rpar = get_rss_irn(rss, par);
1223
1224                                 if (is_Sink(rpar->killer)) {
1225                                         rpar->killer = t->irn;
1226                                         DBG((rss->dbg, LEVEL_2, "\t\tkill %+F\n", rpar->irn));
1227                                 }
1228                                 else {
1229                                         DBG((rss->dbg, LEVEL_3, "\t\t\tkeeping %+F as killer for %+F\n", rpar->killer, rpar->irn));
1230                                 }
1231                         }
1232                 }
1233
1234                 ir_nodeset_destroy(&x);
1235                 ir_nodeset_destroy(&y);
1236                 DEL_ARR_F(sks);
1237         }
1238
1239         if (rss->opts->dump_flags & RSS_DUMP_KILL)
1240                 debug_vcg_dump_kill(rss);
1241
1242         del_pset(rss->cbc_set);
1243         obstack_free(&obst, NULL);
1244 }
1245
1246 /**
1247  * Adds the edge src -> tgt to the dvg. Checks if reverse edge is already there (asserts).
1248  */
1249 static inline void add_dvg_edge(rss_t *rss, dvg_t *dvg, const ir_node *src, const ir_node *tgt, int have_source) {
1250         rss_edge_t *dvg_edge;
1251         rss_edge_t key;
1252
1253         if (! have_source)
1254                 ir_nodeset_insert(&dvg->nodes, (ir_node *) src);
1255         else
1256                 assert(ir_nodeset_contains(&dvg->nodes, src) && "Wrong assumption");
1257
1258         ir_nodeset_insert(&dvg->nodes, (ir_node *) tgt);
1259
1260         key.src = (ir_node *) tgt;
1261         key.tgt = (ir_node *) src;
1262         assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1263
1264         key.src = (ir_node *) src;
1265         key.tgt = (ir_node *) tgt;
1266         if (NULL != pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1267                 /* add the edge to the DVG */
1268                 dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1269
1270                 dvg_edge->src  = (ir_node *) src;
1271                 dvg_edge->tgt  = (ir_node *) tgt;
1272                 dvg_edge->next = NULL;
1273
1274                 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", src, tgt));
1275                 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1276         }
1277 }
1278
1279 /**
1280  * Computes the disjoint value DAG (DVG).
1281  * BEWARE: It is not made explicitly clear in the Touati paper,
1282  *         but the DVG is meant to be build from the KILLING DAG
1283  */
1284 static void compute_dvg(rss_t *rss, dvg_t *dvg) {
1285         plist_element_t *el;
1286
1287         DBG((rss->dbg, LEVEL_1, "\tcomputing DVG:\n"));
1288
1289         foreach_plist(rss->nodes, el) {
1290                 ir_node    *u_irn    = plist_element_get_value(el);
1291                 rss_irn_t  *u        = get_rss_irn(rss, u_irn);
1292                 rss_irn_t  *u_killer = get_rss_irn(rss, u->killer);
1293                 int        i;
1294
1295                 /* TODO: omit nodes only having sink as pkiller and killing no one */
1296
1297                 /* add an edge to killer */
1298                 add_dvg_edge(rss, dvg, u_irn, u->killer, 0);
1299
1300                 if (is_Sink(u->killer))
1301                         continue;
1302
1303                 /* We add an edge to every killer, from where we could be reached. */
1304                 for (i = ARR_LEN_SAFE(u_killer->descendants) - 1; i >= 0; --i) {
1305                         add_dvg_edge(rss, dvg, u_irn, u_killer->descendants[i], 1);
1306                 }
1307
1308 #if 0
1309
1310                 foreach_plist(rss->nodes, el2) {
1311                         ir_node *v_irn = plist_element_get_value(el2);
1312
1313                         /*
1314                                 There is an edge (u, v) in the DVG iff v is a descendant of the killer(u).
1315                         */
1316                         if (BSEARCH_IRN_ARR(v_irn, u_kill->descendants)) {
1317                                 rss_edge_t *dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1318                                 rss_edge_t key;
1319
1320                                 /* insert the user into the DVG and append it to the user list of u */
1321                                 ir_nodeset_insert(&dvg->nodes, v_irn);
1322                                 if (! plist_has_value(u->dvg_user_list, v_irn))
1323                                         plist_insert_back(u->dvg_user_list, v_irn);
1324
1325                                 dvg_edge->src  = u_irn;
1326                                 dvg_edge->tgt  = v_irn;
1327                                 dvg_edge->next = NULL;
1328
1329                                 key.src = v_irn;
1330                                 key.tgt = u_irn;
1331
1332                                 assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1333
1334                                 /* add the edge to the DVG */
1335                                 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", u_irn, v_irn));
1336                                 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1337                         }
1338                 }
1339 #endif /* if 0 */
1340         }
1341
1342         if (rss->opts->dump_flags & RSS_DUMP_DVG)
1343                 debug_vcg_dump_dvg(rss, dvg);
1344 }
1345
1346 /**
1347  * Updates the dvg structure when a serialization edge from src -> tgt is added.
1348  */
1349 static void update_dvg(rss_t *rss, dvg_t *dvg, rss_irn_t *src, rss_irn_t *tgt) {
1350         int i, j, idx;
1351         rss_edge_t *edge;
1352         rss_edge_t **arr = ALLOCAN(rss_edge_t*, pset_count(dvg->edges));
1353
1354         /*
1355                 Add an edge from serialization target to serialization src:
1356                 src cannot be alive with target
1357         */
1358         add_dvg_edge(rss, dvg, tgt->irn, src->irn, 0);
1359
1360         /* Add edges to src's descendants as well, they also getting serialized. */
1361         for (i = ARR_LEN_SAFE(src->descendants) - 1; i >= 0; --i) {
1362                 add_dvg_edge(rss, dvg, tgt->irn, src->descendants[i], 1);
1363         }
1364
1365         /* We also have to add edges from targets predecessors in dvg */
1366         idx = 0;
1367         /* We cannot insert elements into set over which we iterate ... */
1368         foreach_pset(dvg->edges, edge) {
1369                 if (edge->tgt == tgt->irn) {
1370                         arr[idx++] = edge;
1371                 }
1372         }
1373
1374         for (i = 0; i < idx; ++i) {
1375                 edge = arr[i];
1376                 add_dvg_edge(rss, dvg, edge->src, src->irn, 1);
1377                 for (j = ARR_LEN_SAFE(src->descendants) - 1; j >= 0; --j) {
1378                         add_dvg_edge(rss, dvg, edge->src, src->descendants[j], 1);
1379                 }
1380         }
1381 }
1382
1383 #if 0
1384 /**
1385  * Accumulate all descendants for root into list.
1386  */
1387 static void accumulate_dvg_descendant_values(rss_t *rss, rss_irn_t *root, plist_t *list) {
1388         if (plist_count(root->dvg_user_list) > 0) {
1389                 plist_element_t *el;
1390
1391                 foreach_plist(root->dvg_user_list, el) {
1392                         ir_node   *v_irn = plist_element_get_value(el);
1393                         rss_irn_t *v     = get_rss_irn(rss, v_irn);
1394
1395                         /* add v as descendant */
1396                         if (! plist_has_value(list, v_irn)) {
1397                                 plist_insert_back(list, v_irn);
1398                                 DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG descendant %+F\n", v_irn));
1399                         }
1400
1401                         /* accumulate v's descendants */
1402                         accumulate_dvg_descendant_values(rss, v, list);
1403                 }
1404         }
1405 }
1406
1407 /**
1408  * Builds the list of potential killers for each node
1409  * in the given DVG.
1410  * Needs the descendant list for all user as sorted array.
1411  */
1412 static void build_dvg_pkiller_list(rss_t *rss, dvg_t *dvg) {
1413         ir_nodeset_iterator_t iter;
1414         ir_node *irn;
1415
1416         foreach_nodeset(&dvg->nodes, irn, iter) {
1417                 rss_irn_t       *node = get_rss_irn(rss, irn);
1418                 plist_element_t *el, *el2;
1419
1420                 DBG((rss->dbg, DEBUG_DVG, "\t\tbuilding pkiller list for %+F\n", irn));
1421
1422                 /* check each user */
1423                 foreach_plist(node->dvg_user_list, el) {
1424                         ir_node *u_irn = plist_element_get_value(el);
1425
1426                         /* is the current user u_irn not a descendant of any other user -> pkiller */
1427                         foreach_plist(node->dvg_user_list, el2) {
1428                                 ir_node   *v_irn = plist_element_get_value(el2);
1429                                 rss_irn_t *v     = get_rss_irn(rss, v_irn);
1430
1431                                 if (el != el2                             &&
1432                                         ! BSEARCH_IRN_ARR(u_irn, v->dvg_desc) &&
1433                                         ! plist_has_value(node->dvg_pkiller_list, u_irn))
1434                                 {
1435                                         plist_insert_back(node->dvg_pkiller_list, u_irn);
1436                                         DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG pkiller %+F\n", u_irn));
1437                                 }
1438                         }
1439                 }
1440
1441                 node->dvg_pkiller = build_sorted_array_from_list(node->dvg_pkiller_list, phase_obst(&rss->ph));
1442         }
1443
1444         DEBUG_ONLY(
1445                 if (firm_dbg_get_mask(rss->dbg) & DEBUG_DVG)
1446                         debug_vcg_dump_dvg_pkiller(rss, dvg);
1447         );
1448 }
1449
1450 #endif /* if 0 */
1451
1452 /**
1453  * Compute the maximal antichain of the current DVG.
1454  * This is a reimplementation of the MAXIMAL_ANTI_CHAIN function
1455  * from the DDG library 1.1 (DAG.cpp).
1456  */
1457 static ir_nodeset_t *compute_maximal_antichain(rss_t *rss, dvg_t *dvg, int iteration) {
1458         int         n               = ir_nodeset_size(&dvg->nodes);
1459         int         *assignment     = ALLOCAN(int, n);
1460         int         *assignment_rev = ALLOCAN(int, n);
1461         int         *idx_map        = ALLOCAN(int, n);
1462         hungarian_problem_t *bp;
1463         ir_nodeset_t *values, *temp;
1464         ir_nodeset_iterator_t iter;
1465         ir_node     *u_irn;
1466         int         i, j, cost, cur_chain, res;
1467         rss_edge_t  *dvg_edge;
1468
1469 #define MAP_IDX(irn) bsearch_for_index(get_irn_idx(irn), idx_map,  n,  1)
1470
1471         if (pset_count(dvg->edges) == 0)
1472                 return NULL;
1473
1474         bp = hungarian_new(n, n, HUNGARIAN_MATCH_NORMAL);
1475
1476         /*
1477                 At first, we build an index map for the nodes in the DVG,
1478                 because we cannot use the irn idx therefore as the resulting
1479                 bipartite data structure would have around 1.2GB.
1480                 So we limit the size to the number of nodes we have in the DVG
1481                 and build a sorted index map for their irn indices.
1482         */
1483         i = 0;
1484         foreach_ir_nodeset(&dvg->nodes, u_irn, iter) {
1485                 idx_map[i++] = get_irn_idx(u_irn);
1486         }
1487         qsort(idx_map, n, sizeof(idx_map[0]), cmp_int);
1488
1489         foreach_pset(dvg->edges, dvg_edge) {
1490                 int idx_u = MAP_IDX(dvg_edge->src);
1491                 int idx_v = MAP_IDX(dvg_edge->tgt);
1492
1493                 /* add the entry to the bipartite data structure */
1494                 hungarian_add(bp, idx_u, idx_v, 1);
1495                 DBG((rss->dbg, LEVEL_3, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1496                         idx_u, dvg_edge->src, idx_v, dvg_edge->tgt));
1497         }
1498 #if 0
1499         /*
1500                 Add a bipartite entry for each pair of nodes (u, v), where exists a
1501                 path in the DVG from u to v, ie. connect all descendants(v) to v.
1502                 desc_dvg(v) = accumulated descendants of all z in dvg_user_list(v)
1503         */
1504         foreach_ir_nodeset(&dvg->nodes, u_irn, iter) {
1505                 rss_irn_t *u        = get_rss_irn(rss, u_irn);
1506                 int       idx_u_irn = MAP_IDX(u_irn);
1507
1508                 DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG descendants of %+F:\n", u_irn));
1509
1510                 //plist_clear(u->dvg_desc_list);
1511                 //accumulate_dvg_descendant_values(rss, u, u->dvg_desc_list);
1512
1513                 /*
1514                         FIXME: The array is build on the phase obstack and we cannot free the data.
1515                         So the array get as many times allocated as this function is called.
1516                 */
1517
1518                 /* build the sorted array for faster searches */
1519                 //u->dvg_desc = build_sorted_array_from_list(u->dvg_desc_list, phase_obst(&rss->ph));
1520
1521                 DBG((rss->dbg, DEBUG_MAX_AC, "\t\tadding bipartite entries of %+F:\n", u_irn));
1522
1523                 /* add the bipartite entries for all descendants */
1524                 for (i = ARR_LEN_SAFE(u->dvg_desc) - 1; i >= 0; --i) {
1525                         rss_irn_t *desc    = get_rss_irn(rss, u->dvg_desc[i]);
1526                         int       idx_desc = MAP_IDX(u->dvg_desc[i]);
1527
1528                         /* add the entry to the bipartite data structure */
1529                         hungarian_add(bp, idx_u_irn, idx_desc, 1);
1530                         DBG((rss->dbg, DEBUG_MAX_AC, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1531                                 idx_u_irn, u_irn, idx_desc, u->dvg_desc[i]));
1532
1533                         need_matching = 1;
1534                 }
1535         }
1536 #endif
1537
1538         /* We want maximum cardinality matching */
1539         hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL);
1540
1541 #if 0
1542         DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG pkiller:\n"));
1543         /* beware: the following function needs the dvg_desc array */
1544         build_dvg_pkiller_list(rss, dvg);
1545 #endif
1546
1547         DBG((rss->dbg, LEVEL_2, "\t\tcomputing bipartite matching\n"));
1548         /*
1549                 The maximum cardinality bipartite matching gives us the minimal
1550                 chain partition, which corresponds to the maximum anti chains.
1551         */
1552         res = hungarian_solve(bp, assignment, &cost, 1);
1553         assert(res == 0 && "Bipartite matching failed!");
1554
1555         hungarian_free(bp);
1556         memset(assignment_rev, -1, n * sizeof(assignment_rev[0]));
1557
1558         /* build the reverse assignment, ie. foreach i -> j, add j -> i */
1559         for (i = 0; i < n; ++i) {
1560                 if (assignment[i] >= 0) {
1561                         int j = assignment[i];
1562                         assignment_rev[j] = i;
1563                 }
1564         }
1565
1566         DBG((rss->dbg, LEVEL_2, "\t\tgot assignment with cost %d\n", cost));
1567         DBG((rss->dbg, LEVEL_3, "\t\t\tassignment   ---   reverse assignment\n", cost));
1568         for (i = 0; i < n; ++i) {
1569                 DBG((rss->dbg, LEVEL_3, "\t\t\t%3d -> %3d         %3d -> %3d\n", i, assignment[i], i, assignment_rev[i]));
1570         }
1571
1572         values    = XMALLOC(ir_nodeset_t);
1573         ir_nodeset_init_size(values, 10);
1574         cur_chain = 0;
1575         /* Construction of the minimal chain partition */
1576         for (j = 0; j < n; ++j) {
1577                 /* check nodes, which did not occur as target */
1578                 if (assignment_rev[j] == -1) {
1579                         int       xj      = idx_map[j];
1580                         ir_node   *xj_irn = get_idx_irn(rss->irg, xj);
1581                         rss_irn_t *xj_rss = get_rss_irn(rss, xj_irn);
1582                         chain_t   *c      = obstack_alloc(phase_obst(&rss->ph), sizeof(*c));
1583                         int       source;
1584
1585                         /* there was no source for j -> we have a source of a new chain */
1586                         ir_nodeset_insert(values, xj_irn);
1587
1588                         c->elements = plist_obstack_new(phase_obst(&rss->ph));
1589                         c->nr       = cur_chain++;
1590                         plist_insert_back(c->elements, xj_irn);
1591
1592                         xj_rss->chain = c;
1593
1594                         DBG((rss->dbg, LEVEL_2, "\t\tstarting chain %d:\n", c->nr));
1595                         DBG((rss->dbg, LEVEL_2, "\t\t\t%+F (%d)", xj_irn, j));
1596
1597                         /* follow chain, having j as source */
1598                         source = j;
1599                         while (assignment[source] >= 0) {
1600                                 int       target  = assignment[source];
1601                                 int       irn_idx = idx_map[target];
1602                                 ir_node   *irn    = get_idx_irn(rss->irg, irn_idx);
1603                                 rss_irn_t *node   = get_rss_irn(rss, irn);
1604
1605                                 plist_insert_back(c->elements, irn);
1606                                 node->chain = c;
1607
1608                                 DB((rss->dbg, LEVEL_2, " -> %+F (%d)", irn, target));
1609
1610                                 /* new source = last target */
1611                                 source = target;
1612                         }
1613
1614                         DB((rss->dbg, LEVEL_2, "\n"));
1615                 }
1616         }
1617
1618         /*
1619                 Computing the maximal antichain: Select an element from each
1620                 chain such, such it is parallel with the others.
1621         */
1622         DBG((rss->dbg, LEVEL_1, "\t\tcomputing set of saturation values (MAX AC)\n"));
1623         DBG((rss->dbg, LEVEL_3, "\t\tstarting with:\n"));
1624
1625         DEBUG_ONLY(
1626                 if (firm_dbg_get_mask(rss->dbg) & LEVEL_3)
1627                         dump_nodeset(values, "\t\t\t");
1628         )
1629
1630         temp = NULL;
1631         do {
1632                 /*
1633                         We need an explicit array for the values as
1634                         we cannot iterate multiple times over the same
1635                         set at the same time. :-(((((
1636                         TODO Matze: now we can, so rewrite this...
1637                 */
1638                 int     n         = ir_nodeset_size(values);
1639                 int     i         = 0;
1640                 ir_node **val_arr = NEW_ARR_F(ir_node *, n);
1641
1642                 foreach_ir_nodeset(values, u_irn, iter)
1643                         val_arr[i++] = u_irn;
1644
1645                 if (temp) {
1646                         ir_nodeset_destroy(temp);
1647                         free(temp);
1648                 }
1649
1650                 temp = XMALLOC(ir_nodeset_t);
1651                 ir_nodeset_init_size(temp, 10);
1652
1653                 /* Select all nodes from current value set, having another node in the set as descendant. */
1654                 for (i = 0; i < n; ++i) {
1655                         rss_irn_t *u = get_rss_irn(rss, val_arr[i]);
1656
1657                         for (j = 0; j < n; ++j) {
1658                                 if (i != j) {
1659                                         rss_edge_t key;
1660
1661                                         key.src = val_arr[i];
1662                                         key.tgt = val_arr[j];
1663
1664                                         if (pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1665                                                 /* v[j] is descendant of u -> remove u and break */
1666                                                 ir_nodeset_insert(temp, (ir_node *) u->irn);
1667                                                 ir_nodeset_remove(values, u->irn);
1668
1669                                                 DBG((rss->dbg, LEVEL_3, "\t\t\tremoving %+F from values, adding it to temp\n", u->irn));
1670
1671                                                 break;
1672                                         }
1673                                 }
1674                         }
1675                 }
1676
1677                 /* Try to insert the chain predecessor of all selected u's */
1678                 foreach_ir_nodeset(temp, u_irn, iter) {
1679                         rss_irn_t       *u  = get_rss_irn(rss, u_irn);
1680                         chain_t         *c  = u->chain;
1681                         plist_element_t *el = plist_find_value(c->elements, u_irn);
1682
1683                         assert(el && "Missing element in chain!");
1684
1685                         /* If u has predecessor in chain: insert the predecessor */
1686                         if (el == plist_element_get_prev(el)) {
1687                                 ir_nodeset_insert(values, plist_element_get_value(el));
1688                                 DBG((rss->dbg, LEVEL_3, "\t\t\tadding %+F to values\n", plist_element_get_value(el)));
1689                         }
1690                 }
1691
1692
1693                 DEL_ARR_F(val_arr);
1694         } while (ir_nodeset_size(temp) > 0);
1695
1696         DBG((rss->dbg, LEVEL_2, "\t\tfinal set:\n"));
1697         DEBUG_ONLY(
1698                 if (firm_dbg_get_mask(rss->dbg) & LEVEL_2) {
1699                         dump_nodeset(values, "\t\t\t");
1700                 }
1701         );
1702
1703         if (rss->opts->dump_flags & RSS_DUMP_MAXAC)
1704                 debug_vcg_dump_pkg(rss, values, iteration);
1705
1706         if(temp != NULL) {
1707                 ir_nodeset_destroy(temp);
1708                 free(temp);
1709         }
1710
1711         return values;
1712
1713 #undef MAP_IDX
1714 }
1715
1716 /**
1717  * Computes the best serialization between two nodes of sat_vals.
1718  */
1719 static serialization_t *compute_best_admissible_serialization(rss_t *rss, ir_nodeset_t *sat_vals, serialization_t *ser, int num_regs) {
1720         int        n                    = ir_nodeset_size(sat_vals);
1721         int        n_idx                = ARR_LEN_SAFE(rss->idx_map);
1722         int        i                    = 0;
1723         ir_node    **val_arr            = ALLOCAN(ir_node*, n);
1724         bitset_t   *bs_sv               = bitset_alloca(n_idx);
1725         bitset_t   *bs_vdesc            = bitset_alloca(n_idx);
1726         bitset_t   *bs_tmp              = bitset_alloca(n_idx);
1727         bitset_t   *bs_ukilldesc        = bitset_alloca(n_idx);
1728         int        best_benefit         = INT_MAX;
1729         int        best_omega2          = INT_MAX;
1730         int        best_benefit_omega20 = INT_MAX;
1731         int        has_omega1           = 0;
1732         int        j, k;
1733         ir_node    *irn;
1734         ir_nodeset_iterator_t iter;
1735         rss_edge_t min_benefit_edge = {NULL, NULL, NULL};
1736         rss_edge_t min_omega20_edge = {NULL, NULL, NULL};
1737         rss_irn_t  *ser_u_omega1 = NULL, *ser_v_omega1 = NULL;
1738         rss_irn_t  *ser_u_omega20 = NULL, *ser_v_omega20 = NULL;
1739
1740         DBG((rss->dbg, LEVEL_1, "\tcomputing admissible serializations:\n"));
1741
1742         /*
1743                 We need an explicit array for the values as
1744                 we cannot iterate multiple times over the same
1745                 set at the same time. :-(((((
1746         */
1747
1748         foreach_ir_nodeset(sat_vals, irn, iter) {
1749                 val_arr[i++] = irn;
1750                 bitset_set(bs_sv, BLOCK_IDX_MAP(rss, irn));
1751         }
1752
1753         /*
1754                 We build all admissible serializations and remember the best found so far.
1755                 For u in sat_vals:
1756                  For v in sat_val:
1757                    if v in pkiller(u): add edge from v to all other pkiller(u)
1758                    else: for all vv in pkiller(u): add edge from v to vv if there exists no path from vv to v
1759         */
1760
1761 /*
1762         A node is unserializable if:
1763         - it has only one killer and this one is Sink
1764     - it kills no other values
1765         In this case there is no serialization which could
1766         reduce the registerpressure
1767 */
1768 #define IS_UNSERIALIZABLE_NODE(rss_node)                  \
1769         (                                                     \
1770                 (                                                 \
1771                         (plist_count(rss_node->pkiller_list) == 1) && \
1772                         is_Sink(rss_node->killer)                  && \
1773                         (rss_node->kill_count                == 0)    \
1774                 )                            ||                   \
1775                 be_is_Barrier(rss_node->irn) ||                   \
1776                 be_is_Keep(rss_node->irn)                         \
1777         )
1778
1779         /* for all u in sat_vals */
1780         for (i = 0; i < n; ++i) {
1781                 rss_irn_t       *u = get_rss_irn(rss, val_arr[i]);
1782                 plist_element_t *el;
1783
1784                 /* ignore nodes where serialization does not help */
1785                 if (IS_UNSERIALIZABLE_NODE(u)) {
1786                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", u->irn));
1787                         continue;
1788                 }
1789
1790                 /* accumulate all descendants of all pkiller(u) */
1791                 bitset_clear_all(bs_ukilldesc);
1792                 foreach_plist(u->pkiller_list, el) {
1793                         ir_node   *irn  = plist_element_get_value(el);
1794                         rss_irn_t *node = get_rss_irn(rss, irn);
1795
1796                         if (! is_Sink(irn))
1797                                 bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, irn));
1798                         else
1799                                 continue;
1800
1801                         for (k = ARR_LEN_SAFE(node->descendants) - 1; k >= 0; --k) {
1802                                 if (! is_Sink(node->descendants[k]))
1803                                         bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, node->descendants[k]));
1804                         }
1805                 }
1806
1807                 /* for all v in sat_vals */
1808                 for (j = 0; j < n; ++j) {
1809                         ir_node   *v_irn   = val_arr[j];
1810                         rss_irn_t *v       = get_rss_irn(rss, v_irn);
1811                         unsigned  v_height = get_irn_height(rss->h, v_irn);
1812                         int       omega1, omega2, is_pkiller;
1813
1814                         /* v cannot be serialized with itself
1815                          * ignore nodes where serialization does not help */
1816                         if (i == j || IS_UNSERIALIZABLE_NODE(v)) {
1817 #ifdef DEBUG_libfirm
1818                                 if (i != j)
1819                                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", v->irn));
1820 #endif
1821                                 continue;
1822                         }
1823
1824                         /* get descendants of v */
1825                         bitset_clear_all(bs_vdesc);
1826                         bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v_irn));
1827                         for (k = ARR_LEN_SAFE(v->descendants) - 1; k >= 0; --k) {
1828                                 if (! is_Sink(v->descendants[k]))
1829                                         bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v->descendants[k]));
1830                         }
1831
1832                         /* if v is in pkiller(u) */
1833                         is_pkiller = plist_has_value(u->pkiller_list, v_irn);
1834
1835                         /* for all vv in pkiller(u) */
1836                         foreach_plist(u->pkiller_list, el) {
1837                                 ir_node *vv_irn  = plist_element_get_value(el);
1838                                 int     add_edge;
1839
1840                                 if (is_Sink(vv_irn) || is_cfop(vv_irn))
1841                                         continue;
1842
1843                                 if (is_pkiller)
1844                                         add_edge = vv_irn != v_irn && skip_Proj(vv_irn) != skip_Proj(v_irn);
1845                                 else
1846                                         add_edge = ! heights_reachable_in_block(rss->h, skip_Proj(vv_irn), skip_Proj(v_irn));
1847
1848                                 /*
1849                                         As we add an edge from vv -> v, we have to make sure,
1850                                         that there exists no path from v to vv.
1851                                 */
1852
1853                                 if (add_edge) {
1854                                         unsigned vv_height = get_irn_height(rss->h, vv_irn);
1855                                         unsigned critical_path_cost;
1856                                         unsigned mu1, mu2;
1857
1858                                         /*
1859                                                 mu1 = | descendants(v) cut sat_vals |
1860                                                 the number of saturating values which cannot
1861                                                 be simultaneously alive with u
1862                                         */
1863                                         bitset_copy(bs_tmp, bs_vdesc);
1864                                         mu1 = bitset_popcnt(bitset_and(bs_tmp, bs_sv));
1865
1866                                         /*
1867                                                 mu2 = | accum_desc_all_pkiller(u) without descendants(v) |
1868                                         */
1869                                         if (is_pkiller) {
1870                                                 bitset_copy(bs_tmp, bs_ukilldesc);
1871                                                 mu2 = bitset_popcnt(bitset_andnot(bs_tmp, bs_vdesc));
1872                                         }
1873                                         else {
1874                                                 mu2 = 0;
1875                                         }
1876
1877                                         /* omega1 = mu1 - mu2 */
1878                                         omega1 = mu1 - mu2;
1879
1880                                         if (omega1 != 0)
1881                                                 has_omega1 = 1;
1882
1883                                         /* omega2 = increase of critical path */
1884                                         critical_path_cost =
1885                                                 v_height                        /* longest path from v to sink */
1886                                                 + rss->max_height - vv_height   /* longest path from source to vv */
1887                                                 + 1;                            /* edge */
1888
1889                                         /*
1890                                                 If critical_path_cost > max_height -> the new edge
1891                                                 would increase the longest critical path by the difference.
1892                                         */
1893                                         omega2 = critical_path_cost > rss->max_height ? critical_path_cost - rss->max_height : 0;
1894
1895                                         /* this keeps track of the edge with the best benefit */
1896                                         if (omega1 >= num_regs - n && omega1 < best_benefit) {
1897                                                 min_benefit_edge.src = v_irn;
1898                                                 min_benefit_edge.tgt = vv_irn;
1899
1900                                                 ser_u_omega1 = u;
1901                                                 ser_v_omega1 = v;
1902
1903                                                 best_benefit    = omega1;
1904                                                 ser->new_killer = is_pkiller;
1905                                         }
1906
1907                                         /* this keeps track of the edge with the best omega1 costs where omega2 == 0 */
1908                                         if (omega2 == 0 && omega1 >= num_regs - n && omega1 < best_benefit_omega20) {
1909                                                 min_omega20_edge.src = v_irn;
1910                                                 min_omega20_edge.tgt = vv_irn;
1911
1912                                                 ser_u_omega20 = u;
1913                                                 ser_v_omega20 = v;
1914
1915                                                 best_benefit_omega20 = omega1;
1916                                                 ser->new_killer      = is_pkiller;
1917                                         }
1918
1919                                         best_omega2 = MIN(best_omega2, omega2);
1920
1921                                         DBG((rss->dbg, LEVEL_2, "\t\tfound %+F -> %+F (w1 %d, w2 %d)\n",
1922                                                 v_irn, vv_irn, omega1, omega2));
1923                                 } /* if add_edge */
1924                         } /* for all vv in pkiller(u) */
1925                 } /* for all v in sat_vals */
1926         } /* for all u in sat_vals */
1927
1928         if (! has_omega1)
1929                 return NULL;
1930
1931         if (best_omega2 == 0) {
1932                 ser->u         = ser_u_omega20;
1933                 ser->v         = ser_v_omega20;
1934                 ser->edge->src = min_omega20_edge.src;
1935                 ser->edge->tgt = min_omega20_edge.tgt;
1936                 ser->omega1    = best_benefit_omega20;
1937                 ser->omega2    = best_omega2;
1938         }
1939         else {
1940                 ser->u         = ser_u_omega1;
1941                 ser->v         = ser_v_omega1;
1942                 ser->edge->src = min_benefit_edge.src;
1943                 ser->edge->tgt = min_benefit_edge.tgt;
1944                 ser->omega1    = best_benefit;
1945                 ser->omega2    = best_omega2;
1946         }
1947
1948         return ser;
1949
1950 #undef IS_UNSERIALIZABLE_NODE
1951 }
1952
1953 /**
1954  * Perform the value serialization heuristic and add all
1955  * computed serialization edges as dependencies to the irg.
1956  */
1957 static void perform_value_serialization_heuristic(rss_t *rss) {
1958         bitset_t *arch_nonign_bs = bitset_alloca(arch_register_class_n_regs(rss->cls));
1959         bitset_t *abi_ign_bs     = bitset_alloca(arch_register_class_n_regs(rss->cls));
1960         unsigned available_regs, iteration;
1961         dvg_t    dvg;
1962         ir_nodeset_t *sat_vals;
1963         pset *ser_set = new_pset(cmp_rss_edges, 20);
1964
1965         /* available_regs = R = |arch_non_ignore_regs cut ~abi_ignore_regs| */
1966         arch_put_non_ignore_regs(rss->cls, arch_nonign_bs);
1967         be_abi_put_ignore_regs(rss->abi, rss->cls, abi_ign_bs);
1968         bitset_andnot(arch_nonign_bs, abi_ign_bs);
1969         available_regs  = bitset_popcnt(arch_nonign_bs);
1970         //num_live = pset_count(rss->live_block);
1971         //available_regs -= num_live < available_regs ? num_live : 0;
1972
1973         DBG((rss->dbg, LEVEL_1, "\n\t#available regs: %d\n\n", available_regs));
1974
1975         /*
1976                 At first we need to compute the disjoint value DAG (DVG = {V, E_dv}).
1977                 V    = set of all nodes we are currently interested in
1978                 E_dv = there is an edge from u to v iff v is a descendant of killer(u), forall u, v in V
1979         */
1980         ir_nodeset_init_size(&dvg.nodes, plist_count(rss->nodes));
1981         dvg.edges = new_pset(cmp_rss_edges, plist_count(rss->nodes) * 5);
1982         compute_dvg(rss, &dvg);
1983
1984         /*
1985                 Then we perform the heuristic serialization algorithm
1986                 on the DVG which gives us all necessary serialization
1987                 edges.
1988         */
1989         DBG((rss->dbg, LEVEL_1, "\tcomputing maximal antichain:\n"));
1990         iteration = 0;
1991         sat_vals  = compute_maximal_antichain(rss, &dvg, iteration++);
1992         while (sat_vals && (ir_nodeset_size(sat_vals) > available_regs)) {
1993                 serialization_t *ser, best_ser;
1994                 rss_edge_t      *edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*edge));
1995                 ir_node         *dep_src, *dep_tgt;
1996
1997                 best_ser.edge = edge;
1998                 ser = compute_best_admissible_serialization(rss, sat_vals, &best_ser, available_regs);
1999
2000                 DBG((rss->dbg, LEVEL_1, "\tcurrent register saturation %d, target %d\n", ir_nodeset_size(sat_vals), available_regs));
2001
2002                 if (! ser) {
2003                         DBG((rss->dbg, LEVEL_1, "\tno RS improving serialization found, breaking at iteration %d\n", iteration));
2004                         break;
2005                 }
2006
2007                 /* Insert the serialization as dependency edge into the irg. */
2008                 DBG((rss->dbg, LEVEL_1, "\tserializing %+F -> %+F with edge %+F -> %+F and cost %d, %d\n",
2009                         ser->u->irn, ser->v->irn, ser->edge->src, ser->edge->tgt, ser->omega1, ser->omega2));
2010
2011                 if (pset_find(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge)))
2012                         ir_printf("WARNING: serialization %+F -> %+F computed twice!\n", ser->edge->src, ser->edge->tgt);
2013
2014
2015                 pset_insert(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge));
2016
2017                 /* update the dvg */
2018                 update_dvg(rss, &dvg, ser->v, ser->u);
2019                 update_dvg(rss, &dvg, get_rss_irn(rss, ser->edge->src), get_rss_irn(rss, ser->edge->tgt));
2020                 if(sat_vals != NULL) {
2021                         ir_nodeset_destroy(sat_vals);
2022                         free(sat_vals);
2023                 }
2024
2025                 dep_src = skip_Proj(ser->edge->src);
2026                 dep_tgt = ser->edge->tgt;
2027                 add_irn_dep(dep_src, dep_tgt);
2028
2029                 /* Update descendants, consumer and pkillers of the target */
2030                 update_node_info(rss, ser->edge->tgt, ser->edge->src);
2031
2032                 /* TODO: try to find a cheaper way for updating height information */
2033                 rss->max_height = heights_recompute_block(rss->h, rss->block);
2034
2035                 /* Recompute the antichain for next serialization */
2036                 DBG((rss->dbg, LEVEL_1, "\tre-computing maximal antichain:\n"));
2037                 sat_vals = compute_maximal_antichain(rss, &dvg, iteration++);
2038         }
2039
2040         ir_nodeset_destroy(&dvg.nodes);
2041         del_pset(dvg.edges);
2042 }
2043
2044 /**
2045  * Do initial calculations for a block.
2046  */
2047 static void process_block(ir_node *block, void *env) {
2048         rss_t *rss = env;
2049         int   i, n;
2050         const ir_edge_t *edge;
2051
2052         phase_init(&rss->ph, "rss block preprocessor", rss->irg, PHASE_DEFAULT_GROWTH, init_rss_irn, NULL);
2053
2054         DBG((rss->dbg, LEVEL_1, "preprocessing block %+F\n", block));
2055         rss->block = block;
2056
2057         /* build an index map for all nodes in the current block */
2058         i            = 0;
2059         n            = get_irn_n_edges(block);
2060         NEW_ARR_A(int *, rss->idx_map, n);
2061         foreach_out_edge(block, edge) {
2062                 ir_node *irn      = get_edge_src_irn(edge);
2063                 rss->idx_map[i++] = get_irn_idx(irn);
2064         }
2065         qsort(rss->idx_map, n, sizeof(rss->idx_map[0]), cmp_int);
2066         rss->max_height = heights_recompute_block(rss->h, block);
2067
2068         /* loop over all register classes */
2069         for (i = arch_env_get_n_reg_class(rss->arch_env) - 1; i >= 0; --i) {
2070                 const arch_register_class_t *cls = arch_env_get_reg_class(rss->arch_env, i);
2071
2072                 rss->cls = cls;
2073                 DBG((rss->dbg, LEVEL_1, "register class %s\n", arch_register_class_name(cls)));
2074
2075                 /* Get all live value at end of Block having current register class */
2076                 ir_nodeset_init(&rss->live_block);
2077                 be_liveness_end_of_block(rss->liveness, rss->cls, rss->block, &rss->live_block);
2078
2079                 /* reset the list of interesting nodes */
2080                 plist_clear(rss->nodes);
2081                 plist_insert_back(rss->nodes, _sink);
2082
2083                 /* collect all nodes having a certain register class */
2084                 foreach_out_edge(block, edge) {
2085                         ir_node *irn  = get_edge_src_irn(edge);
2086                         ir_mode *mode = get_irn_mode(irn);
2087
2088                         /*
2089                                 We skip:
2090                                 - mode_T nodes (the projs are asked)
2091                                 - mode_X nodes (control flow nodes are always scheduled last)
2092                                 - Keeps (they are always immediately scheduled)
2093                                 - Phi (same as Keep)
2094                         */
2095                         if (mode == mode_T || mode == mode_X || is_Phi(irn))
2096                                 continue;
2097
2098                         /*
2099                                 In case of a proj, we skip
2100                                 - Barrier (they are a Barrier :)
2101                                 - Start
2102                                 - the Proj itself, as it's scheduled always with it's super node
2103                         */
2104                         if (is_Proj(irn)) {
2105                                 ir_node *pred = get_Proj_pred(irn);
2106                                 if (be_is_Barrier(pred) || is_Start(pred))
2107                                         continue;
2108                         }
2109
2110                         /* calculate the descendants and consumer for each node in the block */
2111                         collect_node_info(rss, skip_Proj(irn));
2112
2113                         if (be_is_Keep(irn))
2114                                 continue;
2115
2116                         if (!arch_irn_is_ignore(irn) &&
2117                                         arch_get_irn_reg_class_out(irn) == cls) {
2118                                 plist_insert_back(rss->nodes, skip_Proj(irn));
2119                         }
2120                         //}
2121                 }
2122
2123                 /* compute the potential killing set PK(G) */
2124                 compute_pkill_set(rss);
2125
2126                 /* compute the killing function k* */
2127                 compute_killing_function(rss);
2128
2129                 /*
2130                         Compute the heuristic value serialization and
2131                         add the necessary dependencies to the irg.
2132                 */
2133                 perform_value_serialization_heuristic(rss);
2134
2135                 ir_nodeset_destroy(&rss->live_block);
2136         }
2137
2138         phase_free(&rss->ph);
2139 }
2140
2141 /**
2142  * Register the options.
2143  */
2144 void be_init_schedrss(void) {
2145         lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
2146         lc_opt_entry_t *sched_grp = lc_opt_get_grp(be_grp, "sched");
2147         lc_opt_entry_t *rss_grp = lc_opt_get_grp(sched_grp, "rss");
2148
2149         lc_opt_add_table(rss_grp, rss_option_table);
2150 }
2151
2152 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_schedrss);
2153
2154 /**
2155  * Preprocess the irg for scheduling.
2156  */
2157 void rss_schedule_preparation(be_irg_t *birg) {
2158         ir_graph *irg = be_get_birg_irg(birg);
2159         rss_t rss;
2160
2161         FIRM_DBG_REGISTER(rss.dbg, "firm.be.sched.rss");
2162
2163         //firm_dbg_set_mask(rss.dbg, LEVEL_1 | LEVEL_2 | LEVEL_3);
2164
2165         init_rss_special_nodes(irg);
2166
2167         rss.irg      = irg;
2168         rss.arch_env = be_get_birg_arch_env(birg);
2169         rss.abi      = birg->abi;
2170         rss.h        = heights_new(irg);
2171         rss.nodes    = plist_new();
2172         rss.opts     = &rss_options;
2173         rss.liveness = be_liveness(irg);
2174         be_liveness_assure_sets(rss.liveness);
2175         irg_block_walk_graph(irg, NULL, process_block, &rss);
2176         heights_free(rss.h);
2177         plist_free(rss.nodes);
2178         be_liveness_free(rss.liveness);
2179
2180         if (birg->main_env->options->dump_flags & DUMP_SCHED)
2181                 be_dump(rss.irg, "-rss", dump_ir_block_graph);
2182 }