04261cf50c650a2495a9253f57ab937a9ea74f78
[libfirm] / ir / be / beschedrss.c
1 /*
2  * Copyright (C) 1995-2007 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * Implementation of a register saturating list scheduler
22  * as described in: Sid-Ahmed-Ali Touati
23  * Register Saturation in Superscalar and VLIW Codes
24  *
25  * @license This file protected by GPL -  GNU GENERAL PUBLIC LICENSE.
26  * @author  Christian Wuerdig
27  * @date    29.08.2006
28  * @cvs-id  $Id$
29  */
30 #ifdef HAVE_CONFIG_H
31 #include "config.h"
32 #endif
33
34 #include <limits.h>
35
36 #include "obst.h"
37 #include "debug.h"
38
39 #include "irgraph_t.h"
40 #include "irnode_t.h"
41 #include "iredges_t.h"
42 #include "ircons_t.h"
43 #include "irphase_t.h"
44 #include "irgwalk.h"
45 #include "irdump.h"
46 #include "irtools.h"
47 #include "irbitset.h"
48 #include "irprintf.h"
49 #include "bipartite.h"
50 #include "hungarian.h"
51 #include "plist.h"
52
53 #include "height.h"
54
55 #include "beabi.h"
56 #include "bemodule.h"
57 #include "benode_t.h"
58 #include "besched_t.h"
59 #include "beirg_t.h"
60
61 #include <libcore/lc_opts.h>
62 #include <libcore/lc_opts_enum.h>
63
64
65 #define ARR_LEN_SAFE(arr) ((arr) != NULL ? ARR_LEN((arr)) : 0)
66
67 #define HASH_RSS_EDGE(edge) ((get_irn_node_nr((edge)->src) << 16) | (get_irn_node_nr((edge)->tgt) & 0xFFFF))
68 #define BSEARCH_IRN_ARR(val, arr) \
69         bsearch(&(val), (arr), ARR_LEN_SAFE((arr)), sizeof((arr)[0]), cmp_irn_idx)
70
71 #define BLOCK_IDX_MAP(rss, irn) bsearch_for_index(get_irn_idx((irn)), (rss)->idx_map, ARR_LEN_SAFE((rss)->idx_map), 1)
72
73 /* the rss options */
74 typedef struct _rss_opts_t {
75         int dump_flags;
76 } rss_opts_t;
77
78 /* Represents a child with associated costs */
79 typedef struct _child {
80         ir_node *irn;
81         float   cost;
82 } child_t;
83
84 /* We need edges for several purposes. */
85 typedef struct _rss_edge {
86         ir_node *src;
87         ir_node *tgt;
88         void    *next;
89 } rss_edge_t;
90
91 /* Represents a connected bipartite component. */
92 typedef struct _cbc {
93         ir_nodeset_t parents;       /**< = S  a set of value producers */
94         ir_nodeset_t children;      /**< = T  a set of value consumers */
95         pset    *kill_edges;    /**< = E  a set of edges (t in T, s in S) such as each s in S gets killed by at least one t in T */
96         int     nr;             /**< a deterministic index for set insertion (used as hash) */
97 } cbc_t;
98
99 /* Represents a disjoint value DAG. */
100 typedef struct _dvg {
101         ir_nodeset_t nodes;
102         pset    *edges;
103 } dvg_t;
104
105 /* Represents a chain of nodes. */
106 typedef struct _chain {
107         plist_t *elements;   /**< List of chain elements */
108         int     nr;          /**< a deterministic index for set insertion (used as hash) */
109 } chain_t;
110
111 typedef struct _rss_irn {
112         plist_t  *consumer_list;    /**< List of consumers */
113         ir_node **consumer;         /**< Sorted consumer array (needed for faster access) */
114
115         plist_t  *parent_list;      /**< List of parents */
116         plist_t  *pkiller_list;     /**< List of potential killers */
117
118         plist_t  *descendant_list;  /**< List of descendants */
119         ir_node **descendants;      /**< Sorted descendant array (needed for faster access) */
120
121         ir_node  *killer;           /**< The selected unique killer */
122         ir_node  *irn;              /**< The corresponding firm node to this rss_irn */
123
124         chain_t  *chain;            /**< The chain, this node is associated to */
125
126         unsigned desc_walk;         /**< visited flag for collecting descendants */
127         unsigned kill_count;        /**< number of nodes killed by this one */
128
129         unsigned live_out : 1;      /**< irn has consumers outside of it's block */
130         unsigned visited  : 1;      /**< visited flag for bipartite decomposition */
131         unsigned havecons : 1;      /**< visited flag collect consumer */
132         unsigned handled  : 1;      /**< flag indicating whether or not the list structures have been build */
133         unsigned dumped   : 1;      /**< flag indication whether or not this node was dumped */
134 } rss_irn_t;
135
136 /* Represents a serialization edge with associated costs. */
137 typedef struct _serialization {
138         rss_irn_t  *u;       /* the top node */
139         rss_irn_t  *v;       /* the node about to be serialized after u */
140         rss_edge_t *edge;    /* the edge selected for the serialization */
141         int        omega1;   /* estimated: available regs - RS reduction */
142         int        omega2;   /* increase in critical path length */
143         int        new_killer;
144 } serialization_t;
145
146 typedef struct _rss {
147         ir_phase          ph;              /**< Phase to hold some data */
148         heights_t        *h;              /**< The current height object */
149         ir_graph         *irg;            /**< The irg to preprocess */
150         plist_t          *nodes;          /**< The list of interesting nodes */
151         const arch_env_t *arch_env;       /**< The architecture environment */
152         be_abi_irg_t     *abi;            /**< The abi for this irg */
153         pset             *cbc_set;        /**< A set of connected bipartite components */
154         ir_node          *block;          /**< The current block in progress. */
155         int              *idx_map;        /**< Mapping irn indices to per block indices */
156         unsigned         max_height;      /**< maximum height in the current block */
157         rss_opts_t       *opts;           /**< The options */
158         be_lv_t          *liveness;       /**< The liveness information for this irg */
159         pset             *live_block;     /**< Values alive at end of block */
160         const arch_register_class_t *cls; /**< The current register class */
161         DEBUG_ONLY(firm_dbg_module_t *dbg);
162 } rss_t;
163
164 #define get_rss_irn(rss, irn)  (phase_get_or_set_irn_data(&rss->ph, irn))
165
166 /**
167  * We need some special nodes:
168  * a source and a sink for all live-in and live-out values of a block
169  */
170 enum {
171         iro_rss_Source,
172         iro_rss_Sink,
173         iro_rss_last
174 };
175
176 /** The opcode of the rss_Source node once allocated. */
177 static ir_op *op_rss_Source;
178 /** The opcode of the rss_Sink node once allocated. */
179 static ir_op *op_rss_Sink;
180
181 /** The rss_Source node of the current graph. */
182 static ir_node *_source = NULL;
183 /** The rss_Sink node of the current graph. */
184 static ir_node *_sink   = NULL;
185
186 #define is_Source(irn) ((irn) == _source)
187 #define is_Sink(irn)   ((irn) == _sink)
188
189 enum {
190         RSS_DUMP_NONE  = 0,
191         RSS_DUMP_CBC   = 1 << 0,
192         RSS_DUMP_PKG   = 1 << 1,
193         RSS_DUMP_KILL  = 1 << 2,
194         RSS_DUMP_DVG   = 1 << 3,
195         RSS_DUMP_MAXAC = 1 << 4,
196         RSS_DUMP_ALL   = (RSS_DUMP_MAXAC << 1) - 1,
197 };
198
199 static rss_opts_t rss_options = {
200         RSS_DUMP_NONE,
201 };
202
203 static const lc_opt_enum_int_items_t dump_items[] = {
204         { "none",  RSS_DUMP_NONE  },
205         { "cbc",   RSS_DUMP_CBC   },
206         { "pkg",   RSS_DUMP_PKG   },
207         { "kill",  RSS_DUMP_KILL  },
208         { "dvg",   RSS_DUMP_DVG   },
209         { "maxac", RSS_DUMP_MAXAC },
210         { "all",   RSS_DUMP_ALL   },
211         { NULL, 0 }
212 };
213
214 static lc_opt_enum_int_var_t dump_var = {
215         &rss_options.dump_flags, dump_items
216 };
217
218 static const lc_opt_table_entry_t rss_option_table[] = {
219         LC_OPT_ENT_ENUM_MASK("dump", "dump phases", &dump_var),
220         { NULL }
221 };
222
223 /******************************************************************************
224  *  _          _                    __                  _   _
225  * | |        | |                  / _|                | | (_)
226  * | |__   ___| |_ __   ___ _ __  | |_ _   _ _ __   ___| |_ _  ___  _ __  ___
227  * | '_ \ / _ \ | '_ \ / _ \ '__| |  _| | | | '_ \ / __| __| |/ _ \| '_ \/ __|
228  * | | | |  __/ | |_) |  __/ |    | | | |_| | | | | (__| |_| | (_) | | | \__ \
229  * |_| |_|\___|_| .__/ \___|_|    |_|  \__,_|_| |_|\___|\__|_|\___/|_| |_|___/
230  *            | |
231  *            |_|
232  ******************************************************************************/
233
234 /**
235  * Acquire opcodes if needed and create source and sink nodes.
236  */
237 static void init_rss_special_nodes(ir_graph *irg) {
238         ir_node *block;
239
240         if (op_rss_Source == NULL) {
241                 int iro_rss_base = get_next_ir_opcodes(iro_rss_last);
242                 op_rss_Source = new_ir_op(iro_rss_base + iro_rss_Source, "rss_Source", op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
243                 op_rss_Sink   = new_ir_op(iro_rss_base + iro_rss_Sink,   "rss_Sink",   op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
244         }
245         block   = get_irg_start_block(irg);
246         _source = new_ir_node(NULL, irg, block, op_rss_Source, mode_ANY, 0, NULL);
247         _sink   = new_ir_node(NULL, irg, block, op_rss_Sink, mode_ANY, 0, NULL);
248 }
249
250 static int cmp_int(const void *a, const void *b) {
251         const int *i1 = a;
252         const int *i2 = b;
253
254         return QSORT_CMP(*i1, *i2);
255 }
256
257 static int cmp_child_costs(const void *a, const void *b) {
258         const child_t *c1 = a;
259         const child_t *c2 = b;
260
261         return QSORT_CMP(c1->cost, c2->cost);
262 }
263
264 static int cmp_irn_idx(const void *a, const void *b) {
265         const ir_node *n1 = *(ir_node **)a;
266         const ir_node *n2 = *(ir_node **)b;
267
268         return QSORT_CMP(get_irn_idx(n1), get_irn_idx(n2));
269 }
270
271 static int cmp_rss_edges(const void *a, const void *b) {
272         const rss_edge_t *e1 = a;
273         const rss_edge_t *e2 = b;
274
275         return (e1->src != e2->src) || (e1->tgt != e2->tgt);
276 }
277
278 static int bsearch_for_index(int key, int *arr, size_t len, int force) {
279         int left = 0;
280         int right = len;
281
282         while (right >= left) {
283                 int idx = (left + right) / 2;
284
285                 if (key < arr[idx])
286                         right = idx - 1;
287                 else if (key > arr[idx])
288                         left = idx + 1;
289                 else
290                         return idx;
291         }
292
293         if (force)
294                 assert(0 && "Something is wrong, key not found.");
295         return -1;
296 }
297
298 static ir_node **build_sorted_array_from_list(plist_t *irn_list, struct obstack *obst) {
299         plist_element_t *el;
300         int     i     = 0;
301         int     len   = plist_count(irn_list);
302         ir_node **arr = NEW_ARR_D(ir_node *, obst, len);
303
304         /* copy the list into the array */
305         foreach_plist(irn_list, el) {
306                 arr[i++] = plist_element_get_value(el);
307         }
308
309         /* sort the array by node index */
310         qsort(arr, len, sizeof(arr[0]), cmp_irn_idx);
311
312         return arr;
313 }
314
315 /*****************************************************
316  *      _      _                       _
317  *     | |    | |                     (_)
318  *   __| | ___| |__  _   _  __ _  __ _ _ _ __   __ _
319  *  / _` |/ _ \ '_ \| | | |/ _` |/ _` | | '_ \ / _` |
320  * | (_| |  __/ |_) | |_| | (_| | (_| | | | | | (_| |
321  *  \__,_|\___|_.__/ \__,_|\__, |\__, |_|_| |_|\__, |
322  *                          __/ | __/ |         __/ |
323  *                         |___/ |___/         |___/
324  *
325  *****************************************************/
326
327 #ifdef DEBUG_libfirm
328 static void dump_nodeset(ir_nodeset_t *ns, const char *prefix) {
329         ir_nodeset_iterator_t iter;
330         ir_node *irn;
331
332         ir_nodeset_iterator_init(&iter, ns);
333         while( (irn = ir_nodeset_iterator_next(&iter)) != NULL ) {
334                 ir_fprintf(stderr, "%s%+F\n", prefix, irn);
335         }
336 }
337 #endif
338
339 static void build_file_name(rss_t *rss, const char *suffix, size_t suf_len, char *buf, size_t len) {
340         const char *irg_name;
341
342         memset(buf, 0, len);
343         irg_name = get_entity_name(get_irg_entity(rss->irg));
344         snprintf(buf, len - suf_len, "%s-%s-block-%ld",
345                 irg_name, arch_register_class_name(rss->cls), get_irn_node_nr(rss->block));
346         strcat(buf, suffix);
347 }
348
349 /* Dumps all collected bipartite components of current irg as vcg. */
350 static void debug_vcg_dump_bipartite(rss_t *rss) {
351         cbc_t *cbc;
352         FILE  *f;
353         char  file_name[256];
354         static const char suffix[] = "-RSS-CBC.vcg";
355
356         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
357         f = fopen(file_name, "w");
358
359         ir_fprintf(f, "graph: { title: \"connected bipartite component graph of %+F\"\n", rss->irg);
360         fprintf(f, "display_edge_labels: no\n");
361         fprintf(f, "layoutalgorithm: mindepth\n");
362         fprintf(f, "manhattan_edges: yes\n\n");
363
364         foreach_pset(rss->cbc_set, cbc) {
365                 ir_nodeset_iterator_t iter;
366                 ir_node    *n;
367                 rss_edge_t *ke;
368
369                 fprintf(f, "graph: { titel: \"cbc %d\" label: \"cbc %d\" status:clustered color:yellow\n", cbc->nr, cbc->nr);
370                 foreach_ir_nodeset(&cbc->parents, n, iter) {
371                         ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
372                 }
373                 foreach_ir_nodeset(&cbc->children, n, iter) {
374                         ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
375                 }
376                 foreach_pset(cbc->kill_edges, ke) {
377                         ir_fprintf(f, "edge: { sourcename: \"n%d_%d\" targetname: \"n%d_%d\" }\n",
378                                 get_irn_node_nr(ke->src), cbc->nr, get_irn_node_nr(ke->tgt), cbc->nr);
379                 }
380                 fprintf(f, "}\n\n");
381         }
382         fprintf(f, "}\n");
383         fclose(f);
384 }
385
386 /* Dump the computed killing function as vcg. */
387 static void debug_vcg_dump_kill(rss_t *rss) {
388         FILE            *f;
389         char            file_name[256];
390         plist_element_t *el;
391         static const char suffix[] = "-RSS-KILL.vcg";
392
393         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
394         f = fopen(file_name, "w");
395
396         ir_fprintf(f, "graph: { title: \"computed kill graph of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
397         fprintf(f, "display_edge_labels: no\n");
398         fprintf(f, "layoutalgorithm: mindepth\n");
399         fprintf(f, "manhattan_edges: yes\n\n");
400
401         {
402                 /* reset dump_flag */
403                 ir_node *irn;
404                 foreach_phase_irn(&rss->ph, irn) {
405                         rss_irn_t *node = get_rss_irn(rss, irn);
406                         node->dumped = 0;
407                 }
408         }
409
410         /* dump all nodes and their killers */
411         foreach_plist(rss->nodes, el) {
412                 ir_node   *irn     = plist_element_get_value(el);
413                 rss_irn_t *rirn    = get_rss_irn(rss, irn);
414                 rss_irn_t *pk_rirn = get_rss_irn(rss, rirn->killer);
415
416                 if (! rirn->dumped) {
417                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
418                         rirn->dumped = 1;
419                 }
420
421                 if (! pk_rirn->dumped) {
422                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(rirn->killer), rirn->killer);
423                         pk_rirn->dumped = 1;
424                 }
425
426                 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
427                         get_irn_node_nr(rirn->killer), get_irn_node_nr(irn));
428         }
429
430         fprintf(f, "}\n");
431         fclose(f);
432 }
433
434 /* Dumps the potential killing DAG (PKG) as vcg. */
435 static void debug_vcg_dump_pkg(rss_t *rss, ir_nodeset_t *max_ac, int iteration) {
436         FILE              *f;
437         char              file_name[256];
438         char              *suffix   = alloca(32);
439         static const char suffix1[] = "-RSS-PKG.vcg";
440         static const char suffix2[] = "-RSS-PKG-MAXAC.vcg";
441         plist_element_t   *el;
442
443         if (! max_ac) {
444                 snprintf(suffix, 32, "%s", suffix1);
445         }
446         else {
447                 snprintf(suffix, 32, "-%02d%s", iteration, suffix2);
448         }
449
450         build_file_name(rss, suffix, strlen(suffix) + 1, file_name, sizeof(file_name));
451         f = fopen(file_name, "w");
452
453         ir_fprintf(f, "graph: { title: \"potential killing DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
454         fprintf(f, "display_edge_labels: no\n");
455         fprintf(f, "layoutalgorithm: mindepth\n");
456         fprintf(f, "manhattan_edges: yes\n\n");
457
458         {
459                 /* reset dump_flag */
460                 ir_node *irn;
461                 foreach_phase_irn(&rss->ph, irn) {
462                         rss_irn_t *node = get_rss_irn(rss, irn);
463                         node->dumped = 0;
464                 }
465         }
466
467         foreach_plist(rss->nodes, el) {
468                 ir_node   *irn  = plist_element_get_value(el);
469                 rss_irn_t *rirn = get_rss_irn(rss, irn);
470                 char      *c1   = "";
471                 plist_element_t *k_el;
472
473                 /* dump selected saturating values in yellow */
474                 if (max_ac && ir_nodeset_contains(max_ac, irn))
475                         c1 = "color:yellow";
476
477                 if (! rirn->dumped) {
478                         if (rirn->chain)
479                                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F   c%d\" %s }\n", get_irn_node_nr(irn), irn, rirn->chain->nr, c1);
480                         else
481                                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(irn), irn, c1);
482                         rirn->dumped = 1;
483                 }
484
485                 foreach_plist(rirn->pkiller_list, k_el) {
486                         ir_node   *pkiller = plist_element_get_value(k_el);
487                         rss_irn_t *pk_rirn = get_rss_irn(rss, pkiller);
488                         char      *c2      = "";
489
490                         if (max_ac && ir_nodeset_contains(max_ac, pkiller))
491                                 c2 = "color:yellow";
492
493                         if (! pk_rirn->dumped) {
494                                 if (pk_rirn->chain)
495                                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F   c%d\" %s }\n", get_irn_node_nr(pkiller), pkiller, pk_rirn->chain->nr, c2);
496                                 else
497                                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(pkiller), pkiller, c2);
498                                 pk_rirn->dumped = 1;
499                         }
500                         ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
501                                 get_irn_node_nr(pkiller), get_irn_node_nr(irn));
502                 }
503         }
504         fprintf(f, "}\n");
505         fclose(f);
506 }
507
508 /* Dumps the disjoint value DAG (DVG) as vcg. */
509 static void debug_vcg_dump_dvg(rss_t *rss, dvg_t *dvg) {
510         static const char suffix[] = "-RSS-DVG.vcg";
511         FILE       *f;
512         char       file_name[256];
513         ir_nodeset_iterator_t iter;
514         ir_node    *irn;
515         rss_edge_t *edge;
516
517         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
518         f = fopen(file_name, "w");
519
520         ir_fprintf(f, "graph: { title: \"disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
521         fprintf(f, "display_edge_labels: no\n");
522         fprintf(f, "layoutalgorithm: mindepth\n");
523         fprintf(f, "manhattan_edges: yes\n\n");
524
525         /* dump all nodes */
526         foreach_ir_nodeset(&dvg->nodes, irn, iter) {
527                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
528         }
529
530         /* dump all edges */
531         foreach_pset(dvg->edges, edge) {
532                 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
533                         get_irn_node_nr(edge->src), get_irn_node_nr(edge->tgt));
534         }
535
536         fprintf(f, "}\n");
537         fclose(f);
538 }
539
540 #if 0
541 /* Dumps the PKG(DVG). */
542 static void debug_vcg_dump_dvg_pkiller(rss_t *rss, dvg_t *dvg) {
543         static const char suffix[] = "-RSS-DVG-PKG.vcg";
544         FILE       *f;
545         char       file_name[256];
546         ir_nodeset_iterator_t iter;
547         ir_node    *irn;
548
549         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
550         f = fopen(file_name, "w");
551
552         ir_fprintf(f, "graph: { title: \"PKG of disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
553         fprintf(f, "display_edge_labels: no\n");
554         fprintf(f, "layoutalgorithm: mindepth\n");
555         fprintf(f, "manhattan_edges: yes\n\n");
556
557         /* dump all nodes */
558         foreach_ir_nodeset(&dvg->nodes, irn, iter) {
559                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
560         }
561
562         /* dump all edges */
563         foreach_ir_nodeset(&dvg->nodes, irn, iter) {
564                 rss_irn_t       *node = get_rss_irn(rss, irn);
565                 plist_element_t *el;
566
567                 foreach_plist(node->dvg_pkiller_list, el) {
568                         ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
569                                 get_irn_node_nr(plist_element_get_value(el)), get_irn_node_nr(irn));
570                 }
571         }
572
573         fprintf(f, "}\n");
574         fclose(f);
575 }
576 #endif /* if 0 */
577
578 /**
579  * In case there is no rss information for irn, initialize it.
580  */
581 static void *init_rss_irn(ir_phase *ph, ir_node *irn, void *old) {
582         rss_irn_t *res = old ? old : phase_alloc(ph, sizeof(res[0]));
583
584         res->descendant_list = plist_obstack_new(phase_obst(ph));
585         res->descendants     = NULL;
586
587         res->consumer_list   = plist_obstack_new(phase_obst(ph));
588         res->consumer        = NULL;
589
590         res->pkiller_list    = plist_obstack_new(phase_obst(ph));
591
592         res->parent_list     = plist_obstack_new(phase_obst(ph));
593
594         res->killer          = NULL;
595         res->irn             = irn;
596         res->chain           = NULL;
597
598         res->kill_count      = 0;
599         res->desc_walk       = 0;
600         res->live_out        = 0;
601         res->visited         = 0;
602         res->handled         = 0;
603         res->dumped          = 0;
604         res->havecons        = 0;
605
606         return res;
607 }
608
609 /**
610  * Collect all nodes data dependent on current node.
611  */
612 static void collect_descendants(rss_t *rss, rss_irn_t *rirn, ir_node *irn, int *got_sink, unsigned cur_desc_walk) {
613         const ir_edge_t *edge;
614         rss_irn_t       *cur_node = get_rss_irn(rss, irn);
615         ir_node         *block    = rss->block;
616         ir_edge_kind_t  ekind[2]  = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
617         int             i;
618
619         if (cur_node->desc_walk >= cur_desc_walk)
620                 return;
621         cur_node->desc_walk = cur_desc_walk;
622
623         /* Stop at Barriers */
624         if (be_is_Barrier(irn))
625                 return;
626
627         /* loop over normal and dependency edges */
628         for (i = 0; i < 2; ++i) {
629                 foreach_out_edge_kind(irn, edge, ekind[i]) {
630                         ir_node *user = get_edge_src_irn(edge);
631
632                         /* skip ignore nodes as they do not really contribute to register pressure */
633                         if (arch_irn_is(rss->arch_env, user, ignore))
634                                 continue;
635
636                         /*
637                                 (a) mode_X means Jump            -> out_edge
638                                 (b) Phi as user of a normal node -> out-edge
639                         */
640                         if (get_irn_mode(user) == mode_X || is_Phi(user)) {
641                                 if (! *got_sink)
642                                         goto force_sink;
643                                 else
644                                         continue;
645                         }
646
647                         if (is_Proj(user)) {
648
649                                 //if (arch_get_irn_reg_class(rss->arch_env, user, -1) == rss->cls)
650                                         collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
651                         }
652                         else {
653                                 /* check if user lives in block and is not a control flow node */
654                                 if (get_nodes_block(user) == block) {
655                                         if (! plist_has_value(rirn->descendant_list, user)) {
656                                                 plist_insert_back(rirn->descendant_list, user);
657                                                 DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", user));
658                                         }
659                                         collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
660                                 }
661                                 else if (! *got_sink) {
662 force_sink:
663                                         /* user lives out of block: add sink as descendant if not already done */
664                                         plist_insert_back(rirn->descendant_list, _sink);
665                                         *got_sink = 1;
666                                         DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", _sink));
667                                 }
668                         }
669                 }
670         }
671 }
672
673 /**
674  * Handles a single consumer.
675  */
676 static void collect_single_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *consumer, int *got_sink) {
677         ir_node *block = rss->block;
678
679         assert(! is_Proj(consumer) && "Cannot handle Projs");
680
681         if (! is_Phi(consumer) && ! is_Block(consumer) && get_nodes_block(consumer) == block) {
682                 if (! arch_irn_is(rss->arch_env, consumer, ignore) && ! plist_has_value(rss_irn->consumer_list, consumer)) {
683                         plist_insert_back(rss_irn->consumer_list, consumer);
684                         DBG((rss->dbg, LEVEL_2, "\t\tconsumer %+F\n", consumer));
685                 }
686         }
687         else {
688                 rss_irn->live_out = 1;
689                 DBG((rss->dbg, LEVEL_2, "\t\tlive out %+F", consumer));
690                 if (! *got_sink) {
691                         plist_insert_back(rss_irn->consumer_list, _sink);
692                         *got_sink = 1;
693                         DB((rss->dbg, LEVEL_2, ", %+F added instead", _sink));
694                 }
695                 DB((rss->dbg, LEVEL_2, "\n"));
696         }
697 }
698
699 /**
700  * Collect all nodes consuming the value(s) produced by current node.
701  */
702 static void collect_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *irn, int *got_sink) {
703         const ir_edge_t *edge;
704         int             i;
705         ir_edge_kind_t  ekind[2]  = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
706         rss_irn_t       *cur_node = get_rss_irn(rss, irn);
707
708         if (cur_node->havecons)
709                 return;
710         cur_node->havecons = 1;
711
712         for (i = 0; i < 2; ++i) {
713                 foreach_out_edge_kind(irn, edge, ekind[i]) {
714                         ir_node *consumer = get_edge_src_irn(edge);
715
716                         if (is_Proj(consumer)) {
717                                 //if (arch_get_irn_reg_class(rss->arch_env, consumer, -1) == rss->cls)
718                                         collect_consumer(rss, rss_irn, consumer, got_sink);
719                         }
720                         else
721                                 collect_single_consumer(rss, rss_irn, consumer, got_sink);
722                 }
723         }
724 }
725
726 /**
727  * Collects all consumer and descendant of a irn.
728  */
729 static void collect_node_info(rss_t *rss, ir_node *irn) {
730         static unsigned cur_desc_walk = 0;
731         rss_irn_t       *rss_irn      = get_rss_irn(rss, irn);
732         int             got_sink;
733
734         assert(! is_Proj(irn) && "Cannot handle Projs.");
735
736         /* check if node info is already available */
737         if (rss_irn->handled)
738                 return;
739                 //phase_reinit_single_irn_data(&rss->ph, irn);
740
741         DBG((rss->dbg, LEVEL_1, "\tcomputing consumers of %+F:\n", irn));
742
743         /* collect all consumer */
744         got_sink = 0;
745         collect_consumer(rss, rss_irn, irn, &got_sink);
746
747         /* build sorted consumer array */
748         rss_irn->consumer = build_sorted_array_from_list(rss_irn->consumer_list, phase_obst(&rss->ph));
749
750         DBG((rss->dbg, LEVEL_1, "\tcompute descendants of %+F:\n", irn));
751
752         /* collect descendants */
753         got_sink = 0;
754         collect_descendants(rss, rss_irn, irn, &got_sink, ++cur_desc_walk);
755
756         /* build sorted descendant array */
757         rss_irn->descendants = build_sorted_array_from_list(rss_irn->descendant_list, phase_obst(&rss->ph));
758
759         rss_irn->handled = 1;
760 }
761
762 /**
763  * Checks if v is a potential killer of u.
764  * v is in pkill(u) iff descendants(v) cut consumer(u) is v
765  *
766  * @param rss   The rss object
767  * @param v      The node to check for killer
768  * @param u      The potentially killed value
769  * @return 1 if v is in pkill(u), 0 otherwise
770  */
771 static int is_potential_killer(rss_t *rss, rss_irn_t *v, rss_irn_t *u) {
772         plist_t *list;
773         ir_node **arr;
774         plist_element_t *el;
775
776         assert(is_Sink(v->irn) || ((plist_count(v->descendant_list) > 0 && v->descendants) || 1));
777         assert(is_Sink(u->irn) || ((plist_count(u->consumer_list)   > 0 && u->consumer)    || 1));
778
779         /* as we loop over the list: loop over the shorter one */
780         if (plist_count(v->descendant_list) > plist_count(u->consumer_list)) {
781                 list = u->consumer_list;
782                 arr  = v->descendants;
783         }
784         else {
785                 list = v->descendant_list;
786                 arr  = u->consumer;
787         }
788
789         /* for each list element: try to find element in array */
790         foreach_plist(list, el) {
791                 ir_node *irn   = plist_element_get_value(el);
792                 ir_node *match = BSEARCH_IRN_ARR(irn, arr);
793
794                 if (match && match != irn)
795                         return 0;
796         }
797
798         return 1;
799 }
800
801 /**
802  * Update descendants, consumer and pkiller list for the given irn.
803  */
804 static void update_node_info(rss_t *rss, ir_node *irn, ir_node *pk_irn) {
805         rss_irn_t *node    = get_rss_irn(rss, irn);
806         rss_irn_t *pkiller = get_rss_irn(rss, pk_irn);
807
808         /* add consumer and rebuild the consumer array */
809         if (! plist_has_value(node->consumer_list, pk_irn)) {
810                 plist_insert_back(node->consumer_list, pk_irn);
811                 node->consumer = build_sorted_array_from_list(node->consumer_list, phase_obst(&rss->ph));
812         }
813
814         /* add pk_irn as descendant, add it's descendants to irn's and rebuild array */
815         if (! plist_has_value(node->descendant_list, pk_irn)) {
816                 plist_element_t *el;
817
818                 plist_insert_back(node->descendant_list, pk_irn);
819
820                 foreach_plist(pkiller->descendant_list, el) {
821                         ir_node *desc = plist_element_get_value(el);
822
823                         if (! plist_has_value(node->descendant_list, desc))
824                                 plist_insert_back(node->descendant_list, desc);
825                 }
826
827                 node->descendants = build_sorted_array_from_list(node->descendant_list, phase_obst(&rss->ph));
828         }
829
830 }
831
832 /**
833  * Compute the potential killing set PK.
834  */
835 static void compute_pkill_set(rss_t *rss) {
836         plist_element_t *u_el, *v_el;
837
838         foreach_plist(rss->nodes, u_el) {
839                 ir_node   *u_irn = plist_element_get_value(u_el);
840                 rss_irn_t *u     = get_rss_irn(rss, u_irn);
841
842                 DBG((rss->dbg, LEVEL_1, "\tcomputing potential killers of %+F:\n", u_irn));
843
844                 /* check each consumer if it is a potential killer  */
845                 foreach_plist(u->consumer_list, v_el) {
846                         ir_node   *v_irn = plist_element_get_value(v_el);
847                         rss_irn_t *v     = get_rss_irn(rss, v_irn);
848
849                         /* check, if v is a potential killer of u */
850                         if (is_potential_killer(rss, v, u)) {
851                                 if (! plist_has_value(u->pkiller_list, v_irn))
852                                         plist_insert_back(u->pkiller_list, v_irn);
853                                 v->kill_count++;
854                                 DBG((rss->dbg, LEVEL_2, "\t\tpotential killer %+F\n", v_irn));
855                         }
856                 }
857
858                 u->killer = _sink;
859         }
860
861         if (rss->opts->dump_flags & RSS_DUMP_PKG)
862                 debug_vcg_dump_pkg(rss, NULL, 0);
863 }
864
865 /**
866  * Build set of killing edges (from values to their potential killers)
867  */
868 static void build_kill_edges(rss_t *rss, pset *epk) {
869         plist_element_t *el, *k_el;
870
871         foreach_plist(rss->nodes, el) {
872                 ir_node    *irn  = plist_element_get_value(el);
873                 rss_irn_t *rirn = get_rss_irn(rss, irn);
874
875                 DBG((rss->dbg, LEVEL_3, "\t\tbuilding kill edges for %+F:\n", irn));
876
877                 foreach_plist(rirn->pkiller_list, k_el) {
878                         ir_node    *pkiller = plist_element_get_value(k_el);
879                         rss_edge_t *ke      = obstack_alloc(phase_obst(&rss->ph), sizeof(*ke));
880
881                         ke->src  = irn;
882                         ke->tgt  = pkiller;
883                         ke->next = NULL;
884
885                         DBG((rss->dbg, LEVEL_3, "\t\t\ttarget %+F\n", pkiller));
886
887                         pset_insert(epk, ke, HASH_RSS_EDGE(ke));
888                 }
889         }
890 }
891
892 #ifdef DEBUG_libfirm
893 /* print the given cbc for debugging purpose */
894 static void debug_print_cbc(firm_dbg_module_t *mod, cbc_t *cbc) {
895         ir_nodeset_iterator_t iter;
896         ir_node    *n;
897         rss_edge_t *ke;
898
899         DBG((mod, LEVEL_3, "\t\tS = set of parents:\n"));
900         foreach_ir_nodeset(&cbc->parents, n, iter) {
901                 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
902         }
903         DBG((mod, LEVEL_3, "\t\tT = set of children:\n"));
904         foreach_ir_nodeset(&cbc->children, n, iter) {
905                 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
906         }
907         DBG((mod, LEVEL_3, "\t\tE = Edges from producers to consumers\n"));
908         foreach_pset(cbc->kill_edges, ke) {
909                 DBG((mod, LEVEL_3, "\t\t\t%+F -> %+F\n", ke->src, ke->tgt));
910         }
911 }
912 #endif
913
914 /**
915  * Construct the bipartite decomposition.
916  * Sid-Ahmed-Ali Touati, Phd Thesis
917  * Register Pressure in Instruction Level Parallelism, p. 71
918  */
919 static void compute_bipartite_decomposition(rss_t *rss) {
920         pset *epk    = new_pset(cmp_rss_edges, 10);
921         int  cur_num = 0;
922
923         plist_element_t *el;
924
925         DBG((rss->dbg, LEVEL_1, "\tcomputing bipartite decomposition:\n"));
926
927         build_kill_edges(rss, epk);
928
929         foreach_plist(rss->nodes, el) {
930                 ir_node   *u_irn   = plist_element_get_value(el);
931                 rss_irn_t *u       = get_rss_irn(rss, u_irn);
932                 int       p_change = 1;
933                 int       c_change = 1;
934                 int       vrfy_ok  = 1;
935
936                 cbc_t           *cbc;
937                 plist_element_t *el2;
938                 rss_edge_t      *k_edge;
939                 rss_edge_t      *kedge_root = NULL;
940                 ir_node         *t_irn, *s_irn;
941                 ir_nodeset_iterator_t iter;
942
943                 if (u->visited || u_irn == _sink)
944                         continue;
945
946                 DBG((rss->dbg, LEVEL_2, "\t\t%+F choosen:\n", u_irn));
947
948                 cbc     = obstack_alloc(phase_obst(&rss->ph), sizeof(*cbc));
949                 cbc->nr = cur_num++;
950
951                 /* initialize S_cb */
952                 ir_nodeset_init_size(&cbc->parents, 5);
953                 ir_nodeset_insert(&cbc->parents, u_irn);
954                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (init)\n", u_irn));
955
956                 /* E_cb = empty */
957                 cbc->kill_edges = new_pset(cmp_rss_edges, 5);
958
959                 /* each parent gets killed by at least one value from children */
960
961                 /* T_cb = PK_successors(u) */
962                 ir_nodeset_init_size(&cbc->children, 5);
963                 foreach_plist(u->pkiller_list, el2) {
964                         ir_nodeset_insert(&cbc->children, plist_element_get_value(el2));
965                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (init)\n", plist_element_get_value(el2)));
966                 }
967
968                 /*
969                         Now: insert the parents of all children into the parent set
970                         and insert the children of all parents into the children set
971                         until the sets don't change any more
972                 */
973                 while (p_change || c_change) {
974                         ir_nodeset_iterator_t iter;
975                         p_change = c_change = 0;
976
977                         /* accumulate parents */
978                         foreach_ir_nodeset(&cbc->children, t_irn, iter) {
979                                 foreach_pset(epk, k_edge) {
980                                         ir_node *val = k_edge->src;
981
982                                         if (k_edge->tgt == t_irn && ! ir_nodeset_contains(&cbc->parents, val)) {
983                                                 ir_nodeset_insert(&cbc->parents, val);
984                                                 p_change = 1;
985                                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (killed by %+F)\n", val, t_irn));
986                                         }
987                                 }
988                         }
989
990                         /* accumulate children */
991                         foreach_ir_nodeset(&cbc->parents, s_irn, iter) {
992                                 foreach_pset(epk, k_edge) {
993                                         ir_node *val = k_edge->tgt;
994
995                                         if (k_edge->src == s_irn  && ! ir_nodeset_contains(&cbc->children, val)) {
996                                                 ir_nodeset_insert(&cbc->children, val);
997                                                 c_change = 1;
998                                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (kills %+F)\n", val, s_irn));
999                                         }
1000                                 }
1001                         }
1002                 }
1003
1004                 /* mark all parent values as visited */
1005                 foreach_ir_nodeset(&cbc->parents, s_irn, iter) {
1006                         rss_irn_t *s = get_rss_irn(rss, s_irn);
1007                         s->visited = 1;
1008                         /* assure bipartite property */
1009 #if 0
1010                         if (ir_nodeset_contains(&cbc->children, s_irn)) {
1011                                 ir_nodeset_remove(&cbc->children, s_irn);
1012                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F removed from to children\n", s_irn));
1013                         }
1014 #endif
1015                 }
1016
1017                 /* update edges */
1018                 foreach_pset(epk, k_edge) {
1019                         if (ir_nodeset_contains(&cbc->parents, k_edge->src) &&
1020                             ir_nodeset_contains(&cbc->children, k_edge->tgt)) {
1021                                 pset_insert(cbc->kill_edges, k_edge, HASH_RSS_EDGE(k_edge));
1022                                 /*
1023                                         Link all k_edges which are about to be removed together.
1024                                         Beware: pset_remove kills the iterator.
1025                                 */
1026                                 k_edge->next = kedge_root;
1027                                 kedge_root   = k_edge;
1028                         }
1029                 }
1030
1031                 /* remove all linked k_edges */
1032                 for (; kedge_root; kedge_root = kedge_root->next) {
1033                         pset_remove(epk, kedge_root, HASH_RSS_EDGE(kedge_root));
1034                 }
1035
1036                 /* verify the cbc */
1037                 foreach_ir_nodeset(&cbc->parents, s_irn, iter) {
1038                         int is_killed = 0;
1039
1040                         foreach_pset(cbc->kill_edges, k_edge) {
1041                                 if (k_edge->src == s_irn) {
1042                                         is_killed = 1;
1043                                         pset_break(cbc->kill_edges);
1044                                         break;
1045                                 }
1046                         }
1047
1048                         if (! is_killed) {
1049                                 vrfy_ok = 0;
1050                                 ir_fprintf(stderr, "Warning: parent %+F is not killed in current cbc\n", s_irn);
1051                         }
1052                 }
1053                 assert(vrfy_ok && "Verification of CBC failed");
1054
1055                 /* add the connected bipartite component */
1056                 if (ir_nodeset_size(&cbc->parents) > 0 &&
1057                     ir_nodeset_size(&cbc->children) > 0 &&
1058                         pset_count(cbc->kill_edges) > 0)
1059                         pset_insert(rss->cbc_set, cbc, (unsigned)cbc->nr);
1060                 DBG((rss->dbg, LEVEL_2, "\tbipartite component %d inserted:\n", cbc->nr));
1061                 DEBUG_ONLY(
1062                         if (firm_dbg_get_mask(rss->dbg) & LEVEL_2)
1063                                 debug_print_cbc(rss->dbg, cbc);
1064                 );
1065         }
1066
1067         if (rss->opts->dump_flags & RSS_DUMP_CBC)
1068                 debug_vcg_dump_bipartite(rss);
1069
1070         del_pset(epk);
1071 }
1072
1073 /**
1074  * Select the child with the maximum cost.
1075  */
1076 static child_t *select_child_max_cost(rss_t *rss, ir_nodeset_t *x, ir_nodeset_t *y, child_t *t, cbc_t *cbc) {
1077         ir_node *child;
1078         ir_nodeset_iterator_t iter;
1079         float   max_cost = -1.0f;
1080
1081         DBG((rss->dbg, LEVEL_2, "\t\tcomputing children costs:\n"));
1082
1083         foreach_ir_nodeset(&cbc->children, child, iter) {
1084                 rss_irn_t  *r_child             = get_rss_irn(rss, child);
1085                 int         num_unkilled_parents = 0;
1086                 int         num_descendants;
1087                 rss_edge_t *k_edge;
1088                 float       cost;
1089
1090                 /* get the number of unkilled parents */
1091                 foreach_pset(cbc->kill_edges, k_edge) {
1092                         if (k_edge->tgt == child && ir_nodeset_contains(x, k_edge->src))
1093                                 ++num_unkilled_parents;
1094                 }
1095
1096                 cost = (float)num_unkilled_parents;
1097
1098                 num_descendants = plist_count(r_child->descendant_list) + ir_nodeset_size(y);
1099
1100                 if (num_descendants > 0)
1101                         cost /= (float)num_descendants;
1102
1103                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F, #desc %d, cost %.3f\n", child, num_descendants, cost));
1104
1105                 if (cost > max_cost) {
1106                         t->irn   = child;
1107                         t->cost  = cost;
1108                         max_cost = cost;
1109                 }
1110         }
1111
1112         return t;
1113 }
1114
1115 /**
1116  * Remove all parents from x which are killed by t_irn.
1117  */
1118 static void remove_covered_parents(rss_t *rss, ir_nodeset_t *x, ir_node *t_irn, cbc_t *cbc) {
1119         rss_irn_t  *t = get_rss_irn(rss, t_irn);
1120         rss_edge_t *k_edge;
1121
1122         DBG((rss->dbg, LEVEL_2, "\t\tremoving parents covered by %+F:\n", t_irn));
1123
1124         foreach_pset(cbc->kill_edges, k_edge) {
1125                 if (k_edge->tgt == t_irn && ir_nodeset_contains(x, k_edge->src)) {
1126                         ir_nodeset_remove(x, k_edge->src);
1127                         plist_insert_back(t->parent_list, k_edge->src);
1128                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", k_edge->src));
1129                 }
1130         }
1131 }
1132
1133 static void update_cumulated_descendent_values(rss_t *rss, ir_nodeset_t *y, ir_node *t_irn) {
1134         rss_irn_t *t = get_rss_irn(rss, t_irn);
1135         plist_element_t *el;
1136
1137         DBG((rss->dbg, LEVEL_2, "\t\tupdating cumulated descendant value of %+F:\n", t_irn));
1138
1139         foreach_plist(t->descendant_list, el) {
1140                 ir_nodeset_insert(y, plist_element_get_value(el));
1141                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", plist_element_get_value(el)));
1142         }
1143 }
1144
1145 /**
1146  * Greedy-k: a heuristics for the MMA problem
1147  */
1148 static void compute_killing_function(rss_t *rss) {
1149         cbc_t *cbc;
1150         struct obstack obst;
1151
1152         obstack_init(&obst);
1153
1154         rss->cbc_set = pset_new_ptr(5);
1155         compute_bipartite_decomposition(rss);
1156
1157         /* for all bipartite components do: */
1158         foreach_pset(rss->cbc_set, cbc) {
1159                 ir_node *p;
1160                 ir_nodeset_t x;
1161                 ir_nodeset_t y;
1162                 ir_nodeset_iterator_t iter;
1163                 child_t **sks    = NEW_ARR_F(child_t *, 20);
1164                 int     cur_len  = 0;
1165                 int     cur_size = 20;
1166                 int     i;
1167
1168                 ir_nodeset_init_size(&x, 10);
1169                 ir_nodeset_init_size(&y, 10);
1170
1171                 DBG((rss->dbg, LEVEL_1, "\tcomputing SKS for cbc %d:\n", cbc->nr));
1172                 DBG((rss->dbg, LEVEL_2, "\t\tinitializing parents X:\n"));
1173
1174                 /* X = S_cb (all parents are initially uncovered) */
1175                 foreach_ir_nodeset(&cbc->parents, p, iter) {
1176                         ir_nodeset_insert(&x, p);
1177                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", p));
1178                 }
1179
1180                 /* while X not empty */
1181                 while (ir_nodeset_size(&x) > 0) {
1182                         child_t *t = obstack_alloc(&obst, sizeof(*t));
1183                         memset(t, 0, sizeof(*t));
1184
1185                         t = select_child_max_cost(rss, &x, &y, t, cbc);
1186
1187                         if (cur_len >= cur_size) {
1188                                 ARR_EXTO(child_t *, sks, cur_size * 2);
1189                                 cur_size *= 2;
1190                         }
1191
1192                         DBG((rss->dbg, LEVEL_2, "\t\tinsert child %+F (%.3f) into SKS at pos %d\n", t->irn, t->cost, cur_len));
1193
1194                         sks[cur_len++] = t;
1195                         remove_covered_parents(rss, &x, t->irn, cbc);
1196                         update_cumulated_descendent_values(rss, &y, t->irn);
1197                 }
1198
1199                 ARR_SHRINKLEN(sks, cur_len);
1200
1201                 /* sort SKS in increasing cost order */
1202                 qsort(sks, cur_len, sizeof(sks[0]), cmp_child_costs);
1203
1204                 DBG((rss->dbg, LEVEL_2, "\tprocessing SKS for cbc %d:\n", cbc->nr));
1205
1206                 /* build killing function */
1207                 for (i = cur_len - 1; i >= 0; --i) { /* loop over sks in decreasing cost order */
1208                         child_t         *t = sks[i];
1209                         rss_irn_t       *rt = get_rss_irn(rss, t->irn);
1210                         plist_element_t *p_el;
1211
1212                         DBG((rss->dbg, LEVEL_3, "\t\t\tkiller %+F (%.3f):\n", t->irn, t->cost));
1213
1214                         /* kill all unkilled parents of t */
1215                         foreach_plist(rt->parent_list, p_el) {
1216                                 ir_node    *par = plist_element_get_value(p_el);
1217                                 rss_irn_t *rpar = get_rss_irn(rss, par);
1218
1219                                 if (is_Sink(rpar->killer)) {
1220                                         rpar->killer = t->irn;
1221                                         DBG((rss->dbg, LEVEL_2, "\t\tkill %+F\n", rpar->irn));
1222                                 }
1223                                 else {
1224                                         DBG((rss->dbg, LEVEL_3, "\t\t\tkeeping %+F as killer for %+F\n", rpar->killer, rpar->irn));
1225                                 }
1226                         }
1227                 }
1228
1229                 ir_nodeset_destroy(&x);
1230                 ir_nodeset_destroy(&y);
1231                 DEL_ARR_F(sks);
1232         }
1233
1234         if (rss->opts->dump_flags & RSS_DUMP_KILL)
1235                 debug_vcg_dump_kill(rss);
1236
1237         del_pset(rss->cbc_set);
1238         obstack_free(&obst, NULL);
1239 }
1240
1241 /**
1242  * Adds the edge src -> tgt to the dvg. Checks if reverse edge is already there (asserts).
1243  */
1244 static INLINE void add_dvg_edge(rss_t *rss, dvg_t *dvg, ir_node *src, ir_node *tgt, int have_source) {
1245         rss_edge_t *dvg_edge;
1246         rss_edge_t key;
1247
1248         if (! have_source)
1249                 ir_nodeset_insert(&dvg->nodes, src);
1250         else
1251                 assert(ir_nodeset_contains(&dvg->nodes, src) && "Wrong assumption");
1252
1253         ir_nodeset_insert(&dvg->nodes, tgt);
1254
1255         key.src = tgt;
1256         key.tgt = src;
1257         assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1258
1259         key.src = src;
1260         key.tgt = tgt;
1261         if (NULL != pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1262                 /* add the edge to the DVG */
1263                 dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1264
1265                 dvg_edge->src  = src;
1266                 dvg_edge->tgt  = tgt;
1267                 dvg_edge->next = NULL;
1268
1269                 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", src, tgt));
1270                 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1271         }
1272 }
1273
1274 /**
1275  * Computes the disjoint value DAG (DVG).
1276  * BEWARE: It is not made explicitly clear in the Touati paper,
1277  *         but the DVG is meant to be build from the KILLING DAG
1278  */
1279 static void compute_dvg(rss_t *rss, dvg_t *dvg) {
1280         plist_element_t *el;
1281
1282         DBG((rss->dbg, LEVEL_1, "\tcomputing DVG:\n"));
1283
1284         foreach_plist(rss->nodes, el) {
1285                 ir_node    *u_irn    = plist_element_get_value(el);
1286                 rss_irn_t  *u        = get_rss_irn(rss, u_irn);
1287                 rss_irn_t  *u_killer = get_rss_irn(rss, u->killer);
1288                 int        i;
1289
1290                 /* TODO: omit nodes only having sink as pkiller and killing no one */
1291
1292                 /* add an edge to killer */
1293                 add_dvg_edge(rss, dvg, u_irn, u->killer, 0);
1294
1295                 if (is_Sink(u->killer))
1296                         continue;
1297
1298                 /* We add an edge to every killer, from where we could be reached. */
1299                 for (i = ARR_LEN_SAFE(u_killer->descendants) - 1; i >= 0; --i) {
1300                         add_dvg_edge(rss, dvg, u_irn, u_killer->descendants[i], 1);
1301                 }
1302
1303 #if 0
1304
1305                 foreach_plist(rss->nodes, el2) {
1306                         ir_node *v_irn = plist_element_get_value(el2);
1307
1308                         /*
1309                                 There is an edge (u, v) in the DVG iff v is a descendant of the killer(u).
1310                         */
1311                         if (BSEARCH_IRN_ARR(v_irn, u_kill->descendants)) {
1312                                 rss_edge_t *dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1313                                 rss_edge_t key;
1314
1315                                 /* insert the user into the DVG and append it to the user list of u */
1316                                 ir_nodeset_insert(&dvg->nodes, v_irn);
1317                                 if (! plist_has_value(u->dvg_user_list, v_irn))
1318                                         plist_insert_back(u->dvg_user_list, v_irn);
1319
1320                                 dvg_edge->src  = u_irn;
1321                                 dvg_edge->tgt  = v_irn;
1322                                 dvg_edge->next = NULL;
1323
1324                                 key.src = v_irn;
1325                                 key.tgt = u_irn;
1326
1327                                 assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1328
1329                                 /* add the edge to the DVG */
1330                                 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", u_irn, v_irn));
1331                                 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1332                         }
1333                 }
1334 #endif /* if 0 */
1335         }
1336
1337         if (rss->opts->dump_flags & RSS_DUMP_DVG)
1338                 debug_vcg_dump_dvg(rss, dvg);
1339 }
1340
1341 /**
1342  * Updates the dvg structure when a serialization edge from src -> tgt is added.
1343  */
1344 static void update_dvg(rss_t *rss, dvg_t *dvg, rss_irn_t *src, rss_irn_t *tgt) {
1345         int i, j, idx;
1346         rss_edge_t *edge;
1347         rss_edge_t **arr = alloca(pset_count(dvg->edges) * sizeof(arr[0]));
1348
1349         /*
1350                 Add an edge from serialization target to serialization src:
1351                 src cannot be alive with target
1352         */
1353         add_dvg_edge(rss, dvg, tgt->irn, src->irn, 0);
1354
1355         /* Add edges to src's descendants as well, they also getting serialized. */
1356         for (i = ARR_LEN_SAFE(src->descendants) - 1; i >= 0; --i) {
1357                 add_dvg_edge(rss, dvg, tgt->irn, src->descendants[i], 1);
1358         }
1359
1360         /* We also have to add edges from targets predecessors in dvg */
1361         idx = 0;
1362         /* We cannot insert elements into set over which we iterate ... */
1363         foreach_pset(dvg->edges, edge) {
1364                 if (edge->tgt == tgt->irn) {
1365                         arr[idx++] = edge;
1366                 }
1367         }
1368
1369         for (i = 0; i < idx; ++i) {
1370                 edge = arr[i];
1371                 add_dvg_edge(rss, dvg, edge->src, src->irn, 1);
1372                 for (j = ARR_LEN_SAFE(src->descendants) - 1; j >= 0; --j) {
1373                         add_dvg_edge(rss, dvg, edge->src, src->descendants[j], 1);
1374                 }
1375         }
1376 }
1377
1378 #if 0
1379 /**
1380  * Accumulate all descendants for root into list.
1381  */
1382 static void accumulate_dvg_descendant_values(rss_t *rss, rss_irn_t *root, plist_t *list) {
1383         if (plist_count(root->dvg_user_list) > 0) {
1384                 plist_element_t *el;
1385
1386                 foreach_plist(root->dvg_user_list, el) {
1387                         ir_node   *v_irn = plist_element_get_value(el);
1388                         rss_irn_t *v     = get_rss_irn(rss, v_irn);
1389
1390                         /* add v as descendant */
1391                         if (! plist_has_value(list, v_irn)) {
1392                                 plist_insert_back(list, v_irn);
1393                                 DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG descendant %+F\n", v_irn));
1394                         }
1395
1396                         /* accumulate v's descendants */
1397                         accumulate_dvg_descendant_values(rss, v, list);
1398                 }
1399         }
1400 }
1401
1402 /**
1403  * Builds the list of potential killers for each node
1404  * in the given DVG.
1405  * Needs the descendant list for all user as sorted array.
1406  */
1407 static void build_dvg_pkiller_list(rss_t *rss, dvg_t *dvg) {
1408         ir_nodeset_iterator_t iter;
1409         ir_node *irn;
1410
1411         foreach_nodeset(&dvg->nodes, irn, iter) {
1412                 rss_irn_t       *node = get_rss_irn(rss, irn);
1413                 plist_element_t *el, *el2;
1414
1415                 DBG((rss->dbg, DEBUG_DVG, "\t\tbuilding pkiller list for %+F\n", irn));
1416
1417                 /* check each user */
1418                 foreach_plist(node->dvg_user_list, el) {
1419                         ir_node *u_irn = plist_element_get_value(el);
1420
1421                         /* is the current user u_irn not a descendant of any other user -> pkiller */
1422                         foreach_plist(node->dvg_user_list, el2) {
1423                                 ir_node   *v_irn = plist_element_get_value(el2);
1424                                 rss_irn_t *v     = get_rss_irn(rss, v_irn);
1425
1426                                 if (el != el2                             &&
1427                                         ! BSEARCH_IRN_ARR(u_irn, v->dvg_desc) &&
1428                                         ! plist_has_value(node->dvg_pkiller_list, u_irn))
1429                                 {
1430                                         plist_insert_back(node->dvg_pkiller_list, u_irn);
1431                                         DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG pkiller %+F\n", u_irn));
1432                                 }
1433                         }
1434                 }
1435
1436                 node->dvg_pkiller = build_sorted_array_from_list(node->dvg_pkiller_list, phase_obst(&rss->ph));
1437         }
1438
1439         DEBUG_ONLY(
1440                 if (firm_dbg_get_mask(rss->dbg) & DEBUG_DVG)
1441                         debug_vcg_dump_dvg_pkiller(rss, dvg);
1442         );
1443 }
1444
1445 #endif /* if 0 */
1446
1447 /**
1448  * Compute the maximal antichain of the current DVG.
1449  * This is a reimplementation of the MAXIMAL_ANTI_CHAIN function
1450  * from the DDG library 1.1 (DAG.cpp).
1451  */
1452 static ir_nodeset_t *compute_maximal_antichain(rss_t *rss, dvg_t *dvg, int iteration) {
1453         int         n               = ir_nodeset_size(&dvg->nodes);
1454         int         *assignment     = alloca(n * sizeof(assignment[0]));
1455         int         *assignment_rev = alloca(n * sizeof(assignment_rev[0]));
1456         int         *idx_map        = alloca(n * sizeof(idx_map[0]));
1457         hungarian_problem_t *bp;
1458         ir_nodeset_t *values, *temp;
1459         ir_nodeset_iterator_t iter;
1460         ir_node     *u_irn;
1461         int         i, j, cost, cur_chain, res;
1462         rss_edge_t  *dvg_edge;
1463
1464 #define MAP_IDX(irn) bsearch_for_index(get_irn_idx(irn), idx_map,  n,  1)
1465
1466         if (pset_count(dvg->edges) == 0)
1467                 return NULL;
1468
1469         bp = hungarian_new(n, n, 1, HUNGARIAN_MATCH_NORMAL);
1470
1471         /*
1472                 At first, we build an index map for the nodes in the DVG,
1473                 because we cannot use the irn idx therefore as the resulting
1474                 bipartite data structure would have around 1.2GB.
1475                 So we limit the size to the number of nodes we have in the DVG
1476                 and build a sorted index map for their irn indices.
1477         */
1478         i = 0;
1479         foreach_ir_nodeset(&dvg->nodes, u_irn, iter) {
1480                 idx_map[i++] = get_irn_idx(u_irn);
1481         }
1482         qsort(idx_map, n, sizeof(idx_map[0]), cmp_int);
1483
1484         foreach_pset(dvg->edges, dvg_edge) {
1485                 int idx_u = MAP_IDX(dvg_edge->src);
1486                 int idx_v = MAP_IDX(dvg_edge->tgt);
1487
1488                 /* add the entry to the bipartite data structure */
1489                 hungarian_add(bp, idx_u, idx_v, 1);
1490                 DBG((rss->dbg, LEVEL_3, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1491                         idx_u, dvg_edge->src, idx_v, dvg_edge->tgt));
1492         }
1493 #if 0
1494         /*
1495                 Add a bipartite entry for each pair of nodes (u, v), where exists a
1496                 path in the DVG from u to v, ie. connect all descendants(v) to v.
1497                 desc_dvg(v) = accumulated descendants of all z in dvg_user_list(v)
1498         */
1499         foreach_ir_nodeset(&dvg->nodes, u_irn, iter) {
1500                 rss_irn_t *u        = get_rss_irn(rss, u_irn);
1501                 int       idx_u_irn = MAP_IDX(u_irn);
1502
1503                 DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG descendants of %+F:\n", u_irn));
1504
1505                 //plist_clear(u->dvg_desc_list);
1506                 //accumulate_dvg_descendant_values(rss, u, u->dvg_desc_list);
1507
1508                 /*
1509                         FIXME: The array is build on the phase obstack and we cannot free the data.
1510                         So the array get as many times allocated as this function is called.
1511                 */
1512
1513                 /* build the sorted array for faster searches */
1514                 //u->dvg_desc = build_sorted_array_from_list(u->dvg_desc_list, phase_obst(&rss->ph));
1515
1516                 DBG((rss->dbg, DEBUG_MAX_AC, "\t\tadding bipartite entries of %+F:\n", u_irn));
1517
1518                 /* add the bipartite entries for all descendants */
1519                 for (i = ARR_LEN_SAFE(u->dvg_desc) - 1; i >= 0; --i) {
1520                         rss_irn_t *desc    = get_rss_irn(rss, u->dvg_desc[i]);
1521                         int       idx_desc = MAP_IDX(u->dvg_desc[i]);
1522
1523                         /* add the entry to the bipartite data structure */
1524                         hungarian_add(bp, idx_u_irn, idx_desc, 1);
1525                         DBG((rss->dbg, DEBUG_MAX_AC, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1526                                 idx_u_irn, u_irn, idx_desc, u->dvg_desc[i]));
1527
1528                         need_matching = 1;
1529                 }
1530         }
1531 #endif
1532
1533         /* We want maximum cardinality matching */
1534         hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL);
1535
1536 #if 0
1537         DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG pkiller:\n"));
1538         /* beware: the following function needs the dvg_desc array */
1539         build_dvg_pkiller_list(rss, dvg);
1540 #endif
1541
1542         DBG((rss->dbg, LEVEL_2, "\t\tcomputing bipartite matching\n"));
1543         /*
1544                 The maximum cardinality bipartite matching gives us the minimal
1545                 chain partition, which corresponds to the maximum anti chains.
1546         */
1547         res = hungarian_solve(bp, assignment, &cost, 1);
1548         assert(res == 0 && "Bipartite matching failed!");
1549
1550         hungarian_free(bp);
1551         memset(assignment_rev, -1, n * sizeof(assignment_rev[0]));
1552
1553         /* build the reverse assignment, ie. foreach i -> j, add j -> i */
1554         for (i = 0; i < n; ++i) {
1555                 if (assignment[i] >= 0) {
1556                         int j = assignment[i];
1557                         assignment_rev[j] = i;
1558                 }
1559         }
1560
1561         DBG((rss->dbg, LEVEL_2, "\t\tgot assignment with cost %d\n", cost));
1562         DBG((rss->dbg, LEVEL_3, "\t\t\tassignment   ---   reverse assignment\n", cost));
1563         for (i = 0; i < n; ++i) {
1564                 DBG((rss->dbg, LEVEL_3, "\t\t\t%3d -> %3d         %3d -> %3d\n", i, assignment[i], i, assignment_rev[i]));
1565         }
1566
1567         values    = xmalloc(sizeof(values[0]));
1568         ir_nodeset_init_size(values, 10);
1569         cur_chain = 0;
1570         /* Construction of the minimal chain partition */
1571         for (j = 0; j < n; ++j) {
1572                 /* check nodes, which did not occur as target */
1573                 if (assignment_rev[j] == -1) {
1574                         int       xj      = idx_map[j];
1575                         ir_node   *xj_irn = get_idx_irn(rss->irg, xj);
1576                         rss_irn_t *xj_rss = get_rss_irn(rss, xj_irn);
1577                         chain_t   *c      = obstack_alloc(phase_obst(&rss->ph), sizeof(*c));
1578                         int       source;
1579
1580                         /* there was no source for j -> we have a source of a new chain */
1581                         ir_nodeset_insert(values, xj_irn);
1582
1583                         c->elements = plist_obstack_new(phase_obst(&rss->ph));
1584                         c->nr       = cur_chain++;
1585                         plist_insert_back(c->elements, xj_irn);
1586
1587                         xj_rss->chain = c;
1588
1589                         DBG((rss->dbg, LEVEL_2, "\t\tstarting chain %d:\n", c->nr));
1590                         DBG((rss->dbg, LEVEL_2, "\t\t\t%+F (%d)", xj_irn, j));
1591
1592                         /* follow chain, having j as source */
1593                         source = j;
1594                         while (assignment[source] >= 0) {
1595                                 int       target  = assignment[source];
1596                                 int       irn_idx = idx_map[target];
1597                                 ir_node   *irn    = get_idx_irn(rss->irg, irn_idx);
1598                                 rss_irn_t *node   = get_rss_irn(rss, irn);
1599
1600                                 plist_insert_back(c->elements, irn);
1601                                 node->chain = c;
1602
1603                                 DB((rss->dbg, LEVEL_2, " -> %+F (%d)", irn, target));
1604
1605                                 /* new source = last target */
1606                                 source = target;
1607                         }
1608
1609                         DB((rss->dbg, LEVEL_2, "\n"));
1610                 }
1611         }
1612
1613         /*
1614                 Computing the maximal antichain: Select an element from each
1615                 chain such, such it is parallel with the others.
1616         */
1617         DBG((rss->dbg, LEVEL_1, "\t\tcomputing set of saturation values (MAX AC)\n"));
1618         DBG((rss->dbg, LEVEL_3, "\t\tstarting with:\n"));
1619
1620         DEBUG_ONLY(
1621                 if (firm_dbg_get_mask(rss->dbg) & LEVEL_3)
1622                         dump_nodeset(values, "\t\t\t");
1623         )
1624
1625         temp = NULL;
1626         do {
1627                 /*
1628                         We need an explicit array for the values as
1629                         we cannot iterate multiple times over the same
1630                         set at the same time. :-(((((
1631                         TODO Matze: now we can, so rewrite this...
1632                 */
1633                 int     n         = ir_nodeset_size(values);
1634                 int     i         = 0;
1635                 ir_node **val_arr = NEW_ARR_F(ir_node *, n);
1636
1637                 foreach_ir_nodeset(values, u_irn, iter)
1638                         val_arr[i++] = u_irn;
1639
1640                 if (temp) {
1641                         ir_nodeset_destroy(temp);
1642                         free(temp);
1643                 }
1644
1645                 temp = xmalloc(sizeof(temp[0]));
1646                 ir_nodeset_init_size(temp, 10);
1647
1648                 /* Select all nodes from current value set, having another node in the set as descendant. */
1649                 for (i = 0; i < n; ++i) {
1650                         rss_irn_t *u = get_rss_irn(rss, val_arr[i]);
1651
1652                         for (j = 0; j < n; ++j) {
1653                                 if (i != j) {
1654                                         rss_edge_t key;
1655
1656                                         key.src = val_arr[i];
1657                                         key.tgt = val_arr[j];
1658
1659                                         if (pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1660                                                 /* v[j] is descendant of u -> remove u and break */
1661                                                 ir_nodeset_insert(temp, u->irn);
1662                                                 ir_nodeset_remove(values, u->irn);
1663
1664                                                 DBG((rss->dbg, LEVEL_3, "\t\t\tremoving %+F from values, adding it to temp\n", u->irn));
1665
1666                                                 break;
1667                                         }
1668                                 }
1669                         }
1670                 }
1671
1672                 /* Try to insert the chain predecessor of all selected u's */
1673                 foreach_ir_nodeset(temp, u_irn, iter) {
1674                         rss_irn_t       *u  = get_rss_irn(rss, u_irn);
1675                         chain_t         *c  = u->chain;
1676                         plist_element_t *el = plist_find_value(c->elements, u_irn);
1677
1678                         assert(el && "Missing element in chain!");
1679
1680                         /* If u has predecessor in chain: insert the predecessor */
1681                         if (el == plist_element_get_prev(el)) {
1682                                 ir_nodeset_insert(values, plist_element_get_value(el));
1683                                 DBG((rss->dbg, LEVEL_3, "\t\t\tadding %+F to values\n", plist_element_get_value(el)));
1684                         }
1685                 }
1686
1687
1688                 DEL_ARR_F(val_arr);
1689         } while (ir_nodeset_size(temp) > 0);
1690
1691         DBG((rss->dbg, LEVEL_2, "\t\tfinal set:\n"));
1692         DEBUG_ONLY(
1693                 if (firm_dbg_get_mask(rss->dbg) & LEVEL_2) {
1694                         dump_nodeset(values, "\t\t\t");
1695                 }
1696         );
1697
1698         if (rss->opts->dump_flags & RSS_DUMP_MAXAC)
1699                 debug_vcg_dump_pkg(rss, values, iteration);
1700
1701         if(temp != NULL) {
1702                 ir_nodeset_destroy(temp);
1703                 free(temp);
1704         }
1705
1706         return values;
1707
1708 #undef MAP_IDX
1709 }
1710
1711 /**
1712  * Computes the best serialization between two nodes of sat_vals.
1713  */
1714 static serialization_t *compute_best_admissible_serialization(rss_t *rss, ir_nodeset_t *sat_vals, serialization_t *ser, int num_regs) {
1715         int        n                    = ir_nodeset_size(sat_vals);
1716         int        n_idx                = ARR_LEN_SAFE(rss->idx_map);
1717         int        i                    = 0;
1718         ir_node    **val_arr            = alloca(n * sizeof(val_arr[0]));
1719         bitset_t   *bs_sv               = bitset_alloca(n_idx);
1720         bitset_t   *bs_vdesc            = bitset_alloca(n_idx);
1721         bitset_t   *bs_tmp              = bitset_alloca(n_idx);
1722         bitset_t   *bs_ukilldesc        = bitset_alloca(n_idx);
1723         int        best_benefit         = INT_MAX;
1724         int        best_omega2          = INT_MAX;
1725         int        best_benefit_omega20 = INT_MAX;
1726         int        has_omega1           = 0;
1727         int        j, k;
1728         ir_node    *irn;
1729         ir_nodeset_iterator_t iter;
1730         rss_edge_t min_benefit_edge;
1731         rss_edge_t min_omega20_edge;
1732         rss_irn_t  *ser_u_omega1 = NULL, *ser_v_omega1 = NULL;
1733         rss_irn_t  *ser_u_omega20 = NULL, *ser_v_omega20 = NULL;
1734
1735         DBG((rss->dbg, LEVEL_1, "\tcomputing admissible serializations:\n"));
1736
1737         /*
1738                 We need an explicit array for the values as
1739                 we cannot iterate multiple times over the same
1740                 set at the same time. :-(((((
1741         */
1742
1743         foreach_ir_nodeset(sat_vals, irn, iter) {
1744                 val_arr[i++] = irn;
1745                 bitset_set(bs_sv, BLOCK_IDX_MAP(rss, irn));
1746         }
1747
1748         /*
1749                 We build all admissible serializations and remember the best found so far.
1750                 For u in sat_vals:
1751                  For v in sat_val:
1752                    if v in pkiller(u): add edge from v to all other pkiller(u)
1753                    else: for all vv in pkiller(u): add edge from v to vv if there exists no path from vv to v
1754         */
1755
1756 /*
1757         A node is unserializable if:
1758         - it has only one killer and this one is Sink
1759     - it kills no other values
1760         In this case there is no serialization which could
1761         reduce the registerpressure
1762 */
1763 #define IS_UNSERIALIZABLE_NODE(rss_node)                  \
1764         (                                                     \
1765                 (                                                 \
1766                         (plist_count(rss_node->pkiller_list) == 1) && \
1767                         is_Sink(rss_node->killer)                  && \
1768                         (rss_node->kill_count                == 0)    \
1769                 )                            ||                   \
1770                 be_is_Barrier(rss_node->irn) ||                   \
1771                 be_is_Keep(rss_node->irn)                         \
1772         )
1773
1774         /* for all u in sat_vals */
1775         for (i = 0; i < n; ++i) {
1776                 rss_irn_t       *u = get_rss_irn(rss, val_arr[i]);
1777                 plist_element_t *el;
1778
1779                 /* ignore nodes where serialization does not help */
1780                 if (IS_UNSERIALIZABLE_NODE(u)) {
1781                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", u->irn));
1782                         continue;
1783                 }
1784
1785                 /* accumulate all descendants of all pkiller(u) */
1786                 bitset_clear_all(bs_ukilldesc);
1787                 foreach_plist(u->pkiller_list, el) {
1788                         ir_node   *irn  = plist_element_get_value(el);
1789                         rss_irn_t *node = get_rss_irn(rss, irn);
1790
1791                         if (! is_Sink(irn))
1792                                 bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, irn));
1793                         else
1794                                 continue;
1795
1796                         for (k = ARR_LEN_SAFE(node->descendants) - 1; k >= 0; --k) {
1797                                 if (! is_Sink(node->descendants[k]))
1798                                         bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, node->descendants[k]));
1799                         }
1800                 }
1801
1802                 /* for all v in sat_vals */
1803                 for (j = 0; j < n; ++j) {
1804                         ir_node   *v_irn   = val_arr[j];
1805                         rss_irn_t *v       = get_rss_irn(rss, v_irn);
1806                         unsigned  v_height = get_irn_height(rss->h, v_irn);
1807                         int       omega1, omega2, is_pkiller;
1808
1809                         /* v cannot be serialized with itself
1810                          * ignore nodes where serialization does not help */
1811                         if (i == j || IS_UNSERIALIZABLE_NODE(v)) {
1812                                 if (i != j)
1813                                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", v->irn));
1814                                 continue;
1815                         }
1816
1817                         /* get descendants of v */
1818                         bitset_clear_all(bs_vdesc);
1819                         bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v_irn));
1820                         for (k = ARR_LEN_SAFE(v->descendants) - 1; k >= 0; --k) {
1821                                 if (! is_Sink(v->descendants[k]))
1822                                         bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v->descendants[k]));
1823                         }
1824
1825                         /* if v is in pkiller(u) */
1826                         is_pkiller = plist_has_value(u->pkiller_list, v_irn);
1827
1828                         /* for all vv in pkiller(u) */
1829                         foreach_plist(u->pkiller_list, el) {
1830                                 ir_node *vv_irn  = plist_element_get_value(el);
1831                                 int     add_edge;
1832
1833                                 if (is_Sink(vv_irn) || is_cfop(vv_irn))
1834                                         continue;
1835
1836                                 if (is_pkiller)
1837                                         add_edge = vv_irn != v_irn && skip_Proj(vv_irn) != skip_Proj(v_irn);
1838                                 else
1839                                         add_edge = ! heights_reachable_in_block(rss->h, skip_Proj(vv_irn), skip_Proj(v_irn));
1840
1841                                 /*
1842                                         As we add an edge from vv -> v, we have to make sure,
1843                                         that there exists no path from v to vv.
1844                                 */
1845
1846                                 if (add_edge) {
1847                                         unsigned vv_height = get_irn_height(rss->h, vv_irn);
1848                                         unsigned critical_path_cost;
1849                                         unsigned mu1, mu2;
1850
1851                                         /*
1852                                                 mu1 = | descendants(v) cut sat_vals |
1853                                                 the number of saturating values which cannot
1854                                                 be simultaneously alive with u
1855                                         */
1856                                         bitset_copy(bs_tmp, bs_vdesc);
1857                                         mu1 = bitset_popcnt(bitset_and(bs_tmp, bs_sv));
1858
1859                                         /*
1860                                                 mu2 = | accum_desc_all_pkiller(u) without descendants(v) |
1861                                         */
1862                                         if (is_pkiller) {
1863                                                 bitset_copy(bs_tmp, bs_ukilldesc);
1864                                                 mu2 = bitset_popcnt(bitset_andnot(bs_tmp, bs_vdesc));
1865                                         }
1866                                         else {
1867                                                 mu2 = 0;
1868                                         }
1869
1870                                         /* omega1 = mu1 - mu2 */
1871                                         omega1 = mu1 - mu2;
1872
1873                                         if (omega1 != 0)
1874                                                 has_omega1 = 1;
1875
1876                                         /* omega2 = increase of critical path */
1877                                         critical_path_cost =
1878                                                 v_height                        /* longest path from v to sink */
1879                                                 + rss->max_height - vv_height   /* longest path from source to vv */
1880                                                 + 1;                            /* edge */
1881
1882                                         /*
1883                                                 If critical_path_cost > max_height -> the new edge
1884                                                 would increase the longest critical path by the difference.
1885                                         */
1886                                         omega2 = critical_path_cost > rss->max_height ? critical_path_cost - rss->max_height : 0;
1887
1888                                         /* this keeps track of the edge with the best benefit */
1889                                         if (omega1 >= num_regs - n && omega1 < best_benefit) {
1890                                                 min_benefit_edge.src = v_irn;
1891                                                 min_benefit_edge.tgt = vv_irn;
1892
1893                                                 ser_u_omega1 = u;
1894                                                 ser_v_omega1 = v;
1895
1896                                                 best_benefit    = omega1;
1897                                                 ser->new_killer = is_pkiller;
1898                                         }
1899
1900                                         /* this keeps track of the edge with the best omega1 costs where omega2 == 0 */
1901                                         if (omega2 == 0 && omega1 >= num_regs - n && omega1 < best_benefit_omega20) {
1902                                                 min_omega20_edge.src = v_irn;
1903                                                 min_omega20_edge.tgt = vv_irn;
1904
1905                                                 ser_u_omega20 = u;
1906                                                 ser_v_omega20 = v;
1907
1908                                                 best_benefit_omega20 = omega1;
1909                                                 ser->new_killer      = is_pkiller;
1910                                         }
1911
1912                                         best_omega2 = MIN(best_omega2, omega2);
1913
1914                                         DBG((rss->dbg, LEVEL_2, "\t\tfound %+F -> %+F (w1 %d, w2 %d)\n",
1915                                                 v_irn, vv_irn, omega1, omega2));
1916                                 } /* if add_edge */
1917                         } /* for all vv in pkiller(u) */
1918                 } /* for all v in sat_vals */
1919         } /* for all u in sat_vals */
1920
1921         if (! has_omega1)
1922                 return NULL;
1923
1924         if (best_omega2 == 0) {
1925                 ser->u         = ser_u_omega20;
1926                 ser->v         = ser_v_omega20;
1927                 ser->edge->src = min_omega20_edge.src;
1928                 ser->edge->tgt = min_omega20_edge.tgt;
1929                 ser->omega1    = best_benefit_omega20;
1930                 ser->omega2    = best_omega2;
1931         }
1932         else {
1933                 ser->u         = ser_u_omega1;
1934                 ser->v         = ser_v_omega1;
1935                 ser->edge->src = min_benefit_edge.src;
1936                 ser->edge->tgt = min_benefit_edge.tgt;
1937                 ser->omega1    = best_benefit;
1938                 ser->omega2    = best_omega2;
1939         }
1940
1941         return ser;
1942
1943 #undef IS_UNSERIALIZABLE_NODE
1944 }
1945
1946 /**
1947  * Perform the value serialization heuristic and add all
1948  * computed serialization edges as dependencies to the irg.
1949  */
1950 static void perform_value_serialization_heuristic(rss_t *rss) {
1951         bitset_t *arch_nonign_bs = bitset_alloca(arch_register_class_n_regs(rss->cls));
1952         bitset_t *abi_ign_bs     = bitset_alloca(arch_register_class_n_regs(rss->cls));
1953         unsigned available_regs, iteration;
1954         dvg_t    dvg;
1955         ir_nodeset_t *sat_vals;
1956         pset *ser_set = new_pset(cmp_rss_edges, 20);
1957
1958         /* available_regs = R = |arch_non_ignore_regs cut ~abi_ignore_regs| */
1959         arch_put_non_ignore_regs(rss->arch_env, rss->cls, arch_nonign_bs);
1960         be_abi_put_ignore_regs(rss->abi, rss->cls, abi_ign_bs);
1961         bitset_andnot(arch_nonign_bs, abi_ign_bs);
1962         available_regs  = bitset_popcnt(arch_nonign_bs);
1963         //num_live = pset_count(rss->live_block);
1964         //available_regs -= num_live < available_regs ? num_live : 0;
1965
1966         DBG((rss->dbg, LEVEL_1, "\n\t#available regs: %d\n\n", available_regs));
1967
1968         /*
1969                 At first we need to compute the disjoint value DAG (DVG = {V, E_dv}).
1970                 V    = set of all nodes we are currently interested in
1971                 E_dv = there is an edge from u to v iff v is a descendant of killer(u), forall u, v in V
1972         */
1973         ir_nodeset_init_size(&dvg.nodes, plist_count(rss->nodes));
1974         dvg.edges = new_pset(cmp_rss_edges, plist_count(rss->nodes) * 5);
1975         compute_dvg(rss, &dvg);
1976
1977         /*
1978                 Then we perform the heuristic serialization algorithm
1979                 on the DVG which gives us all necessary serialization
1980                 edges.
1981         */
1982         DBG((rss->dbg, LEVEL_1, "\tcomputing maximal antichain:\n"));
1983         iteration = 0;
1984         sat_vals  = compute_maximal_antichain(rss, &dvg, iteration++);
1985         while (sat_vals && (ir_nodeset_size(sat_vals) > available_regs)) {
1986                 serialization_t *ser, best_ser;
1987                 rss_edge_t      *edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*edge));
1988                 ir_node         *dep_src, *dep_tgt;
1989
1990                 best_ser.edge = edge;
1991                 ser = compute_best_admissible_serialization(rss, sat_vals, &best_ser, available_regs);
1992
1993                 DBG((rss->dbg, LEVEL_1, "\tcurrent register saturation %d, target %d\n", ir_nodeset_size(sat_vals), available_regs));
1994
1995                 if (! ser) {
1996                         DBG((rss->dbg, LEVEL_1, "\tno RS improving serialization found, breaking at iteration %d\n", iteration));
1997                         break;
1998                 }
1999
2000                 /* Insert the serialization as dependency edge into the irg. */
2001                 DBG((rss->dbg, LEVEL_1, "\tserializing %+F -> %+F with edge %+F -> %+F and cost %d, %d\n",
2002                         ser->u->irn, ser->v->irn, ser->edge->src, ser->edge->tgt, ser->omega1, ser->omega2));
2003
2004                 if (pset_find(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge)))
2005                         ir_printf("WARNING: serialization %+F -> %+F computed twice!\n", ser->edge->src, ser->edge->tgt);
2006
2007
2008                 pset_insert(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge));
2009
2010                 /* update the dvg */
2011                 update_dvg(rss, &dvg, ser->v, ser->u);
2012                 update_dvg(rss, &dvg, get_rss_irn(rss, ser->edge->src), get_rss_irn(rss, ser->edge->tgt));
2013                 if(sat_vals != NULL) {
2014                         ir_nodeset_destroy(sat_vals);
2015                         free(sat_vals);
2016                 }
2017
2018                 dep_src = skip_Proj(ser->edge->src);
2019                 dep_tgt = ser->edge->tgt;
2020                 add_irn_dep(dep_src, dep_tgt);
2021
2022                 /* Update descendants, consumer and pkillers of the target */
2023                 update_node_info(rss, ser->edge->tgt, ser->edge->src);
2024
2025                 /* TODO: try to find a cheaper way for updating height information */
2026                 rss->max_height = heights_recompute_block(rss->h, rss->block);
2027
2028                 /* Recompute the antichain for next serialization */
2029                 DBG((rss->dbg, LEVEL_1, "\tre-computing maximal antichain:\n"));
2030                 sat_vals = compute_maximal_antichain(rss, &dvg, iteration++);
2031         }
2032
2033         ir_nodeset_destroy(&dvg.nodes);
2034         del_pset(dvg.edges);
2035 }
2036
2037 /**
2038  * Do initial calculations for a block.
2039  */
2040 static void process_block(ir_node *block, void *env) {
2041         rss_t *rss = env;
2042         int   i, n;
2043         const ir_edge_t *edge;
2044
2045         phase_init(&rss->ph, "rss block preprocessor", rss->irg, PHASE_DEFAULT_GROWTH, init_rss_irn, NULL);
2046
2047         DBG((rss->dbg, LEVEL_1, "preprocessing block %+F\n", block));
2048         rss->block = block;
2049
2050         /* build an index map for all nodes in the current block */
2051         i            = 0;
2052         n            = get_irn_n_edges(block);
2053         NEW_ARR_A(int *, rss->idx_map, n);
2054         foreach_out_edge(block, edge) {
2055                 ir_node *irn      = get_edge_src_irn(edge);
2056                 rss->idx_map[i++] = get_irn_idx(irn);
2057         }
2058         qsort(rss->idx_map, n, sizeof(rss->idx_map[0]), cmp_int);
2059         rss->max_height = heights_recompute_block(rss->h, block);
2060
2061         /* loop over all register classes */
2062         for (i = arch_isa_get_n_reg_class(rss->arch_env->isa) - 1; i >= 0; --i) {
2063                 const arch_register_class_t *cls = arch_isa_get_reg_class(rss->arch_env->isa, i);
2064
2065                 rss->cls = cls;
2066                 DBG((rss->dbg, LEVEL_1, "register class %s\n", arch_register_class_name(cls)));
2067
2068                 /* Get all live value at end of Block having current register class */
2069                 rss->live_block = pset_new_ptr(10);
2070                 be_liveness_end_of_block(rss->liveness, rss->arch_env, rss->cls, rss->block, rss->live_block);
2071
2072                 /* reset the list of interesting nodes */
2073                 plist_clear(rss->nodes);
2074                 plist_insert_back(rss->nodes, _sink);
2075
2076                 /* collect all nodes having a certain register class */
2077                 foreach_out_edge(block, edge) {
2078                         ir_node *irn  = get_edge_src_irn(edge);
2079                         ir_mode *mode = get_irn_mode(irn);
2080
2081                         /*
2082                                 We skip:
2083                                 - mode_T nodes (the projs are asked)
2084                                 - mode_X nodes (control flow nodes are always scheduled last)
2085                                 - Keeps (they are always immediately scheduled)
2086                                 - Phi (same as Keep)
2087                         */
2088                         if (mode == mode_T || mode == mode_X || is_Phi(irn))
2089                                 continue;
2090
2091                         /*
2092                                 In case of a proj, we skip
2093                                 - Barrier (they are a Barrier :)
2094                                 - Start
2095                                 - the Proj itself, as it's scheduled always with it's super node
2096                         */
2097                         if (is_Proj(irn)) {
2098                                 ir_node *pred = get_Proj_pred(irn);
2099                                 if (be_is_Barrier(pred) || is_Start(pred))
2100                                         continue;
2101                         }
2102
2103                         /* calculate the descendants and consumer for each node in the block */
2104                         collect_node_info(rss, skip_Proj(irn));
2105
2106                         if (be_is_Keep(irn))
2107                                 continue;
2108
2109                         if (! arch_irn_is(rss->arch_env, irn, ignore) && arch_get_irn_reg_class(rss->arch_env, irn, -1) == cls) {
2110                                 plist_insert_back(rss->nodes, skip_Proj(irn));
2111                         }
2112                         //}
2113                 }
2114
2115                 /* compute the potential killing set PK(G) */
2116                 compute_pkill_set(rss);
2117
2118                 /* compute the killing function k* */
2119                 compute_killing_function(rss);
2120
2121                 /*
2122                         Compute the heuristic value serialization and
2123                         add the necessary dependencies to the irg.
2124                 */
2125                 perform_value_serialization_heuristic(rss);
2126
2127                 del_pset(rss->live_block);
2128         }
2129
2130         phase_free(&rss->ph);
2131 }
2132
2133 /**
2134  * Register the options.
2135  */
2136 void be_init_schedrss(void) {
2137         lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
2138         lc_opt_entry_t *sched_grp = lc_opt_get_grp(be_grp, "sched");
2139         lc_opt_entry_t *rss_grp = lc_opt_get_grp(sched_grp, "rss");
2140
2141         lc_opt_add_table(rss_grp, rss_option_table);
2142 }
2143
2144 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_schedrss);
2145
2146 /**
2147  * Preprocess the irg for scheduling.
2148  */
2149 void rss_schedule_preparation(const be_irg_t *birg) {
2150         ir_graph *irg = be_get_birg_irg(birg);
2151         rss_t rss;
2152
2153         FIRM_DBG_REGISTER(rss.dbg, "firm.be.sched.rss");
2154
2155         //firm_dbg_set_mask(rss.dbg, LEVEL_1 | LEVEL_2 | LEVEL_3);
2156
2157         init_rss_special_nodes(irg);
2158
2159         rss.irg      = irg;
2160         rss.arch_env = be_get_birg_arch_env(birg);
2161         rss.abi      = birg->abi;
2162         rss.h        = heights_new(irg);
2163         rss.nodes    = plist_new();
2164         rss.opts     = &rss_options;
2165         rss.liveness = be_liveness(irg);
2166         irg_block_walk_graph(irg, NULL, process_block, &rss);
2167         heights_free(rss.h);
2168         plist_free(rss.nodes);
2169         be_liveness_free(rss.liveness);
2170
2171         if (birg->main_env->options->dump_flags & DUMP_SCHED)
2172                 be_dump(rss.irg, "-rss", dump_ir_block_graph);
2173 }