cleanup irphase: phase_reinit is a special case and doesn't polute the main callback...
[libfirm] / ir / be / beschedrss.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       Implementation of a register saturating list scheduler.
23  * @author      Christian Wuerdig
24  * @date        29.08.2006
25  * @version     $Id$
26  *
27  * Implementation of a register saturating list scheduler
28  * as described in: Sid-Ahmed-Ali Touati
29  * Register Saturation in Superscalar and VLIW Codes
30  */
31 #include "config.h"
32
33 #include "beschedrss.h"
34
35 #include <limits.h>
36
37 #include "obst.h"
38 #include "debug.h"
39
40 #include "irgraph_t.h"
41 #include "irnode_t.h"
42 #include "iredges_t.h"
43 #include "ircons_t.h"
44 #include "irphase_t.h"
45 #include "irgwalk.h"
46 #include "irdump.h"
47 #include "irtools.h"
48 #include "irbitset.h"
49 #include "irprintf.h"
50 #include "irnodeset.h"
51 #include "bipartite.h"
52 #include "hungarian.h"
53 #include "plist.h"
54 #include "array_t.h"
55
56 #include "height.h"
57
58 #include "beabi.h"
59 #include "bemodule.h"
60 #include "benode.h"
61 #include "besched.h"
62 #include "beirg.h"
63 #include "belive.h"
64
65 #include "lc_opts.h"
66 #include "lc_opts_enum.h"
67
68
69 #define ARR_LEN_SAFE(arr) ((arr) != NULL ? ARR_LEN((arr)) : 0)
70
71 #define HASH_RSS_EDGE(edge) ((get_irn_node_nr((edge)->src) << 16) | (get_irn_node_nr((edge)->tgt) & 0xFFFF))
72 #define BSEARCH_IRN_ARR(val, arr) \
73         bsearch(&(val), (arr), ARR_LEN_SAFE((arr)), sizeof((arr)[0]), cmp_irn_idx)
74
75 #define BLOCK_IDX_MAP(rss, irn) bsearch_for_index(get_irn_idx((irn)), (rss)->idx_map, ARR_LEN_SAFE((rss)->idx_map), 1)
76
77 /* the rss options */
78 typedef struct _rss_opts_t {
79         int dump_flags;
80 } rss_opts_t;
81
82 /* Represents a child with associated costs */
83 typedef struct _child {
84         ir_node *irn;
85         float   cost;
86 } child_t;
87
88 /* We need edges for several purposes. */
89 typedef struct _rss_edge {
90         ir_node *src;
91         ir_node *tgt;
92         void    *next;
93 } rss_edge_t;
94
95 /* Represents a connected bipartite component. */
96 typedef struct _cbc {
97         ir_nodeset_t parents;       /**< = S  a set of value producers */
98         ir_nodeset_t children;      /**< = T  a set of value consumers */
99         pset    *kill_edges;    /**< = E  a set of edges (t in T, s in S) such as each s in S gets killed by at least one t in T */
100         int     nr;             /**< a deterministic index for set insertion (used as hash) */
101 } cbc_t;
102
103 /* Represents a disjoint value DAG. */
104 typedef struct _dvg {
105         ir_nodeset_t nodes;
106         pset    *edges;
107 } dvg_t;
108
109 /* Represents a chain of nodes. */
110 typedef struct _chain {
111         plist_t *elements;   /**< List of chain elements */
112         int     nr;          /**< a deterministic index for set insertion (used as hash) */
113 } chain_t;
114
115 typedef struct _rss_irn {
116         plist_t  *consumer_list;    /**< List of consumers */
117         const ir_node **consumer;   /**< Sorted consumer array (needed for faster access) */
118
119         plist_t  *parent_list;      /**< List of parents */
120         plist_t  *pkiller_list;     /**< List of potential killers */
121
122         plist_t  *descendant_list;  /**< List of descendants */
123         const ir_node **descendants;      /**< Sorted descendant array (needed for faster access) */
124
125         const ir_node  *killer;     /**< The selected unique killer */
126         const ir_node  *irn;        /**< The corresponding firm node to this rss_irn */
127
128         chain_t  *chain;            /**< The chain, this node is associated to */
129
130         unsigned desc_walk;         /**< visited flag for collecting descendants */
131         unsigned kill_count;        /**< number of nodes killed by this one */
132
133         unsigned live_out : 1;      /**< irn has consumers outside of it's block */
134         unsigned visited  : 1;      /**< visited flag for bipartite decomposition */
135         unsigned havecons : 1;      /**< visited flag collect consumer */
136         unsigned handled  : 1;      /**< flag indicating whether or not the list structures have been build */
137         unsigned dumped   : 1;      /**< flag indication whether or not this node was dumped */
138 } rss_irn_t;
139
140 /* Represents a serialization edge with associated costs. */
141 typedef struct _serialization {
142         rss_irn_t  *u;       /* the top node */
143         rss_irn_t  *v;       /* the node about to be serialized after u */
144         rss_edge_t *edge;    /* the edge selected for the serialization */
145         int        omega1;   /* estimated: available regs - RS reduction */
146         int        omega2;   /* increase in critical path length */
147         int        new_killer;
148 } serialization_t;
149
150 typedef struct _rss {
151         ir_phase          ph;              /**< Phase to hold some data */
152         heights_t        *h;              /**< The current height object */
153         ir_graph         *irg;            /**< The irg to preprocess */
154         plist_t          *nodes;          /**< The list of interesting nodes */
155         const arch_env_t *arch_env;       /**< The architecture environment */
156         be_abi_irg_t     *abi;            /**< The abi for this irg */
157         pset             *cbc_set;        /**< A set of connected bipartite components */
158         ir_node          *block;          /**< The current block in progress. */
159         int              *idx_map;        /**< Mapping irn indices to per block indices */
160         unsigned         max_height;      /**< maximum height in the current block */
161         rss_opts_t       *opts;           /**< The options */
162         be_lv_t          *liveness;       /**< The liveness information for this irg */
163         ir_nodeset_t      live_block;     /**< Values alive at end of block */
164         const arch_register_class_t *cls; /**< The current register class */
165         DEBUG_ONLY(firm_dbg_module_t *dbg);
166 } rss_t;
167
168 #define get_rss_irn(rss, irn)  (phase_get_or_set_irn_data(&rss->ph, irn))
169
170 /**
171  * We need some special nodes:
172  * a source and a sink for all live-in and live-out values of a block
173  */
174 enum {
175         iro_rss_Source,
176         iro_rss_Sink,
177         iro_rss_last
178 };
179
180 /** The opcode of the rss_Source node once allocated. */
181 static ir_op *op_rss_Source;
182 /** The opcode of the rss_Sink node once allocated. */
183 static ir_op *op_rss_Sink;
184
185 /** The rss_Source node of the current graph. */
186 static ir_node *_source = NULL;
187 /** The rss_Sink node of the current graph. */
188 static ir_node *_sink   = NULL;
189
190 #define is_Source(irn) ((irn) == _source)
191 #define is_Sink(irn)   ((irn) == _sink)
192
193 enum {
194         RSS_DUMP_NONE  = 0,
195         RSS_DUMP_CBC   = 1 << 0,
196         RSS_DUMP_PKG   = 1 << 1,
197         RSS_DUMP_KILL  = 1 << 2,
198         RSS_DUMP_DVG   = 1 << 3,
199         RSS_DUMP_MAXAC = 1 << 4,
200         RSS_DUMP_ALL   = (RSS_DUMP_MAXAC << 1) - 1,
201 };
202
203 static rss_opts_t rss_options = {
204         RSS_DUMP_NONE,
205 };
206
207 static const lc_opt_enum_int_items_t dump_items[] = {
208         { "none",  RSS_DUMP_NONE  },
209         { "cbc",   RSS_DUMP_CBC   },
210         { "pkg",   RSS_DUMP_PKG   },
211         { "kill",  RSS_DUMP_KILL  },
212         { "dvg",   RSS_DUMP_DVG   },
213         { "maxac", RSS_DUMP_MAXAC },
214         { "all",   RSS_DUMP_ALL   },
215         { NULL, 0 }
216 };
217
218 static lc_opt_enum_int_var_t dump_var = {
219         &rss_options.dump_flags, dump_items
220 };
221
222 static const lc_opt_table_entry_t rss_option_table[] = {
223         LC_OPT_ENT_ENUM_MASK("dump", "dump phases", &dump_var),
224         LC_OPT_LAST
225 };
226
227 /******************************************************************************
228  *  _          _                    __                  _   _
229  * | |        | |                  / _|                | | (_)
230  * | |__   ___| |_ __   ___ _ __  | |_ _   _ _ __   ___| |_ _  ___  _ __  ___
231  * | '_ \ / _ \ | '_ \ / _ \ '__| |  _| | | | '_ \ / __| __| |/ _ \| '_ \/ __|
232  * | | | |  __/ | |_) |  __/ |    | | | |_| | | | | (__| |_| | (_) | | | \__ \
233  * |_| |_|\___|_| .__/ \___|_|    |_|  \__,_|_| |_|\___|\__|_|\___/|_| |_|___/
234  *            | |
235  *            |_|
236  ******************************************************************************/
237
238 /**
239  * Acquire opcodes if needed and create source and sink nodes.
240  */
241 static void init_rss_special_nodes(ir_graph *irg)
242 {
243         ir_node *block;
244
245         if (op_rss_Source == NULL) {
246                 int iro_rss_base = get_next_ir_opcodes(iro_rss_last);
247                 op_rss_Source = new_ir_op(iro_rss_base + iro_rss_Source, "rss_Source", op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
248                 op_rss_Sink   = new_ir_op(iro_rss_base + iro_rss_Sink,   "rss_Sink",   op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
249         }
250         block   = get_irg_start_block(irg);
251         _source = new_ir_node(NULL, irg, block, op_rss_Source, mode_ANY, 0, NULL);
252         _sink   = new_ir_node(NULL, irg, block, op_rss_Sink, mode_ANY, 0, NULL);
253 }
254
255 static int cmp_int(const void *a, const void *b)
256 {
257         const int *i1 = a;
258         const int *i2 = b;
259
260         return QSORT_CMP(*i1, *i2);
261 }
262
263 static int cmp_child_costs(const void *a, const void *b)
264 {
265         const child_t *c1 = a;
266         const child_t *c2 = b;
267
268         return QSORT_CMP(c1->cost, c2->cost);
269 }
270
271 static int cmp_irn_idx(const void *a, const void *b)
272 {
273         const ir_node *n1 = *(ir_node **)a;
274         const ir_node *n2 = *(ir_node **)b;
275
276         return QSORT_CMP(get_irn_idx(n1), get_irn_idx(n2));
277 }
278
279 static int cmp_rss_edges(const void *a, const void *b)
280 {
281         const rss_edge_t *e1 = a;
282         const rss_edge_t *e2 = b;
283
284         return (e1->src != e2->src) || (e1->tgt != e2->tgt);
285 }
286
287 static int bsearch_for_index(int key, int *arr, size_t len, int force)
288 {
289         int left = 0;
290         int right = len;
291
292         while (right >= left) {
293                 int idx = (left + right) / 2;
294
295                 if (key < arr[idx])
296                         right = idx - 1;
297                 else if (key > arr[idx])
298                         left = idx + 1;
299                 else
300                         return idx;
301         }
302
303         if (force)
304                 assert(0 && "Something is wrong, key not found.");
305         return -1;
306 }
307
308 static const ir_node **build_sorted_array_from_list(plist_t *irn_list, struct obstack *obst)
309 {
310         plist_element_t *el;
311         int             i   = 0;
312         int             len = plist_count(irn_list);
313         const ir_node   **arr = (const ir_node**)NEW_ARR_D(ir_node*, obst, len);
314
315         /* copy the list into the array */
316         foreach_plist(irn_list, el) {
317                 arr[i++] = plist_element_get_value(el);
318         }
319
320         /* sort the array by node index */
321         /* HACK cast for MSVC */
322         qsort((void*)arr, len, sizeof(arr[0]), cmp_irn_idx);
323
324         return arr;
325 }
326
327 /*****************************************************
328  *      _      _                       _
329  *     | |    | |                     (_)
330  *   __| | ___| |__  _   _  __ _  __ _ _ _ __   __ _
331  *  / _` |/ _ \ '_ \| | | |/ _` |/ _` | | '_ \ / _` |
332  * | (_| |  __/ |_) | |_| | (_| | (_| | | | | | (_| |
333  *  \__,_|\___|_.__/ \__,_|\__, |\__, |_|_| |_|\__, |
334  *                          __/ | __/ |         __/ |
335  *                         |___/ |___/         |___/
336  *
337  *****************************************************/
338
339 #ifdef DEBUG_libfirm
340 static void dump_nodeset(ir_nodeset_t *ns, const char *prefix)
341 {
342         ir_nodeset_iterator_t iter;
343         ir_node *irn;
344
345         ir_nodeset_iterator_init(&iter, ns);
346         while ( (irn = ir_nodeset_iterator_next(&iter)) != NULL ) {
347                 ir_fprintf(stderr, "%s%+F\n", prefix, irn);
348         }
349 }
350 #endif
351
352 static void build_file_name(rss_t *rss, const char *suffix, size_t suf_len, char *buf, size_t len)
353 {
354         const char *irg_name;
355
356         memset(buf, 0, len);
357         irg_name = get_entity_name(get_irg_entity(rss->irg));
358         snprintf(buf, len - suf_len, "%s-%s-block-%ld",
359                 irg_name, arch_register_class_name(rss->cls), get_irn_node_nr(rss->block));
360         strcat(buf, suffix);
361 }
362
363 /* Dumps all collected bipartite components of current irg as vcg. */
364 static void debug_vcg_dump_bipartite(rss_t *rss)
365 {
366         cbc_t *cbc;
367         FILE  *f;
368         char  file_name[256];
369         static const char suffix[] = "-RSS-CBC.vcg";
370
371         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
372         f = fopen(file_name, "w");
373
374         ir_fprintf(f, "graph: { title: \"connected bipartite component graph of %+F\"\n", rss->irg);
375         fprintf(f, "display_edge_labels: no\n");
376         fprintf(f, "layoutalgorithm: mindepth\n");
377         fprintf(f, "manhattan_edges: yes\n\n");
378
379         foreach_pset(rss->cbc_set, cbc) {
380                 ir_nodeset_iterator_t iter;
381                 ir_node    *n;
382                 rss_edge_t *ke;
383
384                 fprintf(f, "graph: { titel: \"cbc %d\" label: \"cbc %d\" status:clustered color:yellow\n", cbc->nr, cbc->nr);
385                 foreach_ir_nodeset(&cbc->parents, n, iter) {
386                         ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
387                 }
388                 foreach_ir_nodeset(&cbc->children, n, iter) {
389                         ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
390                 }
391                 foreach_pset(cbc->kill_edges, ke) {
392                         ir_fprintf(f, "edge: { sourcename: \"n%d_%d\" targetname: \"n%d_%d\" }\n",
393                                 get_irn_node_nr(ke->src), cbc->nr, get_irn_node_nr(ke->tgt), cbc->nr);
394                 }
395                 fprintf(f, "}\n\n");
396         }
397         fprintf(f, "}\n");
398         fclose(f);
399 }
400
401 /* Dump the computed killing function as vcg. */
402 static void debug_vcg_dump_kill(rss_t *rss)
403 {
404         FILE            *f;
405         char            file_name[256];
406         plist_element_t *el;
407         static const char suffix[] = "-RSS-KILL.vcg";
408
409         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
410         f = fopen(file_name, "w");
411
412         ir_fprintf(f, "graph: { title: \"computed kill graph of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
413         fprintf(f, "display_edge_labels: no\n");
414         fprintf(f, "layoutalgorithm: mindepth\n");
415         fprintf(f, "manhattan_edges: yes\n\n");
416
417         {
418                 /* reset dump_flag */
419                 ir_node *irn;
420                 foreach_phase_irn(&rss->ph, irn) {
421                         rss_irn_t *node = get_rss_irn(rss, irn);
422                         node->dumped = 0;
423                 }
424         }
425
426         /* dump all nodes and their killers */
427         foreach_plist(rss->nodes, el) {
428                 ir_node   *irn     = plist_element_get_value(el);
429                 rss_irn_t *rirn    = get_rss_irn(rss, irn);
430                 rss_irn_t *pk_rirn = get_rss_irn(rss, rirn->killer);
431
432                 if (! rirn->dumped) {
433                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
434                         rirn->dumped = 1;
435                 }
436
437                 if (! pk_rirn->dumped) {
438                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(rirn->killer), rirn->killer);
439                         pk_rirn->dumped = 1;
440                 }
441
442                 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
443                         get_irn_node_nr(rirn->killer), get_irn_node_nr(irn));
444         }
445
446         fprintf(f, "}\n");
447         fclose(f);
448 }
449
450 /* Dumps the potential killing DAG (PKG) as vcg. */
451 static void debug_vcg_dump_pkg(rss_t *rss, ir_nodeset_t *max_ac, int iteration)
452 {
453         FILE              *f;
454         char              file_name[256];
455         char              suffix[32];
456         static const char suffix1[] = "-RSS-PKG.vcg";
457         static const char suffix2[] = "-RSS-PKG-MAXAC.vcg";
458         plist_element_t   *el;
459
460         if (! max_ac) {
461                 snprintf(suffix, sizeof(suffix), "%s", suffix1);
462         }
463         else {
464                 snprintf(suffix, sizeof(suffix), "-%02d%s", iteration, suffix2);
465         }
466
467         build_file_name(rss, suffix, strlen(suffix) + 1, file_name, sizeof(file_name));
468         f = fopen(file_name, "w");
469
470         ir_fprintf(f, "graph: { title: \"potential killing DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
471         fprintf(f, "display_edge_labels: no\n");
472         fprintf(f, "layoutalgorithm: mindepth\n");
473         fprintf(f, "manhattan_edges: yes\n\n");
474
475         {
476                 /* reset dump_flag */
477                 ir_node *irn;
478                 foreach_phase_irn(&rss->ph, irn) {
479                         rss_irn_t *node = get_rss_irn(rss, irn);
480                         node->dumped = 0;
481                 }
482         }
483
484         foreach_plist(rss->nodes, el) {
485                 ir_node   *irn  = plist_element_get_value(el);
486                 rss_irn_t *rirn = get_rss_irn(rss, irn);
487                 char      *c1   = "";
488                 plist_element_t *k_el;
489
490                 /* dump selected saturating values in yellow */
491                 if (max_ac && ir_nodeset_contains(max_ac, irn))
492                         c1 = "color:yellow";
493
494                 if (! rirn->dumped) {
495                         if (rirn->chain)
496                                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F   c%d\" %s }\n", get_irn_node_nr(irn), irn, rirn->chain->nr, c1);
497                         else
498                                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(irn), irn, c1);
499                         rirn->dumped = 1;
500                 }
501
502                 foreach_plist(rirn->pkiller_list, k_el) {
503                         ir_node   *pkiller = plist_element_get_value(k_el);
504                         rss_irn_t *pk_rirn = get_rss_irn(rss, pkiller);
505                         char      *c2      = "";
506
507                         if (max_ac && ir_nodeset_contains(max_ac, pkiller))
508                                 c2 = "color:yellow";
509
510                         if (! pk_rirn->dumped) {
511                                 if (pk_rirn->chain)
512                                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F   c%d\" %s }\n", get_irn_node_nr(pkiller), pkiller, pk_rirn->chain->nr, c2);
513                                 else
514                                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(pkiller), pkiller, c2);
515                                 pk_rirn->dumped = 1;
516                         }
517                         ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
518                                 get_irn_node_nr(pkiller), get_irn_node_nr(irn));
519                 }
520         }
521         fprintf(f, "}\n");
522         fclose(f);
523 }
524
525 /* Dumps the disjoint value DAG (DVG) as vcg. */
526 static void debug_vcg_dump_dvg(rss_t *rss, dvg_t *dvg)
527 {
528         static const char suffix[] = "-RSS-DVG.vcg";
529         FILE       *f;
530         char       file_name[256];
531         ir_nodeset_iterator_t iter;
532         ir_node    *irn;
533         rss_edge_t *edge;
534
535         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
536         f = fopen(file_name, "w");
537
538         ir_fprintf(f, "graph: { title: \"disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
539         fprintf(f, "display_edge_labels: no\n");
540         fprintf(f, "layoutalgorithm: mindepth\n");
541         fprintf(f, "manhattan_edges: yes\n\n");
542
543         /* dump all nodes */
544         foreach_ir_nodeset(&dvg->nodes, irn, iter) {
545                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
546         }
547
548         /* dump all edges */
549         foreach_pset(dvg->edges, edge) {
550                 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
551                         get_irn_node_nr(edge->src), get_irn_node_nr(edge->tgt));
552         }
553
554         fprintf(f, "}\n");
555         fclose(f);
556 }
557
558 #if 0
559 /* Dumps the PKG(DVG). */
560 static void debug_vcg_dump_dvg_pkiller(rss_t *rss, dvg_t *dvg)
561 {
562         static const char suffix[] = "-RSS-DVG-PKG.vcg";
563         FILE       *f;
564         char       file_name[256];
565         ir_nodeset_iterator_t iter;
566         ir_node    *irn;
567
568         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
569         f = fopen(file_name, "w");
570
571         ir_fprintf(f, "graph: { title: \"PKG of disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
572         fprintf(f, "display_edge_labels: no\n");
573         fprintf(f, "layoutalgorithm: mindepth\n");
574         fprintf(f, "manhattan_edges: yes\n\n");
575
576         /* dump all nodes */
577         foreach_ir_nodeset(&dvg->nodes, irn, iter) {
578                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
579         }
580
581         /* dump all edges */
582         foreach_ir_nodeset(&dvg->nodes, irn, iter) {
583                 rss_irn_t       *node = get_rss_irn(rss, irn);
584                 plist_element_t *el;
585
586                 foreach_plist(node->dvg_pkiller_list, el) {
587                         ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
588                                 get_irn_node_nr(plist_element_get_value(el)), get_irn_node_nr(irn));
589                 }
590         }
591
592         fprintf(f, "}\n");
593         fclose(f);
594 }
595 #endif /* if 0 */
596
597 /**
598  * In case there is no rss information for irn, initialize it.
599  */
600 static void *init_rss_irn(ir_phase *ph, const ir_node *irn)
601 {
602         rss_irn_t *res = phase_alloc(ph, sizeof(res[0]));
603
604         res->descendant_list = plist_obstack_new(phase_obst(ph));
605         res->descendants     = NULL;
606
607         res->consumer_list   = plist_obstack_new(phase_obst(ph));
608         res->consumer        = NULL;
609
610         res->pkiller_list    = plist_obstack_new(phase_obst(ph));
611
612         res->parent_list     = plist_obstack_new(phase_obst(ph));
613
614         res->killer          = NULL;
615         res->irn             = irn;
616         res->chain           = NULL;
617
618         res->kill_count      = 0;
619         res->desc_walk       = 0;
620         res->live_out        = 0;
621         res->visited         = 0;
622         res->handled         = 0;
623         res->dumped          = 0;
624         res->havecons        = 0;
625
626         return res;
627 }
628
629 /**
630  * Collect all nodes data dependent on current node.
631  */
632 static void collect_descendants(rss_t *rss, rss_irn_t *rirn, ir_node *irn, int *got_sink, unsigned cur_desc_walk)
633 {
634         const ir_edge_t *edge;
635         rss_irn_t       *cur_node = get_rss_irn(rss, irn);
636         ir_node         *block    = rss->block;
637         ir_edge_kind_t  ekind[2]  = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
638         int             i;
639
640         if (cur_node->desc_walk >= cur_desc_walk)
641                 return;
642         cur_node->desc_walk = cur_desc_walk;
643
644         /* Stop at Barriers */
645         if (be_is_Barrier(irn))
646                 return;
647
648         /* loop over normal and dependency edges */
649         for (i = 0; i < 2; ++i) {
650                 foreach_out_edge_kind(irn, edge, ekind[i]) {
651                         ir_node *user = get_edge_src_irn(edge);
652
653                         /* skip ignore nodes as they do not really contribute to register pressure */
654                         if (arch_irn_is_ignore(user))
655                                 continue;
656
657                         /*
658                                 (a) mode_X means Jump            -> out_edge
659                                 (b) Phi as user of a normal node -> out-edge
660                         */
661                         if (get_irn_mode(user) == mode_X || is_Phi(user)) {
662                                 if (! *got_sink)
663                                         goto force_sink;
664                                 else
665                                         continue;
666                         }
667
668                         if (is_Proj(user)) {
669                                 //if (arch_get_irn_reg_class_out(user) == rss->cls)
670                                         collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
671                         }
672                         else {
673                                 /* check if user lives in block and is not a control flow node */
674                                 if (get_nodes_block(user) == block) {
675                                         if (! plist_has_value(rirn->descendant_list, user)) {
676                                                 plist_insert_back(rirn->descendant_list, user);
677                                                 DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", user));
678                                         }
679                                         collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
680                                 }
681                                 else if (! *got_sink) {
682 force_sink:
683                                         /* user lives out of block: add sink as descendant if not already done */
684                                         plist_insert_back(rirn->descendant_list, _sink);
685                                         *got_sink = 1;
686                                         DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", _sink));
687                                 }
688                         }
689                 }
690         }
691 }
692
693 /**
694  * Handles a single consumer.
695  */
696 static void collect_single_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *consumer, int *got_sink)
697 {
698         ir_node *block = rss->block;
699
700         assert(! is_Proj(consumer) && "Cannot handle Projs");
701
702         if (! is_Phi(consumer) && ! is_Block(consumer) && get_nodes_block(consumer) == block) {
703                 if (!arch_irn_is_ignore(consumer) &&
704                                 !plist_has_value(rss_irn->consumer_list, consumer)) {
705                         plist_insert_back(rss_irn->consumer_list, consumer);
706                         DBG((rss->dbg, LEVEL_2, "\t\tconsumer %+F\n", consumer));
707                 }
708         }
709         else {
710                 rss_irn->live_out = 1;
711                 DBG((rss->dbg, LEVEL_2, "\t\tlive out %+F", consumer));
712                 if (! *got_sink) {
713                         plist_insert_back(rss_irn->consumer_list, _sink);
714                         *got_sink = 1;
715                         DB((rss->dbg, LEVEL_2, ", %+F added instead", _sink));
716                 }
717                 DB((rss->dbg, LEVEL_2, "\n"));
718         }
719 }
720
721 /**
722  * Collect all nodes consuming the value(s) produced by current node.
723  */
724 static void collect_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *irn, int *got_sink)
725 {
726         const ir_edge_t *edge;
727         int             i;
728         ir_edge_kind_t  ekind[2]  = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
729         rss_irn_t       *cur_node = get_rss_irn(rss, irn);
730
731         if (cur_node->havecons)
732                 return;
733         cur_node->havecons = 1;
734
735         for (i = 0; i < 2; ++i) {
736                 foreach_out_edge_kind(irn, edge, ekind[i]) {
737                         ir_node *consumer = get_edge_src_irn(edge);
738
739                         if (is_Proj(consumer)) {
740                                 //if (arch_get_irn_reg_class_out(consumer) == rss->cls)
741                                         collect_consumer(rss, rss_irn, consumer, got_sink);
742                         }
743                         else
744                                 collect_single_consumer(rss, rss_irn, consumer, got_sink);
745                 }
746         }
747 }
748
749 /**
750  * Collects all consumer and descendant of a irn.
751  */
752 static void collect_node_info(rss_t *rss, ir_node *irn)
753 {
754         static unsigned cur_desc_walk = 0;
755         rss_irn_t       *rss_irn      = get_rss_irn(rss, irn);
756         int             got_sink;
757
758         assert(! is_Proj(irn) && "Cannot handle Projs.");
759
760         /* check if node info is already available */
761         if (rss_irn->handled)
762                 return;
763
764         DBG((rss->dbg, LEVEL_1, "\tcomputing consumers of %+F:\n", irn));
765
766         /* collect all consumer */
767         got_sink = 0;
768         collect_consumer(rss, rss_irn, irn, &got_sink);
769
770         /* build sorted consumer array */
771         rss_irn->consumer = build_sorted_array_from_list(rss_irn->consumer_list, phase_obst(&rss->ph));
772
773         DBG((rss->dbg, LEVEL_1, "\tcompute descendants of %+F:\n", irn));
774
775         /* collect descendants */
776         got_sink = 0;
777         collect_descendants(rss, rss_irn, irn, &got_sink, ++cur_desc_walk);
778
779         /* build sorted descendant array */
780         rss_irn->descendants = build_sorted_array_from_list(rss_irn->descendant_list, phase_obst(&rss->ph));
781
782         rss_irn->handled = 1;
783 }
784
785 /**
786  * Checks if v is a potential killer of u.
787  * v is in pkill(u) iff descendants(v) cut consumer(u) is v
788  *
789  * @param rss   The rss object
790  * @param v      The node to check for killer
791  * @param u      The potentially killed value
792  * @return 1 if v is in pkill(u), 0 otherwise
793  */
794 static int is_potential_killer(rss_t *rss, rss_irn_t *v, rss_irn_t *u)
795 {
796         plist_t *list;
797         const ir_node **arr;
798         plist_element_t *el;
799         (void) rss;
800
801         /* as we loop over the list: loop over the shorter one */
802         if (plist_count(v->descendant_list) > plist_count(u->consumer_list)) {
803                 list = u->consumer_list;
804                 arr  = v->descendants;
805         }
806         else {
807                 list = v->descendant_list;
808                 arr  = u->consumer;
809         }
810
811         /* for each list element: try to find element in array */
812         foreach_plist(list, el) {
813                 ir_node *irn   = plist_element_get_value(el);
814                 ir_node *match = BSEARCH_IRN_ARR(irn, arr);
815
816                 if (match && match != irn)
817                         return 0;
818         }
819
820         return 1;
821 }
822
823 /**
824  * Update descendants, consumer and pkiller list for the given irn.
825  */
826 static void update_node_info(rss_t *rss, ir_node *irn, ir_node *pk_irn)
827 {
828         rss_irn_t *node    = get_rss_irn(rss, irn);
829         rss_irn_t *pkiller = get_rss_irn(rss, pk_irn);
830
831         /* add consumer and rebuild the consumer array */
832         if (! plist_has_value(node->consumer_list, pk_irn)) {
833                 plist_insert_back(node->consumer_list, pk_irn);
834                 node->consumer = build_sorted_array_from_list(node->consumer_list, phase_obst(&rss->ph));
835         }
836
837         /* add pk_irn as descendant, add it's descendants to irn's and rebuild array */
838         if (! plist_has_value(node->descendant_list, pk_irn)) {
839                 plist_element_t *el;
840
841                 plist_insert_back(node->descendant_list, pk_irn);
842
843                 foreach_plist(pkiller->descendant_list, el) {
844                         ir_node *desc = plist_element_get_value(el);
845
846                         if (! plist_has_value(node->descendant_list, desc))
847                                 plist_insert_back(node->descendant_list, desc);
848                 }
849
850                 node->descendants = build_sorted_array_from_list(node->descendant_list, phase_obst(&rss->ph));
851         }
852
853 }
854
855 /**
856  * Compute the potential killing set PK.
857  */
858 static void compute_pkill_set(rss_t *rss)
859 {
860         plist_element_t *u_el, *v_el;
861
862         foreach_plist(rss->nodes, u_el) {
863                 ir_node   *u_irn = plist_element_get_value(u_el);
864                 rss_irn_t *u     = get_rss_irn(rss, u_irn);
865
866                 DBG((rss->dbg, LEVEL_1, "\tcomputing potential killers of %+F:\n", u_irn));
867
868                 /* check each consumer if it is a potential killer  */
869                 foreach_plist(u->consumer_list, v_el) {
870                         ir_node   *v_irn = plist_element_get_value(v_el);
871                         rss_irn_t *v     = get_rss_irn(rss, v_irn);
872
873                         /* check, if v is a potential killer of u */
874                         if (is_potential_killer(rss, v, u)) {
875                                 if (! plist_has_value(u->pkiller_list, v_irn))
876                                         plist_insert_back(u->pkiller_list, v_irn);
877                                 v->kill_count++;
878                                 DBG((rss->dbg, LEVEL_2, "\t\tpotential killer %+F\n", v_irn));
879                         }
880                 }
881
882                 u->killer = _sink;
883         }
884
885         if (rss->opts->dump_flags & RSS_DUMP_PKG)
886                 debug_vcg_dump_pkg(rss, NULL, 0);
887 }
888
889 /**
890  * Build set of killing edges (from values to their potential killers)
891  */
892 static void build_kill_edges(rss_t *rss, pset *epk)
893 {
894         plist_element_t *el, *k_el;
895
896         foreach_plist(rss->nodes, el) {
897                 ir_node    *irn  = plist_element_get_value(el);
898                 rss_irn_t *rirn = get_rss_irn(rss, irn);
899
900                 DBG((rss->dbg, LEVEL_3, "\t\tbuilding kill edges for %+F:\n", irn));
901
902                 foreach_plist(rirn->pkiller_list, k_el) {
903                         ir_node    *pkiller = plist_element_get_value(k_el);
904                         rss_edge_t *ke      = OALLOC(phase_obst(&rss->ph), rss_edge_t);
905
906                         ke->src  = irn;
907                         ke->tgt  = pkiller;
908                         ke->next = NULL;
909
910                         DBG((rss->dbg, LEVEL_3, "\t\t\ttarget %+F\n", pkiller));
911
912                         pset_insert(epk, ke, HASH_RSS_EDGE(ke));
913                 }
914         }
915 }
916
917 #ifdef DEBUG_libfirm
918 /* print the given cbc for debugging purpose */
919 static void debug_print_cbc(firm_dbg_module_t *mod, cbc_t *cbc)
920 {
921         ir_nodeset_iterator_t iter;
922         ir_node    *n;
923         rss_edge_t *ke;
924
925         DBG((mod, LEVEL_3, "\t\tS = set of parents:\n"));
926         foreach_ir_nodeset(&cbc->parents, n, iter) {
927                 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
928         }
929         DBG((mod, LEVEL_3, "\t\tT = set of children:\n"));
930         foreach_ir_nodeset(&cbc->children, n, iter) {
931                 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
932         }
933         DBG((mod, LEVEL_3, "\t\tE = Edges from producers to consumers\n"));
934         foreach_pset(cbc->kill_edges, ke) {
935                 DBG((mod, LEVEL_3, "\t\t\t%+F -> %+F\n", ke->src, ke->tgt));
936         }
937 }
938 #endif
939
940 /**
941  * Construct the bipartite decomposition.
942  * Sid-Ahmed-Ali Touati, Phd Thesis
943  * Register Pressure in Instruction Level Parallelism, p. 71
944  */
945 static void compute_bipartite_decomposition(rss_t *rss)
946 {
947         pset *epk    = new_pset(cmp_rss_edges, 10);
948         int  cur_num = 0;
949
950         plist_element_t *el;
951
952         DBG((rss->dbg, LEVEL_1, "\tcomputing bipartite decomposition:\n"));
953
954         build_kill_edges(rss, epk);
955
956         foreach_plist(rss->nodes, el) {
957                 ir_node   *u_irn   = plist_element_get_value(el);
958                 rss_irn_t *u       = get_rss_irn(rss, u_irn);
959                 int       p_change = 1;
960                 int       c_change = 1;
961                 int       vrfy_ok  = 1;
962
963                 cbc_t           *cbc;
964                 plist_element_t *el2;
965                 rss_edge_t      *k_edge;
966                 rss_edge_t      *kedge_root = NULL;
967                 ir_node         *t_irn, *s_irn;
968                 ir_nodeset_iterator_t iter;
969
970                 if (u->visited || u_irn == _sink)
971                         continue;
972
973                 DBG((rss->dbg, LEVEL_2, "\t\t%+F choosen:\n", u_irn));
974
975                 cbc     = OALLOC(phase_obst(&rss->ph), cbc_t);
976                 cbc->nr = cur_num++;
977
978                 /* initialize S_cb */
979                 ir_nodeset_init_size(&cbc->parents, 5);
980                 ir_nodeset_insert(&cbc->parents, u_irn);
981                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (init)\n", u_irn));
982
983                 /* E_cb = empty */
984                 cbc->kill_edges = new_pset(cmp_rss_edges, 5);
985
986                 /* each parent gets killed by at least one value from children */
987
988                 /* T_cb = PK_successors(u) */
989                 ir_nodeset_init_size(&cbc->children, 5);
990                 foreach_plist(u->pkiller_list, el2) {
991                         ir_nodeset_insert(&cbc->children, plist_element_get_value(el2));
992                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (init)\n", plist_element_get_value(el2)));
993                 }
994
995                 /*
996                         Now: insert the parents of all children into the parent set
997                         and insert the children of all parents into the children set
998                         until the sets don't change any more
999                 */
1000                 while (p_change || c_change) {
1001                         ir_nodeset_iterator_t iter;
1002                         p_change = c_change = 0;
1003
1004                         /* accumulate parents */
1005                         foreach_ir_nodeset(&cbc->children, t_irn, iter) {
1006                                 foreach_pset(epk, k_edge) {
1007                                         ir_node *val = k_edge->src;
1008
1009                                         if (k_edge->tgt == t_irn && ! ir_nodeset_contains(&cbc->parents, val)) {
1010                                                 ir_nodeset_insert(&cbc->parents, val);
1011                                                 p_change = 1;
1012                                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (killed by %+F)\n", val, t_irn));
1013                                         }
1014                                 }
1015                         }
1016
1017                         /* accumulate children */
1018                         foreach_ir_nodeset(&cbc->parents, s_irn, iter) {
1019                                 foreach_pset(epk, k_edge) {
1020                                         ir_node *val = k_edge->tgt;
1021
1022                                         if (k_edge->src == s_irn  && ! ir_nodeset_contains(&cbc->children, val)) {
1023                                                 ir_nodeset_insert(&cbc->children, val);
1024                                                 c_change = 1;
1025                                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (kills %+F)\n", val, s_irn));
1026                                         }
1027                                 }
1028                         }
1029                 }
1030
1031                 /* mark all parent values as visited */
1032                 foreach_ir_nodeset(&cbc->parents, s_irn, iter) {
1033                         rss_irn_t *s = get_rss_irn(rss, s_irn);
1034                         s->visited = 1;
1035                         /* assure bipartite property */
1036 #if 0
1037                         if (ir_nodeset_contains(&cbc->children, s_irn)) {
1038                                 ir_nodeset_remove(&cbc->children, s_irn);
1039                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F removed from to children\n", s_irn));
1040                         }
1041 #endif
1042                 }
1043
1044                 /* update edges */
1045                 foreach_pset(epk, k_edge) {
1046                         if (ir_nodeset_contains(&cbc->parents, k_edge->src) &&
1047                             ir_nodeset_contains(&cbc->children, k_edge->tgt)) {
1048                                 pset_insert(cbc->kill_edges, k_edge, HASH_RSS_EDGE(k_edge));
1049                                 /*
1050                                         Link all k_edges which are about to be removed together.
1051                                         Beware: pset_remove kills the iterator.
1052                                 */
1053                                 k_edge->next = kedge_root;
1054                                 kedge_root   = k_edge;
1055                         }
1056                 }
1057
1058                 /* remove all linked k_edges */
1059                 for (; kedge_root; kedge_root = kedge_root->next) {
1060                         pset_remove(epk, kedge_root, HASH_RSS_EDGE(kedge_root));
1061                 }
1062
1063                 /* verify the cbc */
1064                 foreach_ir_nodeset(&cbc->parents, s_irn, iter) {
1065                         int is_killed = 0;
1066
1067                         foreach_pset(cbc->kill_edges, k_edge) {
1068                                 if (k_edge->src == s_irn) {
1069                                         is_killed = 1;
1070                                         pset_break(cbc->kill_edges);
1071                                         break;
1072                                 }
1073                         }
1074
1075                         if (! is_killed) {
1076                                 vrfy_ok = 0;
1077                                 ir_fprintf(stderr, "Warning: parent %+F is not killed in current cbc\n", s_irn);
1078                         }
1079                 }
1080                 assert(vrfy_ok && "Verification of CBC failed");
1081
1082                 /* add the connected bipartite component */
1083                 if (ir_nodeset_size(&cbc->parents) > 0 &&
1084                     ir_nodeset_size(&cbc->children) > 0 &&
1085                         pset_count(cbc->kill_edges) > 0)
1086                         pset_insert(rss->cbc_set, cbc, (unsigned)cbc->nr);
1087                 DBG((rss->dbg, LEVEL_2, "\tbipartite component %d inserted:\n", cbc->nr));
1088                 DEBUG_ONLY(
1089                         if (firm_dbg_get_mask(rss->dbg) & LEVEL_2)
1090                                 debug_print_cbc(rss->dbg, cbc);
1091                 );
1092         }
1093
1094         if (rss->opts->dump_flags & RSS_DUMP_CBC)
1095                 debug_vcg_dump_bipartite(rss);
1096
1097         del_pset(epk);
1098 }
1099
1100 /**
1101  * Select the child with the maximum cost.
1102  */
1103 static child_t *select_child_max_cost(rss_t *rss, ir_nodeset_t *x, ir_nodeset_t *y, child_t *t, cbc_t *cbc)
1104 {
1105         ir_node *child;
1106         ir_nodeset_iterator_t iter;
1107         float   max_cost = -1.0f;
1108
1109         DBG((rss->dbg, LEVEL_2, "\t\tcomputing children costs:\n"));
1110
1111         foreach_ir_nodeset(&cbc->children, child, iter) {
1112                 rss_irn_t  *r_child             = get_rss_irn(rss, child);
1113                 int         num_unkilled_parents = 0;
1114                 int         num_descendants;
1115                 rss_edge_t *k_edge;
1116                 float       cost;
1117
1118                 /* get the number of unkilled parents */
1119                 foreach_pset(cbc->kill_edges, k_edge) {
1120                         if (k_edge->tgt == child && ir_nodeset_contains(x, k_edge->src))
1121                                 ++num_unkilled_parents;
1122                 }
1123
1124                 cost = (float)num_unkilled_parents;
1125
1126                 num_descendants = plist_count(r_child->descendant_list) + ir_nodeset_size(y);
1127
1128                 if (num_descendants > 0)
1129                         cost /= (float)num_descendants;
1130
1131                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F, #desc %d, cost %.3f\n", child, num_descendants, cost));
1132
1133                 if (cost > max_cost) {
1134                         t->irn   = child;
1135                         t->cost  = cost;
1136                         max_cost = cost;
1137                 }
1138         }
1139
1140         return t;
1141 }
1142
1143 /**
1144  * Remove all parents from x which are killed by t_irn.
1145  */
1146 static void remove_covered_parents(rss_t *rss, ir_nodeset_t *x, ir_node *t_irn, cbc_t *cbc)
1147 {
1148         rss_irn_t  *t = get_rss_irn(rss, t_irn);
1149         rss_edge_t *k_edge;
1150
1151         DBG((rss->dbg, LEVEL_2, "\t\tremoving parents covered by %+F:\n", t_irn));
1152
1153         foreach_pset(cbc->kill_edges, k_edge) {
1154                 if (k_edge->tgt == t_irn && ir_nodeset_contains(x, k_edge->src)) {
1155                         ir_nodeset_remove(x, k_edge->src);
1156                         plist_insert_back(t->parent_list, k_edge->src);
1157                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", k_edge->src));
1158                 }
1159         }
1160 }
1161
1162 static void update_cumulated_descendent_values(rss_t *rss, ir_nodeset_t *y, ir_node *t_irn)
1163 {
1164         rss_irn_t *t = get_rss_irn(rss, t_irn);
1165         plist_element_t *el;
1166
1167         DBG((rss->dbg, LEVEL_2, "\t\tupdating cumulated descendant value of %+F:\n", t_irn));
1168
1169         foreach_plist(t->descendant_list, el) {
1170                 ir_nodeset_insert(y, plist_element_get_value(el));
1171                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", plist_element_get_value(el)));
1172         }
1173 }
1174
1175 /**
1176  * Greedy-k: a heuristics for the MMA problem
1177  */
1178 static void compute_killing_function(rss_t *rss)
1179 {
1180         cbc_t *cbc;
1181         struct obstack obst;
1182
1183         obstack_init(&obst);
1184
1185         rss->cbc_set = pset_new_ptr(5);
1186         compute_bipartite_decomposition(rss);
1187
1188         /* for all bipartite components do: */
1189         foreach_pset(rss->cbc_set, cbc) {
1190                 ir_node *p;
1191                 ir_nodeset_t x;
1192                 ir_nodeset_t y;
1193                 ir_nodeset_iterator_t iter;
1194                 child_t **sks    = NEW_ARR_F(child_t *, 20);
1195                 int     cur_len  = 0;
1196                 int     cur_size = 20;
1197                 int     i;
1198
1199                 ir_nodeset_init_size(&x, 10);
1200                 ir_nodeset_init_size(&y, 10);
1201
1202                 DBG((rss->dbg, LEVEL_1, "\tcomputing SKS for cbc %d:\n", cbc->nr));
1203                 DBG((rss->dbg, LEVEL_2, "\t\tinitializing parents X:\n"));
1204
1205                 /* X = S_cb (all parents are initially uncovered) */
1206                 foreach_ir_nodeset(&cbc->parents, p, iter) {
1207                         ir_nodeset_insert(&x, p);
1208                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", p));
1209                 }
1210
1211                 /* while X not empty */
1212                 while (ir_nodeset_size(&x) > 0) {
1213                         child_t *t = OALLOCZ(&obst, child_t);
1214
1215                         t = select_child_max_cost(rss, &x, &y, t, cbc);
1216
1217                         if (cur_len >= cur_size) {
1218                                 ARR_EXTO(child_t *, sks, cur_size * 2);
1219                                 cur_size *= 2;
1220                         }
1221
1222                         DBG((rss->dbg, LEVEL_2, "\t\tinsert child %+F (%.3f) into SKS at pos %d\n", t->irn, t->cost, cur_len));
1223
1224                         sks[cur_len++] = t;
1225                         remove_covered_parents(rss, &x, t->irn, cbc);
1226                         update_cumulated_descendent_values(rss, &y, t->irn);
1227                 }
1228
1229                 ARR_SHRINKLEN(sks, cur_len);
1230
1231                 /* sort SKS in increasing cost order */
1232                 qsort(sks, cur_len, sizeof(sks[0]), cmp_child_costs);
1233
1234                 DBG((rss->dbg, LEVEL_2, "\tprocessing SKS for cbc %d:\n", cbc->nr));
1235
1236                 /* build killing function */
1237                 for (i = cur_len - 1; i >= 0; --i) { /* loop over sks in decreasing cost order */
1238                         child_t         *t = sks[i];
1239                         rss_irn_t       *rt = get_rss_irn(rss, t->irn);
1240                         plist_element_t *p_el;
1241
1242                         DBG((rss->dbg, LEVEL_3, "\t\t\tkiller %+F (%.3f):\n", t->irn, t->cost));
1243
1244                         /* kill all unkilled parents of t */
1245                         foreach_plist(rt->parent_list, p_el) {
1246                                 ir_node    *par = plist_element_get_value(p_el);
1247                                 rss_irn_t *rpar = get_rss_irn(rss, par);
1248
1249                                 if (is_Sink(rpar->killer)) {
1250                                         rpar->killer = t->irn;
1251                                         DBG((rss->dbg, LEVEL_2, "\t\tkill %+F\n", rpar->irn));
1252                                 }
1253                                 else {
1254                                         DBG((rss->dbg, LEVEL_3, "\t\t\tkeeping %+F as killer for %+F\n", rpar->killer, rpar->irn));
1255                                 }
1256                         }
1257                 }
1258
1259                 ir_nodeset_destroy(&x);
1260                 ir_nodeset_destroy(&y);
1261                 DEL_ARR_F(sks);
1262         }
1263
1264         if (rss->opts->dump_flags & RSS_DUMP_KILL)
1265                 debug_vcg_dump_kill(rss);
1266
1267         del_pset(rss->cbc_set);
1268         obstack_free(&obst, NULL);
1269 }
1270
1271 /**
1272  * Adds the edge src -> tgt to the dvg. Checks if reverse edge is already there (asserts).
1273  */
1274 static inline void add_dvg_edge(rss_t *rss, dvg_t *dvg, const ir_node *src, const ir_node *tgt, int have_source)
1275 {
1276         rss_edge_t *dvg_edge;
1277         rss_edge_t key;
1278
1279         if (! have_source)
1280                 ir_nodeset_insert(&dvg->nodes, (ir_node *) src);
1281         else
1282                 assert(ir_nodeset_contains(&dvg->nodes, src) && "Wrong assumption");
1283
1284         ir_nodeset_insert(&dvg->nodes, (ir_node *) tgt);
1285
1286         key.src = (ir_node *) tgt;
1287         key.tgt = (ir_node *) src;
1288         assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1289
1290         key.src = (ir_node *) src;
1291         key.tgt = (ir_node *) tgt;
1292         if (NULL != pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1293                 /* add the edge to the DVG */
1294                 dvg_edge = OALLOC(phase_obst(&rss->ph), rss_edge_t);
1295
1296                 dvg_edge->src  = (ir_node *) src;
1297                 dvg_edge->tgt  = (ir_node *) tgt;
1298                 dvg_edge->next = NULL;
1299
1300                 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", src, tgt));
1301                 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1302         }
1303 }
1304
1305 /**
1306  * Computes the disjoint value DAG (DVG).
1307  * BEWARE: It is not made explicitly clear in the Touati paper,
1308  *         but the DVG is meant to be build from the KILLING DAG
1309  */
1310 static void compute_dvg(rss_t *rss, dvg_t *dvg)
1311 {
1312         plist_element_t *el;
1313
1314         DBG((rss->dbg, LEVEL_1, "\tcomputing DVG:\n"));
1315
1316         foreach_plist(rss->nodes, el) {
1317                 ir_node    *u_irn    = plist_element_get_value(el);
1318                 rss_irn_t  *u        = get_rss_irn(rss, u_irn);
1319                 rss_irn_t  *u_killer = get_rss_irn(rss, u->killer);
1320                 int        i;
1321
1322                 /* TODO: omit nodes only having sink as pkiller and killing no one */
1323
1324                 /* add an edge to killer */
1325                 add_dvg_edge(rss, dvg, u_irn, u->killer, 0);
1326
1327                 if (is_Sink(u->killer))
1328                         continue;
1329
1330                 /* We add an edge to every killer, from where we could be reached. */
1331                 for (i = ARR_LEN_SAFE(u_killer->descendants) - 1; i >= 0; --i) {
1332                         add_dvg_edge(rss, dvg, u_irn, u_killer->descendants[i], 1);
1333                 }
1334
1335 #if 0
1336
1337                 foreach_plist(rss->nodes, el2) {
1338                         ir_node *v_irn = plist_element_get_value(el2);
1339
1340                         /*
1341                                 There is an edge (u, v) in the DVG iff v is a descendant of the killer(u).
1342                         */
1343                         if (BSEARCH_IRN_ARR(v_irn, u_kill->descendants)) {
1344                                 rss_edge_t *dvg_edge = OALLOC(phase_obst(&rss->ph), rss_edge_t);
1345                                 rss_edge_t key;
1346
1347                                 /* insert the user into the DVG and append it to the user list of u */
1348                                 ir_nodeset_insert(&dvg->nodes, v_irn);
1349                                 if (! plist_has_value(u->dvg_user_list, v_irn))
1350                                         plist_insert_back(u->dvg_user_list, v_irn);
1351
1352                                 dvg_edge->src  = u_irn;
1353                                 dvg_edge->tgt  = v_irn;
1354                                 dvg_edge->next = NULL;
1355
1356                                 key.src = v_irn;
1357                                 key.tgt = u_irn;
1358
1359                                 assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1360
1361                                 /* add the edge to the DVG */
1362                                 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", u_irn, v_irn));
1363                                 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1364                         }
1365                 }
1366 #endif /* if 0 */
1367         }
1368
1369         if (rss->opts->dump_flags & RSS_DUMP_DVG)
1370                 debug_vcg_dump_dvg(rss, dvg);
1371 }
1372
1373 /**
1374  * Updates the dvg structure when a serialization edge from src -> tgt is added.
1375  */
1376 static void update_dvg(rss_t *rss, dvg_t *dvg, rss_irn_t *src, rss_irn_t *tgt)
1377 {
1378         int i, j, idx;
1379         rss_edge_t *edge;
1380         rss_edge_t **arr = ALLOCAN(rss_edge_t*, pset_count(dvg->edges));
1381
1382         /*
1383                 Add an edge from serialization target to serialization src:
1384                 src cannot be alive with target
1385         */
1386         add_dvg_edge(rss, dvg, tgt->irn, src->irn, 0);
1387
1388         /* Add edges to src's descendants as well, they also getting serialized. */
1389         for (i = ARR_LEN_SAFE(src->descendants) - 1; i >= 0; --i) {
1390                 add_dvg_edge(rss, dvg, tgt->irn, src->descendants[i], 1);
1391         }
1392
1393         /* We also have to add edges from targets predecessors in dvg */
1394         idx = 0;
1395         /* We cannot insert elements into set over which we iterate ... */
1396         foreach_pset(dvg->edges, edge) {
1397                 if (edge->tgt == tgt->irn) {
1398                         arr[idx++] = edge;
1399                 }
1400         }
1401
1402         for (i = 0; i < idx; ++i) {
1403                 edge = arr[i];
1404                 add_dvg_edge(rss, dvg, edge->src, src->irn, 1);
1405                 for (j = ARR_LEN_SAFE(src->descendants) - 1; j >= 0; --j) {
1406                         add_dvg_edge(rss, dvg, edge->src, src->descendants[j], 1);
1407                 }
1408         }
1409 }
1410
1411 #if 0
1412 /**
1413  * Accumulate all descendants for root into list.
1414  */
1415 static void accumulate_dvg_descendant_values(rss_t *rss, rss_irn_t *root, plist_t *list)
1416 {
1417         if (plist_count(root->dvg_user_list) > 0) {
1418                 plist_element_t *el;
1419
1420                 foreach_plist(root->dvg_user_list, el) {
1421                         ir_node   *v_irn = plist_element_get_value(el);
1422                         rss_irn_t *v     = get_rss_irn(rss, v_irn);
1423
1424                         /* add v as descendant */
1425                         if (! plist_has_value(list, v_irn)) {
1426                                 plist_insert_back(list, v_irn);
1427                                 DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG descendant %+F\n", v_irn));
1428                         }
1429
1430                         /* accumulate v's descendants */
1431                         accumulate_dvg_descendant_values(rss, v, list);
1432                 }
1433         }
1434 }
1435
1436 /**
1437  * Builds the list of potential killers for each node
1438  * in the given DVG.
1439  * Needs the descendant list for all user as sorted array.
1440  */
1441 static void build_dvg_pkiller_list(rss_t *rss, dvg_t *dvg)
1442 {
1443         ir_nodeset_iterator_t iter;
1444         ir_node *irn;
1445
1446         foreach_nodeset(&dvg->nodes, irn, iter) {
1447                 rss_irn_t       *node = get_rss_irn(rss, irn);
1448                 plist_element_t *el, *el2;
1449
1450                 DBG((rss->dbg, DEBUG_DVG, "\t\tbuilding pkiller list for %+F\n", irn));
1451
1452                 /* check each user */
1453                 foreach_plist(node->dvg_user_list, el) {
1454                         ir_node *u_irn = plist_element_get_value(el);
1455
1456                         /* is the current user u_irn not a descendant of any other user -> pkiller */
1457                         foreach_plist(node->dvg_user_list, el2) {
1458                                 ir_node   *v_irn = plist_element_get_value(el2);
1459                                 rss_irn_t *v     = get_rss_irn(rss, v_irn);
1460
1461                                 if (el != el2                             &&
1462                                         ! BSEARCH_IRN_ARR(u_irn, v->dvg_desc) &&
1463                                         ! plist_has_value(node->dvg_pkiller_list, u_irn))
1464                                 {
1465                                         plist_insert_back(node->dvg_pkiller_list, u_irn);
1466                                         DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG pkiller %+F\n", u_irn));
1467                                 }
1468                         }
1469                 }
1470
1471                 node->dvg_pkiller = build_sorted_array_from_list(node->dvg_pkiller_list, phase_obst(&rss->ph));
1472         }
1473
1474         DEBUG_ONLY(
1475                 if (firm_dbg_get_mask(rss->dbg) & DEBUG_DVG)
1476                         debug_vcg_dump_dvg_pkiller(rss, dvg);
1477         );
1478 }
1479
1480 #endif /* if 0 */
1481
1482 /**
1483  * Compute the maximal antichain of the current DVG.
1484  * This is a reimplementation of the MAXIMAL_ANTI_CHAIN function
1485  * from the DDG library 1.1 (DAG.cpp).
1486  */
1487 static ir_nodeset_t *compute_maximal_antichain(rss_t *rss, dvg_t *dvg, int iteration)
1488 {
1489         int         n               = ir_nodeset_size(&dvg->nodes);
1490         int         *assignment     = ALLOCAN(int, n);
1491         int         *assignment_rev = ALLOCAN(int, n);
1492         int         *idx_map        = ALLOCAN(int, n);
1493         hungarian_problem_t *bp;
1494         ir_nodeset_t *values, *temp;
1495         ir_nodeset_iterator_t iter;
1496         ir_node     *u_irn;
1497         int         i, j, cost, cur_chain, res;
1498         rss_edge_t  *dvg_edge;
1499
1500 #define MAP_IDX(irn) bsearch_for_index(get_irn_idx(irn), idx_map,  n,  1)
1501
1502         if (pset_count(dvg->edges) == 0)
1503                 return NULL;
1504
1505         bp = hungarian_new(n, n, HUNGARIAN_MATCH_NORMAL);
1506
1507         /*
1508                 At first, we build an index map for the nodes in the DVG,
1509                 because we cannot use the irn idx therefore as the resulting
1510                 bipartite data structure would have around 1.2GB.
1511                 So we limit the size to the number of nodes we have in the DVG
1512                 and build a sorted index map for their irn indices.
1513         */
1514         i = 0;
1515         foreach_ir_nodeset(&dvg->nodes, u_irn, iter) {
1516                 idx_map[i++] = get_irn_idx(u_irn);
1517         }
1518         qsort(idx_map, n, sizeof(idx_map[0]), cmp_int);
1519
1520         foreach_pset(dvg->edges, dvg_edge) {
1521                 int idx_u = MAP_IDX(dvg_edge->src);
1522                 int idx_v = MAP_IDX(dvg_edge->tgt);
1523
1524                 /* add the entry to the bipartite data structure */
1525                 hungarian_add(bp, idx_u, idx_v, 1);
1526                 DBG((rss->dbg, LEVEL_3, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1527                         idx_u, dvg_edge->src, idx_v, dvg_edge->tgt));
1528         }
1529 #if 0
1530         /*
1531                 Add a bipartite entry for each pair of nodes (u, v), where exists a
1532                 path in the DVG from u to v, ie. connect all descendants(v) to v.
1533                 desc_dvg(v) = accumulated descendants of all z in dvg_user_list(v)
1534         */
1535         foreach_ir_nodeset(&dvg->nodes, u_irn, iter) {
1536                 rss_irn_t *u        = get_rss_irn(rss, u_irn);
1537                 int       idx_u_irn = MAP_IDX(u_irn);
1538
1539                 DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG descendants of %+F:\n", u_irn));
1540
1541                 //plist_clear(u->dvg_desc_list);
1542                 //accumulate_dvg_descendant_values(rss, u, u->dvg_desc_list);
1543
1544                 /*
1545                         FIXME: The array is build on the phase obstack and we cannot free the data.
1546                         So the array get as many times allocated as this function is called.
1547                 */
1548
1549                 /* build the sorted array for faster searches */
1550                 //u->dvg_desc = build_sorted_array_from_list(u->dvg_desc_list, phase_obst(&rss->ph));
1551
1552                 DBG((rss->dbg, DEBUG_MAX_AC, "\t\tadding bipartite entries of %+F:\n", u_irn));
1553
1554                 /* add the bipartite entries for all descendants */
1555                 for (i = ARR_LEN_SAFE(u->dvg_desc) - 1; i >= 0; --i) {
1556                         rss_irn_t *desc    = get_rss_irn(rss, u->dvg_desc[i]);
1557                         int       idx_desc = MAP_IDX(u->dvg_desc[i]);
1558
1559                         /* add the entry to the bipartite data structure */
1560                         hungarian_add(bp, idx_u_irn, idx_desc, 1);
1561                         DBG((rss->dbg, DEBUG_MAX_AC, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1562                                 idx_u_irn, u_irn, idx_desc, u->dvg_desc[i]));
1563
1564                         need_matching = 1;
1565                 }
1566         }
1567 #endif
1568
1569         /* We want maximum cardinality matching */
1570         hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL);
1571
1572 #if 0
1573         DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG pkiller:\n"));
1574         /* beware: the following function needs the dvg_desc array */
1575         build_dvg_pkiller_list(rss, dvg);
1576 #endif
1577
1578         DBG((rss->dbg, LEVEL_2, "\t\tcomputing bipartite matching\n"));
1579         /*
1580                 The maximum cardinality bipartite matching gives us the minimal
1581                 chain partition, which corresponds to the maximum anti chains.
1582         */
1583         res = hungarian_solve(bp, assignment, &cost, 1);
1584         assert(res == 0 && "Bipartite matching failed!");
1585
1586         hungarian_free(bp);
1587         memset(assignment_rev, -1, n * sizeof(assignment_rev[0]));
1588
1589         /* build the reverse assignment, ie. foreach i -> j, add j -> i */
1590         for (i = 0; i < n; ++i) {
1591                 if (assignment[i] >= 0) {
1592                         int j = assignment[i];
1593                         assignment_rev[j] = i;
1594                 }
1595         }
1596
1597         DBG((rss->dbg, LEVEL_2, "\t\tgot assignment with cost %d\n", cost));
1598         DBG((rss->dbg, LEVEL_3, "\t\t\tassignment   ---   reverse assignment\n", cost));
1599         for (i = 0; i < n; ++i) {
1600                 DBG((rss->dbg, LEVEL_3, "\t\t\t%3d -> %3d         %3d -> %3d\n", i, assignment[i], i, assignment_rev[i]));
1601         }
1602
1603         values    = XMALLOC(ir_nodeset_t);
1604         ir_nodeset_init_size(values, 10);
1605         cur_chain = 0;
1606         /* Construction of the minimal chain partition */
1607         for (j = 0; j < n; ++j) {
1608                 /* check nodes, which did not occur as target */
1609                 if (assignment_rev[j] == -1) {
1610                         int       xj      = idx_map[j];
1611                         ir_node   *xj_irn = get_idx_irn(rss->irg, xj);
1612                         rss_irn_t *xj_rss = get_rss_irn(rss, xj_irn);
1613                         chain_t   *c      = OALLOC(phase_obst(&rss->ph), chain_t);
1614                         int       source;
1615
1616                         /* there was no source for j -> we have a source of a new chain */
1617                         ir_nodeset_insert(values, xj_irn);
1618
1619                         c->elements = plist_obstack_new(phase_obst(&rss->ph));
1620                         c->nr       = cur_chain++;
1621                         plist_insert_back(c->elements, xj_irn);
1622
1623                         xj_rss->chain = c;
1624
1625                         DBG((rss->dbg, LEVEL_2, "\t\tstarting chain %d:\n", c->nr));
1626                         DBG((rss->dbg, LEVEL_2, "\t\t\t%+F (%d)", xj_irn, j));
1627
1628                         /* follow chain, having j as source */
1629                         source = j;
1630                         while (assignment[source] >= 0) {
1631                                 int       target  = assignment[source];
1632                                 int       irn_idx = idx_map[target];
1633                                 ir_node   *irn    = get_idx_irn(rss->irg, irn_idx);
1634                                 rss_irn_t *node   = get_rss_irn(rss, irn);
1635
1636                                 plist_insert_back(c->elements, irn);
1637                                 node->chain = c;
1638
1639                                 DB((rss->dbg, LEVEL_2, " -> %+F (%d)", irn, target));
1640
1641                                 /* new source = last target */
1642                                 source = target;
1643                         }
1644
1645                         DB((rss->dbg, LEVEL_2, "\n"));
1646                 }
1647         }
1648
1649         /*
1650                 Computing the maximal antichain: Select an element from each
1651                 chain such, such it is parallel with the others.
1652         */
1653         DBG((rss->dbg, LEVEL_1, "\t\tcomputing set of saturation values (MAX AC)\n"));
1654         DBG((rss->dbg, LEVEL_3, "\t\tstarting with:\n"));
1655
1656         DEBUG_ONLY(
1657                 if (firm_dbg_get_mask(rss->dbg) & LEVEL_3)
1658                         dump_nodeset(values, "\t\t\t");
1659         )
1660
1661         temp = NULL;
1662         do {
1663                 /*
1664                         We need an explicit array for the values as
1665                         we cannot iterate multiple times over the same
1666                         set at the same time. :-(((((
1667                         TODO Matze: now we can, so rewrite this...
1668                 */
1669                 int     n         = ir_nodeset_size(values);
1670                 int     i         = 0;
1671                 ir_node **val_arr = NEW_ARR_F(ir_node *, n);
1672
1673                 foreach_ir_nodeset(values, u_irn, iter)
1674                         val_arr[i++] = u_irn;
1675
1676                 if (temp) {
1677                         ir_nodeset_destroy(temp);
1678                         free(temp);
1679                 }
1680
1681                 temp = XMALLOC(ir_nodeset_t);
1682                 ir_nodeset_init_size(temp, 10);
1683
1684                 /* Select all nodes from current value set, having another node in the set as descendant. */
1685                 for (i = 0; i < n; ++i) {
1686                         rss_irn_t *u = get_rss_irn(rss, val_arr[i]);
1687
1688                         for (j = 0; j < n; ++j) {
1689                                 if (i != j) {
1690                                         rss_edge_t key;
1691
1692                                         key.src = val_arr[i];
1693                                         key.tgt = val_arr[j];
1694
1695                                         if (pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1696                                                 /* v[j] is descendant of u -> remove u and break */
1697                                                 ir_nodeset_insert(temp, (ir_node *) u->irn);
1698                                                 ir_nodeset_remove(values, u->irn);
1699
1700                                                 DBG((rss->dbg, LEVEL_3, "\t\t\tremoving %+F from values, adding it to temp\n", u->irn));
1701
1702                                                 break;
1703                                         }
1704                                 }
1705                         }
1706                 }
1707
1708                 /* Try to insert the chain predecessor of all selected u's */
1709                 foreach_ir_nodeset(temp, u_irn, iter) {
1710                         rss_irn_t       *u  = get_rss_irn(rss, u_irn);
1711                         chain_t         *c  = u->chain;
1712                         plist_element_t *el = plist_find_value(c->elements, u_irn);
1713
1714                         assert(el && "Missing element in chain!");
1715
1716                         /* If u has predecessor in chain: insert the predecessor */
1717                         if (el == plist_element_get_prev(el)) {
1718                                 ir_nodeset_insert(values, plist_element_get_value(el));
1719                                 DBG((rss->dbg, LEVEL_3, "\t\t\tadding %+F to values\n", plist_element_get_value(el)));
1720                         }
1721                 }
1722
1723
1724                 DEL_ARR_F(val_arr);
1725         } while (ir_nodeset_size(temp) > 0);
1726
1727         DBG((rss->dbg, LEVEL_2, "\t\tfinal set:\n"));
1728         DEBUG_ONLY(
1729                 if (firm_dbg_get_mask(rss->dbg) & LEVEL_2) {
1730                         dump_nodeset(values, "\t\t\t");
1731                 }
1732         );
1733
1734         if (rss->opts->dump_flags & RSS_DUMP_MAXAC)
1735                 debug_vcg_dump_pkg(rss, values, iteration);
1736
1737         if (temp != NULL) {
1738                 ir_nodeset_destroy(temp);
1739                 free(temp);
1740         }
1741
1742         return values;
1743
1744 #undef MAP_IDX
1745 }
1746
1747 /**
1748  * Computes the best serialization between two nodes of sat_vals.
1749  */
1750 static serialization_t *compute_best_admissible_serialization(rss_t *rss, ir_nodeset_t *sat_vals, serialization_t *ser, int num_regs)
1751 {
1752         int        n                    = ir_nodeset_size(sat_vals);
1753         int        n_idx                = ARR_LEN_SAFE(rss->idx_map);
1754         int        i                    = 0;
1755         ir_node    **val_arr            = ALLOCAN(ir_node*, n);
1756         bitset_t   *bs_sv               = bitset_alloca(n_idx);
1757         bitset_t   *bs_vdesc            = bitset_alloca(n_idx);
1758         bitset_t   *bs_tmp              = bitset_alloca(n_idx);
1759         bitset_t   *bs_ukilldesc        = bitset_alloca(n_idx);
1760         int        best_benefit         = INT_MAX;
1761         int        best_omega2          = INT_MAX;
1762         int        best_benefit_omega20 = INT_MAX;
1763         int        has_omega1           = 0;
1764         int        j, k;
1765         ir_node    *irn;
1766         ir_nodeset_iterator_t iter;
1767         rss_edge_t min_benefit_edge = {NULL, NULL, NULL};
1768         rss_edge_t min_omega20_edge = {NULL, NULL, NULL};
1769         rss_irn_t  *ser_u_omega1 = NULL, *ser_v_omega1 = NULL;
1770         rss_irn_t  *ser_u_omega20 = NULL, *ser_v_omega20 = NULL;
1771
1772         DBG((rss->dbg, LEVEL_1, "\tcomputing admissible serializations:\n"));
1773
1774         /*
1775                 We need an explicit array for the values as
1776                 we cannot iterate multiple times over the same
1777                 set at the same time. :-(((((
1778         */
1779
1780         foreach_ir_nodeset(sat_vals, irn, iter) {
1781                 val_arr[i++] = irn;
1782                 bitset_set(bs_sv, BLOCK_IDX_MAP(rss, irn));
1783         }
1784
1785         /*
1786                 We build all admissible serializations and remember the best found so far.
1787                 For u in sat_vals:
1788                  For v in sat_val:
1789                    if v in pkiller(u): add edge from v to all other pkiller(u)
1790                    else: for all vv in pkiller(u): add edge from v to vv if there exists no path from vv to v
1791         */
1792
1793 /*
1794         A node is unserializable if:
1795         - it has only one killer and this one is Sink
1796     - it kills no other values
1797         In this case there is no serialization which could
1798         reduce the registerpressure
1799 */
1800 #define IS_UNSERIALIZABLE_NODE(rss_node)                  \
1801         (                                                     \
1802                 (                                                 \
1803                         (plist_count(rss_node->pkiller_list) == 1) && \
1804                         is_Sink(rss_node->killer)                  && \
1805                         (rss_node->kill_count                == 0)    \
1806                 )                            ||                   \
1807                 be_is_Barrier(rss_node->irn) ||                   \
1808                 be_is_Keep(rss_node->irn)                         \
1809         )
1810
1811         /* for all u in sat_vals */
1812         for (i = 0; i < n; ++i) {
1813                 rss_irn_t       *u = get_rss_irn(rss, val_arr[i]);
1814                 plist_element_t *el;
1815
1816                 /* ignore nodes where serialization does not help */
1817                 if (IS_UNSERIALIZABLE_NODE(u)) {
1818                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", u->irn));
1819                         continue;
1820                 }
1821
1822                 /* accumulate all descendants of all pkiller(u) */
1823                 bitset_clear_all(bs_ukilldesc);
1824                 foreach_plist(u->pkiller_list, el) {
1825                         ir_node   *irn  = plist_element_get_value(el);
1826                         rss_irn_t *node = get_rss_irn(rss, irn);
1827
1828                         if (! is_Sink(irn))
1829                                 bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, irn));
1830                         else
1831                                 continue;
1832
1833                         for (k = ARR_LEN_SAFE(node->descendants) - 1; k >= 0; --k) {
1834                                 if (! is_Sink(node->descendants[k]))
1835                                         bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, node->descendants[k]));
1836                         }
1837                 }
1838
1839                 /* for all v in sat_vals */
1840                 for (j = 0; j < n; ++j) {
1841                         ir_node   *v_irn   = val_arr[j];
1842                         rss_irn_t *v       = get_rss_irn(rss, v_irn);
1843                         unsigned  v_height = get_irn_height(rss->h, v_irn);
1844                         int       omega1, omega2, is_pkiller;
1845
1846                         /* v cannot be serialized with itself
1847                          * ignore nodes where serialization does not help */
1848                         if (i == j || IS_UNSERIALIZABLE_NODE(v)) {
1849 #ifdef DEBUG_libfirm
1850                                 if (i != j)
1851                                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", v->irn));
1852 #endif
1853                                 continue;
1854                         }
1855
1856                         /* get descendants of v */
1857                         bitset_clear_all(bs_vdesc);
1858                         bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v_irn));
1859                         for (k = ARR_LEN_SAFE(v->descendants) - 1; k >= 0; --k) {
1860                                 if (! is_Sink(v->descendants[k]))
1861                                         bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v->descendants[k]));
1862                         }
1863
1864                         /* if v is in pkiller(u) */
1865                         is_pkiller = plist_has_value(u->pkiller_list, v_irn);
1866
1867                         /* for all vv in pkiller(u) */
1868                         foreach_plist(u->pkiller_list, el) {
1869                                 ir_node *vv_irn  = plist_element_get_value(el);
1870                                 int     add_edge;
1871
1872                                 if (is_Sink(vv_irn) || is_cfop(vv_irn))
1873                                         continue;
1874
1875                                 if (is_pkiller)
1876                                         add_edge = vv_irn != v_irn && skip_Proj(vv_irn) != skip_Proj(v_irn);
1877                                 else
1878                                         add_edge = ! heights_reachable_in_block(rss->h, skip_Proj(vv_irn), skip_Proj(v_irn));
1879
1880                                 /*
1881                                         As we add an edge from vv -> v, we have to make sure,
1882                                         that there exists no path from v to vv.
1883                                 */
1884
1885                                 if (add_edge) {
1886                                         unsigned vv_height = get_irn_height(rss->h, vv_irn);
1887                                         unsigned critical_path_cost;
1888                                         unsigned mu1, mu2;
1889
1890                                         /*
1891                                                 mu1 = | descendants(v) cut sat_vals |
1892                                                 the number of saturating values which cannot
1893                                                 be simultaneously alive with u
1894                                         */
1895                                         bitset_copy(bs_tmp, bs_vdesc);
1896                                         bitset_and(bs_tmp, bs_sv);
1897                                         mu1 = bitset_popcount(bs_tmp);
1898
1899                                         /*
1900                                                 mu2 = | accum_desc_all_pkiller(u) without descendants(v) |
1901                                         */
1902                                         if (is_pkiller) {
1903                                                 bitset_copy(bs_tmp, bs_ukilldesc);
1904                                                 bitset_andnot(bs_tmp, bs_vdesc);
1905                                                 mu2 = bitset_popcount(bs_tmp);
1906                                         }
1907                                         else {
1908                                                 mu2 = 0;
1909                                         }
1910
1911                                         /* omega1 = mu1 - mu2 */
1912                                         omega1 = mu1 - mu2;
1913
1914                                         if (omega1 != 0)
1915                                                 has_omega1 = 1;
1916
1917                                         /* omega2 = increase of critical path */
1918                                         critical_path_cost =
1919                                                 v_height                        /* longest path from v to sink */
1920                                                 + rss->max_height - vv_height   /* longest path from source to vv */
1921                                                 + 1;                            /* edge */
1922
1923                                         /*
1924                                                 If critical_path_cost > max_height -> the new edge
1925                                                 would increase the longest critical path by the difference.
1926                                         */
1927                                         omega2 = critical_path_cost > rss->max_height ? critical_path_cost - rss->max_height : 0;
1928
1929                                         /* this keeps track of the edge with the best benefit */
1930                                         if (omega1 >= num_regs - n && omega1 < best_benefit) {
1931                                                 min_benefit_edge.src = v_irn;
1932                                                 min_benefit_edge.tgt = vv_irn;
1933
1934                                                 ser_u_omega1 = u;
1935                                                 ser_v_omega1 = v;
1936
1937                                                 best_benefit    = omega1;
1938                                                 ser->new_killer = is_pkiller;
1939                                         }
1940
1941                                         /* this keeps track of the edge with the best omega1 costs where omega2 == 0 */
1942                                         if (omega2 == 0 && omega1 >= num_regs - n && omega1 < best_benefit_omega20) {
1943                                                 min_omega20_edge.src = v_irn;
1944                                                 min_omega20_edge.tgt = vv_irn;
1945
1946                                                 ser_u_omega20 = u;
1947                                                 ser_v_omega20 = v;
1948
1949                                                 best_benefit_omega20 = omega1;
1950                                                 ser->new_killer      = is_pkiller;
1951                                         }
1952
1953                                         best_omega2 = MIN(best_omega2, omega2);
1954
1955                                         DBG((rss->dbg, LEVEL_2, "\t\tfound %+F -> %+F (w1 %d, w2 %d)\n",
1956                                                 v_irn, vv_irn, omega1, omega2));
1957                                 } /* if add_edge */
1958                         } /* for all vv in pkiller(u) */
1959                 } /* for all v in sat_vals */
1960         } /* for all u in sat_vals */
1961
1962         if (! has_omega1)
1963                 return NULL;
1964
1965         if (best_omega2 == 0) {
1966                 ser->u         = ser_u_omega20;
1967                 ser->v         = ser_v_omega20;
1968                 ser->edge->src = min_omega20_edge.src;
1969                 ser->edge->tgt = min_omega20_edge.tgt;
1970                 ser->omega1    = best_benefit_omega20;
1971                 ser->omega2    = best_omega2;
1972         }
1973         else {
1974                 ser->u         = ser_u_omega1;
1975                 ser->v         = ser_v_omega1;
1976                 ser->edge->src = min_benefit_edge.src;
1977                 ser->edge->tgt = min_benefit_edge.tgt;
1978                 ser->omega1    = best_benefit;
1979                 ser->omega2    = best_omega2;
1980         }
1981
1982         return ser;
1983
1984 #undef IS_UNSERIALIZABLE_NODE
1985 }
1986
1987 /**
1988  * Perform the value serialization heuristic and add all
1989  * computed serialization edges as dependencies to the irg.
1990  */
1991 static void perform_value_serialization_heuristic(rss_t *rss)
1992 {
1993         bitset_t *arch_nonign_bs = bitset_alloca(arch_register_class_n_regs(rss->cls));
1994         bitset_t *abi_ign_bs     = bitset_alloca(arch_register_class_n_regs(rss->cls));
1995         unsigned available_regs, iteration;
1996         dvg_t    dvg;
1997         ir_nodeset_t *sat_vals;
1998         pset *ser_set = new_pset(cmp_rss_edges, 20);
1999
2000         /* available_regs = R = |arch_non_ignore_regs cut ~abi_ignore_regs| */
2001         arch_put_non_ignore_regs(rss->cls, arch_nonign_bs);
2002         be_abi_put_ignore_regs(rss->abi, rss->cls, abi_ign_bs);
2003         bitset_andnot(arch_nonign_bs, abi_ign_bs);
2004         available_regs  = bitset_popcount(arch_nonign_bs);
2005         //num_live = pset_count(rss->live_block);
2006         //available_regs -= num_live < available_regs ? num_live : 0;
2007
2008         DBG((rss->dbg, LEVEL_1, "\n\t#available regs: %d\n\n", available_regs));
2009
2010         /*
2011                 At first we need to compute the disjoint value DAG (DVG = {V, E_dv}).
2012                 V    = set of all nodes we are currently interested in
2013                 E_dv = there is an edge from u to v iff v is a descendant of killer(u), forall u, v in V
2014         */
2015         ir_nodeset_init_size(&dvg.nodes, plist_count(rss->nodes));
2016         dvg.edges = new_pset(cmp_rss_edges, plist_count(rss->nodes) * 5);
2017         compute_dvg(rss, &dvg);
2018
2019         /*
2020                 Then we perform the heuristic serialization algorithm
2021                 on the DVG which gives us all necessary serialization
2022                 edges.
2023         */
2024         DBG((rss->dbg, LEVEL_1, "\tcomputing maximal antichain:\n"));
2025         iteration = 0;
2026         sat_vals  = compute_maximal_antichain(rss, &dvg, iteration++);
2027         while (sat_vals && (ir_nodeset_size(sat_vals) > available_regs)) {
2028                 serialization_t *ser, best_ser;
2029                 rss_edge_t      *edge = OALLOC(phase_obst(&rss->ph), rss_edge_t);
2030                 ir_node         *dep_src, *dep_tgt;
2031
2032                 best_ser.edge = edge;
2033                 ser = compute_best_admissible_serialization(rss, sat_vals, &best_ser, available_regs);
2034
2035                 DBG((rss->dbg, LEVEL_1, "\tcurrent register saturation %d, target %d\n", ir_nodeset_size(sat_vals), available_regs));
2036
2037                 if (! ser) {
2038                         DBG((rss->dbg, LEVEL_1, "\tno RS improving serialization found, breaking at iteration %d\n", iteration));
2039                         break;
2040                 }
2041
2042                 /* Insert the serialization as dependency edge into the irg. */
2043                 DBG((rss->dbg, LEVEL_1, "\tserializing %+F -> %+F with edge %+F -> %+F and cost %d, %d\n",
2044                         ser->u->irn, ser->v->irn, ser->edge->src, ser->edge->tgt, ser->omega1, ser->omega2));
2045
2046                 if (pset_find(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge)))
2047                         ir_printf("WARNING: serialization %+F -> %+F computed twice!\n", ser->edge->src, ser->edge->tgt);
2048
2049
2050                 pset_insert(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge));
2051
2052                 /* update the dvg */
2053                 update_dvg(rss, &dvg, ser->v, ser->u);
2054                 update_dvg(rss, &dvg, get_rss_irn(rss, ser->edge->src), get_rss_irn(rss, ser->edge->tgt));
2055                 if (sat_vals != NULL) {
2056                         ir_nodeset_destroy(sat_vals);
2057                         free(sat_vals);
2058                 }
2059
2060                 dep_src = skip_Proj(ser->edge->src);
2061                 dep_tgt = ser->edge->tgt;
2062                 add_irn_dep(dep_src, dep_tgt);
2063
2064                 /* Update descendants, consumer and pkillers of the target */
2065                 update_node_info(rss, ser->edge->tgt, ser->edge->src);
2066
2067                 /* TODO: try to find a cheaper way for updating height information */
2068                 rss->max_height = heights_recompute_block(rss->h, rss->block);
2069
2070                 /* Recompute the antichain for next serialization */
2071                 DBG((rss->dbg, LEVEL_1, "\tre-computing maximal antichain:\n"));
2072                 sat_vals = compute_maximal_antichain(rss, &dvg, iteration++);
2073         }
2074
2075         ir_nodeset_destroy(&dvg.nodes);
2076         del_pset(dvg.edges);
2077 }
2078
2079 /**
2080  * Do initial calculations for a block.
2081  */
2082 static void process_block(ir_node *block, void *env)
2083 {
2084         rss_t *rss = env;
2085         int   i, n;
2086         const ir_edge_t *edge;
2087
2088         phase_init(&rss->ph, rss->irg, init_rss_irn);
2089
2090         DBG((rss->dbg, LEVEL_1, "preprocessing block %+F\n", block));
2091         rss->block = block;
2092
2093         /* build an index map for all nodes in the current block */
2094         i            = 0;
2095         n            = get_irn_n_edges(block);
2096         NEW_ARR_A(int *, rss->idx_map, n);
2097         foreach_out_edge(block, edge) {
2098                 ir_node *irn      = get_edge_src_irn(edge);
2099                 rss->idx_map[i++] = get_irn_idx(irn);
2100         }
2101         qsort(rss->idx_map, n, sizeof(rss->idx_map[0]), cmp_int);
2102         rss->max_height = heights_recompute_block(rss->h, block);
2103
2104         /* loop over all register classes */
2105         for (i = arch_env_get_n_reg_class(rss->arch_env) - 1; i >= 0; --i) {
2106                 const arch_register_class_t *cls = arch_env_get_reg_class(rss->arch_env, i);
2107
2108                 rss->cls = cls;
2109                 DBG((rss->dbg, LEVEL_1, "register class %s\n", arch_register_class_name(cls)));
2110
2111                 /* Get all live value at end of Block having current register class */
2112                 ir_nodeset_init(&rss->live_block);
2113                 be_liveness_end_of_block(rss->liveness, rss->cls, rss->block, &rss->live_block);
2114
2115                 /* reset the list of interesting nodes */
2116                 plist_clear(rss->nodes);
2117                 plist_insert_back(rss->nodes, _sink);
2118
2119                 /* collect all nodes having a certain register class */
2120                 foreach_out_edge(block, edge) {
2121                         ir_node *irn  = get_edge_src_irn(edge);
2122                         ir_mode *mode = get_irn_mode(irn);
2123
2124                         /*
2125                                 We skip:
2126                                 - mode_T nodes (the projs are asked)
2127                                 - mode_X nodes (control flow nodes are always scheduled last)
2128                                 - Keeps (they are always immediately scheduled)
2129                                 - Phi (same as Keep)
2130                         */
2131                         if (mode == mode_T || mode == mode_X || is_Phi(irn))
2132                                 continue;
2133
2134                         /*
2135                                 In case of a proj, we skip
2136                                 - Barrier (they are a Barrier :)
2137                                 - Start
2138                                 - the Proj itself, as it's scheduled always with it's super node
2139                         */
2140                         if (is_Proj(irn)) {
2141                                 ir_node *pred = get_Proj_pred(irn);
2142                                 if (be_is_Barrier(pred) || be_is_Start(pred))
2143                                         continue;
2144                         }
2145
2146                         /* calculate the descendants and consumer for each node in the block */
2147                         collect_node_info(rss, skip_Proj(irn));
2148
2149                         if (be_is_Keep(irn))
2150                                 continue;
2151
2152                         if (!arch_irn_is_ignore(irn) &&
2153                                         arch_get_irn_reg_class_out(irn) == cls) {
2154                                 plist_insert_back(rss->nodes, skip_Proj(irn));
2155                         }
2156                         //}
2157                 }
2158
2159                 /* compute the potential killing set PK(G) */
2160                 compute_pkill_set(rss);
2161
2162                 /* compute the killing function k* */
2163                 compute_killing_function(rss);
2164
2165                 /*
2166                         Compute the heuristic value serialization and
2167                         add the necessary dependencies to the irg.
2168                 */
2169                 perform_value_serialization_heuristic(rss);
2170
2171                 ir_nodeset_destroy(&rss->live_block);
2172         }
2173
2174         phase_deinit(&rss->ph);
2175 }
2176
2177 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_schedrss);
2178 void be_init_schedrss(void)
2179 {
2180         lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
2181         lc_opt_entry_t *sched_grp = lc_opt_get_grp(be_grp, "sched");
2182         lc_opt_entry_t *rss_grp = lc_opt_get_grp(sched_grp, "rss");
2183
2184         lc_opt_add_table(rss_grp, rss_option_table);
2185 }
2186
2187 /**
2188  * Preprocess the irg for scheduling.
2189  */
2190 void rss_schedule_preparation(ir_graph *irg)
2191 {
2192         rss_t rss;
2193
2194         FIRM_DBG_REGISTER(rss.dbg, "firm.be.sched.rss");
2195
2196         //firm_dbg_set_mask(rss.dbg, LEVEL_1 | LEVEL_2 | LEVEL_3);
2197
2198         init_rss_special_nodes(irg);
2199
2200         rss.irg      = irg;
2201         rss.arch_env = be_get_irg_arch_env(irg);
2202         rss.abi      = be_get_irg_abi(irg);
2203         rss.h        = heights_new(irg);
2204         rss.nodes    = plist_new();
2205         rss.opts     = &rss_options;
2206         rss.liveness = be_liveness(irg);
2207         be_liveness_assure_sets(rss.liveness);
2208         irg_block_walk_graph(irg, NULL, process_block, &rss);
2209         heights_free(rss.h);
2210         plist_free(rss.nodes);
2211         be_liveness_free(rss.liveness);
2212
2213         if (be_get_irg_options(irg)->dump_flags & DUMP_SCHED)
2214                 dump_ir_graph(irg, "rss");
2215 }