fix an offset bug in allocating memory for irg phases
[libfirm] / ir / be / beschedrss.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       Implementation of a register saturating list scheduler.
23  * @author      Christian Wuerdig
24  * @date        29.08.2006
25  * @version     $Id$
26  *
27  * Implementation of a register saturating list scheduler
28  * as described in: Sid-Ahmed-Ali Touati
29  * Register Saturation in Superscalar and VLIW Codes
30  */
31 #include "config.h"
32
33 #include <limits.h>
34
35 #include "obst.h"
36 #include "debug.h"
37
38 #include "irgraph_t.h"
39 #include "irnode_t.h"
40 #include "iredges_t.h"
41 #include "ircons_t.h"
42 #include "irphase_t.h"
43 #include "irgwalk.h"
44 #include "irdump.h"
45 #include "irtools.h"
46 #include "irbitset.h"
47 #include "irprintf.h"
48 #include "irnodeset.h"
49 #include "bipartite.h"
50 #include "hungarian.h"
51 #include "plist.h"
52 #include "array_t.h"
53
54 #include "height.h"
55
56 #include "beabi.h"
57 #include "bemodule.h"
58 #include "benode.h"
59 #include "besched.h"
60 #include "beirg.h"
61 #include "belive.h"
62
63 #include "lc_opts.h"
64 #include "lc_opts_enum.h"
65
66
67 #define ARR_LEN_SAFE(arr) ((arr) != NULL ? ARR_LEN((arr)) : 0)
68
69 #define HASH_RSS_EDGE(edge) ((get_irn_node_nr((edge)->src) << 16) | (get_irn_node_nr((edge)->tgt) & 0xFFFF))
70 #define BSEARCH_IRN_ARR(val, arr) \
71         bsearch(&(val), (arr), ARR_LEN_SAFE((arr)), sizeof((arr)[0]), cmp_irn_idx)
72
73 #define BLOCK_IDX_MAP(rss, irn) bsearch_for_index(get_irn_idx((irn)), (rss)->idx_map, ARR_LEN_SAFE((rss)->idx_map), 1)
74
75 /* the rss options */
76 typedef struct _rss_opts_t {
77         int dump_flags;
78 } rss_opts_t;
79
80 /* Represents a child with associated costs */
81 typedef struct _child {
82         ir_node *irn;
83         float   cost;
84 } child_t;
85
86 /* We need edges for several purposes. */
87 typedef struct _rss_edge {
88         ir_node *src;
89         ir_node *tgt;
90         void    *next;
91 } rss_edge_t;
92
93 /* Represents a connected bipartite component. */
94 typedef struct _cbc {
95         ir_nodeset_t parents;       /**< = S  a set of value producers */
96         ir_nodeset_t children;      /**< = T  a set of value consumers */
97         pset    *kill_edges;    /**< = E  a set of edges (t in T, s in S) such as each s in S gets killed by at least one t in T */
98         int     nr;             /**< a deterministic index for set insertion (used as hash) */
99 } cbc_t;
100
101 /* Represents a disjoint value DAG. */
102 typedef struct _dvg {
103         ir_nodeset_t nodes;
104         pset    *edges;
105 } dvg_t;
106
107 /* Represents a chain of nodes. */
108 typedef struct _chain {
109         plist_t *elements;   /**< List of chain elements */
110         int     nr;          /**< a deterministic index for set insertion (used as hash) */
111 } chain_t;
112
113 typedef struct _rss_irn {
114         plist_t  *consumer_list;    /**< List of consumers */
115         const ir_node **consumer;   /**< Sorted consumer array (needed for faster access) */
116
117         plist_t  *parent_list;      /**< List of parents */
118         plist_t  *pkiller_list;     /**< List of potential killers */
119
120         plist_t  *descendant_list;  /**< List of descendants */
121         const ir_node **descendants;      /**< Sorted descendant array (needed for faster access) */
122
123         const ir_node  *killer;     /**< The selected unique killer */
124         const ir_node  *irn;        /**< The corresponding firm node to this rss_irn */
125
126         chain_t  *chain;            /**< The chain, this node is associated to */
127
128         unsigned desc_walk;         /**< visited flag for collecting descendants */
129         unsigned kill_count;        /**< number of nodes killed by this one */
130
131         unsigned live_out : 1;      /**< irn has consumers outside of it's block */
132         unsigned visited  : 1;      /**< visited flag for bipartite decomposition */
133         unsigned havecons : 1;      /**< visited flag collect consumer */
134         unsigned handled  : 1;      /**< flag indicating whether or not the list structures have been build */
135         unsigned dumped   : 1;      /**< flag indication whether or not this node was dumped */
136 } rss_irn_t;
137
138 /* Represents a serialization edge with associated costs. */
139 typedef struct _serialization {
140         rss_irn_t  *u;       /* the top node */
141         rss_irn_t  *v;       /* the node about to be serialized after u */
142         rss_edge_t *edge;    /* the edge selected for the serialization */
143         int        omega1;   /* estimated: available regs - RS reduction */
144         int        omega2;   /* increase in critical path length */
145         int        new_killer;
146 } serialization_t;
147
148 typedef struct _rss {
149         ir_phase          ph;              /**< Phase to hold some data */
150         heights_t        *h;              /**< The current height object */
151         ir_graph         *irg;            /**< The irg to preprocess */
152         plist_t          *nodes;          /**< The list of interesting nodes */
153         const arch_env_t *arch_env;       /**< The architecture environment */
154         be_abi_irg_t     *abi;            /**< The abi for this irg */
155         pset             *cbc_set;        /**< A set of connected bipartite components */
156         ir_node          *block;          /**< The current block in progress. */
157         int              *idx_map;        /**< Mapping irn indices to per block indices */
158         unsigned         max_height;      /**< maximum height in the current block */
159         rss_opts_t       *opts;           /**< The options */
160         be_lv_t          *liveness;       /**< The liveness information for this irg */
161         ir_nodeset_t      live_block;     /**< Values alive at end of block */
162         const arch_register_class_t *cls; /**< The current register class */
163         DEBUG_ONLY(firm_dbg_module_t *dbg);
164 } rss_t;
165
166 #define get_rss_irn(rss, irn)  (phase_get_or_set_irn_data(&rss->ph, irn))
167
168 /**
169  * We need some special nodes:
170  * a source and a sink for all live-in and live-out values of a block
171  */
172 enum {
173         iro_rss_Source,
174         iro_rss_Sink,
175         iro_rss_last
176 };
177
178 /** The opcode of the rss_Source node once allocated. */
179 static ir_op *op_rss_Source;
180 /** The opcode of the rss_Sink node once allocated. */
181 static ir_op *op_rss_Sink;
182
183 /** The rss_Source node of the current graph. */
184 static ir_node *_source = NULL;
185 /** The rss_Sink node of the current graph. */
186 static ir_node *_sink   = NULL;
187
188 #define is_Source(irn) ((irn) == _source)
189 #define is_Sink(irn)   ((irn) == _sink)
190
191 enum {
192         RSS_DUMP_NONE  = 0,
193         RSS_DUMP_CBC   = 1 << 0,
194         RSS_DUMP_PKG   = 1 << 1,
195         RSS_DUMP_KILL  = 1 << 2,
196         RSS_DUMP_DVG   = 1 << 3,
197         RSS_DUMP_MAXAC = 1 << 4,
198         RSS_DUMP_ALL   = (RSS_DUMP_MAXAC << 1) - 1,
199 };
200
201 static rss_opts_t rss_options = {
202         RSS_DUMP_NONE,
203 };
204
205 static const lc_opt_enum_int_items_t dump_items[] = {
206         { "none",  RSS_DUMP_NONE  },
207         { "cbc",   RSS_DUMP_CBC   },
208         { "pkg",   RSS_DUMP_PKG   },
209         { "kill",  RSS_DUMP_KILL  },
210         { "dvg",   RSS_DUMP_DVG   },
211         { "maxac", RSS_DUMP_MAXAC },
212         { "all",   RSS_DUMP_ALL   },
213         { NULL, 0 }
214 };
215
216 static lc_opt_enum_int_var_t dump_var = {
217         &rss_options.dump_flags, dump_items
218 };
219
220 static const lc_opt_table_entry_t rss_option_table[] = {
221         LC_OPT_ENT_ENUM_MASK("dump", "dump phases", &dump_var),
222         LC_OPT_LAST
223 };
224
225 /******************************************************************************
226  *  _          _                    __                  _   _
227  * | |        | |                  / _|                | | (_)
228  * | |__   ___| |_ __   ___ _ __  | |_ _   _ _ __   ___| |_ _  ___  _ __  ___
229  * | '_ \ / _ \ | '_ \ / _ \ '__| |  _| | | | '_ \ / __| __| |/ _ \| '_ \/ __|
230  * | | | |  __/ | |_) |  __/ |    | | | |_| | | | | (__| |_| | (_) | | | \__ \
231  * |_| |_|\___|_| .__/ \___|_|    |_|  \__,_|_| |_|\___|\__|_|\___/|_| |_|___/
232  *            | |
233  *            |_|
234  ******************************************************************************/
235
236 /**
237  * Acquire opcodes if needed and create source and sink nodes.
238  */
239 static void init_rss_special_nodes(ir_graph *irg)
240 {
241         ir_node *block;
242
243         if (op_rss_Source == NULL) {
244                 int iro_rss_base = get_next_ir_opcodes(iro_rss_last);
245                 op_rss_Source = new_ir_op(iro_rss_base + iro_rss_Source, "rss_Source", op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
246                 op_rss_Sink   = new_ir_op(iro_rss_base + iro_rss_Sink,   "rss_Sink",   op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
247         }
248         block   = get_irg_start_block(irg);
249         _source = new_ir_node(NULL, irg, block, op_rss_Source, mode_ANY, 0, NULL);
250         _sink   = new_ir_node(NULL, irg, block, op_rss_Sink, mode_ANY, 0, NULL);
251 }
252
253 static int cmp_int(const void *a, const void *b)
254 {
255         const int *i1 = a;
256         const int *i2 = b;
257
258         return QSORT_CMP(*i1, *i2);
259 }
260
261 static int cmp_child_costs(const void *a, const void *b)
262 {
263         const child_t *c1 = a;
264         const child_t *c2 = b;
265
266         return QSORT_CMP(c1->cost, c2->cost);
267 }
268
269 static int cmp_irn_idx(const void *a, const void *b)
270 {
271         const ir_node *n1 = *(ir_node **)a;
272         const ir_node *n2 = *(ir_node **)b;
273
274         return QSORT_CMP(get_irn_idx(n1), get_irn_idx(n2));
275 }
276
277 static int cmp_rss_edges(const void *a, const void *b)
278 {
279         const rss_edge_t *e1 = a;
280         const rss_edge_t *e2 = b;
281
282         return (e1->src != e2->src) || (e1->tgt != e2->tgt);
283 }
284
285 static int bsearch_for_index(int key, int *arr, size_t len, int force)
286 {
287         int left = 0;
288         int right = len;
289
290         while (right >= left) {
291                 int idx = (left + right) / 2;
292
293                 if (key < arr[idx])
294                         right = idx - 1;
295                 else if (key > arr[idx])
296                         left = idx + 1;
297                 else
298                         return idx;
299         }
300
301         if (force)
302                 assert(0 && "Something is wrong, key not found.");
303         return -1;
304 }
305
306 static const ir_node **build_sorted_array_from_list(plist_t *irn_list, struct obstack *obst)
307 {
308         plist_element_t *el;
309         int             i   = 0;
310         int             len = plist_count(irn_list);
311         const ir_node   **arr = (const ir_node**)NEW_ARR_D(ir_node*, obst, len);
312
313         /* copy the list into the array */
314         foreach_plist(irn_list, el) {
315                 arr[i++] = plist_element_get_value(el);
316         }
317
318         /* sort the array by node index */
319         /* HACK cast for MSVC */
320         qsort((void*)arr, len, sizeof(arr[0]), cmp_irn_idx);
321
322         return arr;
323 }
324
325 /*****************************************************
326  *      _      _                       _
327  *     | |    | |                     (_)
328  *   __| | ___| |__  _   _  __ _  __ _ _ _ __   __ _
329  *  / _` |/ _ \ '_ \| | | |/ _` |/ _` | | '_ \ / _` |
330  * | (_| |  __/ |_) | |_| | (_| | (_| | | | | | (_| |
331  *  \__,_|\___|_.__/ \__,_|\__, |\__, |_|_| |_|\__, |
332  *                          __/ | __/ |         __/ |
333  *                         |___/ |___/         |___/
334  *
335  *****************************************************/
336
337 #ifdef DEBUG_libfirm
338 static void dump_nodeset(ir_nodeset_t *ns, const char *prefix)
339 {
340         ir_nodeset_iterator_t iter;
341         ir_node *irn;
342
343         ir_nodeset_iterator_init(&iter, ns);
344         while ( (irn = ir_nodeset_iterator_next(&iter)) != NULL ) {
345                 ir_fprintf(stderr, "%s%+F\n", prefix, irn);
346         }
347 }
348 #endif
349
350 static void build_file_name(rss_t *rss, const char *suffix, size_t suf_len, char *buf, size_t len)
351 {
352         const char *irg_name;
353
354         memset(buf, 0, len);
355         irg_name = get_entity_name(get_irg_entity(rss->irg));
356         snprintf(buf, len - suf_len, "%s-%s-block-%ld",
357                 irg_name, arch_register_class_name(rss->cls), get_irn_node_nr(rss->block));
358         strcat(buf, suffix);
359 }
360
361 /* Dumps all collected bipartite components of current irg as vcg. */
362 static void debug_vcg_dump_bipartite(rss_t *rss)
363 {
364         cbc_t *cbc;
365         FILE  *f;
366         char  file_name[256];
367         static const char suffix[] = "-RSS-CBC.vcg";
368
369         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
370         f = fopen(file_name, "w");
371
372         ir_fprintf(f, "graph: { title: \"connected bipartite component graph of %+F\"\n", rss->irg);
373         fprintf(f, "display_edge_labels: no\n");
374         fprintf(f, "layoutalgorithm: mindepth\n");
375         fprintf(f, "manhattan_edges: yes\n\n");
376
377         foreach_pset(rss->cbc_set, cbc) {
378                 ir_nodeset_iterator_t iter;
379                 ir_node    *n;
380                 rss_edge_t *ke;
381
382                 fprintf(f, "graph: { titel: \"cbc %d\" label: \"cbc %d\" status:clustered color:yellow\n", cbc->nr, cbc->nr);
383                 foreach_ir_nodeset(&cbc->parents, n, iter) {
384                         ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
385                 }
386                 foreach_ir_nodeset(&cbc->children, n, iter) {
387                         ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
388                 }
389                 foreach_pset(cbc->kill_edges, ke) {
390                         ir_fprintf(f, "edge: { sourcename: \"n%d_%d\" targetname: \"n%d_%d\" }\n",
391                                 get_irn_node_nr(ke->src), cbc->nr, get_irn_node_nr(ke->tgt), cbc->nr);
392                 }
393                 fprintf(f, "}\n\n");
394         }
395         fprintf(f, "}\n");
396         fclose(f);
397 }
398
399 /* Dump the computed killing function as vcg. */
400 static void debug_vcg_dump_kill(rss_t *rss)
401 {
402         FILE            *f;
403         char            file_name[256];
404         plist_element_t *el;
405         static const char suffix[] = "-RSS-KILL.vcg";
406
407         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
408         f = fopen(file_name, "w");
409
410         ir_fprintf(f, "graph: { title: \"computed kill graph of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
411         fprintf(f, "display_edge_labels: no\n");
412         fprintf(f, "layoutalgorithm: mindepth\n");
413         fprintf(f, "manhattan_edges: yes\n\n");
414
415         {
416                 /* reset dump_flag */
417                 ir_node *irn;
418                 foreach_phase_irn(&rss->ph, irn) {
419                         rss_irn_t *node = get_rss_irn(rss, irn);
420                         node->dumped = 0;
421                 }
422         }
423
424         /* dump all nodes and their killers */
425         foreach_plist(rss->nodes, el) {
426                 ir_node   *irn     = plist_element_get_value(el);
427                 rss_irn_t *rirn    = get_rss_irn(rss, irn);
428                 rss_irn_t *pk_rirn = get_rss_irn(rss, rirn->killer);
429
430                 if (! rirn->dumped) {
431                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
432                         rirn->dumped = 1;
433                 }
434
435                 if (! pk_rirn->dumped) {
436                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(rirn->killer), rirn->killer);
437                         pk_rirn->dumped = 1;
438                 }
439
440                 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
441                         get_irn_node_nr(rirn->killer), get_irn_node_nr(irn));
442         }
443
444         fprintf(f, "}\n");
445         fclose(f);
446 }
447
448 /* Dumps the potential killing DAG (PKG) as vcg. */
449 static void debug_vcg_dump_pkg(rss_t *rss, ir_nodeset_t *max_ac, int iteration)
450 {
451         FILE              *f;
452         char              file_name[256];
453         char              suffix[32];
454         static const char suffix1[] = "-RSS-PKG.vcg";
455         static const char suffix2[] = "-RSS-PKG-MAXAC.vcg";
456         plist_element_t   *el;
457
458         if (! max_ac) {
459                 snprintf(suffix, sizeof(suffix), "%s", suffix1);
460         }
461         else {
462                 snprintf(suffix, sizeof(suffix), "-%02d%s", iteration, suffix2);
463         }
464
465         build_file_name(rss, suffix, strlen(suffix) + 1, file_name, sizeof(file_name));
466         f = fopen(file_name, "w");
467
468         ir_fprintf(f, "graph: { title: \"potential killing DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
469         fprintf(f, "display_edge_labels: no\n");
470         fprintf(f, "layoutalgorithm: mindepth\n");
471         fprintf(f, "manhattan_edges: yes\n\n");
472
473         {
474                 /* reset dump_flag */
475                 ir_node *irn;
476                 foreach_phase_irn(&rss->ph, irn) {
477                         rss_irn_t *node = get_rss_irn(rss, irn);
478                         node->dumped = 0;
479                 }
480         }
481
482         foreach_plist(rss->nodes, el) {
483                 ir_node   *irn  = plist_element_get_value(el);
484                 rss_irn_t *rirn = get_rss_irn(rss, irn);
485                 char      *c1   = "";
486                 plist_element_t *k_el;
487
488                 /* dump selected saturating values in yellow */
489                 if (max_ac && ir_nodeset_contains(max_ac, irn))
490                         c1 = "color:yellow";
491
492                 if (! rirn->dumped) {
493                         if (rirn->chain)
494                                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F   c%d\" %s }\n", get_irn_node_nr(irn), irn, rirn->chain->nr, c1);
495                         else
496                                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(irn), irn, c1);
497                         rirn->dumped = 1;
498                 }
499
500                 foreach_plist(rirn->pkiller_list, k_el) {
501                         ir_node   *pkiller = plist_element_get_value(k_el);
502                         rss_irn_t *pk_rirn = get_rss_irn(rss, pkiller);
503                         char      *c2      = "";
504
505                         if (max_ac && ir_nodeset_contains(max_ac, pkiller))
506                                 c2 = "color:yellow";
507
508                         if (! pk_rirn->dumped) {
509                                 if (pk_rirn->chain)
510                                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F   c%d\" %s }\n", get_irn_node_nr(pkiller), pkiller, pk_rirn->chain->nr, c2);
511                                 else
512                                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(pkiller), pkiller, c2);
513                                 pk_rirn->dumped = 1;
514                         }
515                         ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
516                                 get_irn_node_nr(pkiller), get_irn_node_nr(irn));
517                 }
518         }
519         fprintf(f, "}\n");
520         fclose(f);
521 }
522
523 /* Dumps the disjoint value DAG (DVG) as vcg. */
524 static void debug_vcg_dump_dvg(rss_t *rss, dvg_t *dvg)
525 {
526         static const char suffix[] = "-RSS-DVG.vcg";
527         FILE       *f;
528         char       file_name[256];
529         ir_nodeset_iterator_t iter;
530         ir_node    *irn;
531         rss_edge_t *edge;
532
533         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
534         f = fopen(file_name, "w");
535
536         ir_fprintf(f, "graph: { title: \"disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
537         fprintf(f, "display_edge_labels: no\n");
538         fprintf(f, "layoutalgorithm: mindepth\n");
539         fprintf(f, "manhattan_edges: yes\n\n");
540
541         /* dump all nodes */
542         foreach_ir_nodeset(&dvg->nodes, irn, iter) {
543                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
544         }
545
546         /* dump all edges */
547         foreach_pset(dvg->edges, edge) {
548                 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
549                         get_irn_node_nr(edge->src), get_irn_node_nr(edge->tgt));
550         }
551
552         fprintf(f, "}\n");
553         fclose(f);
554 }
555
556 #if 0
557 /* Dumps the PKG(DVG). */
558 static void debug_vcg_dump_dvg_pkiller(rss_t *rss, dvg_t *dvg)
559 {
560         static const char suffix[] = "-RSS-DVG-PKG.vcg";
561         FILE       *f;
562         char       file_name[256];
563         ir_nodeset_iterator_t iter;
564         ir_node    *irn;
565
566         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
567         f = fopen(file_name, "w");
568
569         ir_fprintf(f, "graph: { title: \"PKG of disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
570         fprintf(f, "display_edge_labels: no\n");
571         fprintf(f, "layoutalgorithm: mindepth\n");
572         fprintf(f, "manhattan_edges: yes\n\n");
573
574         /* dump all nodes */
575         foreach_ir_nodeset(&dvg->nodes, irn, iter) {
576                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
577         }
578
579         /* dump all edges */
580         foreach_ir_nodeset(&dvg->nodes, irn, iter) {
581                 rss_irn_t       *node = get_rss_irn(rss, irn);
582                 plist_element_t *el;
583
584                 foreach_plist(node->dvg_pkiller_list, el) {
585                         ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
586                                 get_irn_node_nr(plist_element_get_value(el)), get_irn_node_nr(irn));
587                 }
588         }
589
590         fprintf(f, "}\n");
591         fclose(f);
592 }
593 #endif /* if 0 */
594
595 /**
596  * In case there is no rss information for irn, initialize it.
597  */
598 static void *init_rss_irn(ir_phase *ph, const ir_node *irn, void *old)
599 {
600         rss_irn_t *res = old ? old : phase_alloc(ph, sizeof(res[0]));
601
602         res->descendant_list = plist_obstack_new(phase_obst(ph));
603         res->descendants     = NULL;
604
605         res->consumer_list   = plist_obstack_new(phase_obst(ph));
606         res->consumer        = NULL;
607
608         res->pkiller_list    = plist_obstack_new(phase_obst(ph));
609
610         res->parent_list     = plist_obstack_new(phase_obst(ph));
611
612         res->killer          = NULL;
613         res->irn             = irn;
614         res->chain           = NULL;
615
616         res->kill_count      = 0;
617         res->desc_walk       = 0;
618         res->live_out        = 0;
619         res->visited         = 0;
620         res->handled         = 0;
621         res->dumped          = 0;
622         res->havecons        = 0;
623
624         return res;
625 }
626
627 /**
628  * Collect all nodes data dependent on current node.
629  */
630 static void collect_descendants(rss_t *rss, rss_irn_t *rirn, ir_node *irn, int *got_sink, unsigned cur_desc_walk)
631 {
632         const ir_edge_t *edge;
633         rss_irn_t       *cur_node = get_rss_irn(rss, irn);
634         ir_node         *block    = rss->block;
635         ir_edge_kind_t  ekind[2]  = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
636         int             i;
637
638         if (cur_node->desc_walk >= cur_desc_walk)
639                 return;
640         cur_node->desc_walk = cur_desc_walk;
641
642         /* Stop at Barriers */
643         if (be_is_Barrier(irn))
644                 return;
645
646         /* loop over normal and dependency edges */
647         for (i = 0; i < 2; ++i) {
648                 foreach_out_edge_kind(irn, edge, ekind[i]) {
649                         ir_node *user = get_edge_src_irn(edge);
650
651                         /* skip ignore nodes as they do not really contribute to register pressure */
652                         if (arch_irn_is_ignore(user))
653                                 continue;
654
655                         /*
656                                 (a) mode_X means Jump            -> out_edge
657                                 (b) Phi as user of a normal node -> out-edge
658                         */
659                         if (get_irn_mode(user) == mode_X || is_Phi(user)) {
660                                 if (! *got_sink)
661                                         goto force_sink;
662                                 else
663                                         continue;
664                         }
665
666                         if (is_Proj(user)) {
667                                 //if (arch_get_irn_reg_class_out(user) == rss->cls)
668                                         collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
669                         }
670                         else {
671                                 /* check if user lives in block and is not a control flow node */
672                                 if (get_nodes_block(user) == block) {
673                                         if (! plist_has_value(rirn->descendant_list, user)) {
674                                                 plist_insert_back(rirn->descendant_list, user);
675                                                 DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", user));
676                                         }
677                                         collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
678                                 }
679                                 else if (! *got_sink) {
680 force_sink:
681                                         /* user lives out of block: add sink as descendant if not already done */
682                                         plist_insert_back(rirn->descendant_list, _sink);
683                                         *got_sink = 1;
684                                         DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", _sink));
685                                 }
686                         }
687                 }
688         }
689 }
690
691 /**
692  * Handles a single consumer.
693  */
694 static void collect_single_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *consumer, int *got_sink)
695 {
696         ir_node *block = rss->block;
697
698         assert(! is_Proj(consumer) && "Cannot handle Projs");
699
700         if (! is_Phi(consumer) && ! is_Block(consumer) && get_nodes_block(consumer) == block) {
701                 if (!arch_irn_is_ignore(consumer) &&
702                                 !plist_has_value(rss_irn->consumer_list, consumer)) {
703                         plist_insert_back(rss_irn->consumer_list, consumer);
704                         DBG((rss->dbg, LEVEL_2, "\t\tconsumer %+F\n", consumer));
705                 }
706         }
707         else {
708                 rss_irn->live_out = 1;
709                 DBG((rss->dbg, LEVEL_2, "\t\tlive out %+F", consumer));
710                 if (! *got_sink) {
711                         plist_insert_back(rss_irn->consumer_list, _sink);
712                         *got_sink = 1;
713                         DB((rss->dbg, LEVEL_2, ", %+F added instead", _sink));
714                 }
715                 DB((rss->dbg, LEVEL_2, "\n"));
716         }
717 }
718
719 /**
720  * Collect all nodes consuming the value(s) produced by current node.
721  */
722 static void collect_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *irn, int *got_sink)
723 {
724         const ir_edge_t *edge;
725         int             i;
726         ir_edge_kind_t  ekind[2]  = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
727         rss_irn_t       *cur_node = get_rss_irn(rss, irn);
728
729         if (cur_node->havecons)
730                 return;
731         cur_node->havecons = 1;
732
733         for (i = 0; i < 2; ++i) {
734                 foreach_out_edge_kind(irn, edge, ekind[i]) {
735                         ir_node *consumer = get_edge_src_irn(edge);
736
737                         if (is_Proj(consumer)) {
738                                 //if (arch_get_irn_reg_class_out(consumer) == rss->cls)
739                                         collect_consumer(rss, rss_irn, consumer, got_sink);
740                         }
741                         else
742                                 collect_single_consumer(rss, rss_irn, consumer, got_sink);
743                 }
744         }
745 }
746
747 /**
748  * Collects all consumer and descendant of a irn.
749  */
750 static void collect_node_info(rss_t *rss, ir_node *irn)
751 {
752         static unsigned cur_desc_walk = 0;
753         rss_irn_t       *rss_irn      = get_rss_irn(rss, irn);
754         int             got_sink;
755
756         assert(! is_Proj(irn) && "Cannot handle Projs.");
757
758         /* check if node info is already available */
759         if (rss_irn->handled)
760                 return;
761                 //phase_reinit_single_irn_data(&rss->ph, irn);
762
763         DBG((rss->dbg, LEVEL_1, "\tcomputing consumers of %+F:\n", irn));
764
765         /* collect all consumer */
766         got_sink = 0;
767         collect_consumer(rss, rss_irn, irn, &got_sink);
768
769         /* build sorted consumer array */
770         rss_irn->consumer = build_sorted_array_from_list(rss_irn->consumer_list, phase_obst(&rss->ph));
771
772         DBG((rss->dbg, LEVEL_1, "\tcompute descendants of %+F:\n", irn));
773
774         /* collect descendants */
775         got_sink = 0;
776         collect_descendants(rss, rss_irn, irn, &got_sink, ++cur_desc_walk);
777
778         /* build sorted descendant array */
779         rss_irn->descendants = build_sorted_array_from_list(rss_irn->descendant_list, phase_obst(&rss->ph));
780
781         rss_irn->handled = 1;
782 }
783
784 /**
785  * Checks if v is a potential killer of u.
786  * v is in pkill(u) iff descendants(v) cut consumer(u) is v
787  *
788  * @param rss   The rss object
789  * @param v      The node to check for killer
790  * @param u      The potentially killed value
791  * @return 1 if v is in pkill(u), 0 otherwise
792  */
793 static int is_potential_killer(rss_t *rss, rss_irn_t *v, rss_irn_t *u)
794 {
795         plist_t *list;
796         const ir_node **arr;
797         plist_element_t *el;
798         (void) rss;
799
800         assert(is_Sink(v->irn) || ((plist_count(v->descendant_list) > 0 && v->descendants) || 1));
801         assert(is_Sink(u->irn) || ((plist_count(u->consumer_list)   > 0 && u->consumer)    || 1));
802
803         /* as we loop over the list: loop over the shorter one */
804         if (plist_count(v->descendant_list) > plist_count(u->consumer_list)) {
805                 list = u->consumer_list;
806                 arr  = v->descendants;
807         }
808         else {
809                 list = v->descendant_list;
810                 arr  = u->consumer;
811         }
812
813         /* for each list element: try to find element in array */
814         foreach_plist(list, el) {
815                 ir_node *irn   = plist_element_get_value(el);
816                 ir_node *match = BSEARCH_IRN_ARR(irn, arr);
817
818                 if (match && match != irn)
819                         return 0;
820         }
821
822         return 1;
823 }
824
825 /**
826  * Update descendants, consumer and pkiller list for the given irn.
827  */
828 static void update_node_info(rss_t *rss, ir_node *irn, ir_node *pk_irn)
829 {
830         rss_irn_t *node    = get_rss_irn(rss, irn);
831         rss_irn_t *pkiller = get_rss_irn(rss, pk_irn);
832
833         /* add consumer and rebuild the consumer array */
834         if (! plist_has_value(node->consumer_list, pk_irn)) {
835                 plist_insert_back(node->consumer_list, pk_irn);
836                 node->consumer = build_sorted_array_from_list(node->consumer_list, phase_obst(&rss->ph));
837         }
838
839         /* add pk_irn as descendant, add it's descendants to irn's and rebuild array */
840         if (! plist_has_value(node->descendant_list, pk_irn)) {
841                 plist_element_t *el;
842
843                 plist_insert_back(node->descendant_list, pk_irn);
844
845                 foreach_plist(pkiller->descendant_list, el) {
846                         ir_node *desc = plist_element_get_value(el);
847
848                         if (! plist_has_value(node->descendant_list, desc))
849                                 plist_insert_back(node->descendant_list, desc);
850                 }
851
852                 node->descendants = build_sorted_array_from_list(node->descendant_list, phase_obst(&rss->ph));
853         }
854
855 }
856
857 /**
858  * Compute the potential killing set PK.
859  */
860 static void compute_pkill_set(rss_t *rss)
861 {
862         plist_element_t *u_el, *v_el;
863
864         foreach_plist(rss->nodes, u_el) {
865                 ir_node   *u_irn = plist_element_get_value(u_el);
866                 rss_irn_t *u     = get_rss_irn(rss, u_irn);
867
868                 DBG((rss->dbg, LEVEL_1, "\tcomputing potential killers of %+F:\n", u_irn));
869
870                 /* check each consumer if it is a potential killer  */
871                 foreach_plist(u->consumer_list, v_el) {
872                         ir_node   *v_irn = plist_element_get_value(v_el);
873                         rss_irn_t *v     = get_rss_irn(rss, v_irn);
874
875                         /* check, if v is a potential killer of u */
876                         if (is_potential_killer(rss, v, u)) {
877                                 if (! plist_has_value(u->pkiller_list, v_irn))
878                                         plist_insert_back(u->pkiller_list, v_irn);
879                                 v->kill_count++;
880                                 DBG((rss->dbg, LEVEL_2, "\t\tpotential killer %+F\n", v_irn));
881                         }
882                 }
883
884                 u->killer = _sink;
885         }
886
887         if (rss->opts->dump_flags & RSS_DUMP_PKG)
888                 debug_vcg_dump_pkg(rss, NULL, 0);
889 }
890
891 /**
892  * Build set of killing edges (from values to their potential killers)
893  */
894 static void build_kill_edges(rss_t *rss, pset *epk)
895 {
896         plist_element_t *el, *k_el;
897
898         foreach_plist(rss->nodes, el) {
899                 ir_node    *irn  = plist_element_get_value(el);
900                 rss_irn_t *rirn = get_rss_irn(rss, irn);
901
902                 DBG((rss->dbg, LEVEL_3, "\t\tbuilding kill edges for %+F:\n", irn));
903
904                 foreach_plist(rirn->pkiller_list, k_el) {
905                         ir_node    *pkiller = plist_element_get_value(k_el);
906                         rss_edge_t *ke      = OALLOC(phase_obst(&rss->ph), rss_edge_t);
907
908                         ke->src  = irn;
909                         ke->tgt  = pkiller;
910                         ke->next = NULL;
911
912                         DBG((rss->dbg, LEVEL_3, "\t\t\ttarget %+F\n", pkiller));
913
914                         pset_insert(epk, ke, HASH_RSS_EDGE(ke));
915                 }
916         }
917 }
918
919 #ifdef DEBUG_libfirm
920 /* print the given cbc for debugging purpose */
921 static void debug_print_cbc(firm_dbg_module_t *mod, cbc_t *cbc)
922 {
923         ir_nodeset_iterator_t iter;
924         ir_node    *n;
925         rss_edge_t *ke;
926
927         DBG((mod, LEVEL_3, "\t\tS = set of parents:\n"));
928         foreach_ir_nodeset(&cbc->parents, n, iter) {
929                 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
930         }
931         DBG((mod, LEVEL_3, "\t\tT = set of children:\n"));
932         foreach_ir_nodeset(&cbc->children, n, iter) {
933                 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
934         }
935         DBG((mod, LEVEL_3, "\t\tE = Edges from producers to consumers\n"));
936         foreach_pset(cbc->kill_edges, ke) {
937                 DBG((mod, LEVEL_3, "\t\t\t%+F -> %+F\n", ke->src, ke->tgt));
938         }
939 }
940 #endif
941
942 /**
943  * Construct the bipartite decomposition.
944  * Sid-Ahmed-Ali Touati, Phd Thesis
945  * Register Pressure in Instruction Level Parallelism, p. 71
946  */
947 static void compute_bipartite_decomposition(rss_t *rss)
948 {
949         pset *epk    = new_pset(cmp_rss_edges, 10);
950         int  cur_num = 0;
951
952         plist_element_t *el;
953
954         DBG((rss->dbg, LEVEL_1, "\tcomputing bipartite decomposition:\n"));
955
956         build_kill_edges(rss, epk);
957
958         foreach_plist(rss->nodes, el) {
959                 ir_node   *u_irn   = plist_element_get_value(el);
960                 rss_irn_t *u       = get_rss_irn(rss, u_irn);
961                 int       p_change = 1;
962                 int       c_change = 1;
963                 int       vrfy_ok  = 1;
964
965                 cbc_t           *cbc;
966                 plist_element_t *el2;
967                 rss_edge_t      *k_edge;
968                 rss_edge_t      *kedge_root = NULL;
969                 ir_node         *t_irn, *s_irn;
970                 ir_nodeset_iterator_t iter;
971
972                 if (u->visited || u_irn == _sink)
973                         continue;
974
975                 DBG((rss->dbg, LEVEL_2, "\t\t%+F choosen:\n", u_irn));
976
977                 cbc     = OALLOC(phase_obst(&rss->ph), cbc_t);
978                 cbc->nr = cur_num++;
979
980                 /* initialize S_cb */
981                 ir_nodeset_init_size(&cbc->parents, 5);
982                 ir_nodeset_insert(&cbc->parents, u_irn);
983                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (init)\n", u_irn));
984
985                 /* E_cb = empty */
986                 cbc->kill_edges = new_pset(cmp_rss_edges, 5);
987
988                 /* each parent gets killed by at least one value from children */
989
990                 /* T_cb = PK_successors(u) */
991                 ir_nodeset_init_size(&cbc->children, 5);
992                 foreach_plist(u->pkiller_list, el2) {
993                         ir_nodeset_insert(&cbc->children, plist_element_get_value(el2));
994                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (init)\n", plist_element_get_value(el2)));
995                 }
996
997                 /*
998                         Now: insert the parents of all children into the parent set
999                         and insert the children of all parents into the children set
1000                         until the sets don't change any more
1001                 */
1002                 while (p_change || c_change) {
1003                         ir_nodeset_iterator_t iter;
1004                         p_change = c_change = 0;
1005
1006                         /* accumulate parents */
1007                         foreach_ir_nodeset(&cbc->children, t_irn, iter) {
1008                                 foreach_pset(epk, k_edge) {
1009                                         ir_node *val = k_edge->src;
1010
1011                                         if (k_edge->tgt == t_irn && ! ir_nodeset_contains(&cbc->parents, val)) {
1012                                                 ir_nodeset_insert(&cbc->parents, val);
1013                                                 p_change = 1;
1014                                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (killed by %+F)\n", val, t_irn));
1015                                         }
1016                                 }
1017                         }
1018
1019                         /* accumulate children */
1020                         foreach_ir_nodeset(&cbc->parents, s_irn, iter) {
1021                                 foreach_pset(epk, k_edge) {
1022                                         ir_node *val = k_edge->tgt;
1023
1024                                         if (k_edge->src == s_irn  && ! ir_nodeset_contains(&cbc->children, val)) {
1025                                                 ir_nodeset_insert(&cbc->children, val);
1026                                                 c_change = 1;
1027                                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (kills %+F)\n", val, s_irn));
1028                                         }
1029                                 }
1030                         }
1031                 }
1032
1033                 /* mark all parent values as visited */
1034                 foreach_ir_nodeset(&cbc->parents, s_irn, iter) {
1035                         rss_irn_t *s = get_rss_irn(rss, s_irn);
1036                         s->visited = 1;
1037                         /* assure bipartite property */
1038 #if 0
1039                         if (ir_nodeset_contains(&cbc->children, s_irn)) {
1040                                 ir_nodeset_remove(&cbc->children, s_irn);
1041                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F removed from to children\n", s_irn));
1042                         }
1043 #endif
1044                 }
1045
1046                 /* update edges */
1047                 foreach_pset(epk, k_edge) {
1048                         if (ir_nodeset_contains(&cbc->parents, k_edge->src) &&
1049                             ir_nodeset_contains(&cbc->children, k_edge->tgt)) {
1050                                 pset_insert(cbc->kill_edges, k_edge, HASH_RSS_EDGE(k_edge));
1051                                 /*
1052                                         Link all k_edges which are about to be removed together.
1053                                         Beware: pset_remove kills the iterator.
1054                                 */
1055                                 k_edge->next = kedge_root;
1056                                 kedge_root   = k_edge;
1057                         }
1058                 }
1059
1060                 /* remove all linked k_edges */
1061                 for (; kedge_root; kedge_root = kedge_root->next) {
1062                         pset_remove(epk, kedge_root, HASH_RSS_EDGE(kedge_root));
1063                 }
1064
1065                 /* verify the cbc */
1066                 foreach_ir_nodeset(&cbc->parents, s_irn, iter) {
1067                         int is_killed = 0;
1068
1069                         foreach_pset(cbc->kill_edges, k_edge) {
1070                                 if (k_edge->src == s_irn) {
1071                                         is_killed = 1;
1072                                         pset_break(cbc->kill_edges);
1073                                         break;
1074                                 }
1075                         }
1076
1077                         if (! is_killed) {
1078                                 vrfy_ok = 0;
1079                                 ir_fprintf(stderr, "Warning: parent %+F is not killed in current cbc\n", s_irn);
1080                         }
1081                 }
1082                 assert(vrfy_ok && "Verification of CBC failed");
1083
1084                 /* add the connected bipartite component */
1085                 if (ir_nodeset_size(&cbc->parents) > 0 &&
1086                     ir_nodeset_size(&cbc->children) > 0 &&
1087                         pset_count(cbc->kill_edges) > 0)
1088                         pset_insert(rss->cbc_set, cbc, (unsigned)cbc->nr);
1089                 DBG((rss->dbg, LEVEL_2, "\tbipartite component %d inserted:\n", cbc->nr));
1090                 DEBUG_ONLY(
1091                         if (firm_dbg_get_mask(rss->dbg) & LEVEL_2)
1092                                 debug_print_cbc(rss->dbg, cbc);
1093                 );
1094         }
1095
1096         if (rss->opts->dump_flags & RSS_DUMP_CBC)
1097                 debug_vcg_dump_bipartite(rss);
1098
1099         del_pset(epk);
1100 }
1101
1102 /**
1103  * Select the child with the maximum cost.
1104  */
1105 static child_t *select_child_max_cost(rss_t *rss, ir_nodeset_t *x, ir_nodeset_t *y, child_t *t, cbc_t *cbc)
1106 {
1107         ir_node *child;
1108         ir_nodeset_iterator_t iter;
1109         float   max_cost = -1.0f;
1110
1111         DBG((rss->dbg, LEVEL_2, "\t\tcomputing children costs:\n"));
1112
1113         foreach_ir_nodeset(&cbc->children, child, iter) {
1114                 rss_irn_t  *r_child             = get_rss_irn(rss, child);
1115                 int         num_unkilled_parents = 0;
1116                 int         num_descendants;
1117                 rss_edge_t *k_edge;
1118                 float       cost;
1119
1120                 /* get the number of unkilled parents */
1121                 foreach_pset(cbc->kill_edges, k_edge) {
1122                         if (k_edge->tgt == child && ir_nodeset_contains(x, k_edge->src))
1123                                 ++num_unkilled_parents;
1124                 }
1125
1126                 cost = (float)num_unkilled_parents;
1127
1128                 num_descendants = plist_count(r_child->descendant_list) + ir_nodeset_size(y);
1129
1130                 if (num_descendants > 0)
1131                         cost /= (float)num_descendants;
1132
1133                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F, #desc %d, cost %.3f\n", child, num_descendants, cost));
1134
1135                 if (cost > max_cost) {
1136                         t->irn   = child;
1137                         t->cost  = cost;
1138                         max_cost = cost;
1139                 }
1140         }
1141
1142         return t;
1143 }
1144
1145 /**
1146  * Remove all parents from x which are killed by t_irn.
1147  */
1148 static void remove_covered_parents(rss_t *rss, ir_nodeset_t *x, ir_node *t_irn, cbc_t *cbc)
1149 {
1150         rss_irn_t  *t = get_rss_irn(rss, t_irn);
1151         rss_edge_t *k_edge;
1152
1153         DBG((rss->dbg, LEVEL_2, "\t\tremoving parents covered by %+F:\n", t_irn));
1154
1155         foreach_pset(cbc->kill_edges, k_edge) {
1156                 if (k_edge->tgt == t_irn && ir_nodeset_contains(x, k_edge->src)) {
1157                         ir_nodeset_remove(x, k_edge->src);
1158                         plist_insert_back(t->parent_list, k_edge->src);
1159                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", k_edge->src));
1160                 }
1161         }
1162 }
1163
1164 static void update_cumulated_descendent_values(rss_t *rss, ir_nodeset_t *y, ir_node *t_irn)
1165 {
1166         rss_irn_t *t = get_rss_irn(rss, t_irn);
1167         plist_element_t *el;
1168
1169         DBG((rss->dbg, LEVEL_2, "\t\tupdating cumulated descendant value of %+F:\n", t_irn));
1170
1171         foreach_plist(t->descendant_list, el) {
1172                 ir_nodeset_insert(y, plist_element_get_value(el));
1173                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", plist_element_get_value(el)));
1174         }
1175 }
1176
1177 /**
1178  * Greedy-k: a heuristics for the MMA problem
1179  */
1180 static void compute_killing_function(rss_t *rss)
1181 {
1182         cbc_t *cbc;
1183         struct obstack obst;
1184
1185         obstack_init(&obst);
1186
1187         rss->cbc_set = pset_new_ptr(5);
1188         compute_bipartite_decomposition(rss);
1189
1190         /* for all bipartite components do: */
1191         foreach_pset(rss->cbc_set, cbc) {
1192                 ir_node *p;
1193                 ir_nodeset_t x;
1194                 ir_nodeset_t y;
1195                 ir_nodeset_iterator_t iter;
1196                 child_t **sks    = NEW_ARR_F(child_t *, 20);
1197                 int     cur_len  = 0;
1198                 int     cur_size = 20;
1199                 int     i;
1200
1201                 ir_nodeset_init_size(&x, 10);
1202                 ir_nodeset_init_size(&y, 10);
1203
1204                 DBG((rss->dbg, LEVEL_1, "\tcomputing SKS for cbc %d:\n", cbc->nr));
1205                 DBG((rss->dbg, LEVEL_2, "\t\tinitializing parents X:\n"));
1206
1207                 /* X = S_cb (all parents are initially uncovered) */
1208                 foreach_ir_nodeset(&cbc->parents, p, iter) {
1209                         ir_nodeset_insert(&x, p);
1210                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", p));
1211                 }
1212
1213                 /* while X not empty */
1214                 while (ir_nodeset_size(&x) > 0) {
1215                         child_t *t = OALLOCZ(&obst, child_t);
1216
1217                         t = select_child_max_cost(rss, &x, &y, t, cbc);
1218
1219                         if (cur_len >= cur_size) {
1220                                 ARR_EXTO(child_t *, sks, cur_size * 2);
1221                                 cur_size *= 2;
1222                         }
1223
1224                         DBG((rss->dbg, LEVEL_2, "\t\tinsert child %+F (%.3f) into SKS at pos %d\n", t->irn, t->cost, cur_len));
1225
1226                         sks[cur_len++] = t;
1227                         remove_covered_parents(rss, &x, t->irn, cbc);
1228                         update_cumulated_descendent_values(rss, &y, t->irn);
1229                 }
1230
1231                 ARR_SHRINKLEN(sks, cur_len);
1232
1233                 /* sort SKS in increasing cost order */
1234                 qsort(sks, cur_len, sizeof(sks[0]), cmp_child_costs);
1235
1236                 DBG((rss->dbg, LEVEL_2, "\tprocessing SKS for cbc %d:\n", cbc->nr));
1237
1238                 /* build killing function */
1239                 for (i = cur_len - 1; i >= 0; --i) { /* loop over sks in decreasing cost order */
1240                         child_t         *t = sks[i];
1241                         rss_irn_t       *rt = get_rss_irn(rss, t->irn);
1242                         plist_element_t *p_el;
1243
1244                         DBG((rss->dbg, LEVEL_3, "\t\t\tkiller %+F (%.3f):\n", t->irn, t->cost));
1245
1246                         /* kill all unkilled parents of t */
1247                         foreach_plist(rt->parent_list, p_el) {
1248                                 ir_node    *par = plist_element_get_value(p_el);
1249                                 rss_irn_t *rpar = get_rss_irn(rss, par);
1250
1251                                 if (is_Sink(rpar->killer)) {
1252                                         rpar->killer = t->irn;
1253                                         DBG((rss->dbg, LEVEL_2, "\t\tkill %+F\n", rpar->irn));
1254                                 }
1255                                 else {
1256                                         DBG((rss->dbg, LEVEL_3, "\t\t\tkeeping %+F as killer for %+F\n", rpar->killer, rpar->irn));
1257                                 }
1258                         }
1259                 }
1260
1261                 ir_nodeset_destroy(&x);
1262                 ir_nodeset_destroy(&y);
1263                 DEL_ARR_F(sks);
1264         }
1265
1266         if (rss->opts->dump_flags & RSS_DUMP_KILL)
1267                 debug_vcg_dump_kill(rss);
1268
1269         del_pset(rss->cbc_set);
1270         obstack_free(&obst, NULL);
1271 }
1272
1273 /**
1274  * Adds the edge src -> tgt to the dvg. Checks if reverse edge is already there (asserts).
1275  */
1276 static inline void add_dvg_edge(rss_t *rss, dvg_t *dvg, const ir_node *src, const ir_node *tgt, int have_source)
1277 {
1278         rss_edge_t *dvg_edge;
1279         rss_edge_t key;
1280
1281         if (! have_source)
1282                 ir_nodeset_insert(&dvg->nodes, (ir_node *) src);
1283         else
1284                 assert(ir_nodeset_contains(&dvg->nodes, src) && "Wrong assumption");
1285
1286         ir_nodeset_insert(&dvg->nodes, (ir_node *) tgt);
1287
1288         key.src = (ir_node *) tgt;
1289         key.tgt = (ir_node *) src;
1290         assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1291
1292         key.src = (ir_node *) src;
1293         key.tgt = (ir_node *) tgt;
1294         if (NULL != pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1295                 /* add the edge to the DVG */
1296                 dvg_edge = OALLOC(phase_obst(&rss->ph), rss_edge_t);
1297
1298                 dvg_edge->src  = (ir_node *) src;
1299                 dvg_edge->tgt  = (ir_node *) tgt;
1300                 dvg_edge->next = NULL;
1301
1302                 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", src, tgt));
1303                 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1304         }
1305 }
1306
1307 /**
1308  * Computes the disjoint value DAG (DVG).
1309  * BEWARE: It is not made explicitly clear in the Touati paper,
1310  *         but the DVG is meant to be build from the KILLING DAG
1311  */
1312 static void compute_dvg(rss_t *rss, dvg_t *dvg)
1313 {
1314         plist_element_t *el;
1315
1316         DBG((rss->dbg, LEVEL_1, "\tcomputing DVG:\n"));
1317
1318         foreach_plist(rss->nodes, el) {
1319                 ir_node    *u_irn    = plist_element_get_value(el);
1320                 rss_irn_t  *u        = get_rss_irn(rss, u_irn);
1321                 rss_irn_t  *u_killer = get_rss_irn(rss, u->killer);
1322                 int        i;
1323
1324                 /* TODO: omit nodes only having sink as pkiller and killing no one */
1325
1326                 /* add an edge to killer */
1327                 add_dvg_edge(rss, dvg, u_irn, u->killer, 0);
1328
1329                 if (is_Sink(u->killer))
1330                         continue;
1331
1332                 /* We add an edge to every killer, from where we could be reached. */
1333                 for (i = ARR_LEN_SAFE(u_killer->descendants) - 1; i >= 0; --i) {
1334                         add_dvg_edge(rss, dvg, u_irn, u_killer->descendants[i], 1);
1335                 }
1336
1337 #if 0
1338
1339                 foreach_plist(rss->nodes, el2) {
1340                         ir_node *v_irn = plist_element_get_value(el2);
1341
1342                         /*
1343                                 There is an edge (u, v) in the DVG iff v is a descendant of the killer(u).
1344                         */
1345                         if (BSEARCH_IRN_ARR(v_irn, u_kill->descendants)) {
1346                                 rss_edge_t *dvg_edge = OALLOC(phase_obst(&rss->ph), rss_edge_t);
1347                                 rss_edge_t key;
1348
1349                                 /* insert the user into the DVG and append it to the user list of u */
1350                                 ir_nodeset_insert(&dvg->nodes, v_irn);
1351                                 if (! plist_has_value(u->dvg_user_list, v_irn))
1352                                         plist_insert_back(u->dvg_user_list, v_irn);
1353
1354                                 dvg_edge->src  = u_irn;
1355                                 dvg_edge->tgt  = v_irn;
1356                                 dvg_edge->next = NULL;
1357
1358                                 key.src = v_irn;
1359                                 key.tgt = u_irn;
1360
1361                                 assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1362
1363                                 /* add the edge to the DVG */
1364                                 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", u_irn, v_irn));
1365                                 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1366                         }
1367                 }
1368 #endif /* if 0 */
1369         }
1370
1371         if (rss->opts->dump_flags & RSS_DUMP_DVG)
1372                 debug_vcg_dump_dvg(rss, dvg);
1373 }
1374
1375 /**
1376  * Updates the dvg structure when a serialization edge from src -> tgt is added.
1377  */
1378 static void update_dvg(rss_t *rss, dvg_t *dvg, rss_irn_t *src, rss_irn_t *tgt)
1379 {
1380         int i, j, idx;
1381         rss_edge_t *edge;
1382         rss_edge_t **arr = ALLOCAN(rss_edge_t*, pset_count(dvg->edges));
1383
1384         /*
1385                 Add an edge from serialization target to serialization src:
1386                 src cannot be alive with target
1387         */
1388         add_dvg_edge(rss, dvg, tgt->irn, src->irn, 0);
1389
1390         /* Add edges to src's descendants as well, they also getting serialized. */
1391         for (i = ARR_LEN_SAFE(src->descendants) - 1; i >= 0; --i) {
1392                 add_dvg_edge(rss, dvg, tgt->irn, src->descendants[i], 1);
1393         }
1394
1395         /* We also have to add edges from targets predecessors in dvg */
1396         idx = 0;
1397         /* We cannot insert elements into set over which we iterate ... */
1398         foreach_pset(dvg->edges, edge) {
1399                 if (edge->tgt == tgt->irn) {
1400                         arr[idx++] = edge;
1401                 }
1402         }
1403
1404         for (i = 0; i < idx; ++i) {
1405                 edge = arr[i];
1406                 add_dvg_edge(rss, dvg, edge->src, src->irn, 1);
1407                 for (j = ARR_LEN_SAFE(src->descendants) - 1; j >= 0; --j) {
1408                         add_dvg_edge(rss, dvg, edge->src, src->descendants[j], 1);
1409                 }
1410         }
1411 }
1412
1413 #if 0
1414 /**
1415  * Accumulate all descendants for root into list.
1416  */
1417 static void accumulate_dvg_descendant_values(rss_t *rss, rss_irn_t *root, plist_t *list)
1418 {
1419         if (plist_count(root->dvg_user_list) > 0) {
1420                 plist_element_t *el;
1421
1422                 foreach_plist(root->dvg_user_list, el) {
1423                         ir_node   *v_irn = plist_element_get_value(el);
1424                         rss_irn_t *v     = get_rss_irn(rss, v_irn);
1425
1426                         /* add v as descendant */
1427                         if (! plist_has_value(list, v_irn)) {
1428                                 plist_insert_back(list, v_irn);
1429                                 DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG descendant %+F\n", v_irn));
1430                         }
1431
1432                         /* accumulate v's descendants */
1433                         accumulate_dvg_descendant_values(rss, v, list);
1434                 }
1435         }
1436 }
1437
1438 /**
1439  * Builds the list of potential killers for each node
1440  * in the given DVG.
1441  * Needs the descendant list for all user as sorted array.
1442  */
1443 static void build_dvg_pkiller_list(rss_t *rss, dvg_t *dvg)
1444 {
1445         ir_nodeset_iterator_t iter;
1446         ir_node *irn;
1447
1448         foreach_nodeset(&dvg->nodes, irn, iter) {
1449                 rss_irn_t       *node = get_rss_irn(rss, irn);
1450                 plist_element_t *el, *el2;
1451
1452                 DBG((rss->dbg, DEBUG_DVG, "\t\tbuilding pkiller list for %+F\n", irn));
1453
1454                 /* check each user */
1455                 foreach_plist(node->dvg_user_list, el) {
1456                         ir_node *u_irn = plist_element_get_value(el);
1457
1458                         /* is the current user u_irn not a descendant of any other user -> pkiller */
1459                         foreach_plist(node->dvg_user_list, el2) {
1460                                 ir_node   *v_irn = plist_element_get_value(el2);
1461                                 rss_irn_t *v     = get_rss_irn(rss, v_irn);
1462
1463                                 if (el != el2                             &&
1464                                         ! BSEARCH_IRN_ARR(u_irn, v->dvg_desc) &&
1465                                         ! plist_has_value(node->dvg_pkiller_list, u_irn))
1466                                 {
1467                                         plist_insert_back(node->dvg_pkiller_list, u_irn);
1468                                         DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG pkiller %+F\n", u_irn));
1469                                 }
1470                         }
1471                 }
1472
1473                 node->dvg_pkiller = build_sorted_array_from_list(node->dvg_pkiller_list, phase_obst(&rss->ph));
1474         }
1475
1476         DEBUG_ONLY(
1477                 if (firm_dbg_get_mask(rss->dbg) & DEBUG_DVG)
1478                         debug_vcg_dump_dvg_pkiller(rss, dvg);
1479         );
1480 }
1481
1482 #endif /* if 0 */
1483
1484 /**
1485  * Compute the maximal antichain of the current DVG.
1486  * This is a reimplementation of the MAXIMAL_ANTI_CHAIN function
1487  * from the DDG library 1.1 (DAG.cpp).
1488  */
1489 static ir_nodeset_t *compute_maximal_antichain(rss_t *rss, dvg_t *dvg, int iteration)
1490 {
1491         int         n               = ir_nodeset_size(&dvg->nodes);
1492         int         *assignment     = ALLOCAN(int, n);
1493         int         *assignment_rev = ALLOCAN(int, n);
1494         int         *idx_map        = ALLOCAN(int, n);
1495         hungarian_problem_t *bp;
1496         ir_nodeset_t *values, *temp;
1497         ir_nodeset_iterator_t iter;
1498         ir_node     *u_irn;
1499         int         i, j, cost, cur_chain, res;
1500         rss_edge_t  *dvg_edge;
1501
1502 #define MAP_IDX(irn) bsearch_for_index(get_irn_idx(irn), idx_map,  n,  1)
1503
1504         if (pset_count(dvg->edges) == 0)
1505                 return NULL;
1506
1507         bp = hungarian_new(n, n, HUNGARIAN_MATCH_NORMAL);
1508
1509         /*
1510                 At first, we build an index map for the nodes in the DVG,
1511                 because we cannot use the irn idx therefore as the resulting
1512                 bipartite data structure would have around 1.2GB.
1513                 So we limit the size to the number of nodes we have in the DVG
1514                 and build a sorted index map for their irn indices.
1515         */
1516         i = 0;
1517         foreach_ir_nodeset(&dvg->nodes, u_irn, iter) {
1518                 idx_map[i++] = get_irn_idx(u_irn);
1519         }
1520         qsort(idx_map, n, sizeof(idx_map[0]), cmp_int);
1521
1522         foreach_pset(dvg->edges, dvg_edge) {
1523                 int idx_u = MAP_IDX(dvg_edge->src);
1524                 int idx_v = MAP_IDX(dvg_edge->tgt);
1525
1526                 /* add the entry to the bipartite data structure */
1527                 hungarian_add(bp, idx_u, idx_v, 1);
1528                 DBG((rss->dbg, LEVEL_3, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1529                         idx_u, dvg_edge->src, idx_v, dvg_edge->tgt));
1530         }
1531 #if 0
1532         /*
1533                 Add a bipartite entry for each pair of nodes (u, v), where exists a
1534                 path in the DVG from u to v, ie. connect all descendants(v) to v.
1535                 desc_dvg(v) = accumulated descendants of all z in dvg_user_list(v)
1536         */
1537         foreach_ir_nodeset(&dvg->nodes, u_irn, iter) {
1538                 rss_irn_t *u        = get_rss_irn(rss, u_irn);
1539                 int       idx_u_irn = MAP_IDX(u_irn);
1540
1541                 DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG descendants of %+F:\n", u_irn));
1542
1543                 //plist_clear(u->dvg_desc_list);
1544                 //accumulate_dvg_descendant_values(rss, u, u->dvg_desc_list);
1545
1546                 /*
1547                         FIXME: The array is build on the phase obstack and we cannot free the data.
1548                         So the array get as many times allocated as this function is called.
1549                 */
1550
1551                 /* build the sorted array for faster searches */
1552                 //u->dvg_desc = build_sorted_array_from_list(u->dvg_desc_list, phase_obst(&rss->ph));
1553
1554                 DBG((rss->dbg, DEBUG_MAX_AC, "\t\tadding bipartite entries of %+F:\n", u_irn));
1555
1556                 /* add the bipartite entries for all descendants */
1557                 for (i = ARR_LEN_SAFE(u->dvg_desc) - 1; i >= 0; --i) {
1558                         rss_irn_t *desc    = get_rss_irn(rss, u->dvg_desc[i]);
1559                         int       idx_desc = MAP_IDX(u->dvg_desc[i]);
1560
1561                         /* add the entry to the bipartite data structure */
1562                         hungarian_add(bp, idx_u_irn, idx_desc, 1);
1563                         DBG((rss->dbg, DEBUG_MAX_AC, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1564                                 idx_u_irn, u_irn, idx_desc, u->dvg_desc[i]));
1565
1566                         need_matching = 1;
1567                 }
1568         }
1569 #endif
1570
1571         /* We want maximum cardinality matching */
1572         hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL);
1573
1574 #if 0
1575         DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG pkiller:\n"));
1576         /* beware: the following function needs the dvg_desc array */
1577         build_dvg_pkiller_list(rss, dvg);
1578 #endif
1579
1580         DBG((rss->dbg, LEVEL_2, "\t\tcomputing bipartite matching\n"));
1581         /*
1582                 The maximum cardinality bipartite matching gives us the minimal
1583                 chain partition, which corresponds to the maximum anti chains.
1584         */
1585         res = hungarian_solve(bp, assignment, &cost, 1);
1586         assert(res == 0 && "Bipartite matching failed!");
1587
1588         hungarian_free(bp);
1589         memset(assignment_rev, -1, n * sizeof(assignment_rev[0]));
1590
1591         /* build the reverse assignment, ie. foreach i -> j, add j -> i */
1592         for (i = 0; i < n; ++i) {
1593                 if (assignment[i] >= 0) {
1594                         int j = assignment[i];
1595                         assignment_rev[j] = i;
1596                 }
1597         }
1598
1599         DBG((rss->dbg, LEVEL_2, "\t\tgot assignment with cost %d\n", cost));
1600         DBG((rss->dbg, LEVEL_3, "\t\t\tassignment   ---   reverse assignment\n", cost));
1601         for (i = 0; i < n; ++i) {
1602                 DBG((rss->dbg, LEVEL_3, "\t\t\t%3d -> %3d         %3d -> %3d\n", i, assignment[i], i, assignment_rev[i]));
1603         }
1604
1605         values    = XMALLOC(ir_nodeset_t);
1606         ir_nodeset_init_size(values, 10);
1607         cur_chain = 0;
1608         /* Construction of the minimal chain partition */
1609         for (j = 0; j < n; ++j) {
1610                 /* check nodes, which did not occur as target */
1611                 if (assignment_rev[j] == -1) {
1612                         int       xj      = idx_map[j];
1613                         ir_node   *xj_irn = get_idx_irn(rss->irg, xj);
1614                         rss_irn_t *xj_rss = get_rss_irn(rss, xj_irn);
1615                         chain_t   *c      = OALLOC(phase_obst(&rss->ph), chain_t);
1616                         int       source;
1617
1618                         /* there was no source for j -> we have a source of a new chain */
1619                         ir_nodeset_insert(values, xj_irn);
1620
1621                         c->elements = plist_obstack_new(phase_obst(&rss->ph));
1622                         c->nr       = cur_chain++;
1623                         plist_insert_back(c->elements, xj_irn);
1624
1625                         xj_rss->chain = c;
1626
1627                         DBG((rss->dbg, LEVEL_2, "\t\tstarting chain %d:\n", c->nr));
1628                         DBG((rss->dbg, LEVEL_2, "\t\t\t%+F (%d)", xj_irn, j));
1629
1630                         /* follow chain, having j as source */
1631                         source = j;
1632                         while (assignment[source] >= 0) {
1633                                 int       target  = assignment[source];
1634                                 int       irn_idx = idx_map[target];
1635                                 ir_node   *irn    = get_idx_irn(rss->irg, irn_idx);
1636                                 rss_irn_t *node   = get_rss_irn(rss, irn);
1637
1638                                 plist_insert_back(c->elements, irn);
1639                                 node->chain = c;
1640
1641                                 DB((rss->dbg, LEVEL_2, " -> %+F (%d)", irn, target));
1642
1643                                 /* new source = last target */
1644                                 source = target;
1645                         }
1646
1647                         DB((rss->dbg, LEVEL_2, "\n"));
1648                 }
1649         }
1650
1651         /*
1652                 Computing the maximal antichain: Select an element from each
1653                 chain such, such it is parallel with the others.
1654         */
1655         DBG((rss->dbg, LEVEL_1, "\t\tcomputing set of saturation values (MAX AC)\n"));
1656         DBG((rss->dbg, LEVEL_3, "\t\tstarting with:\n"));
1657
1658         DEBUG_ONLY(
1659                 if (firm_dbg_get_mask(rss->dbg) & LEVEL_3)
1660                         dump_nodeset(values, "\t\t\t");
1661         )
1662
1663         temp = NULL;
1664         do {
1665                 /*
1666                         We need an explicit array for the values as
1667                         we cannot iterate multiple times over the same
1668                         set at the same time. :-(((((
1669                         TODO Matze: now we can, so rewrite this...
1670                 */
1671                 int     n         = ir_nodeset_size(values);
1672                 int     i         = 0;
1673                 ir_node **val_arr = NEW_ARR_F(ir_node *, n);
1674
1675                 foreach_ir_nodeset(values, u_irn, iter)
1676                         val_arr[i++] = u_irn;
1677
1678                 if (temp) {
1679                         ir_nodeset_destroy(temp);
1680                         free(temp);
1681                 }
1682
1683                 temp = XMALLOC(ir_nodeset_t);
1684                 ir_nodeset_init_size(temp, 10);
1685
1686                 /* Select all nodes from current value set, having another node in the set as descendant. */
1687                 for (i = 0; i < n; ++i) {
1688                         rss_irn_t *u = get_rss_irn(rss, val_arr[i]);
1689
1690                         for (j = 0; j < n; ++j) {
1691                                 if (i != j) {
1692                                         rss_edge_t key;
1693
1694                                         key.src = val_arr[i];
1695                                         key.tgt = val_arr[j];
1696
1697                                         if (pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1698                                                 /* v[j] is descendant of u -> remove u and break */
1699                                                 ir_nodeset_insert(temp, (ir_node *) u->irn);
1700                                                 ir_nodeset_remove(values, u->irn);
1701
1702                                                 DBG((rss->dbg, LEVEL_3, "\t\t\tremoving %+F from values, adding it to temp\n", u->irn));
1703
1704                                                 break;
1705                                         }
1706                                 }
1707                         }
1708                 }
1709
1710                 /* Try to insert the chain predecessor of all selected u's */
1711                 foreach_ir_nodeset(temp, u_irn, iter) {
1712                         rss_irn_t       *u  = get_rss_irn(rss, u_irn);
1713                         chain_t         *c  = u->chain;
1714                         plist_element_t *el = plist_find_value(c->elements, u_irn);
1715
1716                         assert(el && "Missing element in chain!");
1717
1718                         /* If u has predecessor in chain: insert the predecessor */
1719                         if (el == plist_element_get_prev(el)) {
1720                                 ir_nodeset_insert(values, plist_element_get_value(el));
1721                                 DBG((rss->dbg, LEVEL_3, "\t\t\tadding %+F to values\n", plist_element_get_value(el)));
1722                         }
1723                 }
1724
1725
1726                 DEL_ARR_F(val_arr);
1727         } while (ir_nodeset_size(temp) > 0);
1728
1729         DBG((rss->dbg, LEVEL_2, "\t\tfinal set:\n"));
1730         DEBUG_ONLY(
1731                 if (firm_dbg_get_mask(rss->dbg) & LEVEL_2) {
1732                         dump_nodeset(values, "\t\t\t");
1733                 }
1734         );
1735
1736         if (rss->opts->dump_flags & RSS_DUMP_MAXAC)
1737                 debug_vcg_dump_pkg(rss, values, iteration);
1738
1739         if (temp != NULL) {
1740                 ir_nodeset_destroy(temp);
1741                 free(temp);
1742         }
1743
1744         return values;
1745
1746 #undef MAP_IDX
1747 }
1748
1749 /**
1750  * Computes the best serialization between two nodes of sat_vals.
1751  */
1752 static serialization_t *compute_best_admissible_serialization(rss_t *rss, ir_nodeset_t *sat_vals, serialization_t *ser, int num_regs)
1753 {
1754         int        n                    = ir_nodeset_size(sat_vals);
1755         int        n_idx                = ARR_LEN_SAFE(rss->idx_map);
1756         int        i                    = 0;
1757         ir_node    **val_arr            = ALLOCAN(ir_node*, n);
1758         bitset_t   *bs_sv               = bitset_alloca(n_idx);
1759         bitset_t   *bs_vdesc            = bitset_alloca(n_idx);
1760         bitset_t   *bs_tmp              = bitset_alloca(n_idx);
1761         bitset_t   *bs_ukilldesc        = bitset_alloca(n_idx);
1762         int        best_benefit         = INT_MAX;
1763         int        best_omega2          = INT_MAX;
1764         int        best_benefit_omega20 = INT_MAX;
1765         int        has_omega1           = 0;
1766         int        j, k;
1767         ir_node    *irn;
1768         ir_nodeset_iterator_t iter;
1769         rss_edge_t min_benefit_edge = {NULL, NULL, NULL};
1770         rss_edge_t min_omega20_edge = {NULL, NULL, NULL};
1771         rss_irn_t  *ser_u_omega1 = NULL, *ser_v_omega1 = NULL;
1772         rss_irn_t  *ser_u_omega20 = NULL, *ser_v_omega20 = NULL;
1773
1774         DBG((rss->dbg, LEVEL_1, "\tcomputing admissible serializations:\n"));
1775
1776         /*
1777                 We need an explicit array for the values as
1778                 we cannot iterate multiple times over the same
1779                 set at the same time. :-(((((
1780         */
1781
1782         foreach_ir_nodeset(sat_vals, irn, iter) {
1783                 val_arr[i++] = irn;
1784                 bitset_set(bs_sv, BLOCK_IDX_MAP(rss, irn));
1785         }
1786
1787         /*
1788                 We build all admissible serializations and remember the best found so far.
1789                 For u in sat_vals:
1790                  For v in sat_val:
1791                    if v in pkiller(u): add edge from v to all other pkiller(u)
1792                    else: for all vv in pkiller(u): add edge from v to vv if there exists no path from vv to v
1793         */
1794
1795 /*
1796         A node is unserializable if:
1797         - it has only one killer and this one is Sink
1798     - it kills no other values
1799         In this case there is no serialization which could
1800         reduce the registerpressure
1801 */
1802 #define IS_UNSERIALIZABLE_NODE(rss_node)                  \
1803         (                                                     \
1804                 (                                                 \
1805                         (plist_count(rss_node->pkiller_list) == 1) && \
1806                         is_Sink(rss_node->killer)                  && \
1807                         (rss_node->kill_count                == 0)    \
1808                 )                            ||                   \
1809                 be_is_Barrier(rss_node->irn) ||                   \
1810                 be_is_Keep(rss_node->irn)                         \
1811         )
1812
1813         /* for all u in sat_vals */
1814         for (i = 0; i < n; ++i) {
1815                 rss_irn_t       *u = get_rss_irn(rss, val_arr[i]);
1816                 plist_element_t *el;
1817
1818                 /* ignore nodes where serialization does not help */
1819                 if (IS_UNSERIALIZABLE_NODE(u)) {
1820                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", u->irn));
1821                         continue;
1822                 }
1823
1824                 /* accumulate all descendants of all pkiller(u) */
1825                 bitset_clear_all(bs_ukilldesc);
1826                 foreach_plist(u->pkiller_list, el) {
1827                         ir_node   *irn  = plist_element_get_value(el);
1828                         rss_irn_t *node = get_rss_irn(rss, irn);
1829
1830                         if (! is_Sink(irn))
1831                                 bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, irn));
1832                         else
1833                                 continue;
1834
1835                         for (k = ARR_LEN_SAFE(node->descendants) - 1; k >= 0; --k) {
1836                                 if (! is_Sink(node->descendants[k]))
1837                                         bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, node->descendants[k]));
1838                         }
1839                 }
1840
1841                 /* for all v in sat_vals */
1842                 for (j = 0; j < n; ++j) {
1843                         ir_node   *v_irn   = val_arr[j];
1844                         rss_irn_t *v       = get_rss_irn(rss, v_irn);
1845                         unsigned  v_height = get_irn_height(rss->h, v_irn);
1846                         int       omega1, omega2, is_pkiller;
1847
1848                         /* v cannot be serialized with itself
1849                          * ignore nodes where serialization does not help */
1850                         if (i == j || IS_UNSERIALIZABLE_NODE(v)) {
1851 #ifdef DEBUG_libfirm
1852                                 if (i != j)
1853                                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", v->irn));
1854 #endif
1855                                 continue;
1856                         }
1857
1858                         /* get descendants of v */
1859                         bitset_clear_all(bs_vdesc);
1860                         bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v_irn));
1861                         for (k = ARR_LEN_SAFE(v->descendants) - 1; k >= 0; --k) {
1862                                 if (! is_Sink(v->descendants[k]))
1863                                         bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v->descendants[k]));
1864                         }
1865
1866                         /* if v is in pkiller(u) */
1867                         is_pkiller = plist_has_value(u->pkiller_list, v_irn);
1868
1869                         /* for all vv in pkiller(u) */
1870                         foreach_plist(u->pkiller_list, el) {
1871                                 ir_node *vv_irn  = plist_element_get_value(el);
1872                                 int     add_edge;
1873
1874                                 if (is_Sink(vv_irn) || is_cfop(vv_irn))
1875                                         continue;
1876
1877                                 if (is_pkiller)
1878                                         add_edge = vv_irn != v_irn && skip_Proj(vv_irn) != skip_Proj(v_irn);
1879                                 else
1880                                         add_edge = ! heights_reachable_in_block(rss->h, skip_Proj(vv_irn), skip_Proj(v_irn));
1881
1882                                 /*
1883                                         As we add an edge from vv -> v, we have to make sure,
1884                                         that there exists no path from v to vv.
1885                                 */
1886
1887                                 if (add_edge) {
1888                                         unsigned vv_height = get_irn_height(rss->h, vv_irn);
1889                                         unsigned critical_path_cost;
1890                                         unsigned mu1, mu2;
1891
1892                                         /*
1893                                                 mu1 = | descendants(v) cut sat_vals |
1894                                                 the number of saturating values which cannot
1895                                                 be simultaneously alive with u
1896                                         */
1897                                         bitset_copy(bs_tmp, bs_vdesc);
1898                                         mu1 = bitset_popcnt(bitset_and(bs_tmp, bs_sv));
1899
1900                                         /*
1901                                                 mu2 = | accum_desc_all_pkiller(u) without descendants(v) |
1902                                         */
1903                                         if (is_pkiller) {
1904                                                 bitset_copy(bs_tmp, bs_ukilldesc);
1905                                                 mu2 = bitset_popcnt(bitset_andnot(bs_tmp, bs_vdesc));
1906                                         }
1907                                         else {
1908                                                 mu2 = 0;
1909                                         }
1910
1911                                         /* omega1 = mu1 - mu2 */
1912                                         omega1 = mu1 - mu2;
1913
1914                                         if (omega1 != 0)
1915                                                 has_omega1 = 1;
1916
1917                                         /* omega2 = increase of critical path */
1918                                         critical_path_cost =
1919                                                 v_height                        /* longest path from v to sink */
1920                                                 + rss->max_height - vv_height   /* longest path from source to vv */
1921                                                 + 1;                            /* edge */
1922
1923                                         /*
1924                                                 If critical_path_cost > max_height -> the new edge
1925                                                 would increase the longest critical path by the difference.
1926                                         */
1927                                         omega2 = critical_path_cost > rss->max_height ? critical_path_cost - rss->max_height : 0;
1928
1929                                         /* this keeps track of the edge with the best benefit */
1930                                         if (omega1 >= num_regs - n && omega1 < best_benefit) {
1931                                                 min_benefit_edge.src = v_irn;
1932                                                 min_benefit_edge.tgt = vv_irn;
1933
1934                                                 ser_u_omega1 = u;
1935                                                 ser_v_omega1 = v;
1936
1937                                                 best_benefit    = omega1;
1938                                                 ser->new_killer = is_pkiller;
1939                                         }
1940
1941                                         /* this keeps track of the edge with the best omega1 costs where omega2 == 0 */
1942                                         if (omega2 == 0 && omega1 >= num_regs - n && omega1 < best_benefit_omega20) {
1943                                                 min_omega20_edge.src = v_irn;
1944                                                 min_omega20_edge.tgt = vv_irn;
1945
1946                                                 ser_u_omega20 = u;
1947                                                 ser_v_omega20 = v;
1948
1949                                                 best_benefit_omega20 = omega1;
1950                                                 ser->new_killer      = is_pkiller;
1951                                         }
1952
1953                                         best_omega2 = MIN(best_omega2, omega2);
1954
1955                                         DBG((rss->dbg, LEVEL_2, "\t\tfound %+F -> %+F (w1 %d, w2 %d)\n",
1956                                                 v_irn, vv_irn, omega1, omega2));
1957                                 } /* if add_edge */
1958                         } /* for all vv in pkiller(u) */
1959                 } /* for all v in sat_vals */
1960         } /* for all u in sat_vals */
1961
1962         if (! has_omega1)
1963                 return NULL;
1964
1965         if (best_omega2 == 0) {
1966                 ser->u         = ser_u_omega20;
1967                 ser->v         = ser_v_omega20;
1968                 ser->edge->src = min_omega20_edge.src;
1969                 ser->edge->tgt = min_omega20_edge.tgt;
1970                 ser->omega1    = best_benefit_omega20;
1971                 ser->omega2    = best_omega2;
1972         }
1973         else {
1974                 ser->u         = ser_u_omega1;
1975                 ser->v         = ser_v_omega1;
1976                 ser->edge->src = min_benefit_edge.src;
1977                 ser->edge->tgt = min_benefit_edge.tgt;
1978                 ser->omega1    = best_benefit;
1979                 ser->omega2    = best_omega2;
1980         }
1981
1982         return ser;
1983
1984 #undef IS_UNSERIALIZABLE_NODE
1985 }
1986
1987 /**
1988  * Perform the value serialization heuristic and add all
1989  * computed serialization edges as dependencies to the irg.
1990  */
1991 static void perform_value_serialization_heuristic(rss_t *rss)
1992 {
1993         bitset_t *arch_nonign_bs = bitset_alloca(arch_register_class_n_regs(rss->cls));
1994         bitset_t *abi_ign_bs     = bitset_alloca(arch_register_class_n_regs(rss->cls));
1995         unsigned available_regs, iteration;
1996         dvg_t    dvg;
1997         ir_nodeset_t *sat_vals;
1998         pset *ser_set = new_pset(cmp_rss_edges, 20);
1999
2000         /* available_regs = R = |arch_non_ignore_regs cut ~abi_ignore_regs| */
2001         arch_put_non_ignore_regs(rss->cls, arch_nonign_bs);
2002         be_abi_put_ignore_regs(rss->abi, rss->cls, abi_ign_bs);
2003         bitset_andnot(arch_nonign_bs, abi_ign_bs);
2004         available_regs  = bitset_popcnt(arch_nonign_bs);
2005         //num_live = pset_count(rss->live_block);
2006         //available_regs -= num_live < available_regs ? num_live : 0;
2007
2008         DBG((rss->dbg, LEVEL_1, "\n\t#available regs: %d\n\n", available_regs));
2009
2010         /*
2011                 At first we need to compute the disjoint value DAG (DVG = {V, E_dv}).
2012                 V    = set of all nodes we are currently interested in
2013                 E_dv = there is an edge from u to v iff v is a descendant of killer(u), forall u, v in V
2014         */
2015         ir_nodeset_init_size(&dvg.nodes, plist_count(rss->nodes));
2016         dvg.edges = new_pset(cmp_rss_edges, plist_count(rss->nodes) * 5);
2017         compute_dvg(rss, &dvg);
2018
2019         /*
2020                 Then we perform the heuristic serialization algorithm
2021                 on the DVG which gives us all necessary serialization
2022                 edges.
2023         */
2024         DBG((rss->dbg, LEVEL_1, "\tcomputing maximal antichain:\n"));
2025         iteration = 0;
2026         sat_vals  = compute_maximal_antichain(rss, &dvg, iteration++);
2027         while (sat_vals && (ir_nodeset_size(sat_vals) > available_regs)) {
2028                 serialization_t *ser, best_ser;
2029                 rss_edge_t      *edge = OALLOC(phase_obst(&rss->ph), rss_edge_t);
2030                 ir_node         *dep_src, *dep_tgt;
2031
2032                 best_ser.edge = edge;
2033                 ser = compute_best_admissible_serialization(rss, sat_vals, &best_ser, available_regs);
2034
2035                 DBG((rss->dbg, LEVEL_1, "\tcurrent register saturation %d, target %d\n", ir_nodeset_size(sat_vals), available_regs));
2036
2037                 if (! ser) {
2038                         DBG((rss->dbg, LEVEL_1, "\tno RS improving serialization found, breaking at iteration %d\n", iteration));
2039                         break;
2040                 }
2041
2042                 /* Insert the serialization as dependency edge into the irg. */
2043                 DBG((rss->dbg, LEVEL_1, "\tserializing %+F -> %+F with edge %+F -> %+F and cost %d, %d\n",
2044                         ser->u->irn, ser->v->irn, ser->edge->src, ser->edge->tgt, ser->omega1, ser->omega2));
2045
2046                 if (pset_find(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge)))
2047                         ir_printf("WARNING: serialization %+F -> %+F computed twice!\n", ser->edge->src, ser->edge->tgt);
2048
2049
2050                 pset_insert(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge));
2051
2052                 /* update the dvg */
2053                 update_dvg(rss, &dvg, ser->v, ser->u);
2054                 update_dvg(rss, &dvg, get_rss_irn(rss, ser->edge->src), get_rss_irn(rss, ser->edge->tgt));
2055                 if (sat_vals != NULL) {
2056                         ir_nodeset_destroy(sat_vals);
2057                         free(sat_vals);
2058                 }
2059
2060                 dep_src = skip_Proj(ser->edge->src);
2061                 dep_tgt = ser->edge->tgt;
2062                 add_irn_dep(dep_src, dep_tgt);
2063
2064                 /* Update descendants, consumer and pkillers of the target */
2065                 update_node_info(rss, ser->edge->tgt, ser->edge->src);
2066
2067                 /* TODO: try to find a cheaper way for updating height information */
2068                 rss->max_height = heights_recompute_block(rss->h, rss->block);
2069
2070                 /* Recompute the antichain for next serialization */
2071                 DBG((rss->dbg, LEVEL_1, "\tre-computing maximal antichain:\n"));
2072                 sat_vals = compute_maximal_antichain(rss, &dvg, iteration++);
2073         }
2074
2075         ir_nodeset_destroy(&dvg.nodes);
2076         del_pset(dvg.edges);
2077 }
2078
2079 /**
2080  * Do initial calculations for a block.
2081  */
2082 static void process_block(ir_node *block, void *env)
2083 {
2084         rss_t *rss = env;
2085         int   i, n;
2086         const ir_edge_t *edge;
2087
2088         phase_init(&rss->ph, "rss block preprocessor", rss->irg, PHASE_DEFAULT_GROWTH, init_rss_irn, NULL);
2089
2090         DBG((rss->dbg, LEVEL_1, "preprocessing block %+F\n", block));
2091         rss->block = block;
2092
2093         /* build an index map for all nodes in the current block */
2094         i            = 0;
2095         n            = get_irn_n_edges(block);
2096         NEW_ARR_A(int *, rss->idx_map, n);
2097         foreach_out_edge(block, edge) {
2098                 ir_node *irn      = get_edge_src_irn(edge);
2099                 rss->idx_map[i++] = get_irn_idx(irn);
2100         }
2101         qsort(rss->idx_map, n, sizeof(rss->idx_map[0]), cmp_int);
2102         rss->max_height = heights_recompute_block(rss->h, block);
2103
2104         /* loop over all register classes */
2105         for (i = arch_env_get_n_reg_class(rss->arch_env) - 1; i >= 0; --i) {
2106                 const arch_register_class_t *cls = arch_env_get_reg_class(rss->arch_env, i);
2107
2108                 rss->cls = cls;
2109                 DBG((rss->dbg, LEVEL_1, "register class %s\n", arch_register_class_name(cls)));
2110
2111                 /* Get all live value at end of Block having current register class */
2112                 ir_nodeset_init(&rss->live_block);
2113                 be_liveness_end_of_block(rss->liveness, rss->cls, rss->block, &rss->live_block);
2114
2115                 /* reset the list of interesting nodes */
2116                 plist_clear(rss->nodes);
2117                 plist_insert_back(rss->nodes, _sink);
2118
2119                 /* collect all nodes having a certain register class */
2120                 foreach_out_edge(block, edge) {
2121                         ir_node *irn  = get_edge_src_irn(edge);
2122                         ir_mode *mode = get_irn_mode(irn);
2123
2124                         /*
2125                                 We skip:
2126                                 - mode_T nodes (the projs are asked)
2127                                 - mode_X nodes (control flow nodes are always scheduled last)
2128                                 - Keeps (they are always immediately scheduled)
2129                                 - Phi (same as Keep)
2130                         */
2131                         if (mode == mode_T || mode == mode_X || is_Phi(irn))
2132                                 continue;
2133
2134                         /*
2135                                 In case of a proj, we skip
2136                                 - Barrier (they are a Barrier :)
2137                                 - Start
2138                                 - the Proj itself, as it's scheduled always with it's super node
2139                         */
2140                         if (is_Proj(irn)) {
2141                                 ir_node *pred = get_Proj_pred(irn);
2142                                 if (be_is_Barrier(pred) || be_is_Start(pred))
2143                                         continue;
2144                         }
2145
2146                         /* calculate the descendants and consumer for each node in the block */
2147                         collect_node_info(rss, skip_Proj(irn));
2148
2149                         if (be_is_Keep(irn))
2150                                 continue;
2151
2152                         if (!arch_irn_is_ignore(irn) &&
2153                                         arch_get_irn_reg_class_out(irn) == cls) {
2154                                 plist_insert_back(rss->nodes, skip_Proj(irn));
2155                         }
2156                         //}
2157                 }
2158
2159                 /* compute the potential killing set PK(G) */
2160                 compute_pkill_set(rss);
2161
2162                 /* compute the killing function k* */
2163                 compute_killing_function(rss);
2164
2165                 /*
2166                         Compute the heuristic value serialization and
2167                         add the necessary dependencies to the irg.
2168                 */
2169                 perform_value_serialization_heuristic(rss);
2170
2171                 ir_nodeset_destroy(&rss->live_block);
2172         }
2173
2174         phase_free(&rss->ph);
2175 }
2176
2177 /**
2178  * Register the options.
2179  */
2180 void be_init_schedrss(void)
2181 {
2182         lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
2183         lc_opt_entry_t *sched_grp = lc_opt_get_grp(be_grp, "sched");
2184         lc_opt_entry_t *rss_grp = lc_opt_get_grp(sched_grp, "rss");
2185
2186         lc_opt_add_table(rss_grp, rss_option_table);
2187 }
2188
2189 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_schedrss);
2190
2191 /**
2192  * Preprocess the irg for scheduling.
2193  */
2194 void rss_schedule_preparation(be_irg_t *birg)
2195 {
2196         ir_graph *irg = be_get_birg_irg(birg);
2197         rss_t rss;
2198
2199         FIRM_DBG_REGISTER(rss.dbg, "firm.be.sched.rss");
2200
2201         //firm_dbg_set_mask(rss.dbg, LEVEL_1 | LEVEL_2 | LEVEL_3);
2202
2203         init_rss_special_nodes(irg);
2204
2205         rss.irg      = irg;
2206         rss.arch_env = be_get_birg_arch_env(birg);
2207         rss.abi      = birg->abi;
2208         rss.h        = heights_new(irg);
2209         rss.nodes    = plist_new();
2210         rss.opts     = &rss_options;
2211         rss.liveness = be_liveness(irg);
2212         be_liveness_assure_sets(rss.liveness);
2213         irg_block_walk_graph(irg, NULL, process_block, &rss);
2214         heights_free(rss.h);
2215         plist_free(rss.nodes);
2216         be_liveness_free(rss.liveness);
2217
2218         if (birg->main_env->options->dump_flags & DUMP_SCHED)
2219                 be_dump(rss.irg, "-rss", dump_ir_block_graph);
2220 }