Remove the unused parameter const arch_env_t *env from arch_irn_get_flags(), arch_irn...
[libfirm] / ir / be / beschedrss.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       Implementation of a register saturating list scheduler.
23  * @author      Christian Wuerdig
24  * @date        29.08.2006
25  * @version     $Id$
26  *
27  * Implementation of a register saturating list scheduler
28  * as described in: Sid-Ahmed-Ali Touati
29  * Register Saturation in Superscalar and VLIW Codes
30  */
31 #ifdef HAVE_CONFIG_H
32 #include "config.h"
33 #endif
34
35 #include <limits.h>
36
37 #include "obst.h"
38 #include "debug.h"
39
40 #include "irgraph_t.h"
41 #include "irnode_t.h"
42 #include "iredges_t.h"
43 #include "ircons_t.h"
44 #include "irphase_t.h"
45 #include "irgwalk.h"
46 #include "irdump.h"
47 #include "irtools.h"
48 #include "irbitset.h"
49 #include "irprintf.h"
50 #include "bipartite.h"
51 #include "hungarian.h"
52 #include "plist.h"
53 #include "array_t.h"
54
55 #include "height.h"
56
57 #include "beabi.h"
58 #include "bemodule.h"
59 #include "benode_t.h"
60 #include "besched_t.h"
61 #include "beirg_t.h"
62
63 #include "lc_opts.h"
64 #include "lc_opts_enum.h"
65
66
67 #define ARR_LEN_SAFE(arr) ((arr) != NULL ? ARR_LEN((arr)) : 0)
68
69 #define HASH_RSS_EDGE(edge) ((get_irn_node_nr((edge)->src) << 16) | (get_irn_node_nr((edge)->tgt) & 0xFFFF))
70 #define BSEARCH_IRN_ARR(val, arr) \
71         bsearch(&(val), (arr), ARR_LEN_SAFE((arr)), sizeof((arr)[0]), cmp_irn_idx)
72
73 #define BLOCK_IDX_MAP(rss, irn) bsearch_for_index(get_irn_idx((irn)), (rss)->idx_map, ARR_LEN_SAFE((rss)->idx_map), 1)
74
75 /* the rss options */
76 typedef struct _rss_opts_t {
77         int dump_flags;
78 } rss_opts_t;
79
80 /* Represents a child with associated costs */
81 typedef struct _child {
82         ir_node *irn;
83         float   cost;
84 } child_t;
85
86 /* We need edges for several purposes. */
87 typedef struct _rss_edge {
88         ir_node *src;
89         ir_node *tgt;
90         void    *next;
91 } rss_edge_t;
92
93 /* Represents a connected bipartite component. */
94 typedef struct _cbc {
95         ir_nodeset_t parents;       /**< = S  a set of value producers */
96         ir_nodeset_t children;      /**< = T  a set of value consumers */
97         pset    *kill_edges;    /**< = E  a set of edges (t in T, s in S) such as each s in S gets killed by at least one t in T */
98         int     nr;             /**< a deterministic index for set insertion (used as hash) */
99 } cbc_t;
100
101 /* Represents a disjoint value DAG. */
102 typedef struct _dvg {
103         ir_nodeset_t nodes;
104         pset    *edges;
105 } dvg_t;
106
107 /* Represents a chain of nodes. */
108 typedef struct _chain {
109         plist_t *elements;   /**< List of chain elements */
110         int     nr;          /**< a deterministic index for set insertion (used as hash) */
111 } chain_t;
112
113 typedef struct _rss_irn {
114         plist_t  *consumer_list;    /**< List of consumers */
115         const ir_node **consumer;   /**< Sorted consumer array (needed for faster access) */
116
117         plist_t  *parent_list;      /**< List of parents */
118         plist_t  *pkiller_list;     /**< List of potential killers */
119
120         plist_t  *descendant_list;  /**< List of descendants */
121         const ir_node **descendants;      /**< Sorted descendant array (needed for faster access) */
122
123         const ir_node  *killer;     /**< The selected unique killer */
124         const ir_node  *irn;        /**< The corresponding firm node to this rss_irn */
125
126         chain_t  *chain;            /**< The chain, this node is associated to */
127
128         unsigned desc_walk;         /**< visited flag for collecting descendants */
129         unsigned kill_count;        /**< number of nodes killed by this one */
130
131         unsigned live_out : 1;      /**< irn has consumers outside of it's block */
132         unsigned visited  : 1;      /**< visited flag for bipartite decomposition */
133         unsigned havecons : 1;      /**< visited flag collect consumer */
134         unsigned handled  : 1;      /**< flag indicating whether or not the list structures have been build */
135         unsigned dumped   : 1;      /**< flag indication whether or not this node was dumped */
136 } rss_irn_t;
137
138 /* Represents a serialization edge with associated costs. */
139 typedef struct _serialization {
140         rss_irn_t  *u;       /* the top node */
141         rss_irn_t  *v;       /* the node about to be serialized after u */
142         rss_edge_t *edge;    /* the edge selected for the serialization */
143         int        omega1;   /* estimated: available regs - RS reduction */
144         int        omega2;   /* increase in critical path length */
145         int        new_killer;
146 } serialization_t;
147
148 typedef struct _rss {
149         ir_phase          ph;              /**< Phase to hold some data */
150         heights_t        *h;              /**< The current height object */
151         ir_graph         *irg;            /**< The irg to preprocess */
152         plist_t          *nodes;          /**< The list of interesting nodes */
153         const arch_env_t *arch_env;       /**< The architecture environment */
154         be_abi_irg_t     *abi;            /**< The abi for this irg */
155         pset             *cbc_set;        /**< A set of connected bipartite components */
156         ir_node          *block;          /**< The current block in progress. */
157         int              *idx_map;        /**< Mapping irn indices to per block indices */
158         unsigned         max_height;      /**< maximum height in the current block */
159         rss_opts_t       *opts;           /**< The options */
160         be_lv_t          *liveness;       /**< The liveness information for this irg */
161         ir_nodeset_t      live_block;     /**< Values alive at end of block */
162         const arch_register_class_t *cls; /**< The current register class */
163         DEBUG_ONLY(firm_dbg_module_t *dbg);
164 } rss_t;
165
166 #define get_rss_irn(rss, irn)  (phase_get_or_set_irn_data(&rss->ph, irn))
167
168 /**
169  * We need some special nodes:
170  * a source and a sink for all live-in and live-out values of a block
171  */
172 enum {
173         iro_rss_Source,
174         iro_rss_Sink,
175         iro_rss_last
176 };
177
178 /** The opcode of the rss_Source node once allocated. */
179 static ir_op *op_rss_Source;
180 /** The opcode of the rss_Sink node once allocated. */
181 static ir_op *op_rss_Sink;
182
183 /** The rss_Source node of the current graph. */
184 static ir_node *_source = NULL;
185 /** The rss_Sink node of the current graph. */
186 static ir_node *_sink   = NULL;
187
188 #define is_Source(irn) ((irn) == _source)
189 #define is_Sink(irn)   ((irn) == _sink)
190
191 enum {
192         RSS_DUMP_NONE  = 0,
193         RSS_DUMP_CBC   = 1 << 0,
194         RSS_DUMP_PKG   = 1 << 1,
195         RSS_DUMP_KILL  = 1 << 2,
196         RSS_DUMP_DVG   = 1 << 3,
197         RSS_DUMP_MAXAC = 1 << 4,
198         RSS_DUMP_ALL   = (RSS_DUMP_MAXAC << 1) - 1,
199 };
200
201 static rss_opts_t rss_options = {
202         RSS_DUMP_NONE,
203 };
204
205 static const lc_opt_enum_int_items_t dump_items[] = {
206         { "none",  RSS_DUMP_NONE  },
207         { "cbc",   RSS_DUMP_CBC   },
208         { "pkg",   RSS_DUMP_PKG   },
209         { "kill",  RSS_DUMP_KILL  },
210         { "dvg",   RSS_DUMP_DVG   },
211         { "maxac", RSS_DUMP_MAXAC },
212         { "all",   RSS_DUMP_ALL   },
213         { NULL, 0 }
214 };
215
216 static lc_opt_enum_int_var_t dump_var = {
217         &rss_options.dump_flags, dump_items
218 };
219
220 static const lc_opt_table_entry_t rss_option_table[] = {
221         LC_OPT_ENT_ENUM_MASK("dump", "dump phases", &dump_var),
222         LC_OPT_LAST
223 };
224
225 /******************************************************************************
226  *  _          _                    __                  _   _
227  * | |        | |                  / _|                | | (_)
228  * | |__   ___| |_ __   ___ _ __  | |_ _   _ _ __   ___| |_ _  ___  _ __  ___
229  * | '_ \ / _ \ | '_ \ / _ \ '__| |  _| | | | '_ \ / __| __| |/ _ \| '_ \/ __|
230  * | | | |  __/ | |_) |  __/ |    | | | |_| | | | | (__| |_| | (_) | | | \__ \
231  * |_| |_|\___|_| .__/ \___|_|    |_|  \__,_|_| |_|\___|\__|_|\___/|_| |_|___/
232  *            | |
233  *            |_|
234  ******************************************************************************/
235
236 /**
237  * Acquire opcodes if needed and create source and sink nodes.
238  */
239 static void init_rss_special_nodes(ir_graph *irg) {
240         ir_node *block;
241
242         if (op_rss_Source == NULL) {
243                 int iro_rss_base = get_next_ir_opcodes(iro_rss_last);
244                 op_rss_Source = new_ir_op(iro_rss_base + iro_rss_Source, "rss_Source", op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
245                 op_rss_Sink   = new_ir_op(iro_rss_base + iro_rss_Sink,   "rss_Sink",   op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
246         }
247         block   = get_irg_start_block(irg);
248         _source = new_ir_node(NULL, irg, block, op_rss_Source, mode_ANY, 0, NULL);
249         _sink   = new_ir_node(NULL, irg, block, op_rss_Sink, mode_ANY, 0, NULL);
250 }
251
252 static int cmp_int(const void *a, const void *b) {
253         const int *i1 = a;
254         const int *i2 = b;
255
256         return QSORT_CMP(*i1, *i2);
257 }
258
259 static int cmp_child_costs(const void *a, const void *b) {
260         const child_t *c1 = a;
261         const child_t *c2 = b;
262
263         return QSORT_CMP(c1->cost, c2->cost);
264 }
265
266 static int cmp_irn_idx(const void *a, const void *b) {
267         const ir_node *n1 = *(ir_node **)a;
268         const ir_node *n2 = *(ir_node **)b;
269
270         return QSORT_CMP(get_irn_idx(n1), get_irn_idx(n2));
271 }
272
273 static int cmp_rss_edges(const void *a, const void *b) {
274         const rss_edge_t *e1 = a;
275         const rss_edge_t *e2 = b;
276
277         return (e1->src != e2->src) || (e1->tgt != e2->tgt);
278 }
279
280 static int bsearch_for_index(int key, int *arr, size_t len, int force) {
281         int left = 0;
282         int right = len;
283
284         while (right >= left) {
285                 int idx = (left + right) / 2;
286
287                 if (key < arr[idx])
288                         right = idx - 1;
289                 else if (key > arr[idx])
290                         left = idx + 1;
291                 else
292                         return idx;
293         }
294
295         if (force)
296                 assert(0 && "Something is wrong, key not found.");
297         return -1;
298 }
299
300 static const ir_node **build_sorted_array_from_list(plist_t *irn_list, struct obstack *obst) {
301         plist_element_t *el;
302         int             i   = 0;
303         int             len = plist_count(irn_list);
304         const ir_node   **arr = (const ir_node**)NEW_ARR_D(ir_node*, obst, len);
305
306         /* copy the list into the array */
307         foreach_plist(irn_list, el) {
308                 arr[i++] = plist_element_get_value(el);
309         }
310
311         /* sort the array by node index */
312         /* HACK cast for MSVC */
313         qsort((void*)arr, len, sizeof(arr[0]), cmp_irn_idx);
314
315         return arr;
316 }
317
318 /*****************************************************
319  *      _      _                       _
320  *     | |    | |                     (_)
321  *   __| | ___| |__  _   _  __ _  __ _ _ _ __   __ _
322  *  / _` |/ _ \ '_ \| | | |/ _` |/ _` | | '_ \ / _` |
323  * | (_| |  __/ |_) | |_| | (_| | (_| | | | | | (_| |
324  *  \__,_|\___|_.__/ \__,_|\__, |\__, |_|_| |_|\__, |
325  *                          __/ | __/ |         __/ |
326  *                         |___/ |___/         |___/
327  *
328  *****************************************************/
329
330 #ifdef DEBUG_libfirm
331 static void dump_nodeset(ir_nodeset_t *ns, const char *prefix) {
332         ir_nodeset_iterator_t iter;
333         ir_node *irn;
334
335         ir_nodeset_iterator_init(&iter, ns);
336         while( (irn = ir_nodeset_iterator_next(&iter)) != NULL ) {
337                 ir_fprintf(stderr, "%s%+F\n", prefix, irn);
338         }
339 }
340 #endif
341
342 static void build_file_name(rss_t *rss, const char *suffix, size_t suf_len, char *buf, size_t len) {
343         const char *irg_name;
344
345         memset(buf, 0, len);
346         irg_name = get_entity_name(get_irg_entity(rss->irg));
347         snprintf(buf, len - suf_len, "%s-%s-block-%ld",
348                 irg_name, arch_register_class_name(rss->cls), get_irn_node_nr(rss->block));
349         strcat(buf, suffix);
350 }
351
352 /* Dumps all collected bipartite components of current irg as vcg. */
353 static void debug_vcg_dump_bipartite(rss_t *rss) {
354         cbc_t *cbc;
355         FILE  *f;
356         char  file_name[256];
357         static const char suffix[] = "-RSS-CBC.vcg";
358
359         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
360         f = fopen(file_name, "w");
361
362         ir_fprintf(f, "graph: { title: \"connected bipartite component graph of %+F\"\n", rss->irg);
363         fprintf(f, "display_edge_labels: no\n");
364         fprintf(f, "layoutalgorithm: mindepth\n");
365         fprintf(f, "manhattan_edges: yes\n\n");
366
367         foreach_pset(rss->cbc_set, cbc) {
368                 ir_nodeset_iterator_t iter;
369                 ir_node    *n;
370                 rss_edge_t *ke;
371
372                 fprintf(f, "graph: { titel: \"cbc %d\" label: \"cbc %d\" status:clustered color:yellow\n", cbc->nr, cbc->nr);
373                 foreach_ir_nodeset(&cbc->parents, n, iter) {
374                         ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
375                 }
376                 foreach_ir_nodeset(&cbc->children, n, iter) {
377                         ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
378                 }
379                 foreach_pset(cbc->kill_edges, ke) {
380                         ir_fprintf(f, "edge: { sourcename: \"n%d_%d\" targetname: \"n%d_%d\" }\n",
381                                 get_irn_node_nr(ke->src), cbc->nr, get_irn_node_nr(ke->tgt), cbc->nr);
382                 }
383                 fprintf(f, "}\n\n");
384         }
385         fprintf(f, "}\n");
386         fclose(f);
387 }
388
389 /* Dump the computed killing function as vcg. */
390 static void debug_vcg_dump_kill(rss_t *rss) {
391         FILE            *f;
392         char            file_name[256];
393         plist_element_t *el;
394         static const char suffix[] = "-RSS-KILL.vcg";
395
396         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
397         f = fopen(file_name, "w");
398
399         ir_fprintf(f, "graph: { title: \"computed kill graph of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
400         fprintf(f, "display_edge_labels: no\n");
401         fprintf(f, "layoutalgorithm: mindepth\n");
402         fprintf(f, "manhattan_edges: yes\n\n");
403
404         {
405                 /* reset dump_flag */
406                 ir_node *irn;
407                 foreach_phase_irn(&rss->ph, irn) {
408                         rss_irn_t *node = get_rss_irn(rss, irn);
409                         node->dumped = 0;
410                 }
411         }
412
413         /* dump all nodes and their killers */
414         foreach_plist(rss->nodes, el) {
415                 ir_node   *irn     = plist_element_get_value(el);
416                 rss_irn_t *rirn    = get_rss_irn(rss, irn);
417                 rss_irn_t *pk_rirn = get_rss_irn(rss, rirn->killer);
418
419                 if (! rirn->dumped) {
420                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
421                         rirn->dumped = 1;
422                 }
423
424                 if (! pk_rirn->dumped) {
425                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(rirn->killer), rirn->killer);
426                         pk_rirn->dumped = 1;
427                 }
428
429                 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
430                         get_irn_node_nr(rirn->killer), get_irn_node_nr(irn));
431         }
432
433         fprintf(f, "}\n");
434         fclose(f);
435 }
436
437 /* Dumps the potential killing DAG (PKG) as vcg. */
438 static void debug_vcg_dump_pkg(rss_t *rss, ir_nodeset_t *max_ac, int iteration) {
439         FILE              *f;
440         char              file_name[256];
441         char              *suffix   = alloca(32);
442         static const char suffix1[] = "-RSS-PKG.vcg";
443         static const char suffix2[] = "-RSS-PKG-MAXAC.vcg";
444         plist_element_t   *el;
445
446         if (! max_ac) {
447                 snprintf(suffix, 32, "%s", suffix1);
448         }
449         else {
450                 snprintf(suffix, 32, "-%02d%s", iteration, suffix2);
451         }
452
453         build_file_name(rss, suffix, strlen(suffix) + 1, file_name, sizeof(file_name));
454         f = fopen(file_name, "w");
455
456         ir_fprintf(f, "graph: { title: \"potential killing DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
457         fprintf(f, "display_edge_labels: no\n");
458         fprintf(f, "layoutalgorithm: mindepth\n");
459         fprintf(f, "manhattan_edges: yes\n\n");
460
461         {
462                 /* reset dump_flag */
463                 ir_node *irn;
464                 foreach_phase_irn(&rss->ph, irn) {
465                         rss_irn_t *node = get_rss_irn(rss, irn);
466                         node->dumped = 0;
467                 }
468         }
469
470         foreach_plist(rss->nodes, el) {
471                 ir_node   *irn  = plist_element_get_value(el);
472                 rss_irn_t *rirn = get_rss_irn(rss, irn);
473                 char      *c1   = "";
474                 plist_element_t *k_el;
475
476                 /* dump selected saturating values in yellow */
477                 if (max_ac && ir_nodeset_contains(max_ac, irn))
478                         c1 = "color:yellow";
479
480                 if (! rirn->dumped) {
481                         if (rirn->chain)
482                                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F   c%d\" %s }\n", get_irn_node_nr(irn), irn, rirn->chain->nr, c1);
483                         else
484                                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(irn), irn, c1);
485                         rirn->dumped = 1;
486                 }
487
488                 foreach_plist(rirn->pkiller_list, k_el) {
489                         ir_node   *pkiller = plist_element_get_value(k_el);
490                         rss_irn_t *pk_rirn = get_rss_irn(rss, pkiller);
491                         char      *c2      = "";
492
493                         if (max_ac && ir_nodeset_contains(max_ac, pkiller))
494                                 c2 = "color:yellow";
495
496                         if (! pk_rirn->dumped) {
497                                 if (pk_rirn->chain)
498                                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F   c%d\" %s }\n", get_irn_node_nr(pkiller), pkiller, pk_rirn->chain->nr, c2);
499                                 else
500                                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(pkiller), pkiller, c2);
501                                 pk_rirn->dumped = 1;
502                         }
503                         ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
504                                 get_irn_node_nr(pkiller), get_irn_node_nr(irn));
505                 }
506         }
507         fprintf(f, "}\n");
508         fclose(f);
509 }
510
511 /* Dumps the disjoint value DAG (DVG) as vcg. */
512 static void debug_vcg_dump_dvg(rss_t *rss, dvg_t *dvg) {
513         static const char suffix[] = "-RSS-DVG.vcg";
514         FILE       *f;
515         char       file_name[256];
516         ir_nodeset_iterator_t iter;
517         ir_node    *irn;
518         rss_edge_t *edge;
519
520         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
521         f = fopen(file_name, "w");
522
523         ir_fprintf(f, "graph: { title: \"disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
524         fprintf(f, "display_edge_labels: no\n");
525         fprintf(f, "layoutalgorithm: mindepth\n");
526         fprintf(f, "manhattan_edges: yes\n\n");
527
528         /* dump all nodes */
529         foreach_ir_nodeset(&dvg->nodes, irn, iter) {
530                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
531         }
532
533         /* dump all edges */
534         foreach_pset(dvg->edges, edge) {
535                 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
536                         get_irn_node_nr(edge->src), get_irn_node_nr(edge->tgt));
537         }
538
539         fprintf(f, "}\n");
540         fclose(f);
541 }
542
543 #if 0
544 /* Dumps the PKG(DVG). */
545 static void debug_vcg_dump_dvg_pkiller(rss_t *rss, dvg_t *dvg) {
546         static const char suffix[] = "-RSS-DVG-PKG.vcg";
547         FILE       *f;
548         char       file_name[256];
549         ir_nodeset_iterator_t iter;
550         ir_node    *irn;
551
552         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
553         f = fopen(file_name, "w");
554
555         ir_fprintf(f, "graph: { title: \"PKG of disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
556         fprintf(f, "display_edge_labels: no\n");
557         fprintf(f, "layoutalgorithm: mindepth\n");
558         fprintf(f, "manhattan_edges: yes\n\n");
559
560         /* dump all nodes */
561         foreach_ir_nodeset(&dvg->nodes, irn, iter) {
562                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
563         }
564
565         /* dump all edges */
566         foreach_ir_nodeset(&dvg->nodes, irn, iter) {
567                 rss_irn_t       *node = get_rss_irn(rss, irn);
568                 plist_element_t *el;
569
570                 foreach_plist(node->dvg_pkiller_list, el) {
571                         ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
572                                 get_irn_node_nr(plist_element_get_value(el)), get_irn_node_nr(irn));
573                 }
574         }
575
576         fprintf(f, "}\n");
577         fclose(f);
578 }
579 #endif /* if 0 */
580
581 /**
582  * In case there is no rss information for irn, initialize it.
583  */
584 static void *init_rss_irn(ir_phase *ph, const ir_node *irn, void *old) {
585         rss_irn_t *res = old ? old : phase_alloc(ph, sizeof(res[0]));
586
587         res->descendant_list = plist_obstack_new(phase_obst(ph));
588         res->descendants     = NULL;
589
590         res->consumer_list   = plist_obstack_new(phase_obst(ph));
591         res->consumer        = NULL;
592
593         res->pkiller_list    = plist_obstack_new(phase_obst(ph));
594
595         res->parent_list     = plist_obstack_new(phase_obst(ph));
596
597         res->killer          = NULL;
598         res->irn             = irn;
599         res->chain           = NULL;
600
601         res->kill_count      = 0;
602         res->desc_walk       = 0;
603         res->live_out        = 0;
604         res->visited         = 0;
605         res->handled         = 0;
606         res->dumped          = 0;
607         res->havecons        = 0;
608
609         return res;
610 }
611
612 /**
613  * Collect all nodes data dependent on current node.
614  */
615 static void collect_descendants(rss_t *rss, rss_irn_t *rirn, ir_node *irn, int *got_sink, unsigned cur_desc_walk) {
616         const ir_edge_t *edge;
617         rss_irn_t       *cur_node = get_rss_irn(rss, irn);
618         ir_node         *block    = rss->block;
619         ir_edge_kind_t  ekind[2]  = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
620         int             i;
621
622         if (cur_node->desc_walk >= cur_desc_walk)
623                 return;
624         cur_node->desc_walk = cur_desc_walk;
625
626         /* Stop at Barriers */
627         if (be_is_Barrier(irn))
628                 return;
629
630         /* loop over normal and dependency edges */
631         for (i = 0; i < 2; ++i) {
632                 foreach_out_edge_kind(irn, edge, ekind[i]) {
633                         ir_node *user = get_edge_src_irn(edge);
634
635                         /* skip ignore nodes as they do not really contribute to register pressure */
636                         if (arch_irn_is(user, ignore))
637                                 continue;
638
639                         /*
640                                 (a) mode_X means Jump            -> out_edge
641                                 (b) Phi as user of a normal node -> out-edge
642                         */
643                         if (get_irn_mode(user) == mode_X || is_Phi(user)) {
644                                 if (! *got_sink)
645                                         goto force_sink;
646                                 else
647                                         continue;
648                         }
649
650                         if (is_Proj(user)) {
651                                 //if (arch_get_irn_reg_class(user, -1) == rss->cls)
652                                         collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
653                         }
654                         else {
655                                 /* check if user lives in block and is not a control flow node */
656                                 if (get_nodes_block(user) == block) {
657                                         if (! plist_has_value(rirn->descendant_list, user)) {
658                                                 plist_insert_back(rirn->descendant_list, user);
659                                                 DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", user));
660                                         }
661                                         collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
662                                 }
663                                 else if (! *got_sink) {
664 force_sink:
665                                         /* user lives out of block: add sink as descendant if not already done */
666                                         plist_insert_back(rirn->descendant_list, _sink);
667                                         *got_sink = 1;
668                                         DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", _sink));
669                                 }
670                         }
671                 }
672         }
673 }
674
675 /**
676  * Handles a single consumer.
677  */
678 static void collect_single_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *consumer, int *got_sink) {
679         ir_node *block = rss->block;
680
681         assert(! is_Proj(consumer) && "Cannot handle Projs");
682
683         if (! is_Phi(consumer) && ! is_Block(consumer) && get_nodes_block(consumer) == block) {
684                 if (!arch_irn_is(consumer, ignore) &&
685                                 !plist_has_value(rss_irn->consumer_list, consumer)) {
686                         plist_insert_back(rss_irn->consumer_list, consumer);
687                         DBG((rss->dbg, LEVEL_2, "\t\tconsumer %+F\n", consumer));
688                 }
689         }
690         else {
691                 rss_irn->live_out = 1;
692                 DBG((rss->dbg, LEVEL_2, "\t\tlive out %+F", consumer));
693                 if (! *got_sink) {
694                         plist_insert_back(rss_irn->consumer_list, _sink);
695                         *got_sink = 1;
696                         DB((rss->dbg, LEVEL_2, ", %+F added instead", _sink));
697                 }
698                 DB((rss->dbg, LEVEL_2, "\n"));
699         }
700 }
701
702 /**
703  * Collect all nodes consuming the value(s) produced by current node.
704  */
705 static void collect_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *irn, int *got_sink) {
706         const ir_edge_t *edge;
707         int             i;
708         ir_edge_kind_t  ekind[2]  = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
709         rss_irn_t       *cur_node = get_rss_irn(rss, irn);
710
711         if (cur_node->havecons)
712                 return;
713         cur_node->havecons = 1;
714
715         for (i = 0; i < 2; ++i) {
716                 foreach_out_edge_kind(irn, edge, ekind[i]) {
717                         ir_node *consumer = get_edge_src_irn(edge);
718
719                         if (is_Proj(consumer)) {
720                                 //if (arch_get_irn_reg_class(consumer, -1) == rss->cls)
721                                         collect_consumer(rss, rss_irn, consumer, got_sink);
722                         }
723                         else
724                                 collect_single_consumer(rss, rss_irn, consumer, got_sink);
725                 }
726         }
727 }
728
729 /**
730  * Collects all consumer and descendant of a irn.
731  */
732 static void collect_node_info(rss_t *rss, ir_node *irn) {
733         static unsigned cur_desc_walk = 0;
734         rss_irn_t       *rss_irn      = get_rss_irn(rss, irn);
735         int             got_sink;
736
737         assert(! is_Proj(irn) && "Cannot handle Projs.");
738
739         /* check if node info is already available */
740         if (rss_irn->handled)
741                 return;
742                 //phase_reinit_single_irn_data(&rss->ph, irn);
743
744         DBG((rss->dbg, LEVEL_1, "\tcomputing consumers of %+F:\n", irn));
745
746         /* collect all consumer */
747         got_sink = 0;
748         collect_consumer(rss, rss_irn, irn, &got_sink);
749
750         /* build sorted consumer array */
751         rss_irn->consumer = build_sorted_array_from_list(rss_irn->consumer_list, phase_obst(&rss->ph));
752
753         DBG((rss->dbg, LEVEL_1, "\tcompute descendants of %+F:\n", irn));
754
755         /* collect descendants */
756         got_sink = 0;
757         collect_descendants(rss, rss_irn, irn, &got_sink, ++cur_desc_walk);
758
759         /* build sorted descendant array */
760         rss_irn->descendants = build_sorted_array_from_list(rss_irn->descendant_list, phase_obst(&rss->ph));
761
762         rss_irn->handled = 1;
763 }
764
765 /**
766  * Checks if v is a potential killer of u.
767  * v is in pkill(u) iff descendants(v) cut consumer(u) is v
768  *
769  * @param rss   The rss object
770  * @param v      The node to check for killer
771  * @param u      The potentially killed value
772  * @return 1 if v is in pkill(u), 0 otherwise
773  */
774 static int is_potential_killer(rss_t *rss, rss_irn_t *v, rss_irn_t *u) {
775         plist_t *list;
776         const ir_node **arr;
777         plist_element_t *el;
778         (void) rss;
779
780         assert(is_Sink(v->irn) || ((plist_count(v->descendant_list) > 0 && v->descendants) || 1));
781         assert(is_Sink(u->irn) || ((plist_count(u->consumer_list)   > 0 && u->consumer)    || 1));
782
783         /* as we loop over the list: loop over the shorter one */
784         if (plist_count(v->descendant_list) > plist_count(u->consumer_list)) {
785                 list = u->consumer_list;
786                 arr  = v->descendants;
787         }
788         else {
789                 list = v->descendant_list;
790                 arr  = u->consumer;
791         }
792
793         /* for each list element: try to find element in array */
794         foreach_plist(list, el) {
795                 ir_node *irn   = plist_element_get_value(el);
796                 ir_node *match = BSEARCH_IRN_ARR(irn, arr);
797
798                 if (match && match != irn)
799                         return 0;
800         }
801
802         return 1;
803 }
804
805 /**
806  * Update descendants, consumer and pkiller list for the given irn.
807  */
808 static void update_node_info(rss_t *rss, ir_node *irn, ir_node *pk_irn) {
809         rss_irn_t *node    = get_rss_irn(rss, irn);
810         rss_irn_t *pkiller = get_rss_irn(rss, pk_irn);
811
812         /* add consumer and rebuild the consumer array */
813         if (! plist_has_value(node->consumer_list, pk_irn)) {
814                 plist_insert_back(node->consumer_list, pk_irn);
815                 node->consumer = build_sorted_array_from_list(node->consumer_list, phase_obst(&rss->ph));
816         }
817
818         /* add pk_irn as descendant, add it's descendants to irn's and rebuild array */
819         if (! plist_has_value(node->descendant_list, pk_irn)) {
820                 plist_element_t *el;
821
822                 plist_insert_back(node->descendant_list, pk_irn);
823
824                 foreach_plist(pkiller->descendant_list, el) {
825                         ir_node *desc = plist_element_get_value(el);
826
827                         if (! plist_has_value(node->descendant_list, desc))
828                                 plist_insert_back(node->descendant_list, desc);
829                 }
830
831                 node->descendants = build_sorted_array_from_list(node->descendant_list, phase_obst(&rss->ph));
832         }
833
834 }
835
836 /**
837  * Compute the potential killing set PK.
838  */
839 static void compute_pkill_set(rss_t *rss) {
840         plist_element_t *u_el, *v_el;
841
842         foreach_plist(rss->nodes, u_el) {
843                 ir_node   *u_irn = plist_element_get_value(u_el);
844                 rss_irn_t *u     = get_rss_irn(rss, u_irn);
845
846                 DBG((rss->dbg, LEVEL_1, "\tcomputing potential killers of %+F:\n", u_irn));
847
848                 /* check each consumer if it is a potential killer  */
849                 foreach_plist(u->consumer_list, v_el) {
850                         ir_node   *v_irn = plist_element_get_value(v_el);
851                         rss_irn_t *v     = get_rss_irn(rss, v_irn);
852
853                         /* check, if v is a potential killer of u */
854                         if (is_potential_killer(rss, v, u)) {
855                                 if (! plist_has_value(u->pkiller_list, v_irn))
856                                         plist_insert_back(u->pkiller_list, v_irn);
857                                 v->kill_count++;
858                                 DBG((rss->dbg, LEVEL_2, "\t\tpotential killer %+F\n", v_irn));
859                         }
860                 }
861
862                 u->killer = _sink;
863         }
864
865         if (rss->opts->dump_flags & RSS_DUMP_PKG)
866                 debug_vcg_dump_pkg(rss, NULL, 0);
867 }
868
869 /**
870  * Build set of killing edges (from values to their potential killers)
871  */
872 static void build_kill_edges(rss_t *rss, pset *epk) {
873         plist_element_t *el, *k_el;
874
875         foreach_plist(rss->nodes, el) {
876                 ir_node    *irn  = plist_element_get_value(el);
877                 rss_irn_t *rirn = get_rss_irn(rss, irn);
878
879                 DBG((rss->dbg, LEVEL_3, "\t\tbuilding kill edges for %+F:\n", irn));
880
881                 foreach_plist(rirn->pkiller_list, k_el) {
882                         ir_node    *pkiller = plist_element_get_value(k_el);
883                         rss_edge_t *ke      = obstack_alloc(phase_obst(&rss->ph), sizeof(*ke));
884
885                         ke->src  = irn;
886                         ke->tgt  = pkiller;
887                         ke->next = NULL;
888
889                         DBG((rss->dbg, LEVEL_3, "\t\t\ttarget %+F\n", pkiller));
890
891                         pset_insert(epk, ke, HASH_RSS_EDGE(ke));
892                 }
893         }
894 }
895
896 #ifdef DEBUG_libfirm
897 /* print the given cbc for debugging purpose */
898 static void debug_print_cbc(firm_dbg_module_t *mod, cbc_t *cbc) {
899         ir_nodeset_iterator_t iter;
900         ir_node    *n;
901         rss_edge_t *ke;
902
903         DBG((mod, LEVEL_3, "\t\tS = set of parents:\n"));
904         foreach_ir_nodeset(&cbc->parents, n, iter) {
905                 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
906         }
907         DBG((mod, LEVEL_3, "\t\tT = set of children:\n"));
908         foreach_ir_nodeset(&cbc->children, n, iter) {
909                 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
910         }
911         DBG((mod, LEVEL_3, "\t\tE = Edges from producers to consumers\n"));
912         foreach_pset(cbc->kill_edges, ke) {
913                 DBG((mod, LEVEL_3, "\t\t\t%+F -> %+F\n", ke->src, ke->tgt));
914         }
915 }
916 #endif
917
918 /**
919  * Construct the bipartite decomposition.
920  * Sid-Ahmed-Ali Touati, Phd Thesis
921  * Register Pressure in Instruction Level Parallelism, p. 71
922  */
923 static void compute_bipartite_decomposition(rss_t *rss) {
924         pset *epk    = new_pset(cmp_rss_edges, 10);
925         int  cur_num = 0;
926
927         plist_element_t *el;
928
929         DBG((rss->dbg, LEVEL_1, "\tcomputing bipartite decomposition:\n"));
930
931         build_kill_edges(rss, epk);
932
933         foreach_plist(rss->nodes, el) {
934                 ir_node   *u_irn   = plist_element_get_value(el);
935                 rss_irn_t *u       = get_rss_irn(rss, u_irn);
936                 int       p_change = 1;
937                 int       c_change = 1;
938                 int       vrfy_ok  = 1;
939
940                 cbc_t           *cbc;
941                 plist_element_t *el2;
942                 rss_edge_t      *k_edge;
943                 rss_edge_t      *kedge_root = NULL;
944                 ir_node         *t_irn, *s_irn;
945                 ir_nodeset_iterator_t iter;
946
947                 if (u->visited || u_irn == _sink)
948                         continue;
949
950                 DBG((rss->dbg, LEVEL_2, "\t\t%+F choosen:\n", u_irn));
951
952                 cbc     = obstack_alloc(phase_obst(&rss->ph), sizeof(*cbc));
953                 cbc->nr = cur_num++;
954
955                 /* initialize S_cb */
956                 ir_nodeset_init_size(&cbc->parents, 5);
957                 ir_nodeset_insert(&cbc->parents, u_irn);
958                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (init)\n", u_irn));
959
960                 /* E_cb = empty */
961                 cbc->kill_edges = new_pset(cmp_rss_edges, 5);
962
963                 /* each parent gets killed by at least one value from children */
964
965                 /* T_cb = PK_successors(u) */
966                 ir_nodeset_init_size(&cbc->children, 5);
967                 foreach_plist(u->pkiller_list, el2) {
968                         ir_nodeset_insert(&cbc->children, plist_element_get_value(el2));
969                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (init)\n", plist_element_get_value(el2)));
970                 }
971
972                 /*
973                         Now: insert the parents of all children into the parent set
974                         and insert the children of all parents into the children set
975                         until the sets don't change any more
976                 */
977                 while (p_change || c_change) {
978                         ir_nodeset_iterator_t iter;
979                         p_change = c_change = 0;
980
981                         /* accumulate parents */
982                         foreach_ir_nodeset(&cbc->children, t_irn, iter) {
983                                 foreach_pset(epk, k_edge) {
984                                         ir_node *val = k_edge->src;
985
986                                         if (k_edge->tgt == t_irn && ! ir_nodeset_contains(&cbc->parents, val)) {
987                                                 ir_nodeset_insert(&cbc->parents, val);
988                                                 p_change = 1;
989                                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (killed by %+F)\n", val, t_irn));
990                                         }
991                                 }
992                         }
993
994                         /* accumulate children */
995                         foreach_ir_nodeset(&cbc->parents, s_irn, iter) {
996                                 foreach_pset(epk, k_edge) {
997                                         ir_node *val = k_edge->tgt;
998
999                                         if (k_edge->src == s_irn  && ! ir_nodeset_contains(&cbc->children, val)) {
1000                                                 ir_nodeset_insert(&cbc->children, val);
1001                                                 c_change = 1;
1002                                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (kills %+F)\n", val, s_irn));
1003                                         }
1004                                 }
1005                         }
1006                 }
1007
1008                 /* mark all parent values as visited */
1009                 foreach_ir_nodeset(&cbc->parents, s_irn, iter) {
1010                         rss_irn_t *s = get_rss_irn(rss, s_irn);
1011                         s->visited = 1;
1012                         /* assure bipartite property */
1013 #if 0
1014                         if (ir_nodeset_contains(&cbc->children, s_irn)) {
1015                                 ir_nodeset_remove(&cbc->children, s_irn);
1016                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F removed from to children\n", s_irn));
1017                         }
1018 #endif
1019                 }
1020
1021                 /* update edges */
1022                 foreach_pset(epk, k_edge) {
1023                         if (ir_nodeset_contains(&cbc->parents, k_edge->src) &&
1024                             ir_nodeset_contains(&cbc->children, k_edge->tgt)) {
1025                                 pset_insert(cbc->kill_edges, k_edge, HASH_RSS_EDGE(k_edge));
1026                                 /*
1027                                         Link all k_edges which are about to be removed together.
1028                                         Beware: pset_remove kills the iterator.
1029                                 */
1030                                 k_edge->next = kedge_root;
1031                                 kedge_root   = k_edge;
1032                         }
1033                 }
1034
1035                 /* remove all linked k_edges */
1036                 for (; kedge_root; kedge_root = kedge_root->next) {
1037                         pset_remove(epk, kedge_root, HASH_RSS_EDGE(kedge_root));
1038                 }
1039
1040                 /* verify the cbc */
1041                 foreach_ir_nodeset(&cbc->parents, s_irn, iter) {
1042                         int is_killed = 0;
1043
1044                         foreach_pset(cbc->kill_edges, k_edge) {
1045                                 if (k_edge->src == s_irn) {
1046                                         is_killed = 1;
1047                                         pset_break(cbc->kill_edges);
1048                                         break;
1049                                 }
1050                         }
1051
1052                         if (! is_killed) {
1053                                 vrfy_ok = 0;
1054                                 ir_fprintf(stderr, "Warning: parent %+F is not killed in current cbc\n", s_irn);
1055                         }
1056                 }
1057                 assert(vrfy_ok && "Verification of CBC failed");
1058
1059                 /* add the connected bipartite component */
1060                 if (ir_nodeset_size(&cbc->parents) > 0 &&
1061                     ir_nodeset_size(&cbc->children) > 0 &&
1062                         pset_count(cbc->kill_edges) > 0)
1063                         pset_insert(rss->cbc_set, cbc, (unsigned)cbc->nr);
1064                 DBG((rss->dbg, LEVEL_2, "\tbipartite component %d inserted:\n", cbc->nr));
1065                 DEBUG_ONLY(
1066                         if (firm_dbg_get_mask(rss->dbg) & LEVEL_2)
1067                                 debug_print_cbc(rss->dbg, cbc);
1068                 );
1069         }
1070
1071         if (rss->opts->dump_flags & RSS_DUMP_CBC)
1072                 debug_vcg_dump_bipartite(rss);
1073
1074         del_pset(epk);
1075 }
1076
1077 /**
1078  * Select the child with the maximum cost.
1079  */
1080 static child_t *select_child_max_cost(rss_t *rss, ir_nodeset_t *x, ir_nodeset_t *y, child_t *t, cbc_t *cbc) {
1081         ir_node *child;
1082         ir_nodeset_iterator_t iter;
1083         float   max_cost = -1.0f;
1084
1085         DBG((rss->dbg, LEVEL_2, "\t\tcomputing children costs:\n"));
1086
1087         foreach_ir_nodeset(&cbc->children, child, iter) {
1088                 rss_irn_t  *r_child             = get_rss_irn(rss, child);
1089                 int         num_unkilled_parents = 0;
1090                 int         num_descendants;
1091                 rss_edge_t *k_edge;
1092                 float       cost;
1093
1094                 /* get the number of unkilled parents */
1095                 foreach_pset(cbc->kill_edges, k_edge) {
1096                         if (k_edge->tgt == child && ir_nodeset_contains(x, k_edge->src))
1097                                 ++num_unkilled_parents;
1098                 }
1099
1100                 cost = (float)num_unkilled_parents;
1101
1102                 num_descendants = plist_count(r_child->descendant_list) + ir_nodeset_size(y);
1103
1104                 if (num_descendants > 0)
1105                         cost /= (float)num_descendants;
1106
1107                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F, #desc %d, cost %.3f\n", child, num_descendants, cost));
1108
1109                 if (cost > max_cost) {
1110                         t->irn   = child;
1111                         t->cost  = cost;
1112                         max_cost = cost;
1113                 }
1114         }
1115
1116         return t;
1117 }
1118
1119 /**
1120  * Remove all parents from x which are killed by t_irn.
1121  */
1122 static void remove_covered_parents(rss_t *rss, ir_nodeset_t *x, ir_node *t_irn, cbc_t *cbc) {
1123         rss_irn_t  *t = get_rss_irn(rss, t_irn);
1124         rss_edge_t *k_edge;
1125
1126         DBG((rss->dbg, LEVEL_2, "\t\tremoving parents covered by %+F:\n", t_irn));
1127
1128         foreach_pset(cbc->kill_edges, k_edge) {
1129                 if (k_edge->tgt == t_irn && ir_nodeset_contains(x, k_edge->src)) {
1130                         ir_nodeset_remove(x, k_edge->src);
1131                         plist_insert_back(t->parent_list, k_edge->src);
1132                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", k_edge->src));
1133                 }
1134         }
1135 }
1136
1137 static void update_cumulated_descendent_values(rss_t *rss, ir_nodeset_t *y, ir_node *t_irn) {
1138         rss_irn_t *t = get_rss_irn(rss, t_irn);
1139         plist_element_t *el;
1140
1141         DBG((rss->dbg, LEVEL_2, "\t\tupdating cumulated descendant value of %+F:\n", t_irn));
1142
1143         foreach_plist(t->descendant_list, el) {
1144                 ir_nodeset_insert(y, plist_element_get_value(el));
1145                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", plist_element_get_value(el)));
1146         }
1147 }
1148
1149 /**
1150  * Greedy-k: a heuristics for the MMA problem
1151  */
1152 static void compute_killing_function(rss_t *rss) {
1153         cbc_t *cbc;
1154         struct obstack obst;
1155
1156         obstack_init(&obst);
1157
1158         rss->cbc_set = pset_new_ptr(5);
1159         compute_bipartite_decomposition(rss);
1160
1161         /* for all bipartite components do: */
1162         foreach_pset(rss->cbc_set, cbc) {
1163                 ir_node *p;
1164                 ir_nodeset_t x;
1165                 ir_nodeset_t y;
1166                 ir_nodeset_iterator_t iter;
1167                 child_t **sks    = NEW_ARR_F(child_t *, 20);
1168                 int     cur_len  = 0;
1169                 int     cur_size = 20;
1170                 int     i;
1171
1172                 ir_nodeset_init_size(&x, 10);
1173                 ir_nodeset_init_size(&y, 10);
1174
1175                 DBG((rss->dbg, LEVEL_1, "\tcomputing SKS for cbc %d:\n", cbc->nr));
1176                 DBG((rss->dbg, LEVEL_2, "\t\tinitializing parents X:\n"));
1177
1178                 /* X = S_cb (all parents are initially uncovered) */
1179                 foreach_ir_nodeset(&cbc->parents, p, iter) {
1180                         ir_nodeset_insert(&x, p);
1181                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", p));
1182                 }
1183
1184                 /* while X not empty */
1185                 while (ir_nodeset_size(&x) > 0) {
1186                         child_t *t = obstack_alloc(&obst, sizeof(*t));
1187                         memset(t, 0, sizeof(*t));
1188
1189                         t = select_child_max_cost(rss, &x, &y, t, cbc);
1190
1191                         if (cur_len >= cur_size) {
1192                                 ARR_EXTO(child_t *, sks, cur_size * 2);
1193                                 cur_size *= 2;
1194                         }
1195
1196                         DBG((rss->dbg, LEVEL_2, "\t\tinsert child %+F (%.3f) into SKS at pos %d\n", t->irn, t->cost, cur_len));
1197
1198                         sks[cur_len++] = t;
1199                         remove_covered_parents(rss, &x, t->irn, cbc);
1200                         update_cumulated_descendent_values(rss, &y, t->irn);
1201                 }
1202
1203                 ARR_SHRINKLEN(sks, cur_len);
1204
1205                 /* sort SKS in increasing cost order */
1206                 qsort(sks, cur_len, sizeof(sks[0]), cmp_child_costs);
1207
1208                 DBG((rss->dbg, LEVEL_2, "\tprocessing SKS for cbc %d:\n", cbc->nr));
1209
1210                 /* build killing function */
1211                 for (i = cur_len - 1; i >= 0; --i) { /* loop over sks in decreasing cost order */
1212                         child_t         *t = sks[i];
1213                         rss_irn_t       *rt = get_rss_irn(rss, t->irn);
1214                         plist_element_t *p_el;
1215
1216                         DBG((rss->dbg, LEVEL_3, "\t\t\tkiller %+F (%.3f):\n", t->irn, t->cost));
1217
1218                         /* kill all unkilled parents of t */
1219                         foreach_plist(rt->parent_list, p_el) {
1220                                 ir_node    *par = plist_element_get_value(p_el);
1221                                 rss_irn_t *rpar = get_rss_irn(rss, par);
1222
1223                                 if (is_Sink(rpar->killer)) {
1224                                         rpar->killer = t->irn;
1225                                         DBG((rss->dbg, LEVEL_2, "\t\tkill %+F\n", rpar->irn));
1226                                 }
1227                                 else {
1228                                         DBG((rss->dbg, LEVEL_3, "\t\t\tkeeping %+F as killer for %+F\n", rpar->killer, rpar->irn));
1229                                 }
1230                         }
1231                 }
1232
1233                 ir_nodeset_destroy(&x);
1234                 ir_nodeset_destroy(&y);
1235                 DEL_ARR_F(sks);
1236         }
1237
1238         if (rss->opts->dump_flags & RSS_DUMP_KILL)
1239                 debug_vcg_dump_kill(rss);
1240
1241         del_pset(rss->cbc_set);
1242         obstack_free(&obst, NULL);
1243 }
1244
1245 /**
1246  * Adds the edge src -> tgt to the dvg. Checks if reverse edge is already there (asserts).
1247  */
1248 static INLINE void add_dvg_edge(rss_t *rss, dvg_t *dvg, const ir_node *src, const ir_node *tgt, int have_source) {
1249         rss_edge_t *dvg_edge;
1250         rss_edge_t key;
1251
1252         if (! have_source)
1253                 ir_nodeset_insert(&dvg->nodes, (ir_node *) src);
1254         else
1255                 assert(ir_nodeset_contains(&dvg->nodes, src) && "Wrong assumption");
1256
1257         ir_nodeset_insert(&dvg->nodes, (ir_node *) tgt);
1258
1259         key.src = (ir_node *) tgt;
1260         key.tgt = (ir_node *) src;
1261         assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1262
1263         key.src = (ir_node *) src;
1264         key.tgt = (ir_node *) tgt;
1265         if (NULL != pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1266                 /* add the edge to the DVG */
1267                 dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1268
1269                 dvg_edge->src  = (ir_node *) src;
1270                 dvg_edge->tgt  = (ir_node *) tgt;
1271                 dvg_edge->next = NULL;
1272
1273                 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", src, tgt));
1274                 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1275         }
1276 }
1277
1278 /**
1279  * Computes the disjoint value DAG (DVG).
1280  * BEWARE: It is not made explicitly clear in the Touati paper,
1281  *         but the DVG is meant to be build from the KILLING DAG
1282  */
1283 static void compute_dvg(rss_t *rss, dvg_t *dvg) {
1284         plist_element_t *el;
1285
1286         DBG((rss->dbg, LEVEL_1, "\tcomputing DVG:\n"));
1287
1288         foreach_plist(rss->nodes, el) {
1289                 ir_node    *u_irn    = plist_element_get_value(el);
1290                 rss_irn_t  *u        = get_rss_irn(rss, u_irn);
1291                 rss_irn_t  *u_killer = get_rss_irn(rss, u->killer);
1292                 int        i;
1293
1294                 /* TODO: omit nodes only having sink as pkiller and killing no one */
1295
1296                 /* add an edge to killer */
1297                 add_dvg_edge(rss, dvg, u_irn, u->killer, 0);
1298
1299                 if (is_Sink(u->killer))
1300                         continue;
1301
1302                 /* We add an edge to every killer, from where we could be reached. */
1303                 for (i = ARR_LEN_SAFE(u_killer->descendants) - 1; i >= 0; --i) {
1304                         add_dvg_edge(rss, dvg, u_irn, u_killer->descendants[i], 1);
1305                 }
1306
1307 #if 0
1308
1309                 foreach_plist(rss->nodes, el2) {
1310                         ir_node *v_irn = plist_element_get_value(el2);
1311
1312                         /*
1313                                 There is an edge (u, v) in the DVG iff v is a descendant of the killer(u).
1314                         */
1315                         if (BSEARCH_IRN_ARR(v_irn, u_kill->descendants)) {
1316                                 rss_edge_t *dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1317                                 rss_edge_t key;
1318
1319                                 /* insert the user into the DVG and append it to the user list of u */
1320                                 ir_nodeset_insert(&dvg->nodes, v_irn);
1321                                 if (! plist_has_value(u->dvg_user_list, v_irn))
1322                                         plist_insert_back(u->dvg_user_list, v_irn);
1323
1324                                 dvg_edge->src  = u_irn;
1325                                 dvg_edge->tgt  = v_irn;
1326                                 dvg_edge->next = NULL;
1327
1328                                 key.src = v_irn;
1329                                 key.tgt = u_irn;
1330
1331                                 assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1332
1333                                 /* add the edge to the DVG */
1334                                 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", u_irn, v_irn));
1335                                 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1336                         }
1337                 }
1338 #endif /* if 0 */
1339         }
1340
1341         if (rss->opts->dump_flags & RSS_DUMP_DVG)
1342                 debug_vcg_dump_dvg(rss, dvg);
1343 }
1344
1345 /**
1346  * Updates the dvg structure when a serialization edge from src -> tgt is added.
1347  */
1348 static void update_dvg(rss_t *rss, dvg_t *dvg, rss_irn_t *src, rss_irn_t *tgt) {
1349         int i, j, idx;
1350         rss_edge_t *edge;
1351         rss_edge_t **arr = alloca(pset_count(dvg->edges) * sizeof(arr[0]));
1352
1353         /*
1354                 Add an edge from serialization target to serialization src:
1355                 src cannot be alive with target
1356         */
1357         add_dvg_edge(rss, dvg, tgt->irn, src->irn, 0);
1358
1359         /* Add edges to src's descendants as well, they also getting serialized. */
1360         for (i = ARR_LEN_SAFE(src->descendants) - 1; i >= 0; --i) {
1361                 add_dvg_edge(rss, dvg, tgt->irn, src->descendants[i], 1);
1362         }
1363
1364         /* We also have to add edges from targets predecessors in dvg */
1365         idx = 0;
1366         /* We cannot insert elements into set over which we iterate ... */
1367         foreach_pset(dvg->edges, edge) {
1368                 if (edge->tgt == tgt->irn) {
1369                         arr[idx++] = edge;
1370                 }
1371         }
1372
1373         for (i = 0; i < idx; ++i) {
1374                 edge = arr[i];
1375                 add_dvg_edge(rss, dvg, edge->src, src->irn, 1);
1376                 for (j = ARR_LEN_SAFE(src->descendants) - 1; j >= 0; --j) {
1377                         add_dvg_edge(rss, dvg, edge->src, src->descendants[j], 1);
1378                 }
1379         }
1380 }
1381
1382 #if 0
1383 /**
1384  * Accumulate all descendants for root into list.
1385  */
1386 static void accumulate_dvg_descendant_values(rss_t *rss, rss_irn_t *root, plist_t *list) {
1387         if (plist_count(root->dvg_user_list) > 0) {
1388                 plist_element_t *el;
1389
1390                 foreach_plist(root->dvg_user_list, el) {
1391                         ir_node   *v_irn = plist_element_get_value(el);
1392                         rss_irn_t *v     = get_rss_irn(rss, v_irn);
1393
1394                         /* add v as descendant */
1395                         if (! plist_has_value(list, v_irn)) {
1396                                 plist_insert_back(list, v_irn);
1397                                 DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG descendant %+F\n", v_irn));
1398                         }
1399
1400                         /* accumulate v's descendants */
1401                         accumulate_dvg_descendant_values(rss, v, list);
1402                 }
1403         }
1404 }
1405
1406 /**
1407  * Builds the list of potential killers for each node
1408  * in the given DVG.
1409  * Needs the descendant list for all user as sorted array.
1410  */
1411 static void build_dvg_pkiller_list(rss_t *rss, dvg_t *dvg) {
1412         ir_nodeset_iterator_t iter;
1413         ir_node *irn;
1414
1415         foreach_nodeset(&dvg->nodes, irn, iter) {
1416                 rss_irn_t       *node = get_rss_irn(rss, irn);
1417                 plist_element_t *el, *el2;
1418
1419                 DBG((rss->dbg, DEBUG_DVG, "\t\tbuilding pkiller list for %+F\n", irn));
1420
1421                 /* check each user */
1422                 foreach_plist(node->dvg_user_list, el) {
1423                         ir_node *u_irn = plist_element_get_value(el);
1424
1425                         /* is the current user u_irn not a descendant of any other user -> pkiller */
1426                         foreach_plist(node->dvg_user_list, el2) {
1427                                 ir_node   *v_irn = plist_element_get_value(el2);
1428                                 rss_irn_t *v     = get_rss_irn(rss, v_irn);
1429
1430                                 if (el != el2                             &&
1431                                         ! BSEARCH_IRN_ARR(u_irn, v->dvg_desc) &&
1432                                         ! plist_has_value(node->dvg_pkiller_list, u_irn))
1433                                 {
1434                                         plist_insert_back(node->dvg_pkiller_list, u_irn);
1435                                         DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG pkiller %+F\n", u_irn));
1436                                 }
1437                         }
1438                 }
1439
1440                 node->dvg_pkiller = build_sorted_array_from_list(node->dvg_pkiller_list, phase_obst(&rss->ph));
1441         }
1442
1443         DEBUG_ONLY(
1444                 if (firm_dbg_get_mask(rss->dbg) & DEBUG_DVG)
1445                         debug_vcg_dump_dvg_pkiller(rss, dvg);
1446         );
1447 }
1448
1449 #endif /* if 0 */
1450
1451 /**
1452  * Compute the maximal antichain of the current DVG.
1453  * This is a reimplementation of the MAXIMAL_ANTI_CHAIN function
1454  * from the DDG library 1.1 (DAG.cpp).
1455  */
1456 static ir_nodeset_t *compute_maximal_antichain(rss_t *rss, dvg_t *dvg, int iteration) {
1457         int         n               = ir_nodeset_size(&dvg->nodes);
1458         int         *assignment     = alloca(n * sizeof(assignment[0]));
1459         int         *assignment_rev = alloca(n * sizeof(assignment_rev[0]));
1460         int         *idx_map        = alloca(n * sizeof(idx_map[0]));
1461         hungarian_problem_t *bp;
1462         ir_nodeset_t *values, *temp;
1463         ir_nodeset_iterator_t iter;
1464         ir_node     *u_irn;
1465         int         i, j, cost, cur_chain, res;
1466         rss_edge_t  *dvg_edge;
1467
1468 #define MAP_IDX(irn) bsearch_for_index(get_irn_idx(irn), idx_map,  n,  1)
1469
1470         if (pset_count(dvg->edges) == 0)
1471                 return NULL;
1472
1473         bp = hungarian_new(n, n, 1, HUNGARIAN_MATCH_NORMAL);
1474
1475         /*
1476                 At first, we build an index map for the nodes in the DVG,
1477                 because we cannot use the irn idx therefore as the resulting
1478                 bipartite data structure would have around 1.2GB.
1479                 So we limit the size to the number of nodes we have in the DVG
1480                 and build a sorted index map for their irn indices.
1481         */
1482         i = 0;
1483         foreach_ir_nodeset(&dvg->nodes, u_irn, iter) {
1484                 idx_map[i++] = get_irn_idx(u_irn);
1485         }
1486         qsort(idx_map, n, sizeof(idx_map[0]), cmp_int);
1487
1488         foreach_pset(dvg->edges, dvg_edge) {
1489                 int idx_u = MAP_IDX(dvg_edge->src);
1490                 int idx_v = MAP_IDX(dvg_edge->tgt);
1491
1492                 /* add the entry to the bipartite data structure */
1493                 hungarian_add(bp, idx_u, idx_v, 1);
1494                 DBG((rss->dbg, LEVEL_3, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1495                         idx_u, dvg_edge->src, idx_v, dvg_edge->tgt));
1496         }
1497 #if 0
1498         /*
1499                 Add a bipartite entry for each pair of nodes (u, v), where exists a
1500                 path in the DVG from u to v, ie. connect all descendants(v) to v.
1501                 desc_dvg(v) = accumulated descendants of all z in dvg_user_list(v)
1502         */
1503         foreach_ir_nodeset(&dvg->nodes, u_irn, iter) {
1504                 rss_irn_t *u        = get_rss_irn(rss, u_irn);
1505                 int       idx_u_irn = MAP_IDX(u_irn);
1506
1507                 DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG descendants of %+F:\n", u_irn));
1508
1509                 //plist_clear(u->dvg_desc_list);
1510                 //accumulate_dvg_descendant_values(rss, u, u->dvg_desc_list);
1511
1512                 /*
1513                         FIXME: The array is build on the phase obstack and we cannot free the data.
1514                         So the array get as many times allocated as this function is called.
1515                 */
1516
1517                 /* build the sorted array for faster searches */
1518                 //u->dvg_desc = build_sorted_array_from_list(u->dvg_desc_list, phase_obst(&rss->ph));
1519
1520                 DBG((rss->dbg, DEBUG_MAX_AC, "\t\tadding bipartite entries of %+F:\n", u_irn));
1521
1522                 /* add the bipartite entries for all descendants */
1523                 for (i = ARR_LEN_SAFE(u->dvg_desc) - 1; i >= 0; --i) {
1524                         rss_irn_t *desc    = get_rss_irn(rss, u->dvg_desc[i]);
1525                         int       idx_desc = MAP_IDX(u->dvg_desc[i]);
1526
1527                         /* add the entry to the bipartite data structure */
1528                         hungarian_add(bp, idx_u_irn, idx_desc, 1);
1529                         DBG((rss->dbg, DEBUG_MAX_AC, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1530                                 idx_u_irn, u_irn, idx_desc, u->dvg_desc[i]));
1531
1532                         need_matching = 1;
1533                 }
1534         }
1535 #endif
1536
1537         /* We want maximum cardinality matching */
1538         hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL);
1539
1540 #if 0
1541         DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG pkiller:\n"));
1542         /* beware: the following function needs the dvg_desc array */
1543         build_dvg_pkiller_list(rss, dvg);
1544 #endif
1545
1546         DBG((rss->dbg, LEVEL_2, "\t\tcomputing bipartite matching\n"));
1547         /*
1548                 The maximum cardinality bipartite matching gives us the minimal
1549                 chain partition, which corresponds to the maximum anti chains.
1550         */
1551         res = hungarian_solve(bp, assignment, &cost, 1);
1552         assert(res == 0 && "Bipartite matching failed!");
1553
1554         hungarian_free(bp);
1555         memset(assignment_rev, -1, n * sizeof(assignment_rev[0]));
1556
1557         /* build the reverse assignment, ie. foreach i -> j, add j -> i */
1558         for (i = 0; i < n; ++i) {
1559                 if (assignment[i] >= 0) {
1560                         int j = assignment[i];
1561                         assignment_rev[j] = i;
1562                 }
1563         }
1564
1565         DBG((rss->dbg, LEVEL_2, "\t\tgot assignment with cost %d\n", cost));
1566         DBG((rss->dbg, LEVEL_3, "\t\t\tassignment   ---   reverse assignment\n", cost));
1567         for (i = 0; i < n; ++i) {
1568                 DBG((rss->dbg, LEVEL_3, "\t\t\t%3d -> %3d         %3d -> %3d\n", i, assignment[i], i, assignment_rev[i]));
1569         }
1570
1571         values    = XMALLOC(ir_nodeset_t);
1572         ir_nodeset_init_size(values, 10);
1573         cur_chain = 0;
1574         /* Construction of the minimal chain partition */
1575         for (j = 0; j < n; ++j) {
1576                 /* check nodes, which did not occur as target */
1577                 if (assignment_rev[j] == -1) {
1578                         int       xj      = idx_map[j];
1579                         ir_node   *xj_irn = get_idx_irn(rss->irg, xj);
1580                         rss_irn_t *xj_rss = get_rss_irn(rss, xj_irn);
1581                         chain_t   *c      = obstack_alloc(phase_obst(&rss->ph), sizeof(*c));
1582                         int       source;
1583
1584                         /* there was no source for j -> we have a source of a new chain */
1585                         ir_nodeset_insert(values, xj_irn);
1586
1587                         c->elements = plist_obstack_new(phase_obst(&rss->ph));
1588                         c->nr       = cur_chain++;
1589                         plist_insert_back(c->elements, xj_irn);
1590
1591                         xj_rss->chain = c;
1592
1593                         DBG((rss->dbg, LEVEL_2, "\t\tstarting chain %d:\n", c->nr));
1594                         DBG((rss->dbg, LEVEL_2, "\t\t\t%+F (%d)", xj_irn, j));
1595
1596                         /* follow chain, having j as source */
1597                         source = j;
1598                         while (assignment[source] >= 0) {
1599                                 int       target  = assignment[source];
1600                                 int       irn_idx = idx_map[target];
1601                                 ir_node   *irn    = get_idx_irn(rss->irg, irn_idx);
1602                                 rss_irn_t *node   = get_rss_irn(rss, irn);
1603
1604                                 plist_insert_back(c->elements, irn);
1605                                 node->chain = c;
1606
1607                                 DB((rss->dbg, LEVEL_2, " -> %+F (%d)", irn, target));
1608
1609                                 /* new source = last target */
1610                                 source = target;
1611                         }
1612
1613                         DB((rss->dbg, LEVEL_2, "\n"));
1614                 }
1615         }
1616
1617         /*
1618                 Computing the maximal antichain: Select an element from each
1619                 chain such, such it is parallel with the others.
1620         */
1621         DBG((rss->dbg, LEVEL_1, "\t\tcomputing set of saturation values (MAX AC)\n"));
1622         DBG((rss->dbg, LEVEL_3, "\t\tstarting with:\n"));
1623
1624         DEBUG_ONLY(
1625                 if (firm_dbg_get_mask(rss->dbg) & LEVEL_3)
1626                         dump_nodeset(values, "\t\t\t");
1627         )
1628
1629         temp = NULL;
1630         do {
1631                 /*
1632                         We need an explicit array for the values as
1633                         we cannot iterate multiple times over the same
1634                         set at the same time. :-(((((
1635                         TODO Matze: now we can, so rewrite this...
1636                 */
1637                 int     n         = ir_nodeset_size(values);
1638                 int     i         = 0;
1639                 ir_node **val_arr = NEW_ARR_F(ir_node *, n);
1640
1641                 foreach_ir_nodeset(values, u_irn, iter)
1642                         val_arr[i++] = u_irn;
1643
1644                 if (temp) {
1645                         ir_nodeset_destroy(temp);
1646                         free(temp);
1647                 }
1648
1649                 temp = XMALLOC(ir_nodeset_t);
1650                 ir_nodeset_init_size(temp, 10);
1651
1652                 /* Select all nodes from current value set, having another node in the set as descendant. */
1653                 for (i = 0; i < n; ++i) {
1654                         rss_irn_t *u = get_rss_irn(rss, val_arr[i]);
1655
1656                         for (j = 0; j < n; ++j) {
1657                                 if (i != j) {
1658                                         rss_edge_t key;
1659
1660                                         key.src = val_arr[i];
1661                                         key.tgt = val_arr[j];
1662
1663                                         if (pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1664                                                 /* v[j] is descendant of u -> remove u and break */
1665                                                 ir_nodeset_insert(temp, (ir_node *) u->irn);
1666                                                 ir_nodeset_remove(values, u->irn);
1667
1668                                                 DBG((rss->dbg, LEVEL_3, "\t\t\tremoving %+F from values, adding it to temp\n", u->irn));
1669
1670                                                 break;
1671                                         }
1672                                 }
1673                         }
1674                 }
1675
1676                 /* Try to insert the chain predecessor of all selected u's */
1677                 foreach_ir_nodeset(temp, u_irn, iter) {
1678                         rss_irn_t       *u  = get_rss_irn(rss, u_irn);
1679                         chain_t         *c  = u->chain;
1680                         plist_element_t *el = plist_find_value(c->elements, u_irn);
1681
1682                         assert(el && "Missing element in chain!");
1683
1684                         /* If u has predecessor in chain: insert the predecessor */
1685                         if (el == plist_element_get_prev(el)) {
1686                                 ir_nodeset_insert(values, plist_element_get_value(el));
1687                                 DBG((rss->dbg, LEVEL_3, "\t\t\tadding %+F to values\n", plist_element_get_value(el)));
1688                         }
1689                 }
1690
1691
1692                 DEL_ARR_F(val_arr);
1693         } while (ir_nodeset_size(temp) > 0);
1694
1695         DBG((rss->dbg, LEVEL_2, "\t\tfinal set:\n"));
1696         DEBUG_ONLY(
1697                 if (firm_dbg_get_mask(rss->dbg) & LEVEL_2) {
1698                         dump_nodeset(values, "\t\t\t");
1699                 }
1700         );
1701
1702         if (rss->opts->dump_flags & RSS_DUMP_MAXAC)
1703                 debug_vcg_dump_pkg(rss, values, iteration);
1704
1705         if(temp != NULL) {
1706                 ir_nodeset_destroy(temp);
1707                 free(temp);
1708         }
1709
1710         return values;
1711
1712 #undef MAP_IDX
1713 }
1714
1715 /**
1716  * Computes the best serialization between two nodes of sat_vals.
1717  */
1718 static serialization_t *compute_best_admissible_serialization(rss_t *rss, ir_nodeset_t *sat_vals, serialization_t *ser, int num_regs) {
1719         int        n                    = ir_nodeset_size(sat_vals);
1720         int        n_idx                = ARR_LEN_SAFE(rss->idx_map);
1721         int        i                    = 0;
1722         ir_node    **val_arr            = alloca(n * sizeof(val_arr[0]));
1723         bitset_t   *bs_sv               = bitset_alloca(n_idx);
1724         bitset_t   *bs_vdesc            = bitset_alloca(n_idx);
1725         bitset_t   *bs_tmp              = bitset_alloca(n_idx);
1726         bitset_t   *bs_ukilldesc        = bitset_alloca(n_idx);
1727         int        best_benefit         = INT_MAX;
1728         int        best_omega2          = INT_MAX;
1729         int        best_benefit_omega20 = INT_MAX;
1730         int        has_omega1           = 0;
1731         int        j, k;
1732         ir_node    *irn;
1733         ir_nodeset_iterator_t iter;
1734         rss_edge_t min_benefit_edge = {NULL, NULL, NULL};
1735         rss_edge_t min_omega20_edge = {NULL, NULL, NULL};
1736         rss_irn_t  *ser_u_omega1 = NULL, *ser_v_omega1 = NULL;
1737         rss_irn_t  *ser_u_omega20 = NULL, *ser_v_omega20 = NULL;
1738
1739         DBG((rss->dbg, LEVEL_1, "\tcomputing admissible serializations:\n"));
1740
1741         /*
1742                 We need an explicit array for the values as
1743                 we cannot iterate multiple times over the same
1744                 set at the same time. :-(((((
1745         */
1746
1747         foreach_ir_nodeset(sat_vals, irn, iter) {
1748                 val_arr[i++] = irn;
1749                 bitset_set(bs_sv, BLOCK_IDX_MAP(rss, irn));
1750         }
1751
1752         /*
1753                 We build all admissible serializations and remember the best found so far.
1754                 For u in sat_vals:
1755                  For v in sat_val:
1756                    if v in pkiller(u): add edge from v to all other pkiller(u)
1757                    else: for all vv in pkiller(u): add edge from v to vv if there exists no path from vv to v
1758         */
1759
1760 /*
1761         A node is unserializable if:
1762         - it has only one killer and this one is Sink
1763     - it kills no other values
1764         In this case there is no serialization which could
1765         reduce the registerpressure
1766 */
1767 #define IS_UNSERIALIZABLE_NODE(rss_node)                  \
1768         (                                                     \
1769                 (                                                 \
1770                         (plist_count(rss_node->pkiller_list) == 1) && \
1771                         is_Sink(rss_node->killer)                  && \
1772                         (rss_node->kill_count                == 0)    \
1773                 )                            ||                   \
1774                 be_is_Barrier(rss_node->irn) ||                   \
1775                 be_is_Keep(rss_node->irn)                         \
1776         )
1777
1778         /* for all u in sat_vals */
1779         for (i = 0; i < n; ++i) {
1780                 rss_irn_t       *u = get_rss_irn(rss, val_arr[i]);
1781                 plist_element_t *el;
1782
1783                 /* ignore nodes where serialization does not help */
1784                 if (IS_UNSERIALIZABLE_NODE(u)) {
1785                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", u->irn));
1786                         continue;
1787                 }
1788
1789                 /* accumulate all descendants of all pkiller(u) */
1790                 bitset_clear_all(bs_ukilldesc);
1791                 foreach_plist(u->pkiller_list, el) {
1792                         ir_node   *irn  = plist_element_get_value(el);
1793                         rss_irn_t *node = get_rss_irn(rss, irn);
1794
1795                         if (! is_Sink(irn))
1796                                 bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, irn));
1797                         else
1798                                 continue;
1799
1800                         for (k = ARR_LEN_SAFE(node->descendants) - 1; k >= 0; --k) {
1801                                 if (! is_Sink(node->descendants[k]))
1802                                         bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, node->descendants[k]));
1803                         }
1804                 }
1805
1806                 /* for all v in sat_vals */
1807                 for (j = 0; j < n; ++j) {
1808                         ir_node   *v_irn   = val_arr[j];
1809                         rss_irn_t *v       = get_rss_irn(rss, v_irn);
1810                         unsigned  v_height = get_irn_height(rss->h, v_irn);
1811                         int       omega1, omega2, is_pkiller;
1812
1813                         /* v cannot be serialized with itself
1814                          * ignore nodes where serialization does not help */
1815                         if (i == j || IS_UNSERIALIZABLE_NODE(v)) {
1816 #ifdef DEBUG_libfirm
1817                                 if (i != j)
1818                                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", v->irn));
1819 #endif
1820                                 continue;
1821                         }
1822
1823                         /* get descendants of v */
1824                         bitset_clear_all(bs_vdesc);
1825                         bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v_irn));
1826                         for (k = ARR_LEN_SAFE(v->descendants) - 1; k >= 0; --k) {
1827                                 if (! is_Sink(v->descendants[k]))
1828                                         bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v->descendants[k]));
1829                         }
1830
1831                         /* if v is in pkiller(u) */
1832                         is_pkiller = plist_has_value(u->pkiller_list, v_irn);
1833
1834                         /* for all vv in pkiller(u) */
1835                         foreach_plist(u->pkiller_list, el) {
1836                                 ir_node *vv_irn  = plist_element_get_value(el);
1837                                 int     add_edge;
1838
1839                                 if (is_Sink(vv_irn) || is_cfop(vv_irn))
1840                                         continue;
1841
1842                                 if (is_pkiller)
1843                                         add_edge = vv_irn != v_irn && skip_Proj(vv_irn) != skip_Proj(v_irn);
1844                                 else
1845                                         add_edge = ! heights_reachable_in_block(rss->h, skip_Proj(vv_irn), skip_Proj(v_irn));
1846
1847                                 /*
1848                                         As we add an edge from vv -> v, we have to make sure,
1849                                         that there exists no path from v to vv.
1850                                 */
1851
1852                                 if (add_edge) {
1853                                         unsigned vv_height = get_irn_height(rss->h, vv_irn);
1854                                         unsigned critical_path_cost;
1855                                         unsigned mu1, mu2;
1856
1857                                         /*
1858                                                 mu1 = | descendants(v) cut sat_vals |
1859                                                 the number of saturating values which cannot
1860                                                 be simultaneously alive with u
1861                                         */
1862                                         bitset_copy(bs_tmp, bs_vdesc);
1863                                         mu1 = bitset_popcnt(bitset_and(bs_tmp, bs_sv));
1864
1865                                         /*
1866                                                 mu2 = | accum_desc_all_pkiller(u) without descendants(v) |
1867                                         */
1868                                         if (is_pkiller) {
1869                                                 bitset_copy(bs_tmp, bs_ukilldesc);
1870                                                 mu2 = bitset_popcnt(bitset_andnot(bs_tmp, bs_vdesc));
1871                                         }
1872                                         else {
1873                                                 mu2 = 0;
1874                                         }
1875
1876                                         /* omega1 = mu1 - mu2 */
1877                                         omega1 = mu1 - mu2;
1878
1879                                         if (omega1 != 0)
1880                                                 has_omega1 = 1;
1881
1882                                         /* omega2 = increase of critical path */
1883                                         critical_path_cost =
1884                                                 v_height                        /* longest path from v to sink */
1885                                                 + rss->max_height - vv_height   /* longest path from source to vv */
1886                                                 + 1;                            /* edge */
1887
1888                                         /*
1889                                                 If critical_path_cost > max_height -> the new edge
1890                                                 would increase the longest critical path by the difference.
1891                                         */
1892                                         omega2 = critical_path_cost > rss->max_height ? critical_path_cost - rss->max_height : 0;
1893
1894                                         /* this keeps track of the edge with the best benefit */
1895                                         if (omega1 >= num_regs - n && omega1 < best_benefit) {
1896                                                 min_benefit_edge.src = v_irn;
1897                                                 min_benefit_edge.tgt = vv_irn;
1898
1899                                                 ser_u_omega1 = u;
1900                                                 ser_v_omega1 = v;
1901
1902                                                 best_benefit    = omega1;
1903                                                 ser->new_killer = is_pkiller;
1904                                         }
1905
1906                                         /* this keeps track of the edge with the best omega1 costs where omega2 == 0 */
1907                                         if (omega2 == 0 && omega1 >= num_regs - n && omega1 < best_benefit_omega20) {
1908                                                 min_omega20_edge.src = v_irn;
1909                                                 min_omega20_edge.tgt = vv_irn;
1910
1911                                                 ser_u_omega20 = u;
1912                                                 ser_v_omega20 = v;
1913
1914                                                 best_benefit_omega20 = omega1;
1915                                                 ser->new_killer      = is_pkiller;
1916                                         }
1917
1918                                         best_omega2 = MIN(best_omega2, omega2);
1919
1920                                         DBG((rss->dbg, LEVEL_2, "\t\tfound %+F -> %+F (w1 %d, w2 %d)\n",
1921                                                 v_irn, vv_irn, omega1, omega2));
1922                                 } /* if add_edge */
1923                         } /* for all vv in pkiller(u) */
1924                 } /* for all v in sat_vals */
1925         } /* for all u in sat_vals */
1926
1927         if (! has_omega1)
1928                 return NULL;
1929
1930         if (best_omega2 == 0) {
1931                 ser->u         = ser_u_omega20;
1932                 ser->v         = ser_v_omega20;
1933                 ser->edge->src = min_omega20_edge.src;
1934                 ser->edge->tgt = min_omega20_edge.tgt;
1935                 ser->omega1    = best_benefit_omega20;
1936                 ser->omega2    = best_omega2;
1937         }
1938         else {
1939                 ser->u         = ser_u_omega1;
1940                 ser->v         = ser_v_omega1;
1941                 ser->edge->src = min_benefit_edge.src;
1942                 ser->edge->tgt = min_benefit_edge.tgt;
1943                 ser->omega1    = best_benefit;
1944                 ser->omega2    = best_omega2;
1945         }
1946
1947         return ser;
1948
1949 #undef IS_UNSERIALIZABLE_NODE
1950 }
1951
1952 /**
1953  * Perform the value serialization heuristic and add all
1954  * computed serialization edges as dependencies to the irg.
1955  */
1956 static void perform_value_serialization_heuristic(rss_t *rss) {
1957         bitset_t *arch_nonign_bs = bitset_alloca(arch_register_class_n_regs(rss->cls));
1958         bitset_t *abi_ign_bs     = bitset_alloca(arch_register_class_n_regs(rss->cls));
1959         unsigned available_regs, iteration;
1960         dvg_t    dvg;
1961         ir_nodeset_t *sat_vals;
1962         pset *ser_set = new_pset(cmp_rss_edges, 20);
1963
1964         /* available_regs = R = |arch_non_ignore_regs cut ~abi_ignore_regs| */
1965         arch_put_non_ignore_regs(rss->cls, arch_nonign_bs);
1966         be_abi_put_ignore_regs(rss->abi, rss->cls, abi_ign_bs);
1967         bitset_andnot(arch_nonign_bs, abi_ign_bs);
1968         available_regs  = bitset_popcnt(arch_nonign_bs);
1969         //num_live = pset_count(rss->live_block);
1970         //available_regs -= num_live < available_regs ? num_live : 0;
1971
1972         DBG((rss->dbg, LEVEL_1, "\n\t#available regs: %d\n\n", available_regs));
1973
1974         /*
1975                 At first we need to compute the disjoint value DAG (DVG = {V, E_dv}).
1976                 V    = set of all nodes we are currently interested in
1977                 E_dv = there is an edge from u to v iff v is a descendant of killer(u), forall u, v in V
1978         */
1979         ir_nodeset_init_size(&dvg.nodes, plist_count(rss->nodes));
1980         dvg.edges = new_pset(cmp_rss_edges, plist_count(rss->nodes) * 5);
1981         compute_dvg(rss, &dvg);
1982
1983         /*
1984                 Then we perform the heuristic serialization algorithm
1985                 on the DVG which gives us all necessary serialization
1986                 edges.
1987         */
1988         DBG((rss->dbg, LEVEL_1, "\tcomputing maximal antichain:\n"));
1989         iteration = 0;
1990         sat_vals  = compute_maximal_antichain(rss, &dvg, iteration++);
1991         while (sat_vals && (ir_nodeset_size(sat_vals) > available_regs)) {
1992                 serialization_t *ser, best_ser;
1993                 rss_edge_t      *edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*edge));
1994                 ir_node         *dep_src, *dep_tgt;
1995
1996                 best_ser.edge = edge;
1997                 ser = compute_best_admissible_serialization(rss, sat_vals, &best_ser, available_regs);
1998
1999                 DBG((rss->dbg, LEVEL_1, "\tcurrent register saturation %d, target %d\n", ir_nodeset_size(sat_vals), available_regs));
2000
2001                 if (! ser) {
2002                         DBG((rss->dbg, LEVEL_1, "\tno RS improving serialization found, breaking at iteration %d\n", iteration));
2003                         break;
2004                 }
2005
2006                 /* Insert the serialization as dependency edge into the irg. */
2007                 DBG((rss->dbg, LEVEL_1, "\tserializing %+F -> %+F with edge %+F -> %+F and cost %d, %d\n",
2008                         ser->u->irn, ser->v->irn, ser->edge->src, ser->edge->tgt, ser->omega1, ser->omega2));
2009
2010                 if (pset_find(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge)))
2011                         ir_printf("WARNING: serialization %+F -> %+F computed twice!\n", ser->edge->src, ser->edge->tgt);
2012
2013
2014                 pset_insert(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge));
2015
2016                 /* update the dvg */
2017                 update_dvg(rss, &dvg, ser->v, ser->u);
2018                 update_dvg(rss, &dvg, get_rss_irn(rss, ser->edge->src), get_rss_irn(rss, ser->edge->tgt));
2019                 if(sat_vals != NULL) {
2020                         ir_nodeset_destroy(sat_vals);
2021                         free(sat_vals);
2022                 }
2023
2024                 dep_src = skip_Proj(ser->edge->src);
2025                 dep_tgt = ser->edge->tgt;
2026                 add_irn_dep(dep_src, dep_tgt);
2027
2028                 /* Update descendants, consumer and pkillers of the target */
2029                 update_node_info(rss, ser->edge->tgt, ser->edge->src);
2030
2031                 /* TODO: try to find a cheaper way for updating height information */
2032                 rss->max_height = heights_recompute_block(rss->h, rss->block);
2033
2034                 /* Recompute the antichain for next serialization */
2035                 DBG((rss->dbg, LEVEL_1, "\tre-computing maximal antichain:\n"));
2036                 sat_vals = compute_maximal_antichain(rss, &dvg, iteration++);
2037         }
2038
2039         ir_nodeset_destroy(&dvg.nodes);
2040         del_pset(dvg.edges);
2041 }
2042
2043 /**
2044  * Do initial calculations for a block.
2045  */
2046 static void process_block(ir_node *block, void *env) {
2047         rss_t *rss = env;
2048         int   i, n;
2049         const ir_edge_t *edge;
2050
2051         phase_init(&rss->ph, "rss block preprocessor", rss->irg, PHASE_DEFAULT_GROWTH, init_rss_irn, NULL);
2052
2053         DBG((rss->dbg, LEVEL_1, "preprocessing block %+F\n", block));
2054         rss->block = block;
2055
2056         /* build an index map for all nodes in the current block */
2057         i            = 0;
2058         n            = get_irn_n_edges(block);
2059         NEW_ARR_A(int *, rss->idx_map, n);
2060         foreach_out_edge(block, edge) {
2061                 ir_node *irn      = get_edge_src_irn(edge);
2062                 rss->idx_map[i++] = get_irn_idx(irn);
2063         }
2064         qsort(rss->idx_map, n, sizeof(rss->idx_map[0]), cmp_int);
2065         rss->max_height = heights_recompute_block(rss->h, block);
2066
2067         /* loop over all register classes */
2068         for (i = arch_env_get_n_reg_class(rss->arch_env) - 1; i >= 0; --i) {
2069                 const arch_register_class_t *cls = arch_env_get_reg_class(rss->arch_env, i);
2070
2071                 rss->cls = cls;
2072                 DBG((rss->dbg, LEVEL_1, "register class %s\n", arch_register_class_name(cls)));
2073
2074                 /* Get all live value at end of Block having current register class */
2075                 ir_nodeset_init(&rss->live_block);
2076                 be_liveness_end_of_block(rss->liveness, rss->arch_env, rss->cls, rss->block, &rss->live_block);
2077
2078                 /* reset the list of interesting nodes */
2079                 plist_clear(rss->nodes);
2080                 plist_insert_back(rss->nodes, _sink);
2081
2082                 /* collect all nodes having a certain register class */
2083                 foreach_out_edge(block, edge) {
2084                         ir_node *irn  = get_edge_src_irn(edge);
2085                         ir_mode *mode = get_irn_mode(irn);
2086
2087                         /*
2088                                 We skip:
2089                                 - mode_T nodes (the projs are asked)
2090                                 - mode_X nodes (control flow nodes are always scheduled last)
2091                                 - Keeps (they are always immediately scheduled)
2092                                 - Phi (same as Keep)
2093                         */
2094                         if (mode == mode_T || mode == mode_X || is_Phi(irn))
2095                                 continue;
2096
2097                         /*
2098                                 In case of a proj, we skip
2099                                 - Barrier (they are a Barrier :)
2100                                 - Start
2101                                 - the Proj itself, as it's scheduled always with it's super node
2102                         */
2103                         if (is_Proj(irn)) {
2104                                 ir_node *pred = get_Proj_pred(irn);
2105                                 if (be_is_Barrier(pred) || is_Start(pred))
2106                                         continue;
2107                         }
2108
2109                         /* calculate the descendants and consumer for each node in the block */
2110                         collect_node_info(rss, skip_Proj(irn));
2111
2112                         if (be_is_Keep(irn))
2113                                 continue;
2114
2115                         if (!arch_irn_is(irn, ignore) &&
2116                                         arch_get_irn_reg_class(irn, -1) == cls) {
2117                                 plist_insert_back(rss->nodes, skip_Proj(irn));
2118                         }
2119                         //}
2120                 }
2121
2122                 /* compute the potential killing set PK(G) */
2123                 compute_pkill_set(rss);
2124
2125                 /* compute the killing function k* */
2126                 compute_killing_function(rss);
2127
2128                 /*
2129                         Compute the heuristic value serialization and
2130                         add the necessary dependencies to the irg.
2131                 */
2132                 perform_value_serialization_heuristic(rss);
2133
2134                 ir_nodeset_destroy(&rss->live_block);
2135         }
2136
2137         phase_free(&rss->ph);
2138 }
2139
2140 /**
2141  * Register the options.
2142  */
2143 void be_init_schedrss(void) {
2144         lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
2145         lc_opt_entry_t *sched_grp = lc_opt_get_grp(be_grp, "sched");
2146         lc_opt_entry_t *rss_grp = lc_opt_get_grp(sched_grp, "rss");
2147
2148         lc_opt_add_table(rss_grp, rss_option_table);
2149 }
2150
2151 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_schedrss);
2152
2153 /**
2154  * Preprocess the irg for scheduling.
2155  */
2156 void rss_schedule_preparation(be_irg_t *birg) {
2157         ir_graph *irg = be_get_birg_irg(birg);
2158         rss_t rss;
2159
2160         FIRM_DBG_REGISTER(rss.dbg, "firm.be.sched.rss");
2161
2162         //firm_dbg_set_mask(rss.dbg, LEVEL_1 | LEVEL_2 | LEVEL_3);
2163
2164         init_rss_special_nodes(irg);
2165
2166         rss.irg      = irg;
2167         rss.arch_env = be_get_birg_arch_env(birg);
2168         rss.abi      = birg->abi;
2169         rss.h        = heights_new(irg);
2170         rss.nodes    = plist_new();
2171         rss.opts     = &rss_options;
2172         rss.liveness = be_liveness(birg);
2173         be_liveness_assure_sets(rss.liveness);
2174         irg_block_walk_graph(irg, NULL, process_block, &rss);
2175         heights_free(rss.h);
2176         plist_free(rss.nodes);
2177         be_liveness_free(rss.liveness);
2178
2179         if (birg->main_env->options->dump_flags & DUMP_SCHED)
2180                 be_dump(rss.irg, "-rss", dump_ir_block_graph);
2181 }