578a4b38aec824905305e92c4908fb1f85c3f4ab
[libfirm] / ir / be / beschedrss.c
1 /**
2  * Implementation of a register saturating list scheduler
3  * as described in: Sid-Ahmed-Ali Touati
4  * Register Saturation in Superscalar and VLIW Codes
5  *
6  * @license This file protected by GPL -  GNU GENERAL PUBLIC LICENSE.
7  * @author  Christian Wuerdig
8  * @date    29.08.2006
9  * @cvs-id  $Id$
10  */
11 #ifdef HAVE_CONFIG_H
12 #include "config.h"
13 #endif
14
15 #include <limits.h>
16
17 #include "obst.h"
18 #include "debug.h"
19
20 #include "irgraph_t.h"
21 #include "irnode_t.h"
22 #include "iredges_t.h"
23 #include "ircons_t.h"
24 #include "irphase_t.h"
25 #include "irgwalk.h"
26 #include "irdump.h"
27 #include "irtools.h"
28 #include "irbitset.h"
29 #include "irprintf.h"
30 #include "bipartite.h"
31 #include "hungarian.h"
32 #include "plist.h"
33
34 #include "height.h"
35
36 #include "beabi.h"
37 #include "benode_t.h"
38 #include "besched_t.h"
39
40 #ifdef WITH_LIBCORE
41 #include <libcore/lc_opts.h>
42 #include <libcore/lc_opts_enum.h>
43 #endif /* WITH_LIBCORE */
44
45
46 #define ARR_LEN_SAFE(arr) ((arr) != NULL ? ARR_LEN((arr)) : 0)
47
48 #define HASH_RSS_EDGE(edge) ((get_irn_node_nr((edge)->src) << 16) | (get_irn_node_nr((edge)->tgt) & 0xFFFF))
49 #define BSEARCH_IRN_ARR(val, arr) \
50         bsearch(&(val), (arr), ARR_LEN_SAFE((arr)), sizeof((arr)[0]), cmp_irn_idx)
51
52 #define BLOCK_IDX_MAP(rss, irn) bsearch_for_index(get_irn_idx((irn)), (rss)->idx_map, ARR_LEN_SAFE((rss)->idx_map), 1)
53
54 /* the rss options */
55 typedef struct _rss_opts_t {
56         int dump_flags;
57 } rss_opts_t;
58
59 /* Represents a child with associated costs */
60 typedef struct _child {
61         ir_node *irn;
62         float   cost;
63 } child_t;
64
65 /* We need edges for several purposes. */
66 typedef struct _rss_edge {
67         ir_node *src;
68         ir_node *tgt;
69         void    *next;
70 } rss_edge_t;
71
72 /* Represents a connected bipartite component. */
73 typedef struct _cbc {
74         nodeset *parents;       /**< = S  a set of value producers */
75         nodeset *children;      /**< = T  a set of value consumers */
76         pset    *kill_edges;    /**< = E  a set of edges (t in T, s in S) such as each s in S gets killed by at least one t in T */
77         int     nr;             /**< a deterministic index for set insertion (used as hash) */
78 } cbc_t;
79
80 /* Represents a disjoint value DAG. */
81 typedef struct _dvg {
82         nodeset *nodes;
83         pset    *edges;
84 } dvg_t;
85
86 /* Represents a chain of nodes. */
87 typedef struct _chain {
88         plist_t *elements;   /**< List of chain elements */
89         int     nr;          /**< a deterministic index for set insertion (used as hash) */
90 } chain_t;
91
92 typedef struct _rss_irn {
93         plist_t  *consumer_list;    /**< List of consumers */
94         ir_node **consumer;         /**< Sorted consumer array (needed for faster access) */
95
96         plist_t  *parent_list;      /**< List of parents */
97         plist_t  *pkiller_list;     /**< List of potential killers */
98
99         plist_t  *descendant_list;  /**< List of descendants */
100         ir_node **descendants;      /**< Sorted descendant array (needed for faster access) */
101
102         ir_node  *killer;           /**< The selected unique killer */
103         ir_node  *irn;              /**< The corresponding firm node to this rss_irn */
104
105         chain_t  *chain;            /**< The chain, this node is associated to */
106
107         unsigned desc_walk;         /**< visited flag for collecting descendants */
108         unsigned kill_count;        /**< number of nodes killed by this one */
109
110         unsigned live_out : 1;      /**< irn has consumers outside of it's block */
111         unsigned visited  : 1;      /**< visited flag for bipartite decomposition */
112         unsigned havecons : 1;      /**< visited flag collect consumer */
113         unsigned handled  : 1;      /**< flag indicating whether or not the list structures have been build */
114         unsigned dumped   : 1;      /**< flag indication whether or not this node was dumped */
115 } rss_irn_t;
116
117 /* Represents a serialization edge with associated costs. */
118 typedef struct _serialization {
119         rss_irn_t  *u;       /* the top node */
120         rss_irn_t  *v;       /* the node about to be serialized after u */
121         rss_edge_t *edge;    /* the edge selected for the serialization */
122         int        omega1;   /* estimated: available regs - RS reduction */
123         int        omega2;   /* increase in critical path length */
124         int        new_killer;
125 } serialization_t;
126
127 typedef struct _rss {
128         phase_t          ph;              /**< Phase to hold some data */
129         heights_t        *h;              /**< The current height object */
130         ir_graph         *irg;            /**< The irg to preprocess */
131         plist_t          *nodes;          /**< The list of interesting nodes */
132         const arch_env_t *arch_env;       /**< The architecture environment */
133         be_abi_irg_t     *abi;            /**< The abi for this irg */
134         pset             *cbc_set;        /**< A set of connected bipartite components */
135         ir_node          *block;          /**< The current block in progress. */
136         int              *idx_map;        /**< Mapping irn indices to per block indices */
137         unsigned         max_height;      /**< maximum height in the current block */
138         rss_opts_t       *opts;           /**< The options */
139         be_lv_t          *liveness;       /**< The liveness information for this irg */
140         pset             *live_block;     /**< Values alive at end of block */
141         const arch_register_class_t *cls; /**< The current register class */
142         DEBUG_ONLY(firm_dbg_module_t *dbg);
143 } rss_t;
144
145 #define get_rss_irn(rss, irn)  (phase_get_or_set_irn_data(&rss->ph, irn))
146
147 /**
148  * We need some special nodes:
149  * a source and a sink for all live-in and live-out values of a block
150  */
151
152 enum {
153         iro_rss_Source,
154         iro_rss_Sink,
155         iro_rss_last
156 };
157
158 static ir_node *_source = NULL;
159 static ir_node *_sink   = NULL;
160
161 #define is_Source(irn) ((irn) == _source)
162 #define is_Sink(irn)   ((irn) == _sink)
163
164 enum {
165         RSS_DUMP_NONE  = 0,
166         RSS_DUMP_CBC   = 1 << 0,
167         RSS_DUMP_PKG   = 1 << 1,
168         RSS_DUMP_KILL  = 1 << 2,
169         RSS_DUMP_DVG   = 1 << 3,
170         RSS_DUMP_MAXAC = 1 << 4,
171         RSS_DUMP_ALL   = (RSS_DUMP_MAXAC << 1) - 1,
172 };
173
174 static rss_opts_t rss_options = {
175         RSS_DUMP_NONE,
176 };
177
178 #ifdef WITH_LIBCORE
179 static const lc_opt_enum_int_items_t dump_items[] = {
180         { "none",  RSS_DUMP_NONE  },
181         { "cbc",   RSS_DUMP_CBC   },
182         { "pkg",   RSS_DUMP_PKG   },
183         { "kill",  RSS_DUMP_KILL  },
184         { "dvg",   RSS_DUMP_DVG   },
185         { "maxac", RSS_DUMP_MAXAC },
186         { "all",   RSS_DUMP_ALL   },
187         { NULL, 0 }
188 };
189
190 static lc_opt_enum_int_var_t dump_var = {
191         &rss_options.dump_flags, dump_items
192 };
193
194 static const lc_opt_table_entry_t rss_option_table[] = {
195         LC_OPT_ENT_ENUM_MASK("dump", "dump phases", &dump_var),
196         { NULL }
197 };
198 #endif /* WITH_LIBCORE */
199
200 /******************************************************************************
201  *  _          _                    __                  _   _
202  * | |        | |                  / _|                | | (_)
203  * | |__   ___| |_ __   ___ _ __  | |_ _   _ _ __   ___| |_ _  ___  _ __  ___
204  * | '_ \ / _ \ | '_ \ / _ \ '__| |  _| | | | '_ \ / __| __| |/ _ \| '_ \/ __|
205  * | | | |  __/ | |_) |  __/ |    | | | |_| | | | | (__| |_| | (_) | | | \__ \
206  * |_| |_|\___|_| .__/ \___|_|    |_|  \__,_|_| |_|\___|\__|_|\___/|_| |_|___/
207  *            | |
208  *            |_|
209  ******************************************************************************/
210
211 /**
212  * Acquire opcodes and create source and sink nodes.
213  */
214 static void init_rss_special_nodes(ir_graph *irg) {
215         ir_node *block         = get_irg_start_block(irg);
216         int     iro_rss_base   = get_next_ir_opcodes(iro_rss_last);
217         ir_op   *op_rss_Source = new_ir_op(iro_rss_base + iro_rss_Source, "rss_Source", op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
218         ir_op   *op_rss_Sink   = new_ir_op(iro_rss_base + iro_rss_Sink,   "rss_Sink",   op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
219         _source                = new_ir_node(NULL, irg, block, op_rss_Source, mode_ANY, 0, NULL);
220         _sink                  = new_ir_node(NULL, irg, block, op_rss_Sink, mode_ANY, 0, NULL);
221 }
222
223 static int cmp_int(const void *a, const void *b) {
224         const int *i1 = a;
225         const int *i2 = b;
226
227         return QSORT_CMP(*i1, *i2);
228 }
229
230 static int cmp_child_costs(const void *a, const void *b) {
231         const child_t *c1 = a;
232         const child_t *c2 = b;
233
234         return QSORT_CMP(c1->cost, c2->cost);
235 }
236
237 static int cmp_irn_idx(const void *a, const void *b) {
238         const ir_node *n1 = *(ir_node **)a;
239         const ir_node *n2 = *(ir_node **)b;
240
241         return QSORT_CMP(get_irn_idx(n1), get_irn_idx(n2));
242 }
243
244 static int cmp_rss_edges(const void *a, const void *b) {
245         const rss_edge_t *e1 = a;
246         const rss_edge_t *e2 = b;
247
248         return (e1->src != e2->src) || (e1->tgt != e2->tgt);
249 }
250
251 static int bsearch_for_index(int key, int *arr, size_t len, int force) {
252         int left = 0;
253         int right = len;
254
255         while (right >= left) {
256                 int idx = (left + right) / 2;
257
258                 if (key < arr[idx])
259                         right = idx - 1;
260                 else if (key > arr[idx])
261                         left = idx + 1;
262                 else
263                         return idx;
264         }
265
266         if (force)
267                 assert(0 && "Something is wrong, key not found.");
268         return -1;
269 }
270
271 static ir_node **build_sorted_array_from_list(plist_t *irn_list, struct obstack *obst) {
272         plist_element_t *el;
273         int     i     = 0;
274         int     len   = plist_count(irn_list);
275         ir_node **arr = NEW_ARR_D(ir_node *, obst, len);
276
277         /* copy the list into the array */
278         foreach_plist(irn_list, el) {
279                 arr[i++] = plist_element_get_value(el);
280         }
281
282         /* sort the array by node index */
283         qsort(arr, len, sizeof(arr[0]), cmp_irn_idx);
284
285         return arr;
286 }
287
288 /*****************************************************
289  *      _      _                       _
290  *     | |    | |                     (_)
291  *   __| | ___| |__  _   _  __ _  __ _ _ _ __   __ _
292  *  / _` |/ _ \ '_ \| | | |/ _` |/ _` | | '_ \ / _` |
293  * | (_| |  __/ |_) | |_| | (_| | (_| | | | | | (_| |
294  *  \__,_|\___|_.__/ \__,_|\__, |\__, |_|_| |_|\__, |
295  *                          __/ | __/ |         __/ |
296  *                         |___/ |___/         |___/
297  *
298  *****************************************************/
299
300 static void dump_nodeset(nodeset *ns, const char *prefix) {
301         ir_node *irn;
302         foreach_nodeset(ns, irn) {
303                 ir_fprintf(stderr, "%s%+F\n", prefix, irn);
304         }
305 }
306
307 static void build_file_name(rss_t *rss, const char *suffix, size_t suf_len, char *buf, size_t len) {
308         const char *irg_name;
309
310         memset(buf, 0, len);
311         irg_name = get_entity_name(get_irg_entity(rss->irg));
312         snprintf(buf, len - suf_len, "%s-%s-block-%ld",
313                 irg_name, arch_register_class_name(rss->cls), get_irn_node_nr(rss->block));
314         strcat(buf, suffix);
315 }
316
317 /* Dumps all collected bipartite components of current irg as vcg. */
318 static void debug_vcg_dump_bipartite(rss_t *rss) {
319         cbc_t *cbc;
320         FILE  *f;
321         char  file_name[256];
322         static const char suffix[] = "-RSS-CBC.vcg";
323
324         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
325         f = fopen(file_name, "w");
326
327         ir_fprintf(f, "graph: { title: \"connected bipartite component graph of %+F\"\n", rss->irg);
328         fprintf(f, "display_edge_labels: no\n");
329         fprintf(f, "layoutalgorithm: mindepth\n");
330         fprintf(f, "manhattan_edges: yes\n\n");
331
332         foreach_pset(rss->cbc_set, cbc) {
333                 ir_node    *n;
334                 rss_edge_t *ke;
335
336                 fprintf(f, "graph: { titel: \"cbc %d\" label: \"cbc %d\" status:clustered color:yellow\n", cbc->nr, cbc->nr);
337                 foreach_nodeset(cbc->parents, n) {
338                         ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
339                 }
340                 foreach_nodeset(cbc->children, n) {
341                         ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
342                 }
343                 foreach_pset(cbc->kill_edges, ke) {
344                         ir_fprintf(f, "edge: { sourcename: \"n%d_%d\" targetname: \"n%d_%d\" }\n",
345                                 get_irn_node_nr(ke->src), cbc->nr, get_irn_node_nr(ke->tgt), cbc->nr);
346                 }
347                 fprintf(f, "}\n\n");
348         }
349         fprintf(f, "}\n");
350         fclose(f);
351 }
352
353 /* Dump the computed killing function as vcg. */
354 static void debug_vcg_dump_kill(rss_t *rss) {
355         FILE            *f;
356         char            file_name[256];
357         plist_element_t *el;
358         static const char suffix[] = "-RSS-KILL.vcg";
359
360         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
361         f = fopen(file_name, "w");
362
363         ir_fprintf(f, "graph: { title: \"computed kill graph of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
364         fprintf(f, "display_edge_labels: no\n");
365         fprintf(f, "layoutalgorithm: mindepth\n");
366         fprintf(f, "manhattan_edges: yes\n\n");
367
368         /* first: reset dumped flag of all nodes */
369         foreach_plist(rss->nodes, el) {
370                 ir_node   *irn  = plist_element_get_value(el);
371                 rss_irn_t *rirn = get_rss_irn(rss, irn);
372                 rirn->dumped = 0;
373         }
374
375         /* dump all nodes and their killers */
376         foreach_plist(rss->nodes, el) {
377                 ir_node   *irn     = plist_element_get_value(el);
378                 rss_irn_t *rirn    = get_rss_irn(rss, irn);
379                 rss_irn_t *pk_rirn = get_rss_irn(rss, rirn->killer);
380
381                 if (! rirn->dumped) {
382                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
383                         rirn->dumped = 1;
384                 }
385
386                 if (! pk_rirn->dumped) {
387                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(rirn->killer), rirn->killer);
388                         pk_rirn->dumped = 1;
389                 }
390
391                 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
392                         get_irn_node_nr(rirn->killer), get_irn_node_nr(irn));
393         }
394
395         fprintf(f, "}\n");
396         fclose(f);
397 }
398
399 /* Dumps the potential killing DAG (PKG) as vcg. */
400 static void debug_vcg_dump_pkg(rss_t *rss, nodeset *max_ac, int iteration) {
401         FILE              *f;
402         char              file_name[256];
403         char              *suffix   = alloca(32);
404         static const char suffix1[] = "-RSS-PKG.vcg";
405         static const char suffix2[] = "-RSS-PKG-MAXAC.vcg";
406         plist_element_t   *el;
407
408         if (! max_ac) {
409                 snprintf(suffix, 32, "%s", suffix1);
410         }
411         else {
412                 snprintf(suffix, 32, "-%02d%s", iteration, suffix2);
413         }
414
415         build_file_name(rss, suffix, strlen(suffix) + 1, file_name, sizeof(file_name));
416         f = fopen(file_name, "w");
417
418         ir_fprintf(f, "graph: { title: \"potential killing DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
419         fprintf(f, "display_edge_labels: no\n");
420         fprintf(f, "layoutalgorithm: mindepth\n");
421         fprintf(f, "manhattan_edges: yes\n\n");
422
423         foreach_plist(rss->nodes, el) {
424                 ir_node   *irn  = plist_element_get_value(el);
425                 rss_irn_t *rirn = get_rss_irn(rss, irn);
426                 char      *c1   = "";
427                 plist_element_t *k_el;
428
429                 /* dump selected saturating values in yellow */
430                 if (max_ac && nodeset_find(max_ac, irn))
431                         c1 = "color:yellow";
432
433                 if (rirn->chain)
434                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F   c%d\" %s }\n", get_irn_node_nr(irn), irn, rirn->chain->nr, c1);
435                 else
436                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(irn), irn, c1);
437                 rirn->dumped = 1;
438
439                 foreach_plist(rirn->pkiller_list, k_el) {
440                         ir_node   *pkiller = plist_element_get_value(k_el);
441                         rss_irn_t *pk_rirn = get_rss_irn(rss, pkiller);
442                         char      *c2      = "";
443
444                         if (max_ac && nodeset_find(max_ac, pkiller))
445                                 c2 = "color:yellow";
446
447                         if (! pk_rirn->dumped) {
448                                 if (pk_rirn->chain)
449                                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F   c%d\" %s }\n", get_irn_node_nr(pkiller), pkiller, pk_rirn->chain->nr, c2);
450                                 else
451                                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(pkiller), pkiller, c2);
452                                 pk_rirn->dumped = 1;
453                         }
454                         ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
455                                 get_irn_node_nr(pkiller), get_irn_node_nr(irn));
456                 }
457         }
458         fprintf(f, "}\n");
459         fclose(f);
460 }
461
462 /* Dumps the disjoint value DAG (DVG) as vcg. */
463 static void debug_vcg_dump_dvg(rss_t *rss, dvg_t *dvg) {
464         static const char suffix[] = "-RSS-DVG.vcg";
465         FILE       *f;
466         char       file_name[256];
467         ir_node    *irn;
468         rss_edge_t *edge;
469
470         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
471         f = fopen(file_name, "w");
472
473         ir_fprintf(f, "graph: { title: \"disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
474         fprintf(f, "display_edge_labels: no\n");
475         fprintf(f, "layoutalgorithm: mindepth\n");
476         fprintf(f, "manhattan_edges: yes\n\n");
477
478         /* dump all nodes */
479         foreach_nodeset(dvg->nodes, irn) {
480                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
481         }
482
483         /* dump all edges */
484         foreach_pset(dvg->edges, edge) {
485                 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
486                         get_irn_node_nr(edge->src), get_irn_node_nr(edge->tgt));
487         }
488
489         fprintf(f, "}\n");
490         fclose(f);
491 }
492
493 #if 0
494 /* Dumps the PKG(DVG). */
495 static void debug_vcg_dump_dvg_pkiller(rss_t *rss, dvg_t *dvg) {
496         static const char suffix[] = "-RSS-DVG-PKG.vcg";
497         FILE       *f;
498         char       file_name[256];
499         ir_node    *irn;
500
501         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
502         f = fopen(file_name, "w");
503
504         ir_fprintf(f, "graph: { title: \"PKG of disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
505         fprintf(f, "display_edge_labels: no\n");
506         fprintf(f, "layoutalgorithm: mindepth\n");
507         fprintf(f, "manhattan_edges: yes\n\n");
508
509         /* dump all nodes */
510         foreach_nodeset(dvg->nodes, irn) {
511                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
512         }
513
514         /* dump all edges */
515         foreach_nodeset(dvg->nodes, irn) {
516                 rss_irn_t       *node = get_rss_irn(rss, irn);
517                 plist_element_t *el;
518
519                 foreach_plist(node->dvg_pkiller_list, el) {
520                         ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
521                                 get_irn_node_nr(plist_element_get_value(el)), get_irn_node_nr(irn));
522                 }
523         }
524
525         fprintf(f, "}\n");
526         fclose(f);
527 }
528 #endif /* if 0 */
529
530 /**
531  * In case there is no rss information for irn, initialize it.
532  */
533 static void *init_rss_irn(phase_t *ph, ir_node *irn, void *old) {
534         rss_irn_t *res = old ? old : phase_alloc(ph, sizeof(res[0]));
535
536         res->descendant_list = plist_obstack_new(phase_obst(ph));
537         res->descendants     = NULL;
538
539         res->consumer_list   = plist_obstack_new(phase_obst(ph));
540         res->consumer        = NULL;
541
542         res->pkiller_list    = plist_obstack_new(phase_obst(ph));
543
544         res->parent_list     = plist_obstack_new(phase_obst(ph));
545
546         res->killer          = NULL;
547         res->irn             = irn;
548         res->chain           = NULL;
549
550         res->kill_count      = 0;
551         res->desc_walk       = 0;
552         res->live_out        = 0;
553         res->visited         = 0;
554         res->handled         = 0;
555         res->dumped          = 0;
556         res->havecons        = 0;
557
558         return res;
559 }
560
561 /**
562  * Collect all nodes data dependent on current node.
563  */
564 static void collect_descendants(rss_t *rss, rss_irn_t *rirn, ir_node *irn, int *got_sink, unsigned cur_desc_walk) {
565         const ir_edge_t *edge;
566         rss_irn_t       *cur_node = get_rss_irn(rss, irn);
567         ir_node         *block    = rss->block;
568         ir_edge_kind_t  ekind[2]  = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
569         int             i;
570
571         if (cur_node->desc_walk >= cur_desc_walk)
572                 return;
573         cur_node->desc_walk = cur_desc_walk;
574
575         /* Stop at Barriers */
576         if (be_is_Barrier(irn))
577                 return;
578
579         /* loop over normal and dependency edges */
580         for (i = 0; i < 2; ++i) {
581                 foreach_out_edge_kind(irn, edge, ekind[i]) {
582                         ir_node *user = get_edge_src_irn(edge);
583
584                         /* skip ignore nodes as they do not really contribute to register pressure */
585                         if (arch_irn_is(rss->arch_env, user, ignore))
586                                 continue;
587
588                         if (get_irn_mode(user) == mode_X) {
589                                 if (! *got_sink)
590                                         goto force_sink;
591                                 else
592                                         continue;
593                         }
594
595                         if (is_Proj(user)) {
596
597                                 //if (arch_get_irn_reg_class(rss->arch_env, user, -1) == rss->cls)
598                                         collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
599                         }
600                         else {
601                                 /* check if user lives in block and is not a control flow node */
602                                 if (get_nodes_block(user) == block) {
603                                         if (! plist_has_value(rirn->descendant_list, user)) {
604                                                 plist_insert_back(rirn->descendant_list, user);
605                                                 DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", user));
606                                         }
607                                         collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
608                                 }
609                                 else if (! *got_sink) {
610 force_sink:
611                                         /* user lives out of block: add sink as descendant if not already done */
612                                         plist_insert_back(rirn->descendant_list, _sink);
613                                         *got_sink = 1;
614                                         DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", _sink));
615                                 }
616                         }
617                 }
618         }
619 }
620
621 /**
622  * Handles a single consumer.
623  */
624 static void collect_single_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *consumer, int *got_sink) {
625         ir_node *block = rss->block;
626
627         assert(! is_Proj(consumer) && "Cannot handle Projs");
628
629         if (! is_Block(consumer) && get_nodes_block(consumer) == block) {
630                 if (! arch_irn_is(rss->arch_env, consumer, ignore) && ! plist_has_value(rss_irn->consumer_list, consumer)) {
631                         plist_insert_back(rss_irn->consumer_list, consumer);
632                         DBG((rss->dbg, LEVEL_2, "\t\tconsumer %+F\n", consumer));
633                 }
634         }
635         else {
636                 rss_irn->live_out = 1;
637                 DBG((rss->dbg, LEVEL_2, "\t\tlive out %+F", consumer));
638                 if (! *got_sink) {
639                         plist_insert_back(rss_irn->consumer_list, _sink);
640                         *got_sink = 1;
641                         DB((rss->dbg, LEVEL_2, ", %+F added instead", _sink));
642                 }
643                 DB((rss->dbg, LEVEL_2, "\n"));
644         }
645 }
646
647 /**
648  * Collect all nodes consuming the value(s) produced by current node.
649  */
650 static void collect_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *irn, int *got_sink) {
651         const ir_edge_t *edge;
652         int             i;
653         ir_edge_kind_t  ekind[2]  = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
654         rss_irn_t       *cur_node = get_rss_irn(rss, irn);
655
656         if (cur_node->havecons)
657                 return;
658         cur_node->havecons = 1;
659
660         for (i = 0; i < 2; ++i) {
661                 foreach_out_edge_kind(irn, edge, ekind[i]) {
662                         ir_node *consumer = get_edge_src_irn(edge);
663
664                         if (is_Proj(consumer)) {
665                                 //if (arch_get_irn_reg_class(rss->arch_env, consumer, -1) == rss->cls)
666                                         collect_consumer(rss, rss_irn, consumer, got_sink);
667                         }
668                         else
669                                 collect_single_consumer(rss, rss_irn, consumer, got_sink);
670                 }
671         }
672 }
673
674 /**
675  * Collects all consumer and descendant of a irn.
676  */
677 static void collect_node_info(rss_t *rss, ir_node *irn) {
678         static unsigned cur_desc_walk = 0;
679         rss_irn_t       *rss_irn      = get_rss_irn(rss, irn);
680         int             got_sink;
681
682         assert(! is_Proj(irn) && "Cannot handle Projs.");
683
684         /* check if node info is already available */
685         if (rss_irn->handled)
686                 return;
687                 //phase_reinit_single_irn_data(&rss->ph, irn);
688
689         DBG((rss->dbg, LEVEL_1, "\tcomputing consumers of %+F:\n", irn));
690
691         /* collect all consumer */
692         got_sink = 0;
693         collect_consumer(rss, rss_irn, irn, &got_sink);
694
695         /* build sorted consumer array */
696         rss_irn->consumer = build_sorted_array_from_list(rss_irn->consumer_list, phase_obst(&rss->ph));
697
698         DBG((rss->dbg, LEVEL_1, "\tcompute descendants of %+F:\n", irn));
699
700         /* collect descendants */
701         got_sink = 0;
702         collect_descendants(rss, rss_irn, irn, &got_sink, ++cur_desc_walk);
703
704         /* build sorted descendant array */
705         rss_irn->descendants = build_sorted_array_from_list(rss_irn->descendant_list, phase_obst(&rss->ph));
706
707         rss_irn->handled = 1;
708 }
709
710 /**
711  * Checks if v is a potential killer of u.
712  * v is in pkill(u) iff descendants(v) cut consumer(u) is v
713  *
714  * @param rss   The rss object
715  * @param v      The node to check for killer
716  * @param u      The potentially killed value
717  * @return 1 if v is in pkill(u), 0 otherwise
718  */
719 static int is_potential_killer(rss_t *rss, rss_irn_t *v, rss_irn_t *u) {
720         plist_t *list;
721         ir_node **arr;
722         plist_element_t *el;
723
724         assert(is_Sink(v->irn) || ((plist_count(v->descendant_list) > 0 && v->descendants) || 1));
725         assert(is_Sink(u->irn) || ((plist_count(u->consumer_list)   > 0 && u->consumer)    || 1));
726
727         /* as we loop over the list: loop over the shorter one */
728         if (plist_count(v->descendant_list) > plist_count(u->consumer_list)) {
729                 list = u->consumer_list;
730                 arr  = v->descendants;
731         }
732         else {
733                 list = v->descendant_list;
734                 arr  = u->consumer;
735         }
736
737         /* for each list element: try to find element in array */
738         foreach_plist(list, el) {
739                 ir_node *irn   = plist_element_get_value(el);
740                 ir_node *match = BSEARCH_IRN_ARR(irn, arr);
741
742                 if (match && match != irn)
743                         return 0;
744         }
745
746         return 1;
747 }
748
749 /**
750  * Update descendants, consumer and pkiller list for the given irn.
751  */
752 static void update_node_info(rss_t *rss, ir_node *irn, ir_node *pk_irn) {
753         rss_irn_t *node    = get_rss_irn(rss, irn);
754         rss_irn_t *pkiller = get_rss_irn(rss, pk_irn);
755
756         /* add consumer and rebuild the consumer array */
757         if (! plist_has_value(node->consumer_list, pk_irn)) {
758                 plist_insert_back(node->consumer_list, pk_irn);
759                 node->consumer = build_sorted_array_from_list(node->consumer_list, phase_obst(&rss->ph));
760         }
761
762         /* add pk_irn as descendant, add it's descendants to irn's and rebuild array */
763         if (! plist_has_value(node->descendant_list, pk_irn)) {
764                 plist_element_t *el;
765
766                 plist_insert_back(node->descendant_list, pk_irn);
767
768                 foreach_plist(pkiller->descendant_list, el) {
769                         ir_node *desc = plist_element_get_value(el);
770
771                         if (! plist_has_value(node->descendant_list, desc))
772                                 plist_insert_back(node->descendant_list, desc);
773                 }
774
775                 node->descendants = build_sorted_array_from_list(node->descendant_list, phase_obst(&rss->ph));
776         }
777
778 }
779
780 /**
781  * Compute the potential killing set PK.
782  */
783 static void compute_pkill_set(rss_t *rss) {
784         plist_element_t *u_el, *v_el;
785
786         foreach_plist(rss->nodes, u_el) {
787                 ir_node   *u_irn = plist_element_get_value(u_el);
788                 rss_irn_t *u     = get_rss_irn(rss, u_irn);
789
790                 DBG((rss->dbg, LEVEL_1, "\tcomputing potential killers of %+F:\n", u_irn));
791
792                 /* check each consumer if it is a potential killer  */
793                 foreach_plist(u->consumer_list, v_el) {
794                         ir_node   *v_irn = plist_element_get_value(v_el);
795                         rss_irn_t *v     = get_rss_irn(rss, v_irn);
796
797                         /* check, if v is a potential killer of u */
798                         if (is_potential_killer(rss, v, u)) {
799                                 if (! plist_has_value(u->pkiller_list, v_irn))
800                                         plist_insert_back(u->pkiller_list, v_irn);
801                                 v->kill_count++;
802                                 DBG((rss->dbg, LEVEL_2, "\t\tpotential killer %+F\n", v_irn));
803                         }
804                 }
805
806                 u->killer = _sink;
807         }
808
809         if (rss->opts->dump_flags & RSS_DUMP_PKG)
810                 debug_vcg_dump_pkg(rss, NULL, 0);
811 }
812
813 /**
814  * Build set of killing edges (from values to their potential killers)
815  */
816 static void build_kill_edges(rss_t *rss, pset *epk) {
817         plist_element_t *el, *k_el;
818
819         foreach_plist(rss->nodes, el) {
820                 ir_node    *irn  = plist_element_get_value(el);
821                 rss_irn_t *rirn = get_rss_irn(rss, irn);
822
823                 DBG((rss->dbg, LEVEL_3, "\t\tbuilding kill edges for %+F:\n", irn));
824
825                 foreach_plist(rirn->pkiller_list, k_el) {
826                         ir_node    *pkiller = plist_element_get_value(k_el);
827                         rss_edge_t *ke      = obstack_alloc(phase_obst(&rss->ph), sizeof(*ke));
828
829                         ke->src  = irn;
830                         ke->tgt  = pkiller;
831                         ke->next = NULL;
832
833                         DBG((rss->dbg, LEVEL_3, "\t\t\ttarget %+F\n", pkiller));
834
835                         pset_insert(epk, ke, HASH_RSS_EDGE(ke));
836                 }
837         }
838 }
839
840 /* print the given cbc for debugging purpose */
841 static void debug_print_cbc(firm_dbg_module_t *mod, cbc_t *cbc) {
842         ir_node    *n;
843         rss_edge_t *ke;
844
845         DBG((mod, LEVEL_3, "\t\tS = set of parents:\n"));
846         foreach_nodeset(cbc->parents, n) {
847                 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
848         }
849         DBG((mod, LEVEL_3, "\t\tT = set of children:\n"));
850         foreach_nodeset(cbc->children, n) {
851                 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
852         }
853         DBG((mod, LEVEL_3, "\t\tE = Edges from producers to consumers\n"));
854         foreach_pset(cbc->kill_edges, ke) {
855                 DBG((mod, LEVEL_3, "\t\t\t%+F -> %+F\n", ke->src, ke->tgt));
856         }
857 }
858
859 /**
860  * Construct the bipartite decomposition.
861  * Sid-Ahmed-Ali Touati, Phd Thesis
862  * Register Pressure in Instruction Level Parallelism, p. 71
863  */
864 static void compute_bipartite_decomposition(rss_t *rss) {
865         pset *epk    = new_pset(cmp_rss_edges, 10);
866         int  cur_num = 0;
867
868         plist_element_t *el;
869
870         DBG((rss->dbg, LEVEL_1, "\tcomputing bipartite decomposition:\n"));
871
872         build_kill_edges(rss, epk);
873
874         foreach_plist(rss->nodes, el) {
875                 ir_node   *u_irn   = plist_element_get_value(el);
876                 rss_irn_t *u       = get_rss_irn(rss, u_irn);
877                 int       p_change = 1;
878                 int       c_change = 1;
879
880                 cbc_t           *cbc;
881                 plist_element_t *el2;
882                 rss_edge_t      *k_edge;
883                 rss_edge_t      *kedge_root = NULL;
884                 ir_node         *t_irn, *s_irn;
885
886                 if (u->visited || u_irn == _sink)
887                         continue;
888
889                 DBG((rss->dbg, LEVEL_2, "\t\t%+F choosen:\n", u_irn));
890
891                 cbc     = obstack_alloc(phase_obst(&rss->ph), sizeof(*cbc));
892                 cbc->nr = cur_num++;
893
894                 /* initialize S_cb */
895                 cbc->parents = new_nodeset(5);
896                 nodeset_insert(cbc->parents, u_irn);
897                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (init)\n", u_irn));
898
899                 /* E_cb = empty */
900                 cbc->kill_edges = new_pset(cmp_rss_edges, 5);
901
902                 /* each parent gets killed by at least one value from children */
903
904                 /* T_cb = PK_successors(u) */
905                 cbc->children = new_nodeset(5);
906                 foreach_plist(u->pkiller_list, el2) {
907                         nodeset_insert(cbc->children, plist_element_get_value(el2));
908                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (init)\n", plist_element_get_value(el2)));
909                 }
910
911                 /*
912                         Now: insert the parents of all children into the parent set
913                         and insert the children of all parents into the children set
914                         until the sets don't change any more
915                 */
916                 while (p_change || c_change) {
917                         p_change = c_change = 0;
918
919                         /* accumulate parents */
920                         foreach_nodeset(cbc->children, t_irn) {
921                                 foreach_pset(epk, k_edge) {
922                                         ir_node *val = k_edge->src;
923
924                                         if (k_edge->tgt == t_irn && ! nodeset_find(cbc->parents, val)) {
925                                                 nodeset_insert(cbc->parents, val);
926                                                 p_change = 1;
927                                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents\n", val));
928                                         }
929                                 }
930                         }
931
932                         /* accumulate children */
933                         foreach_nodeset(cbc->parents, s_irn) {
934                                 foreach_pset(epk, k_edge) {
935                                         ir_node *val = k_edge->tgt;
936
937                                         if (k_edge->src == s_irn  && ! nodeset_find(cbc->children, val)) {
938                                                 nodeset_insert(cbc->children, val);
939                                                 c_change = 1;
940                                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children\n", val));
941                                         }
942                                 }
943                         }
944                 }
945
946                 /* mark all parent values as visited */
947                 foreach_nodeset(cbc->parents, s_irn) {
948                         rss_irn_t *s = get_rss_irn(rss, s_irn);
949                         s->visited = 1;
950                         /* assure bipartite property */
951                         if (nodeset_find(cbc->children, s_irn)) {
952                                 nodeset_remove(cbc->children, s_irn);
953                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F removed from to children\n", s_irn));
954                         }
955                 }
956
957                 /* update edges */
958                 foreach_pset(epk, k_edge) {
959                         if (nodeset_find(cbc->parents, k_edge->src) && nodeset_find(cbc->children, k_edge->tgt)) {
960                                 pset_insert(cbc->kill_edges, k_edge, HASH_RSS_EDGE(k_edge));
961                                 /*
962                                         Link all k_edges which are about to be removed together.
963                                         Beware: pset_remove kills the iterator.
964                                 */
965                                 k_edge->next = kedge_root;
966                                 kedge_root   = k_edge;
967                         }
968                 }
969
970                 /* remove all linked k_edges */
971                 for (; kedge_root; kedge_root = kedge_root->next) {
972                         pset_remove(epk, kedge_root, HASH_RSS_EDGE(kedge_root));
973                 }
974
975                 /* add the connected bipartite component */
976                 if (nodeset_count(cbc->parents) > 0 && nodeset_count(cbc->children) > 0 && pset_count(cbc->kill_edges) > 0)
977                         pset_insert(rss->cbc_set, cbc, (unsigned)cbc->nr);
978                 DBG((rss->dbg, LEVEL_2, "\tbipartite component %d inserted:\n", cbc->nr));
979                 DEBUG_ONLY(
980                         if (firm_dbg_get_mask(rss->dbg) & LEVEL_2)
981                                 debug_print_cbc(rss->dbg, cbc);
982                 );
983         }
984
985         if (rss->opts->dump_flags & RSS_DUMP_CBC)
986                 debug_vcg_dump_bipartite(rss);
987
988         del_pset(epk);
989 }
990
991 /**
992  * Select the child with the maximum cost.
993  */
994 static child_t *select_child_max_cost(rss_t *rss, nodeset *x, nodeset *y, child_t *t, cbc_t *cbc) {
995         ir_node *child;
996         float   max_cost = -1.0f;
997
998         DBG((rss->dbg, LEVEL_2, "\t\tcomputing children costs:\n"));
999
1000         foreach_nodeset(cbc->children, child) {
1001                 rss_irn_t  *r_child             = get_rss_irn(rss, child);
1002                 int         num_unkilled_parents = 0;
1003                 int         num_descendants;
1004                 rss_edge_t *k_edge;
1005                 float       cost;
1006
1007                 /* get the number of unkilled parents */
1008                 foreach_pset(cbc->kill_edges, k_edge) {
1009                         if (k_edge->tgt == child && nodeset_find(x, k_edge->src))
1010                                 ++num_unkilled_parents;
1011                 }
1012
1013                 cost = (float)num_unkilled_parents;
1014
1015                 num_descendants = plist_count(r_child->descendant_list) + nodeset_count(y);
1016
1017                 if (num_descendants > 0)
1018                         cost /= (float)num_descendants;
1019
1020                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F, #desc %d, cost %.3f\n", child, num_descendants, cost));
1021
1022                 if (cost > max_cost) {
1023                         t->irn   = child;
1024                         t->cost  = cost;
1025                         max_cost = cost;
1026                 }
1027         }
1028
1029         return t;
1030 }
1031
1032 /**
1033  * Remove all parents from x which are killed by t_irn.
1034  */
1035 static void remove_covered_parents(rss_t *rss, nodeset *x, ir_node *t_irn, cbc_t *cbc) {
1036         rss_irn_t  *t = get_rss_irn(rss, t_irn);
1037         rss_edge_t *k_edge;
1038
1039         DBG((rss->dbg, LEVEL_2, "\t\tremoving parents covered by %+F:\n", t_irn));
1040
1041         foreach_pset(cbc->kill_edges, k_edge) {
1042                 if (k_edge->tgt == t_irn && nodeset_find(x, k_edge->src)) {
1043                         nodeset_remove(x, k_edge->src);
1044                         plist_insert_back(t->parent_list, k_edge->src);
1045                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", k_edge->src));
1046                 }
1047         }
1048 }
1049
1050 static void update_cumulated_descendent_values(rss_t *rss, nodeset *y, ir_node *t_irn) {
1051         rss_irn_t *t = get_rss_irn(rss, t_irn);
1052         plist_element_t *el;
1053
1054         DBG((rss->dbg, LEVEL_2, "\t\tupdating cumulated descendant value of %+F:\n", t_irn));
1055
1056         foreach_plist(t->descendant_list, el) {
1057                 nodeset_insert(y, plist_element_get_value(el));
1058                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", plist_element_get_value(el)));
1059         }
1060 }
1061
1062 /**
1063  * Greedy-k: a heuristics for the MMA problem
1064  */
1065 static void compute_killing_function(rss_t *rss) {
1066         cbc_t *cbc;
1067         struct obstack obst;
1068
1069         obstack_init(&obst);
1070
1071         rss->cbc_set = pset_new_ptr(5);
1072         compute_bipartite_decomposition(rss);
1073
1074         /* for all bipartite components do: */
1075         foreach_pset(rss->cbc_set, cbc) {
1076                 ir_node *p;
1077                 nodeset *x       = new_nodeset(10);
1078                 nodeset *y       = new_nodeset(10);
1079                 child_t **sks    = NEW_ARR_F(child_t *, 20);
1080                 int     cur_len  = 0;
1081                 int     cur_size = 20;
1082                 int     i;
1083
1084                 DBG((rss->dbg, LEVEL_1, "\tcomputing SKS for cbc %d:\n", cbc->nr));
1085                 DBG((rss->dbg, LEVEL_2, "\t\tinitializing parents X:\n"));
1086
1087                 /* X = S_cb (all parents are initially uncovered) */
1088                 foreach_nodeset(cbc->parents, p) {
1089                         nodeset_insert(x, p);
1090                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", p));
1091                 }
1092
1093                 /* while X not empty */
1094                 while (nodeset_count(x) > 0) {
1095                         child_t *t = obstack_alloc(&obst, sizeof(*t));
1096                         memset(t, 0, sizeof(*t));
1097
1098                         t = select_child_max_cost(rss, x, y, t, cbc);
1099
1100                         if (cur_len >= cur_size) {
1101                                 ARR_EXTO(child_t *, sks, cur_size * 2);
1102                                 cur_size *= 2;
1103                         }
1104
1105                         DBG((rss->dbg, LEVEL_2, "\t\tinsert child %+F (%.3f) into SKS at pos %d\n", t->irn, t->cost, cur_len));
1106
1107                         sks[cur_len++] = t;
1108                         remove_covered_parents(rss, x, t->irn, cbc);
1109                         update_cumulated_descendent_values(rss, y, t->irn);
1110                 }
1111
1112                 ARR_SHRINKLEN(sks, cur_len);
1113
1114                 /* sort SKS in increasing cost order */
1115                 qsort(sks, cur_len, sizeof(sks[0]), cmp_child_costs);
1116
1117                 DBG((rss->dbg, LEVEL_2, "\tprocessing SKS for cbc %d:\n", cbc->nr));
1118
1119                 /* build killing function */
1120                 for (i = cur_len - 1; i >= 0; --i) { /* loop over sks in decreasing cost order */
1121                         child_t         *t = sks[i];
1122                         rss_irn_t       *rt = get_rss_irn(rss, t->irn);
1123                         plist_element_t *p_el;
1124
1125                         DBG((rss->dbg, LEVEL_3, "\t\t\tkiller %+F (%.3f):\n", t->irn, t->cost));
1126
1127                         /* kill all unkilled parents of t */
1128                         foreach_plist(rt->parent_list, p_el) {
1129                                 ir_node    *par = plist_element_get_value(p_el);
1130                                 rss_irn_t *rpar = get_rss_irn(rss, par);
1131
1132                                 if (is_Sink(rpar->killer)) {
1133                                         rpar->killer = t->irn;
1134                                         DBG((rss->dbg, LEVEL_2, "\t\tkill %+F\n", rpar->irn));
1135                                 }
1136                                 else {
1137                                         DBG((rss->dbg, LEVEL_3, "\t\t\tkeeping %+F as killer for %+F\n", rpar->killer, rpar->irn));
1138                                 }
1139                         }
1140                 }
1141
1142                 del_nodeset(x);
1143                 del_nodeset(y);
1144                 DEL_ARR_F(sks);
1145         }
1146
1147         if (rss->opts->dump_flags & RSS_DUMP_KILL)
1148                 debug_vcg_dump_kill(rss);
1149
1150         del_pset(rss->cbc_set);
1151         obstack_free(&obst, NULL);
1152 }
1153
1154 /**
1155  * Adds the edge src -> tgt to the dvg. Checks if reverse edge is already there (asserts).
1156  */
1157 static INLINE void add_dvg_edge(rss_t *rss, dvg_t *dvg, ir_node *src, ir_node *tgt, int have_source) {
1158         rss_edge_t *dvg_edge;
1159         rss_edge_t key;
1160
1161         if (! have_source)
1162                 nodeset_insert(dvg->nodes, src);
1163         else
1164                 assert(nodeset_find(dvg->nodes, src) != NULL && "Wrong assumption");
1165
1166         nodeset_insert(dvg->nodes, tgt);
1167
1168         key.src = tgt;
1169         key.tgt = src;
1170         assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1171
1172         key.src = src;
1173         key.tgt = tgt;
1174         if (NULL != pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1175                 /* add the edge to the DVG */
1176                 dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1177
1178                 dvg_edge->src  = src;
1179                 dvg_edge->tgt  = tgt;
1180                 dvg_edge->next = NULL;
1181
1182                 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", src, tgt));
1183                 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1184         }
1185 }
1186
1187 /**
1188  * Computes the disjoint value DAG (DVG).
1189  * BEWARE: It is not made explicitly clear in the Touati paper,
1190  *         but the DVG is meant to be build from the KILLING DAG
1191  */
1192 static void compute_dvg(rss_t *rss, dvg_t *dvg) {
1193         plist_element_t *el;
1194
1195         DBG((rss->dbg, LEVEL_1, "\tcomputing DVG:\n"));
1196
1197         foreach_plist(rss->nodes, el) {
1198                 ir_node    *u_irn    = plist_element_get_value(el);
1199                 rss_irn_t  *u        = get_rss_irn(rss, u_irn);
1200                 rss_irn_t  *u_killer = get_rss_irn(rss, u->killer);
1201                 int        i;
1202
1203                 /* TODO: omit nodes only having sink as pkiller and killing no one */
1204
1205                 /* add an edge to killer */
1206                 add_dvg_edge(rss, dvg, u_irn, u->killer, 0);
1207
1208                 if (is_Sink(u->killer))
1209                         continue;
1210
1211                 /* We add an edge to every killer, from where we could be reached. */
1212                 for (i = ARR_LEN_SAFE(u_killer->descendants) - 1; i >= 0; --i) {
1213                         add_dvg_edge(rss, dvg, u_irn, u_killer->descendants[i], 1);
1214                 }
1215
1216 #if 0
1217
1218                 foreach_plist(rss->nodes, el2) {
1219                         ir_node *v_irn = plist_element_get_value(el2);
1220
1221                         /*
1222                                 There is an edge (u, v) in the DVG iff v is a descendant of the killer(u).
1223                         */
1224                         if (BSEARCH_IRN_ARR(v_irn, u_kill->descendants)) {
1225                                 rss_edge_t *dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1226                                 rss_edge_t key;
1227
1228                                 /* insert the user into the DVG and append it to the user list of u */
1229                                 nodeset_insert(dvg->nodes, v_irn);
1230                                 if (! plist_has_value(u->dvg_user_list, v_irn))
1231                                         plist_insert_back(u->dvg_user_list, v_irn);
1232
1233                                 dvg_edge->src  = u_irn;
1234                                 dvg_edge->tgt  = v_irn;
1235                                 dvg_edge->next = NULL;
1236
1237                                 key.src = v_irn;
1238                                 key.tgt = u_irn;
1239
1240                                 assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1241
1242                                 /* add the edge to the DVG */
1243                                 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", u_irn, v_irn));
1244                                 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1245                         }
1246                 }
1247 #endif /* if 0 */
1248         }
1249
1250         if (rss->opts->dump_flags & RSS_DUMP_DVG)
1251                 debug_vcg_dump_dvg(rss, dvg);
1252 }
1253
1254 /**
1255  * Updates the dvg structure when a serialization edge from src -> tgt is added.
1256  */
1257 static void update_dvg(rss_t *rss, dvg_t *dvg, rss_irn_t *src, rss_irn_t *tgt) {
1258         int i, j, idx;
1259         rss_edge_t *edge;
1260         rss_edge_t **arr = alloca(pset_count(dvg->edges) * sizeof(arr[0]));
1261
1262         /*
1263                 Add an edge from serialization target to serialization src:
1264                 src cannot be alive with target
1265         */
1266         add_dvg_edge(rss, dvg, tgt->irn, src->irn, 0);
1267
1268         /* Add edges to src's descendants as well, they also getting serialized. */
1269         for (i = ARR_LEN_SAFE(src->descendants) - 1; i >= 0; --i) {
1270                 add_dvg_edge(rss, dvg, tgt->irn, src->descendants[i], 1);
1271         }
1272
1273         /* We also have to add edges from targets predecessors in dvg */
1274         idx = 0;
1275         /* We cannot insert elements into set over which we iterate ... */
1276         foreach_pset(dvg->edges, edge) {
1277                 if (edge->tgt == tgt->irn) {
1278                         arr[idx++] = edge;
1279                 }
1280         }
1281
1282         for (i = 0; i < idx; ++i) {
1283                 edge = arr[i];
1284                 add_dvg_edge(rss, dvg, edge->src, src->irn, 1);
1285                 for (j = ARR_LEN_SAFE(src->descendants) - 1; j >= 0; --j) {
1286                         add_dvg_edge(rss, dvg, edge->src, src->descendants[j], 1);
1287                 }
1288         }
1289 }
1290
1291 #if 0
1292 /**
1293  * Accumulate all descendants for root into list.
1294  */
1295 static void accumulate_dvg_descendant_values(rss_t *rss, rss_irn_t *root, plist_t *list) {
1296         if (plist_count(root->dvg_user_list) > 0) {
1297                 plist_element_t *el;
1298
1299                 foreach_plist(root->dvg_user_list, el) {
1300                         ir_node   *v_irn = plist_element_get_value(el);
1301                         rss_irn_t *v     = get_rss_irn(rss, v_irn);
1302
1303                         /* add v as descendant */
1304                         if (! plist_has_value(list, v_irn)) {
1305                                 plist_insert_back(list, v_irn);
1306                                 DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG descendant %+F\n", v_irn));
1307                         }
1308
1309                         /* accumulate v's descendants */
1310                         accumulate_dvg_descendant_values(rss, v, list);
1311                 }
1312         }
1313 }
1314
1315 /**
1316  * Builds the list of potential killers for each node
1317  * in the given DVG.
1318  * Needs the descendant list for all user as sorted array.
1319  */
1320 static void build_dvg_pkiller_list(rss_t *rss, dvg_t *dvg) {
1321         ir_node *irn;
1322
1323         foreach_nodeset(dvg->nodes, irn) {
1324                 rss_irn_t       *node = get_rss_irn(rss, irn);
1325                 plist_element_t *el, *el2;
1326
1327                 DBG((rss->dbg, DEBUG_DVG, "\t\tbuilding pkiller list for %+F\n", irn));
1328
1329                 /* check each user */
1330                 foreach_plist(node->dvg_user_list, el) {
1331                         ir_node *u_irn = plist_element_get_value(el);
1332
1333                         /* is the current user u_irn not a descendant of any other user -> pkiller */
1334                         foreach_plist(node->dvg_user_list, el2) {
1335                                 ir_node   *v_irn = plist_element_get_value(el2);
1336                                 rss_irn_t *v     = get_rss_irn(rss, v_irn);
1337
1338                                 if (el != el2                             &&
1339                                         ! BSEARCH_IRN_ARR(u_irn, v->dvg_desc) &&
1340                                         ! plist_has_value(node->dvg_pkiller_list, u_irn))
1341                                 {
1342                                         plist_insert_back(node->dvg_pkiller_list, u_irn);
1343                                         DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG pkiller %+F\n", u_irn));
1344                                 }
1345                         }
1346                 }
1347
1348                 node->dvg_pkiller = build_sorted_array_from_list(node->dvg_pkiller_list, phase_obst(&rss->ph));
1349         }
1350
1351         DEBUG_ONLY(
1352                 if (firm_dbg_get_mask(rss->dbg) & DEBUG_DVG)
1353                         debug_vcg_dump_dvg_pkiller(rss, dvg);
1354         );
1355 }
1356
1357 #endif /* if 0 */
1358
1359 /**
1360  * Compute the maximal antichain of the current DVG.
1361  * This is a reimplementation of the MAXIMAL_ANTI_CHAIN function
1362  * from the DDG library 1.1 (DAG.cpp).
1363  */
1364 static nodeset *compute_maximal_antichain(rss_t *rss, dvg_t *dvg, int iteration) {
1365         int         n               = nodeset_count(dvg->nodes);
1366         int         *assignment     = alloca(n * sizeof(assignment[0]));
1367         int         *assignment_rev = alloca(n * sizeof(assignment_rev[0]));
1368         int         *idx_map        = alloca(n * sizeof(idx_map[0]));
1369         hungarian_problem_t *bp;
1370         nodeset     *values, *temp;
1371         ir_node     *u_irn;
1372         int         i, j, cost, cur_chain, res;
1373         rss_edge_t  *dvg_edge;
1374
1375 #define MAP_IDX(irn) bsearch_for_index(get_irn_idx(irn), idx_map,  n,  1)
1376
1377         if (pset_count(dvg->edges) == 0)
1378                 return NULL;
1379
1380         bp = hungarian_new(n, n, 1, HUNGARIAN_MATCH_NORMAL);
1381
1382         /*
1383                 At first, we build an index map for the nodes in the DVG,
1384                 because we cannot use the irn idx therefore as the resulting
1385                 bipartite data structure would have around 1.2GB.
1386                 So we limit the size to the number of nodes we have in the DVG
1387                 and build a sorted index map for their irn indices.
1388         */
1389         i = 0;
1390         foreach_nodeset(dvg->nodes, u_irn) {
1391                 idx_map[i++] = get_irn_idx(u_irn);
1392         }
1393         qsort(idx_map, n, sizeof(idx_map[0]), cmp_int);
1394
1395         foreach_pset(dvg->edges, dvg_edge) {
1396                 int idx_u = MAP_IDX(dvg_edge->src);
1397                 int idx_v = MAP_IDX(dvg_edge->tgt);
1398
1399                 /* add the entry to the bipartite data structure */
1400                 hungarian_add(bp, idx_u, idx_v, 1);
1401                 DBG((rss->dbg, LEVEL_3, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1402                         idx_u, dvg_edge->src, idx_v, dvg_edge->tgt));
1403         }
1404 #if 0
1405         /*
1406                 Add a bipartite entry for each pair of nodes (u, v), where exists a
1407                 path in the DVG from u to v, ie. connect all descendants(v) to v.
1408                 desc_dvg(v) = accumulated descendants of all z in dvg_user_list(v)
1409         */
1410         foreach_nodeset(dvg->nodes, u_irn) {
1411                 rss_irn_t *u        = get_rss_irn(rss, u_irn);
1412                 int       idx_u_irn = MAP_IDX(u_irn);
1413
1414                 DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG descendants of %+F:\n", u_irn));
1415
1416                 //plist_clear(u->dvg_desc_list);
1417                 //accumulate_dvg_descendant_values(rss, u, u->dvg_desc_list);
1418
1419                 /*
1420                         FIXME: The array is build on the phase obstack and we cannot free the data.
1421                         So the array get as many times allocated as this function is called.
1422                 */
1423
1424                 /* build the sorted array for faster searches */
1425                 //u->dvg_desc = build_sorted_array_from_list(u->dvg_desc_list, phase_obst(&rss->ph));
1426
1427                 DBG((rss->dbg, DEBUG_MAX_AC, "\t\tadding bipartite entries of %+F:\n", u_irn));
1428
1429                 /* add the bipartite entries for all descendants */
1430                 for (i = ARR_LEN_SAFE(u->dvg_desc) - 1; i >= 0; --i) {
1431                         rss_irn_t *desc    = get_rss_irn(rss, u->dvg_desc[i]);
1432                         int       idx_desc = MAP_IDX(u->dvg_desc[i]);
1433
1434                         /* add the entry to the bipartite data structure */
1435                         hungarian_add(bp, idx_u_irn, idx_desc, 1);
1436                         DBG((rss->dbg, DEBUG_MAX_AC, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1437                                 idx_u_irn, u_irn, idx_desc, u->dvg_desc[i]));
1438
1439                         need_matching = 1;
1440                 }
1441         }
1442 #endif
1443
1444         /* We want maximum cardinality matching */
1445         hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL);
1446
1447 #if 0
1448         DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG pkiller:\n"));
1449         /* beware: the following function needs the dvg_desc array */
1450         build_dvg_pkiller_list(rss, dvg);
1451 #endif
1452
1453         DBG((rss->dbg, LEVEL_2, "\t\tcomputing bipartite matching\n"));
1454         /*
1455                 The maximum cardinality bipartite matching gives us the minimal
1456                 chain partition, which corresponds to the maximum anti chains.
1457         */
1458         res = hungarian_solve(bp, assignment, &cost, 1);
1459         assert(res == 0 && "Bipartite matching failed!");
1460
1461         hungarian_free(bp);
1462         memset(assignment_rev, -1, n * sizeof(assignment_rev[0]));
1463
1464         /* build the reverse assignment, ie. foreach i -> j, add j -> i */
1465         for (i = 0; i < n; ++i) {
1466                 if (assignment[i] >= 0) {
1467                         int j = assignment[i];
1468                         assignment_rev[j] = i;
1469                 }
1470         }
1471
1472         DBG((rss->dbg, LEVEL_2, "\t\tgot assignment with cost %d\n", cost));
1473         DBG((rss->dbg, LEVEL_3, "\t\t\tassignment   ---   reverse assignment\n", cost));
1474         for (i = 0; i < n; ++i) {
1475                 DBG((rss->dbg, LEVEL_3, "\t\t\t%3d -> %3d         %3d -> %3d\n", i, assignment[i], i, assignment_rev[i]));
1476         }
1477
1478         values    = new_nodeset(10);
1479         cur_chain = 0;
1480         /* Construction of the minimal chain partition */
1481         for (j = 0; j < n; ++j) {
1482                 /* check nodes, which did not occur as target */
1483                 if (assignment_rev[j] == -1) {
1484                         int       xj      = idx_map[j];
1485                         ir_node   *xj_irn = get_idx_irn(rss->irg, xj);
1486                         rss_irn_t *xj_rss = get_rss_irn(rss, xj_irn);
1487                         chain_t   *c      = obstack_alloc(phase_obst(&rss->ph), sizeof(*c));
1488                         int       source;
1489
1490                         /* there was no source for j -> we have a source of a new chain */
1491                         nodeset_insert(values, xj_irn);
1492
1493                         c->elements = plist_obstack_new(phase_obst(&rss->ph));
1494                         c->nr       = cur_chain++;
1495                         plist_insert_back(c->elements, xj_irn);
1496
1497                         xj_rss->chain = c;
1498
1499                         DBG((rss->dbg, LEVEL_2, "\t\tstarting chain %d:\n", c->nr));
1500                         DBG((rss->dbg, LEVEL_2, "\t\t\t%+F (%d)", xj_irn, j));
1501
1502                         /* follow chain, having j as source */
1503                         source = j;
1504                         while (assignment[source] >= 0) {
1505                                 int       target  = assignment[source];
1506                                 int       irn_idx = idx_map[target];
1507                                 ir_node   *irn    = get_idx_irn(rss->irg, irn_idx);
1508                                 rss_irn_t *node   = get_rss_irn(rss, irn);
1509
1510                                 plist_insert_back(c->elements, irn);
1511                                 node->chain = c;
1512
1513                                 DB((rss->dbg, LEVEL_2, " -> %+F (%d)", irn, target));
1514
1515                                 /* new source = last target */
1516                                 source = target;
1517                         }
1518
1519                         DB((rss->dbg, LEVEL_2, "\n"));
1520                 }
1521         }
1522
1523         /*
1524                 Computing the maximal antichain: Select an element from each
1525                 chain such, such it is parallel with the others.
1526         */
1527         DBG((rss->dbg, LEVEL_1, "\t\tcomputing set of saturation values (MAX AC)\n"));
1528         DBG((rss->dbg, LEVEL_3, "\t\tstarting with:\n"));
1529
1530         DEBUG_ONLY(
1531                 if (firm_dbg_get_mask(rss->dbg) & LEVEL_3)
1532                         dump_nodeset(values, "\t\t\t");
1533         )
1534
1535         temp = NULL;
1536         do {
1537                 /*
1538                         We need an explicit array for the values as
1539                         we cannot iterate multiple times over the same
1540                         set at the same time. :-(((((
1541                 */
1542                 int     n         = nodeset_count(values);
1543                 int     i         = 0;
1544                 ir_node **val_arr = NEW_ARR_F(ir_node *, n);
1545
1546                 foreach_nodeset(values, u_irn)
1547                         val_arr[i++] = u_irn;
1548
1549                 if (temp)
1550                         del_nodeset(temp);
1551
1552                 temp = new_nodeset(10);
1553
1554                 /* Select all nodes from current value set, having another node in the set as descendant. */
1555                 for (i = 0; i < n; ++i) {
1556                         rss_irn_t *u = get_rss_irn(rss, val_arr[i]);
1557
1558                         for (j = 0; j < n; ++j) {
1559                                 if (i != j) {
1560                                         rss_edge_t key;
1561
1562                                         key.src = val_arr[i];
1563                                         key.tgt = val_arr[j];
1564
1565                                         if (pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1566                                                 /* v[j] is descendant of u -> remove u and break */
1567                                                 nodeset_insert(temp, u->irn);
1568                                                 nodeset_remove(values, u->irn);
1569
1570                                                 DBG((rss->dbg, LEVEL_3, "\t\t\tremoving %+F from values, adding it to temp\n", u->irn));
1571
1572                                                 break;
1573                                         }
1574                                 }
1575                         }
1576                 }
1577
1578                 /* Try to insert the chain predecessor of all selected u's */
1579                 foreach_nodeset(temp, u_irn) {
1580                         rss_irn_t       *u  = get_rss_irn(rss, u_irn);
1581                         chain_t         *c  = u->chain;
1582                         plist_element_t *el = plist_find_value(c->elements, u_irn);
1583
1584                         assert(el && "Missing element in chain!");
1585
1586                         /* If u has predecessor in chain: insert the predecessor */
1587                         if (el == plist_element_get_prev(el)) {
1588                                 nodeset_insert(values, plist_element_get_value(el));
1589                                 DBG((rss->dbg, LEVEL_3, "\t\t\tadding %+F to values\n", plist_element_get_value(el)));
1590                         }
1591                 }
1592
1593
1594                 DEL_ARR_F(val_arr);
1595         } while (nodeset_count(temp) > 0);
1596
1597         DBG((rss->dbg, LEVEL_2, "\t\tfinal set:\n"));
1598         DEBUG_ONLY(
1599                 if (firm_dbg_get_mask(rss->dbg) & LEVEL_2) {
1600                         dump_nodeset(values, "\t\t\t");
1601                 }
1602         );
1603
1604         if (rss->opts->dump_flags & RSS_DUMP_MAXAC)
1605                 debug_vcg_dump_pkg(rss, values, iteration);
1606
1607         del_nodeset(temp);
1608
1609         return values;
1610
1611 #undef MAP_IDX
1612 }
1613
1614 /**
1615  * Computes the best serialization between two nodes of sat_vals.
1616  */
1617 static serialization_t *compute_best_admissible_serialization(rss_t *rss, nodeset *sat_vals, serialization_t *ser, int num_regs) {
1618         int        n                    = nodeset_count(sat_vals);
1619         int        n_idx                = ARR_LEN_SAFE(rss->idx_map);
1620         int        i                    = 0;
1621         ir_node    **val_arr            = alloca(n * sizeof(val_arr[0]));
1622         bitset_t   *bs_sv               = bitset_alloca(n_idx);
1623         bitset_t   *bs_vdesc            = bitset_alloca(n_idx);
1624         bitset_t   *bs_tmp              = bitset_alloca(n_idx);
1625         bitset_t   *bs_ukilldesc        = bitset_alloca(n_idx);
1626         int        best_benefit         = INT_MAX;
1627         int        best_omega2          = INT_MAX;
1628         int        best_benefit_omega20 = INT_MAX;
1629         int        has_omega1           = 0;
1630         int        j, k;
1631         ir_node    *irn;
1632         rss_edge_t min_benefit_edge;
1633         rss_edge_t min_omega20_edge;
1634         rss_irn_t  *ser_u_omega1, *ser_v_omega1;
1635         rss_irn_t  *ser_u_omega20, *ser_v_omega20;
1636
1637         DBG((rss->dbg, LEVEL_1, "\tcomputing admissible serializations:\n"));
1638
1639         /*
1640                 We need an explicit array for the values as
1641                 we cannot iterate multiple times over the same
1642                 set at the same time. :-(((((
1643         */
1644
1645         foreach_nodeset(sat_vals, irn) {
1646                 val_arr[i++] = irn;
1647                 bitset_set(bs_sv, BLOCK_IDX_MAP(rss, irn));
1648         }
1649
1650         /*
1651                 We build all admissible serializations and remember the best found so far.
1652                 For u in sat_vals:
1653                  For v in sat_val:
1654                    if v in pkiller(u): add edge from v to all other pkiller(u)
1655                    else: for all vv in pkiller(u): add edge from v to vv if there exists no path from vv to v
1656         */
1657
1658 /*
1659         A node is unserializable if:
1660         - it has only one killer and this one is Sink
1661     - it kills no other values
1662         In this case there is no serialization which could
1663         reduce the registerpressure
1664 */
1665 #define IS_UNSERIALIZABLE_NODE(rss_node)                  \
1666         (                                                     \
1667                 (                                                 \
1668                         (plist_count(rss_node->pkiller_list) == 1) && \
1669                         is_Sink(rss_node->killer)                  && \
1670                         (rss_node->kill_count                == 0)    \
1671                 )                            ||                   \
1672                 be_is_Barrier(rss_node->irn) ||                   \
1673                 be_is_Keep(rss_node->irn)                         \
1674         )
1675
1676         /* for all u in sat_vals */
1677         for (i = 0; i < n; ++i) {
1678                 rss_irn_t       *u = get_rss_irn(rss, val_arr[i]);
1679                 plist_element_t *el;
1680
1681                 /* ignore nodes where serialization does not help */
1682                 if (IS_UNSERIALIZABLE_NODE(u)) {
1683                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", u->irn));
1684                         continue;
1685                 }
1686
1687                 /* accumulate all descendants of all pkiller(u) */
1688                 bitset_clear_all(bs_ukilldesc);
1689                 foreach_plist(u->pkiller_list, el) {
1690                         ir_node   *irn  = plist_element_get_value(el);
1691                         rss_irn_t *node = get_rss_irn(rss, irn);
1692
1693                         if (! is_Sink(irn))
1694                                 bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, irn));
1695                         else
1696                                 continue;
1697
1698                         for (k = ARR_LEN_SAFE(node->descendants) - 1; k >= 0; --k) {
1699                                 if (! is_Sink(node->descendants[k]))
1700                                         bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, node->descendants[k]));
1701                         }
1702                 }
1703
1704                 /* for all v in sat_vals */
1705                 for (j = 0; j < n; ++j) {
1706                         ir_node   *v_irn   = val_arr[j];
1707                         rss_irn_t *v       = get_rss_irn(rss, v_irn);
1708                         unsigned  v_height = get_irn_height(rss->h, v_irn);
1709                         int       omega1, omega2, is_pkiller;
1710
1711                         /* v cannot be serialized with itself
1712                          * ignore nodes where serialization does not help */
1713                         if (i == j || IS_UNSERIALIZABLE_NODE(v)) {
1714                                 if (i != j)
1715                                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", v->irn));
1716                                 continue;
1717                         }
1718
1719                         /* get descendants of v */
1720                         bitset_clear_all(bs_vdesc);
1721                         bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v_irn));
1722                         for (k = ARR_LEN_SAFE(v->descendants) - 1; k >= 0; --k) {
1723                                 if (! is_Sink(v->descendants[k]))
1724                                         bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v->descendants[k]));
1725                         }
1726
1727                         /* if v is in pkiller(u) */
1728                         is_pkiller = plist_has_value(u->pkiller_list, v_irn);
1729
1730                         /* for all vv in pkiller(u) */
1731                         foreach_plist(u->pkiller_list, el) {
1732                                 ir_node *vv_irn  = plist_element_get_value(el);
1733                                 int     add_edge;
1734
1735                                 if (is_Sink(vv_irn) || is_cfop(vv_irn))
1736                                         continue;
1737
1738                                 if (is_pkiller)
1739                                         add_edge = vv_irn != v_irn && skip_Proj(vv_irn) != skip_Proj(v_irn);
1740                                 else
1741                                         add_edge = ! heights_reachable_in_block(rss->h, skip_Proj(vv_irn), skip_Proj(v_irn));
1742
1743                                 /*
1744                                         As we add an edge from vv -> v, we have to make sure,
1745                                         that there exists no path from v to vv.
1746                                 */
1747
1748                                 if (add_edge) {
1749                                         int vv_height = get_irn_height(rss->h, vv_irn);
1750                                         int mu1, mu2, critical_path_cost;
1751
1752                                         /*
1753                                                 mu1 = | descendants(v) cut sat_vals |
1754                                                 the number of saturating values which cannot
1755                                                 be simultaneously alive with u
1756                                         */
1757                                         bitset_copy(bs_tmp, bs_vdesc);
1758                                         mu1 = bitset_popcnt(bitset_and(bs_tmp, bs_sv));
1759
1760                                         /*
1761                                                 mu2 = | accum_desc_all_pkiller(u) without descendants(v) |
1762                                         */
1763                                         if (is_pkiller) {
1764                                                 bitset_copy(bs_tmp, bs_ukilldesc);
1765                                                 mu2 = bitset_popcnt(bitset_andnot(bs_tmp, bs_vdesc));
1766                                         }
1767                                         else {
1768                                                 mu2 = 0;
1769                                         }
1770
1771                                         /* omega1 = mu1 - mu2 */
1772                                         omega1 = mu1 - mu2;
1773
1774                                         if (omega1 != 0)
1775                                                 has_omega1 = 1;
1776
1777                                         /* omega2 = increase of critical path */
1778                                         critical_path_cost =
1779                                                 v_height                        /* longest path from v to sink */
1780                                                 + rss->max_height - vv_height   /* longest path from source to vv */
1781                                                 + 1;                            /* edge */
1782
1783                                         /*
1784                                                 If critical_path_cost > max_height -> the new edge
1785                                                 would increase the longest critical path by the difference.
1786                                         */
1787                                         omega2 = critical_path_cost > rss->max_height ? critical_path_cost - rss->max_height : 0;
1788
1789                                         /* this keeps track of the edge with the best benefit */
1790                                         if (omega1 >= num_regs - n && omega1 < best_benefit) {
1791                                                 min_benefit_edge.src = v_irn;
1792                                                 min_benefit_edge.tgt = vv_irn;
1793
1794                                                 ser_u_omega1 = u;
1795                                                 ser_v_omega1 = v;
1796
1797                                                 best_benefit    = omega1;
1798                                                 ser->new_killer = is_pkiller;
1799                                         }
1800
1801                                         /* this keeps track of the edge with the best omega1 costs where omega2 == 0 */
1802                                         if (omega2 == 0 && omega1 >= num_regs - n && omega1 < best_benefit_omega20) {
1803                                                 min_omega20_edge.src = v_irn;
1804                                                 min_omega20_edge.tgt = vv_irn;
1805
1806                                                 ser_u_omega20 = u;
1807                                                 ser_v_omega20 = v;
1808
1809                                                 best_benefit_omega20 = omega1;
1810                                                 ser->new_killer      = is_pkiller;
1811                                         }
1812
1813                                         best_omega2 = MIN(best_omega2, omega2);
1814
1815                                         DBG((rss->dbg, LEVEL_2, "\t\tfound %+F -> %+F (w1 %d, w2 %d)\n",
1816                                                 v_irn, vv_irn, omega1, omega2));
1817                                 } /* if add_edge */
1818                         } /* for all vv in pkiller(u) */
1819                 } /* for all v in sat_vals */
1820         } /* for all u in sat_vals */
1821
1822         if (! has_omega1)
1823                 return NULL;
1824
1825         if (best_omega2 == 0) {
1826                 ser->u         = ser_u_omega20;
1827                 ser->v         = ser_v_omega20;
1828                 ser->edge->src = min_omega20_edge.src;
1829                 ser->edge->tgt = min_omega20_edge.tgt;
1830                 ser->omega1    = best_benefit_omega20;
1831                 ser->omega2    = best_omega2;
1832         }
1833         else {
1834                 ser->u         = ser_u_omega1;
1835                 ser->v         = ser_v_omega1;
1836                 ser->edge->src = min_benefit_edge.src;
1837                 ser->edge->tgt = min_benefit_edge.tgt;
1838                 ser->omega1    = best_benefit;
1839                 ser->omega2    = best_omega2;
1840         }
1841
1842         return ser;
1843
1844 #undef IS_UNSERIALIZABLE_NODE
1845 }
1846
1847 /**
1848  * Perform the value serialization heuristic and add all
1849  * computed serialization edges as dependencies to the irg.
1850  */
1851 static void perform_value_serialization_heuristic(rss_t *rss) {
1852         bitset_t *arch_nonign_bs = bitset_alloca(arch_register_class_n_regs(rss->cls));
1853         bitset_t *abi_ign_bs     = bitset_alloca(arch_register_class_n_regs(rss->cls));
1854         int      available_regs, iteration, num_live;
1855         dvg_t    dvg;
1856         nodeset  *sat_vals;
1857         pset *ser_set = new_pset(cmp_rss_edges, 20);
1858
1859         /* available_regs = R = |arch_non_ignore_regs cut ~abi_ignore_regs| */
1860         arch_put_non_ignore_regs(rss->arch_env, rss->cls, arch_nonign_bs);
1861         be_abi_put_ignore_regs(rss->abi, rss->cls, abi_ign_bs);
1862         bitset_andnot(arch_nonign_bs, abi_ign_bs);
1863         available_regs  = bitset_popcnt(arch_nonign_bs);
1864         //um_live = pset_count(rss->live_block);
1865         //available_regs -= num_live < available_regs ? num_live : 0;
1866
1867         DBG((rss->dbg, LEVEL_1, "\n\t#available regs: %d\n\n", available_regs));
1868
1869         /*
1870                 At first we need to compute the disjoint value DAG (DVG = {V, E_dv}).
1871                 V    = set of all nodes we are currently interested in
1872                 E_dv = there is an edge from u to v iff v is a descendant of killer(u), forall u, v in V
1873         */
1874         dvg.nodes = new_nodeset(plist_count(rss->nodes));
1875         dvg.edges = new_pset(cmp_rss_edges, plist_count(rss->nodes) * 5);
1876         compute_dvg(rss, &dvg);
1877
1878         /*
1879                 Then we perform the heuristic serialization algorithm
1880                 on the DVG which gives us all necessary serialization
1881                 edges.
1882         */
1883         DBG((rss->dbg, LEVEL_1, "\tcomputing maximal antichain:\n"));
1884         iteration = 0;
1885         sat_vals  = compute_maximal_antichain(rss, &dvg, iteration++);
1886         while (sat_vals && (nodeset_count(sat_vals) > available_regs)) {
1887                 serialization_t *ser, best_ser;
1888                 rss_edge_t      *edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*edge));
1889                 ir_node         *dep_src, *dep_tgt;
1890
1891                 best_ser.edge = edge;
1892                 ser = compute_best_admissible_serialization(rss, sat_vals, &best_ser, available_regs);
1893
1894                 DBG((rss->dbg, LEVEL_1, "\tcurrent register saturation %d, target %d\n", nodeset_count(sat_vals), available_regs));
1895
1896                 if (! ser) {
1897                         DBG((rss->dbg, LEVEL_1, "\tno RS improving serialization found, breaking at iteration %d\n", iteration));
1898                         break;
1899                 }
1900
1901                 /* Insert the serialization as dependency edge into the irg. */
1902                 DBG((rss->dbg, LEVEL_1, "\tserializing %+F -> %+F with edge %+F -> %+F and cost %d, %d\n",
1903                         ser->u->irn, ser->v->irn, ser->edge->src, ser->edge->tgt, ser->omega1, ser->omega2));
1904
1905                 if (pset_find(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge)))
1906                         ir_printf("WARNING: serialization %+F -> %+F computed twice!\n", ser->edge->src, ser->edge->tgt);
1907
1908
1909                 pset_insert(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge));
1910
1911                 /* update the dvg */
1912                 update_dvg(rss, &dvg, ser->v, ser->u);
1913                 update_dvg(rss, &dvg, get_rss_irn(rss, ser->edge->src), get_rss_irn(rss, ser->edge->tgt));
1914                 del_nodeset(sat_vals);
1915
1916                 dep_src = skip_Proj(ser->edge->src);
1917                 dep_tgt = ser->edge->tgt;
1918                 add_irn_dep(dep_src, dep_tgt);
1919
1920                 /* Update descendants, consumer and pkillers of the target */
1921                 update_node_info(rss, ser->edge->tgt, ser->edge->src);
1922
1923                 /* TODO: try to find a cheaper way for updating height information */
1924                 rss->max_height = heights_recompute_block(rss->h, rss->block);
1925
1926                 /* Recompute the antichain for next serialization */
1927                 DBG((rss->dbg, LEVEL_1, "\tre-computing maximal antichain:\n"));
1928                 sat_vals = compute_maximal_antichain(rss, &dvg, iteration++);
1929         }
1930
1931         del_nodeset(dvg.nodes);
1932         del_pset(dvg.edges);
1933 }
1934
1935 /**
1936  * Do initial calculations for a block.
1937  */
1938 static void process_block(ir_node *block, void *env) {
1939         rss_t *rss = env;
1940         int   i, n;
1941         const ir_edge_t *edge;
1942
1943         phase_init(&rss->ph, "rss block preprocessor", rss->irg, PHASE_DEFAULT_GROWTH, init_rss_irn);
1944
1945         DBG((rss->dbg, LEVEL_1, "preprocessing block %+F\n", block));
1946         rss->block = block;
1947
1948         /* build an index map for all nodes in the current block */
1949         i            = 0;
1950         n            = get_irn_n_edges(block);
1951         NEW_ARR_A(int *, rss->idx_map, n);
1952         foreach_out_edge(block, edge) {
1953                 ir_node *irn      = get_edge_src_irn(edge);
1954                 rss->idx_map[i++] = get_irn_idx(irn);
1955         }
1956         qsort(rss->idx_map, n, sizeof(rss->idx_map[0]), cmp_int);
1957         rss->max_height = heights_recompute_block(rss->h, block);
1958
1959         /* loop over all register classes */
1960         for (i = arch_isa_get_n_reg_class(rss->arch_env->isa) - 1; i >= 0; --i) {
1961                 const arch_register_class_t *cls = arch_isa_get_reg_class(rss->arch_env->isa, i);
1962
1963                 rss->cls = cls;
1964                 DBG((rss->dbg, LEVEL_1, "register class %s\n", arch_register_class_name(cls)));
1965
1966                 /* Get all live value at end of Block having current register class */
1967                 rss->live_block = pset_new_ptr(10);
1968                 be_liveness_end_of_block(rss->liveness, rss->arch_env, rss->cls, rss->block, rss->live_block);
1969
1970                 /* reset the list of interesting nodes */
1971                 plist_clear(rss->nodes);
1972                 plist_insert_back(rss->nodes, _sink);
1973
1974                 /* collect all nodes having a certain register class */
1975                 foreach_out_edge(block, edge) {
1976                         ir_node *irn  = get_edge_src_irn(edge);
1977                         ir_mode *mode = get_irn_mode(irn);
1978
1979                         /*
1980                                 We skip:
1981                                 - mode_T nodes (the projs are asked)
1982                                 - mode_X nodes (control flow nodes are always scheduled last)
1983                                 - Keeps (they are always immediately scheduled)
1984                                 - Phi (same as Keep)
1985                         */
1986                         if (mode == mode_T || mode == mode_X || is_Phi(irn))
1987                                 continue;
1988
1989                         /*
1990                                 In case of a proj, we skip
1991                                 - Barrier (they are a Barrier :)
1992                                 - Start
1993                                 - the Proj itself, as it's scheduled always with it's super node
1994                         */
1995                         if (is_Proj(irn)) {
1996                                 ir_node *pred = get_Proj_pred(irn);
1997                                 if (be_is_Barrier(pred) || is_Start(pred))
1998                                         continue;
1999                         }
2000
2001                         /* calculate the descendants and consumer for each node in the block */
2002                         collect_node_info(rss, skip_Proj(irn));
2003
2004                         if (be_is_Keep(irn))
2005                                 continue;
2006
2007                         if (! arch_irn_is(rss->arch_env, irn, ignore) && arch_get_irn_reg_class(rss->arch_env, irn, -1) == cls) {
2008                                 plist_insert_back(rss->nodes, skip_Proj(irn));
2009                         }
2010                         //}
2011                 }
2012
2013                 /* compute the potential killing set PK(G) */
2014                 compute_pkill_set(rss);
2015
2016                 /* compute the killing function k* */
2017                 compute_killing_function(rss);
2018
2019                 /*
2020                         Compute the heuristic value serialization and
2021                         add the necessary dependencies to the irg.
2022                 */
2023                 perform_value_serialization_heuristic(rss);
2024
2025                 del_pset(rss->live_block);
2026         }
2027
2028         phase_free(&rss->ph);
2029 }
2030
2031 #ifdef WITH_LIBCORE
2032 /**
2033  * Register the options.
2034  */
2035 void rss_register_options(lc_opt_entry_t *grp) {
2036         static int     run_once = 0;
2037         lc_opt_entry_t *rss_grp;
2038
2039         if (! run_once) {
2040                 run_once = 1;
2041                 rss_grp  = lc_opt_get_grp(grp, "rss");
2042
2043                 lc_opt_add_table(rss_grp, rss_option_table);
2044         }
2045 }
2046 #endif /* WITH_LIBCORE */
2047
2048 /**
2049  * Preprocess the irg for scheduling.
2050  */
2051 void rss_schedule_preparation(const be_irg_t *birg) {
2052         rss_t rss;
2053
2054         FIRM_DBG_REGISTER(rss.dbg, "firm.be.sched.rss");
2055
2056         //firm_dbg_set_mask(rss.dbg, LEVEL_1 | LEVEL_2 | LEVEL_3);
2057
2058         init_rss_special_nodes(birg->irg);
2059
2060         rss.irg      = birg->irg;
2061         rss.arch_env = birg->main_env->arch_env;
2062         rss.abi      = birg->abi;
2063         rss.h        = heights_new(birg->irg);
2064         rss.nodes    = plist_new();
2065         rss.opts     = &rss_options;
2066         rss.liveness = be_liveness(birg->irg);
2067         irg_block_walk_graph(birg->irg, NULL, process_block, &rss);
2068         heights_free(rss.h);
2069         plist_free(rss.nodes);
2070         be_liveness_free(rss.liveness);
2071
2072         if (birg->main_env->options->dump_flags & DUMP_SCHED)
2073                 be_dump(rss.irg, "-rss", dump_ir_block_graph);
2074 }