800841e544dcfbaa242d55db47da392e34d5f356
[libfirm] / ir / be / beschedrss.c
1 /**
2  * Implementation of a register saturating list scheduler
3  * as described in: Sid-Ahmed-Ali Touati
4  * Register Saturation in Superscalar and VLIW Codes
5  *
6  * @license This file protected by GPL -  GNU GENERAL PUBLIC LICENSE.
7  * @author  Christian Wuerdig
8  * @date    29.08.2006
9  * @cvs-id  $Id$
10  */
11 #ifdef HAVE_CONFIG_H
12 #include "config.h"
13 #endif
14
15 #include <limits.h>
16
17 #include "obst.h"
18 #include "debug.h"
19
20 #include "irgraph_t.h"
21 #include "irnode_t.h"
22 #include "iredges_t.h"
23 #include "ircons_t.h"
24 #include "irphase_t.h"
25 #include "irgwalk.h"
26 #include "irdump.h"
27 #include "irtools.h"
28 #include "irbitset.h"
29 #include "irprintf.h"
30 #include "bipartite.h"
31 #include "hungarian.h"
32 #include "plist.h"
33
34 #include "height.h"
35
36 #include "beabi.h"
37 #include "benode_t.h"
38 #include "besched_t.h"
39
40 #ifdef WITH_LIBCORE
41 #include <libcore/lc_opts.h>
42 #include <libcore/lc_opts_enum.h>
43 #endif /* WITH_LIBCORE */
44
45
46 #define ARR_LEN_SAFE(arr) ((arr) != NULL ? ARR_LEN((arr)) : 0)
47
48 #define HASH_RSS_EDGE(edge) ((get_irn_node_nr((edge)->src) << 16) | (get_irn_node_nr((edge)->tgt) & 0xFFFF))
49 #define BSEARCH_IRN_ARR(val, arr) \
50         bsearch(&(val), (arr), ARR_LEN_SAFE((arr)), sizeof((arr)[0]), cmp_irn_idx)
51
52 #define BLOCK_IDX_MAP(rss, irn) bsearch_for_index(get_irn_idx((irn)), (rss)->idx_map, ARR_LEN_SAFE((rss)->idx_map), 1)
53
54 /* the rss options */
55 typedef struct _rss_opts_t {
56         int dump_flags;
57 } rss_opts_t;
58
59 /* Represents a child with associated costs */
60 typedef struct _child {
61         ir_node *irn;
62         float   cost;
63 } child_t;
64
65 /* We need edges for several purposes. */
66 typedef struct _rss_edge {
67         ir_node *src;
68         ir_node *tgt;
69         void    *next;
70 } rss_edge_t;
71
72 /* Represents a connected bipartite component. */
73 typedef struct _cbc {
74         nodeset *parents;       /**< = S  a set of value producers */
75         nodeset *children;      /**< = T  a set of value consumers */
76         pset    *kill_edges;    /**< = E  a set of edges (t in T, s in S) such as each s in S gets killed by at least one t in T */
77         int     nr;             /**< a deterministic index for set insertion (used as hash) */
78 } cbc_t;
79
80 /* Represents a disjoint value DAG. */
81 typedef struct _dvg {
82         nodeset *nodes;
83         pset    *edges;
84 } dvg_t;
85
86 /* Represents a chain of nodes. */
87 typedef struct _chain {
88         plist_t *elements;   /**< List of chain elements */
89         int     nr;          /**< a deterministic index for set insertion (used as hash) */
90 } chain_t;
91
92 typedef struct _rss_irn {
93         plist_t  *consumer_list;    /**< List of consumers */
94         ir_node **consumer;         /**< Sorted consumer array (needed for faster access) */
95
96         plist_t  *parent_list;      /**< List of parents */
97         ir_node **parents;          /**< Sorted parent array (needed for faster access) */
98
99         plist_t  *descendant_list;  /**< List of descendants */
100         ir_node **descendants;      /**< Sorted descendant array (needed for faster access) */
101
102         plist_t  *pkiller_list;     /**< List of potential killers */
103         ir_node **pkillers;         /**< Sorted pkiller array (needed for faster access) */
104
105 #if 0
106         plist_t  *dvg_desc_list;    /**< List of all descendants in the DVG */
107         ir_node **dvg_desc;         /**< Sorted dvg descendant array (needed for faster access) */
108
109         plist_t  *dvg_pkiller_list; /**< List of potential killers in the DVG */
110         ir_node **dvg_pkiller;      /**< Sorted dvg pkiller array (needed for faster access) */
111
112         plist_t  *dvg_user_list;    /**< List of users in the disjoint value DAG DVG */
113 #endif
114         plist_t  *kill_value_list;  /**< List of values getting potentially killed by this node */
115
116         ir_node  *killer;           /**< The selected unique killer */
117         ir_node  *irn;              /**< The corresponding firm node to this rss_irn */
118
119         chain_t  *chain;            /**< The chain, this node is associated to */
120
121         unsigned desc_walk;         /**< visited flag for collecting descendants */
122
123         unsigned live_out : 1;      /**< irn has consumers outside of it's block */
124         unsigned visited  : 1;      /**< visited flag for bipartite decomposition */
125         unsigned havecons : 1;      /**< visited flag collect consumer */
126         unsigned handled  : 1;      /**< flag indicating whether or not the list structures have been build */
127         unsigned dumped   : 1;      /**< flag indication whether or not this node was dumped */
128 } rss_irn_t;
129
130 /* Represents a serialization edge with associated costs. */
131 typedef struct _serialization {
132         rss_irn_t  *u;       /* the top node */
133         rss_irn_t  *v;       /* the node about to be serialized after u */
134         rss_edge_t *edge;    /* the edge selected for the serialization */
135         int        omega1;   /* estimated: available regs - RS reduction */
136         int        omega2;   /* increase in critical path length */
137         int        new_killer;
138 } serialization_t;
139
140 typedef struct _rss {
141         phase_t          ph;              /**< Phase to hold some data */
142         heights_t        *h;              /**< The current height object */
143         ir_graph         *irg;            /**< The irg to preprocess */
144         plist_t          *nodes;          /**< The list of interesting nodes */
145         const arch_env_t *arch_env;       /**< The architecture environment */
146         be_abi_irg_t     *abi;            /**< The abi for this irg */
147         pset             *cbc_set;        /**< A set of connected bipartite components */
148         ir_node          *block;          /**< The current block in progress. */
149         int              *idx_map;        /**< Mapping irn indices to per block indices */
150         unsigned         max_height;      /**< maximum height in the current block */
151         rss_opts_t       *opts;           /**< The options */
152         const arch_register_class_t *cls; /**< The current register class */
153         DEBUG_ONLY(firm_dbg_module_t *dbg);
154 } rss_t;
155
156 #define get_rss_irn(rss, irn)  (phase_get_or_set_irn_data(&rss->ph, irn))
157
158 /**
159  * We need some special nodes:
160  * a source and a sink for all live-in and live-out values of a block
161  */
162
163 enum {
164         iro_rss_Source,
165         iro_rss_Sink,
166         iro_rss_last
167 };
168
169 static ir_node *_source = NULL;
170 static ir_node *_sink   = NULL;
171
172 #define is_Source(irn) ((irn) == _source)
173 #define is_Sink(irn)   ((irn) == _sink)
174
175 enum {
176         RSS_DUMP_NONE  = 0,
177         RSS_DUMP_CBC   = 1 << 0,
178         RSS_DUMP_PKG   = 1 << 1,
179         RSS_DUMP_KILL  = 1 << 2,
180         RSS_DUMP_DVG   = 1 << 3,
181         RSS_DUMP_MAXAC = 1 << 4,
182         RSS_DUMP_ALL   = (RSS_DUMP_MAXAC << 1) - 1,
183 };
184
185 static rss_opts_t rss_options = {
186         RSS_DUMP_NONE,
187 };
188
189 #ifdef WITH_LIBCORE
190 static const lc_opt_enum_int_items_t dump_items[] = {
191         { "none",  RSS_DUMP_NONE  },
192         { "cbc",   RSS_DUMP_CBC   },
193         { "pkg",   RSS_DUMP_PKG   },
194         { "kill",  RSS_DUMP_KILL  },
195         { "dvg",   RSS_DUMP_DVG   },
196         { "maxac", RSS_DUMP_MAXAC },
197         { "all",   RSS_DUMP_ALL   },
198         { NULL, 0 }
199 };
200
201 static lc_opt_enum_int_var_t dump_var = {
202         &rss_options.dump_flags, dump_items
203 };
204
205 static const lc_opt_table_entry_t rss_option_table[] = {
206         LC_OPT_ENT_ENUM_MASK("dump", "dump phases (none, cbc, pkg, kill, dvg, maxac, all)", &dump_var),
207         { NULL }
208 };
209 #endif /* WITH_LIBCORE */
210
211 /******************************************************************************
212  *  _          _                    __                  _   _
213  * | |        | |                  / _|                | | (_)
214  * | |__   ___| |_ __   ___ _ __  | |_ _   _ _ __   ___| |_ _  ___  _ __  ___
215  * | '_ \ / _ \ | '_ \ / _ \ '__| |  _| | | | '_ \ / __| __| |/ _ \| '_ \/ __|
216  * | | | |  __/ | |_) |  __/ |    | | | |_| | | | | (__| |_| | (_) | | | \__ \
217  * |_| |_|\___|_| .__/ \___|_|    |_|  \__,_|_| |_|\___|\__|_|\___/|_| |_|___/
218  *            | |
219  *            |_|
220  ******************************************************************************/
221
222 /**
223  * Acquire opcodes and create source and sink nodes.
224  */
225 static void init_rss_special_nodes(ir_graph *irg) {
226         ir_node *block         = get_irg_start_block(irg);
227         int     iro_rss_base   = get_next_ir_opcodes(iro_rss_last);
228         ir_op   *op_rss_Source = new_ir_op(iro_rss_base + iro_rss_Source, "rss_Source", op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
229         ir_op   *op_rss_Sink   = new_ir_op(iro_rss_base + iro_rss_Sink,   "rss_Sink",   op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
230         _source                = new_ir_node(NULL, irg, block, op_rss_Source, mode_ANY, 0, NULL);
231         _sink                  = new_ir_node(NULL, irg, block, op_rss_Sink, mode_ANY, 0, NULL);
232 }
233
234 static int cmp_int(const void *a, const void *b) {
235         const int *i1 = a;
236         const int *i2 = b;
237
238         return QSORT_CMP(*i1, *i2);
239 }
240
241 static int cmp_child_costs(const void *a, const void *b) {
242         const child_t *c1 = a;
243         const child_t *c2 = b;
244
245         return QSORT_CMP(c1->cost, c2->cost);
246 }
247
248 static int cmp_irn_idx(const void *a, const void *b) {
249         const ir_node *n1 = *(ir_node **)a;
250         const ir_node *n2 = *(ir_node **)b;
251
252         return QSORT_CMP(get_irn_idx(n1), get_irn_idx(n2));
253 }
254
255 static int cmp_rss_edges(const void *a, const void *b) {
256         const rss_edge_t *e1 = a;
257         const rss_edge_t *e2 = b;
258
259         return (e1->src != e2->src) || (e1->tgt != e2->tgt);
260 }
261
262 static int bsearch_for_index(int key, int *arr, size_t len, int force) {
263         int left = 0;
264         int right = len;
265
266         while (right >= left) {
267                 int idx = (left + right) / 2;
268
269                 if (key < arr[idx])
270                         right = idx - 1;
271                 else if (key > arr[idx])
272                         left = idx + 1;
273                 else
274                         return idx;
275         }
276
277         if (force)
278                 assert(0 && "Something is wrong, key not found.");
279         return -1;
280 }
281
282 static ir_node **build_sorted_array_from_list(plist_t *irn_list, struct obstack *obst) {
283         plist_element_t *el;
284         int     i     = 0;
285         int     len   = plist_count(irn_list);
286         ir_node **arr = NEW_ARR_D(ir_node *, obst, len);
287
288         /* copy the list into the array */
289         foreach_plist(irn_list, el) {
290                 arr[i++] = plist_element_get_value(el);
291         }
292
293         /* sort the array by node index */
294         qsort(arr, len, sizeof(arr[0]), cmp_irn_idx);
295
296         return arr;
297 }
298
299 /*****************************************************
300  *      _      _                       _
301  *     | |    | |                     (_)
302  *   __| | ___| |__  _   _  __ _  __ _ _ _ __   __ _
303  *  / _` |/ _ \ '_ \| | | |/ _` |/ _` | | '_ \ / _` |
304  * | (_| |  __/ |_) | |_| | (_| | (_| | | | | | (_| |
305  *  \__,_|\___|_.__/ \__,_|\__, |\__, |_|_| |_|\__, |
306  *                          __/ | __/ |         __/ |
307  *                         |___/ |___/         |___/
308  *
309  *****************************************************/
310
311 static void dump_nodeset(nodeset *ns, const char *prefix) {
312         ir_node *irn;
313         foreach_nodeset(ns, irn) {
314                 ir_fprintf(stderr, "%s%+F\n", prefix, irn);
315         }
316 }
317
318 static void build_file_name(rss_t *rss, const char *suffix, size_t suf_len, char *buf, size_t len) {
319         const char *irg_name;
320
321         memset(buf, 0, len);
322         irg_name = get_entity_name(get_irg_entity(rss->irg));
323         snprintf(buf, len - suf_len, "%s-%s-block-%ld",
324                 irg_name, arch_register_class_name(rss->cls), get_irn_node_nr(rss->block));
325         strcat(buf, suffix);
326 }
327
328 /* Dumps all collected bipartite components of current irg as vcg. */
329 static void debug_vcg_dump_bipartite(rss_t *rss) {
330         cbc_t *cbc;
331         FILE  *f;
332         char  file_name[256];
333         static const char suffix[] = "-RSS-CBC.vcg";
334
335         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
336         f = fopen(file_name, "w");
337
338         ir_fprintf(f, "graph: { title: \"connected bipartite component graph of %+F\"\n", rss->irg);
339         fprintf(f, "display_edge_labels: no\n");
340         fprintf(f, "layoutalgorithm: mindepth\n");
341         fprintf(f, "manhattan_edges: yes\n\n");
342
343         foreach_pset(rss->cbc_set, cbc) {
344                 ir_node    *n;
345                 rss_edge_t *ke;
346
347                 fprintf(f, "graph: { titel: \"cbc %d\" label: \"cbc %d\" status:clustered color:yellow\n", cbc->nr, cbc->nr);
348                 foreach_nodeset(cbc->parents, n) {
349                         ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
350                 }
351                 foreach_nodeset(cbc->children, n) {
352                         ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
353                 }
354                 foreach_pset(cbc->kill_edges, ke) {
355                         ir_fprintf(f, "edge: { sourcename: \"n%d_%d\" targetname: \"n%d_%d\" }\n",
356                                 get_irn_node_nr(ke->src), cbc->nr, get_irn_node_nr(ke->tgt), cbc->nr);
357                 }
358                 fprintf(f, "}\n\n");
359         }
360         fprintf(f, "}\n");
361         fclose(f);
362 }
363
364 /* Dump the computed killing function as vcg. */
365 static void debug_vcg_dump_kill(rss_t *rss) {
366         FILE            *f;
367         char            file_name[256];
368         plist_element_t *el;
369         static const char suffix[] = "-RSS-KILL.vcg";
370
371         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
372         f = fopen(file_name, "w");
373
374         ir_fprintf(f, "graph: { title: \"computed kill graph of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
375         fprintf(f, "display_edge_labels: no\n");
376         fprintf(f, "layoutalgorithm: mindepth\n");
377         fprintf(f, "manhattan_edges: yes\n\n");
378
379         /* first: reset dumped flag of all nodes */
380         foreach_plist(rss->nodes, el) {
381                 ir_node   *irn  = plist_element_get_value(el);
382                 rss_irn_t *rirn = get_rss_irn(rss, irn);
383                 rirn->dumped = 0;
384         }
385
386         /* dump all nodes and their killers */
387         foreach_plist(rss->nodes, el) {
388                 ir_node   *irn     = plist_element_get_value(el);
389                 rss_irn_t *rirn    = get_rss_irn(rss, irn);
390                 rss_irn_t *pk_rirn = get_rss_irn(rss, rirn->killer);
391
392                 if (! rirn->dumped) {
393                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
394                         rirn->dumped = 1;
395                 }
396
397                 if (! pk_rirn->dumped) {
398                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(rirn->killer), rirn->killer);
399                         pk_rirn->dumped = 1;
400                 }
401
402                 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
403                         get_irn_node_nr(rirn->killer), get_irn_node_nr(irn));
404         }
405
406         fprintf(f, "}\n");
407         fclose(f);
408 }
409
410 /* Dumps the potential killing DAG (PKG) as vcg. */
411 static void debug_vcg_dump_pkg(rss_t *rss, nodeset *max_ac, int iteration) {
412         FILE              *f;
413         char              file_name[256];
414         char              *suffix   = alloca(32);
415         static const char suffix1[] = "-RSS-PKG.vcg";
416         static const char suffix2[] = "-RSS-PKG-MAXAC.vcg";
417         plist_element_t   *el;
418
419         if (! max_ac) {
420                 snprintf(suffix, 32, "%s", suffix1);
421         }
422         else {
423                 snprintf(suffix, 32, "-%02d%s", iteration, suffix2);
424         }
425
426         build_file_name(rss, suffix, strlen(suffix) + 1, file_name, sizeof(file_name));
427         f = fopen(file_name, "w");
428
429         ir_fprintf(f, "graph: { title: \"potential killing DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
430         fprintf(f, "display_edge_labels: no\n");
431         fprintf(f, "layoutalgorithm: mindepth\n");
432         fprintf(f, "manhattan_edges: yes\n\n");
433
434         foreach_plist(rss->nodes, el) {
435                 ir_node   *irn  = plist_element_get_value(el);
436                 rss_irn_t *rirn = get_rss_irn(rss, irn);
437                 char      *c1   = "";
438                 plist_element_t *k_el;
439
440                 /* dump selected saturating values in yellow */
441                 if (max_ac && nodeset_find(max_ac, irn))
442                         c1 = "color:yellow";
443
444                 if (rirn->chain)
445                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F   c%d\" %s }\n", get_irn_node_nr(irn), irn, rirn->chain->nr, c1);
446                 else
447                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(irn), irn, c1);
448                 rirn->dumped = 1;
449
450                 foreach_plist(rirn->pkiller_list, k_el) {
451                         ir_node   *pkiller = plist_element_get_value(k_el);
452                         rss_irn_t *pk_rirn = get_rss_irn(rss, pkiller);
453                         char      *c2      = "";
454
455                         if (max_ac && nodeset_find(max_ac, pkiller))
456                                 c2 = "color:yellow";
457
458                         if (! pk_rirn->dumped) {
459                                 if (pk_rirn->chain)
460                                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F   c%d\" %s }\n", get_irn_node_nr(pkiller), pkiller, pk_rirn->chain->nr, c2);
461                                 else
462                                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(pkiller), pkiller, c2);
463                                 pk_rirn->dumped = 1;
464                         }
465                         ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
466                                 get_irn_node_nr(pkiller), get_irn_node_nr(irn));
467                 }
468         }
469         fprintf(f, "}\n");
470         fclose(f);
471 }
472
473 /* Dumps the disjoint value DAG (DVG) as vcg. */
474 static void debug_vcg_dump_dvg(rss_t *rss, dvg_t *dvg) {
475         static const char suffix[] = "-RSS-DVG.vcg";
476         FILE       *f;
477         char       file_name[256];
478         ir_node    *irn;
479         rss_edge_t *edge;
480
481         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
482         f = fopen(file_name, "w");
483
484         ir_fprintf(f, "graph: { title: \"disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
485         fprintf(f, "display_edge_labels: no\n");
486         fprintf(f, "layoutalgorithm: mindepth\n");
487         fprintf(f, "manhattan_edges: yes\n\n");
488
489         /* dump all nodes */
490         foreach_nodeset(dvg->nodes, irn) {
491                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
492         }
493
494         /* dump all edges */
495         foreach_pset(dvg->edges, edge) {
496                 rss_irn_t *src = get_rss_irn(rss, edge->src);
497                 rss_irn_t *tgt = get_rss_irn(rss, edge->tgt);
498
499                 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
500                         get_irn_node_nr(edge->src), get_irn_node_nr(edge->tgt));
501         }
502
503         fprintf(f, "}\n");
504         fclose(f);
505 }
506
507 #if 0
508 /* Dumps the PKG(DVG). */
509 static void debug_vcg_dump_dvg_pkiller(rss_t *rss, dvg_t *dvg) {
510         static const char suffix[] = "-RSS-DVG-PKG.vcg";
511         FILE       *f;
512         char       file_name[256];
513         ir_node    *irn;
514
515         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
516         f = fopen(file_name, "w");
517
518         ir_fprintf(f, "graph: { title: \"PKG of disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
519         fprintf(f, "display_edge_labels: no\n");
520         fprintf(f, "layoutalgorithm: mindepth\n");
521         fprintf(f, "manhattan_edges: yes\n\n");
522
523         /* dump all nodes */
524         foreach_nodeset(dvg->nodes, irn) {
525                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
526         }
527
528         /* dump all edges */
529         foreach_nodeset(dvg->nodes, irn) {
530                 rss_irn_t       *node = get_rss_irn(rss, irn);
531                 plist_element_t *el;
532
533                 foreach_plist(node->dvg_pkiller_list, el) {
534                         ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
535                                 get_irn_node_nr(plist_element_get_value(el)), get_irn_node_nr(irn));
536                 }
537         }
538
539         fprintf(f, "}\n");
540         fclose(f);
541 }
542 #endif /* if 0 */
543
544 /**
545  * In case there is no rss information for irn, initialize it.
546  */
547 static void *init_rss_irn(phase_t *ph, ir_node *irn, void *old) {
548         rss_irn_t *res = old ? old : phase_alloc(ph, sizeof(res[0]));
549
550         res->descendant_list  = plist_obstack_new(phase_obst(ph));
551         res->descendants      = NULL;
552
553         res->consumer_list    = plist_obstack_new(phase_obst(ph));
554         res->consumer         = NULL;
555
556         res->pkiller_list     = plist_obstack_new(phase_obst(ph));
557         res->pkillers         = NULL;
558
559         res->parent_list      = plist_obstack_new(phase_obst(ph));
560         res->parents          = NULL;
561
562         //res->dvg_desc_list    = plist_obstack_new(phase_obst(ph));
563         //res->dvg_desc         = NULL;
564
565         res->kill_value_list  = plist_obstack_new(phase_obst(ph));
566         //res->dvg_user_list    = plist_obstack_new(phase_obst(ph));
567         //res->dvg_pkiller_list = plist_obstack_new(phase_obst(ph));
568
569         res->killer           = NULL;
570         res->irn              = irn;
571         res->chain            = NULL;
572
573         res->desc_walk        = 0;
574         res->live_out         = 0;
575         res->visited          = 0;
576         res->handled          = 0;
577         res->dumped           = 0;
578         res->havecons         = 0;
579
580         return res;
581 }
582
583 /**
584  * Collect all nodes data dependent on current node.
585  */
586 static void collect_descendants(rss_t *rss, rss_irn_t *rirn, ir_node *irn, int *got_sink, unsigned cur_desc_walk) {
587         const ir_edge_t *edge;
588         rss_irn_t       *cur_node = get_rss_irn(rss, irn);
589         ir_node         *block    = rss->block;
590         ir_edge_kind_t  ekind[2]  = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
591         int             i;
592
593         if (cur_node->desc_walk >= cur_desc_walk)
594                 return;
595         cur_node->desc_walk = cur_desc_walk;
596
597         /* Stop at Barriers and Keeps */
598         if (be_is_Barrier(irn) || be_is_Keep(irn))
599                 return;
600
601         /* loop over normal and dependency edges */
602         for (i = 0; i < 2; ++i) {
603                 foreach_out_edge_kind(irn, edge, ekind[i]) {
604                         ir_node *user = get_edge_src_irn(edge);
605
606                         /* skip ignore nodes as they do not really contribute to register pressure */
607                         if (arch_irn_is(rss->arch_env, user, ignore))
608                                 continue;
609
610                         if (is_Proj(user)) {
611                                 if (get_irn_mode(user) != mode_X && arch_get_irn_reg_class(rss->arch_env, user, -1) == rss->cls)
612                                         collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
613                         }
614                         else {
615                                 /* check if user lives in block and is not a control flow node */
616                                 if (get_nodes_block(user) == block) {
617                                         if (! plist_has_value(rirn->descendant_list, user)) {
618                                                 plist_insert_back(rirn->descendant_list, user);
619                                                 DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", user));
620                                         }
621                                         collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
622                                 }
623                                 else if (! *got_sink) {
624                                         /* user lives out of block: add sink as descendant if not already done */
625                                         plist_insert_back(rirn->descendant_list, _sink);
626                                         *got_sink = 1;
627                                         DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", _sink));
628                                 }
629                         }
630                 }
631         }
632 }
633
634 /**
635  * Handles a single consumer.
636  */
637 static void collect_single_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *consumer, int *got_sink) {
638         ir_node        *block   = rss->block;
639         ir_edge_kind_t ekind[2] = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
640
641         assert(! is_Proj(consumer) && "Cannot handle Projs");
642
643         if (get_nodes_block(consumer) == block) {
644                 if (! arch_irn_is(rss->arch_env, consumer, ignore)) {
645                         plist_insert_back(rss_irn->consumer_list, consumer);
646                         DBG((rss->dbg, LEVEL_2, "\t\tconsumer %+F\n", consumer));
647                 }
648         }
649         else {
650                 rss_irn->live_out = 1;
651                 DBG((rss->dbg, LEVEL_2, "\t\tlive out %+F", consumer));
652                 if (! *got_sink) {
653                         plist_insert_back(rss_irn->consumer_list, _sink);
654                         *got_sink = 1;
655                         DB((rss->dbg, LEVEL_2, ", %+F added instead", _sink));
656                 }
657                 DB((rss->dbg, LEVEL_2, "\n"));
658         }
659 }
660
661 /**
662  * Collect all nodes consuming the value(s) produced by current node.
663  */
664 static void collect_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *irn, int *got_sink) {
665         const ir_edge_t *edge;
666         int             i;
667         ir_edge_kind_t  ekind[2]  = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
668         rss_irn_t       *cur_node = get_rss_irn(rss, irn);
669
670         if (cur_node->havecons)
671                 return;
672         cur_node->havecons = 1;
673
674         for (i = 0; i < 2; ++i) {
675                 foreach_out_edge_kind(irn, edge, ekind[i]) {
676                         ir_node *consumer = get_edge_src_irn(edge);
677
678                         if (is_Proj(consumer)) {
679                                 if (arch_get_irn_reg_class(rss->arch_env, consumer, -1) == rss->cls)
680                                         collect_consumer(rss, rss_irn, consumer, got_sink);
681                         }
682                         else
683                                 collect_single_consumer(rss, rss_irn, consumer, got_sink);
684                 }
685         }
686 }
687
688 #if 0
689 /**
690  * We need to build the consumer and descendant list for _source.
691  */
692 static void collect_node_info_source(rss_t *rss) {
693         const ir_edge_t *edge;
694         rss_irn_t       *rirn  = get_rss_irn(rss, _source);
695         ir_node         *block = rss->block;
696
697         if (rirn->handled)
698                 return;
699
700         foreach_out_edge(block, edge) {
701                 ir_node *irn = get_edge_src_irn(edge);
702                 int     i;
703
704                 for (i = get_irn_arity(n) - 1; i >= 0; --i) {
705
706                 }
707         }
708 }
709
710 static void reset_node_info(rss_irn_t *rss_irn) {
711         /* Beware: array data resides on phase obstack, so it gets removed automatically */
712
713         plist_clear(rss_irn->consumer_list);
714         rss_irn->consumer = NULL;
715
716         plist_clear(rss_irn->parent_list);
717         rss_irn->parents = NULL;
718
719         plist_clear(rss_irn->descendant_list);
720         rss_irn->descendants = NULL;
721
722         plist_clear(rss_irn->pkiller_list);
723         rss_irn->pkillers = NULL;
724
725         plist_clear(rss_irn->kill_value_list);
726
727         rss_irn->killer   = NULL;
728         rss_irn->live_out = 0;
729         rss_irn->visited  = 0;
730         rss_irn->handled  = 0;
731 }
732 #endif /* if 0 */
733
734 /**
735  * Collects all consumer and descendant of a irn.
736  */
737 static void collect_node_info(rss_t *rss, ir_node *irn) {
738         static unsigned cur_desc_walk = 0;
739         rss_irn_t       *rss_irn      = get_rss_irn(rss, irn);
740         int             got_sink;
741
742         assert(! is_Proj(irn) && "Cannot handle Projs.");
743
744         /* check if node info is already available */
745         if (rss_irn->handled)
746                 phase_reinit_single_irn_data(&rss->ph, irn);
747
748         DBG((rss->dbg, LEVEL_1, "\tcomputing consumers of %+F:\n", irn));
749
750         /* collect all consumer */
751         got_sink = 0;
752         collect_consumer(rss, rss_irn, irn, &got_sink);
753
754         /* build sorted consumer array */
755         rss_irn->consumer = build_sorted_array_from_list(rss_irn->consumer_list, phase_obst(&rss->ph));
756
757         DBG((rss->dbg, LEVEL_1, "\tcompute descendants of %+F:\n", irn));
758
759         /* collect descendants */
760         got_sink = 0;
761         collect_descendants(rss, rss_irn, irn, &got_sink, ++cur_desc_walk);
762
763         /* build sorted descendant array */
764         rss_irn->descendants = build_sorted_array_from_list(rss_irn->descendant_list, phase_obst(&rss->ph));
765
766         rss_irn->handled = 1;
767 }
768
769 /**
770  * Checks if v is a potential killer of u.
771  * v is in pkill(u) iff descendants(v) cut consumer(u) is v
772  *
773  * @param rss   The rss object
774  * @param v      The node to check for killer
775  * @param u      The potentially killed value
776  * @return 1 if v is in pkill(u), 0 otherwise
777  */
778 static int is_potential_killer(rss_t *rss, rss_irn_t *v, rss_irn_t *u) {
779         plist_t *list;
780         ir_node **arr;
781         plist_element_t *el;
782
783         assert(is_Sink(v->irn) || ((plist_count(v->descendant_list) > 0 && v->descendants) || 1));
784         assert(is_Sink(u->irn) || ((plist_count(u->consumer_list)   > 0 && u->consumer)    || 1));
785
786         /* as we loop over the list: loop over the shorter one */
787         if (plist_count(v->descendant_list) > plist_count(u->consumer_list)) {
788                 list = u->consumer_list;
789                 arr  = v->descendants;
790         }
791         else {
792                 list = v->descendant_list;
793                 arr  = u->consumer;
794         }
795
796         /* for each list element: try to find element in array */
797         foreach_plist(list, el) {
798                 ir_node *irn   = plist_element_get_value(el);
799                 ir_node *match = BSEARCH_IRN_ARR(irn, arr);
800
801                 if (match && match != irn)
802                         return 0;
803         }
804
805         return 1;
806 }
807
808 /**
809  * Update descendants, consumer and pkiller list for the given irn.
810  */
811 static void update_node_info(rss_t *rss, ir_node *irn, ir_node *pk_irn) {
812         rss_irn_t *node    = get_rss_irn(rss, irn);
813         rss_irn_t *pkiller = get_rss_irn(rss, pk_irn);
814
815         /* add consumer and rebuild the consumer array */
816         if (! plist_has_value(node->consumer_list, pk_irn)) {
817                 plist_insert_back(node->consumer_list, pk_irn);
818                 node->consumer = build_sorted_array_from_list(node->consumer_list, phase_obst(&rss->ph));
819         }
820
821         /* add pk_irn as descendant, add it's descendants to irn's and rebuild array */
822         if (! plist_has_value(node->descendant_list, pk_irn)) {
823                 plist_element_t *el;
824
825                 plist_insert_back(node->descendant_list, pk_irn);
826
827                 foreach_plist(pkiller->descendant_list, el) {
828                         ir_node *desc = plist_element_get_value(el);
829
830                         if (! plist_has_value(node->descendant_list, desc))
831                                 plist_insert_back(node->descendant_list, desc);
832                 }
833
834                 node->descendants = build_sorted_array_from_list(node->descendant_list, phase_obst(&rss->ph));
835         }
836
837 #if 0
838         /* check, if pkiller is a potential killer of irn */
839         if (is_potential_killer(rss, pkiller, node)) {
840                 if (! plist_has_value(node->pkiller_list, pk_irn))
841                         plist_insert_back(node->pkiller_list, pk_irn);
842                 if (! plist_has_value(pkiller->kill_value_list, irn))
843                         plist_insert_back(pkiller->kill_value_list, irn);
844                 DBG((rss->dbg, LEVEL_1, "\tpotential killer %+F added to %+F\n", pk_irn, irn));
845         }
846 #endif
847 }
848
849 /**
850  * Compute the potential killing set PK.
851  */
852 static void compute_pkill_set(rss_t *rss) {
853         plist_element_t *u_el, *v_el;
854
855         foreach_plist(rss->nodes, u_el) {
856                 ir_node   *u_irn = plist_element_get_value(u_el);
857                 rss_irn_t *u     = get_rss_irn(rss, u_irn);
858
859                 DBG((rss->dbg, LEVEL_1, "\tcomputing potential killers of %+F:\n", u_irn));
860
861                 /* check each consumer if it is a potential killer  */
862                 foreach_plist(u->consumer_list, v_el) {
863                         ir_node   *v_irn = plist_element_get_value(v_el);
864                         rss_irn_t *v     = get_rss_irn(rss, v_irn);
865
866                         /* check, if v is a potential killer of u */
867                         if (is_potential_killer(rss, v, u)) {
868                                 if (! plist_has_value(u->pkiller_list, v_irn))
869                                         plist_insert_back(u->pkiller_list, v_irn);
870                                 if (! plist_has_value(v->kill_value_list, u_irn))
871                                         plist_insert_back(v->kill_value_list, u_irn);
872                                 DBG((rss->dbg, LEVEL_2, "\t\tpotential killer %+F\n", v_irn));
873                         }
874                 }
875
876                 u->killer = _sink;
877         }
878
879         if (rss->opts->dump_flags & RSS_DUMP_PKG)
880                 debug_vcg_dump_pkg(rss, NULL, 0);
881 }
882
883 /**
884  * Build set of killing edges (from values to their potential killers)
885  */
886 static void build_kill_edges(rss_t *rss, pset *epk) {
887         plist_element_t *el, *k_el;
888
889         foreach_plist(rss->nodes, el) {
890                 ir_node    *irn  = plist_element_get_value(el);
891                 rss_irn_t *rirn = get_rss_irn(rss, irn);
892
893                 DBG((rss->dbg, LEVEL_3, "\t\tbuilding kill edges for %+F:\n", irn));
894
895                 foreach_plist(rirn->pkiller_list, k_el) {
896                         ir_node    *pkiller = plist_element_get_value(k_el);
897                         rss_edge_t *ke      = obstack_alloc(phase_obst(&rss->ph), sizeof(*ke));
898
899                         ke->src  = irn;
900                         ke->tgt  = pkiller;
901                         ke->next = NULL;
902
903                         DBG((rss->dbg, LEVEL_3, "\t\t\ttarget %+F\n", pkiller));
904
905                         pset_insert(epk, ke, HASH_RSS_EDGE(ke));
906                 }
907         }
908 }
909
910 /* print the given cbc for debugging purpose */
911 static void debug_print_cbc(firm_dbg_module_t *mod, cbc_t *cbc) {
912         ir_node    *n;
913         rss_edge_t *ke;
914
915         DBG((mod, LEVEL_3, "\t\tS = set of parents:\n"));
916         foreach_nodeset(cbc->parents, n) {
917                 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
918         }
919         DBG((mod, LEVEL_3, "\t\tT = set of children:\n"));
920         foreach_nodeset(cbc->children, n) {
921                 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
922         }
923         DBG((mod, LEVEL_3, "\t\tE = Edges from producers to consumers\n"));
924         foreach_pset(cbc->kill_edges, ke) {
925                 DBG((mod, LEVEL_3, "\t\t\t%+F -> %+F\n", ke->src, ke->tgt));
926         }
927 }
928
929 /**
930  * Construct the bipartite decomposition.
931  * Sid-Ahmed-Ali Touati, Phd Thesis
932  * Register Pressure in Instruction Level Parallelism, p. 71
933  */
934 static void compute_bipartite_decomposition(rss_t *rss) {
935         pset *epk    = new_pset(cmp_rss_edges, 10);
936         int  cur_num = 0;
937
938         plist_element_t *el;
939
940         DBG((rss->dbg, LEVEL_1, "\tcomputing bipartite decomposition:\n"));
941
942         build_kill_edges(rss, epk);
943
944         foreach_plist(rss->nodes, el) {
945                 ir_node   *u_irn   = plist_element_get_value(el);
946                 rss_irn_t *u       = get_rss_irn(rss, u_irn);
947                 int       p_change = 1;
948                 int       c_change = 1;
949
950                 cbc_t           *cbc;
951                 plist_element_t *el2;
952                 rss_edge_t      *k_edge;
953                 rss_edge_t      *kedge_root = NULL;
954                 ir_node         *t_irn, *s_irn;
955
956                 if (u->visited || u_irn == _sink)
957                         continue;
958
959                 DBG((rss->dbg, LEVEL_2, "\t\t%+F choosen:\n", u_irn));
960
961                 cbc     = obstack_alloc(phase_obst(&rss->ph), sizeof(*cbc));
962                 cbc->nr = cur_num++;
963
964                 /* initialize S_cb */
965                 cbc->parents = new_nodeset(5);
966                 nodeset_insert(cbc->parents, u_irn);
967                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (init)\n", u_irn));
968
969                 /* E_cb = empty */
970                 cbc->kill_edges = new_pset(cmp_rss_edges, 5);
971
972                 /* each parent gets killed by at least one value from children */
973
974                 /* T_cb = PK_successors(u) */
975                 cbc->children = new_nodeset(5);
976                 foreach_plist(u->pkiller_list, el2) {
977                         nodeset_insert(cbc->children, plist_element_get_value(el2));
978                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (init)\n", plist_element_get_value(el2)));
979                 }
980
981                 /*
982                         Now: insert the parents of all children into the parent set
983                         and insert the children of all parents into the children set
984                         until the sets don't change any more
985                 */
986                 while (p_change || c_change) {
987                         p_change = c_change = 0;
988
989                         /* accumulate parents */
990                         foreach_nodeset(cbc->children, t_irn) {
991                                 rss_irn_t *t = get_rss_irn(rss, t_irn);
992                                 plist_element_t *kvl_el;
993
994                                 foreach_pset(epk, k_edge) {
995                                         ir_node *val = k_edge->src;
996
997                                         if (k_edge->tgt == t_irn && ! nodeset_find(cbc->parents, k_edge->src)) {
998                                                 nodeset_insert(cbc->parents, val);
999                                                 p_change = 1;
1000                                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents\n", val));
1001                                         }
1002                                 }
1003                         }
1004
1005                         /* accumulate children */
1006                         foreach_nodeset(cbc->parents, s_irn) {
1007                                 rss_irn_t *s = get_rss_irn(rss, s_irn);
1008                                 plist_element_t *pkl_el;
1009
1010                                 foreach_plist(s->pkiller_list, pkl_el) {
1011                                         ir_node *val = plist_element_get_value(pkl_el);
1012
1013                                         if (! nodeset_find(cbc->children, val)) {
1014                                                 nodeset_insert(cbc->children, val);
1015                                                 c_change = 1;
1016                                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children\n", val));
1017                                         }
1018                                 }
1019                         }
1020                 }
1021
1022                 /* mark all parent values as visited */
1023                 foreach_nodeset(cbc->parents, s_irn) {
1024                         rss_irn_t *s = get_rss_irn(rss, s_irn);
1025                         s->visited = 1;
1026                         /* assure bipartite property */
1027                         if (nodeset_find(cbc->children, s_irn)) {
1028                                 nodeset_remove(cbc->children, s_irn);
1029                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F removed from to children\n", s_irn));
1030                         }
1031                 }
1032
1033                 /* update edges */
1034                 foreach_pset(epk, k_edge) {
1035                         if (nodeset_find(cbc->parents, k_edge->src) && nodeset_find(cbc->children, k_edge->tgt)) {
1036                                 pset_insert(cbc->kill_edges, k_edge, HASH_RSS_EDGE(k_edge));
1037                                 /*
1038                                         Link all k_edges which are about to be removed together.
1039                                         Beware: pset_remove kills the iterator.
1040                                 */
1041                                 k_edge->next = kedge_root;
1042                                 kedge_root   = k_edge;
1043                         }
1044                 }
1045
1046                 /* remove all linked k_edges */
1047                 for (; kedge_root; kedge_root = kedge_root->next) {
1048                         pset_remove(epk, kedge_root, HASH_RSS_EDGE(kedge_root));
1049                 }
1050
1051                 /* add the connected bipartite component */
1052                 if (nodeset_count(cbc->parents) > 0 && nodeset_count(cbc->children) > 0 && pset_count(cbc->kill_edges) > 0)
1053                         pset_insert(rss->cbc_set, cbc, (unsigned)cbc->nr);
1054                 DBG((rss->dbg, LEVEL_2, "\tbipartite component %d inserted:\n", cbc->nr));
1055                 DEBUG_ONLY(
1056                         if (firm_dbg_get_mask(rss->dbg) & LEVEL_2)
1057                                 debug_print_cbc(rss->dbg, cbc);
1058                 );
1059         }
1060
1061         if (rss->opts->dump_flags & RSS_DUMP_CBC)
1062                 debug_vcg_dump_bipartite(rss);
1063
1064         del_pset(epk);
1065 }
1066
1067 /**
1068  * Select the child with the maximum cost.
1069  */
1070 static child_t *select_child_max_cost(rss_t *rss, nodeset *x, nodeset *y, child_t *t, cbc_t *cbc) {
1071         ir_node *child;
1072         float   max_cost = -1.0f;
1073
1074         DBG((rss->dbg, LEVEL_2, "\t\tcomputing children costs:\n"));
1075
1076         foreach_nodeset(cbc->children, child) {
1077                 rss_irn_t  *r_child             = get_rss_irn(rss, child);
1078                 int         num_unkilled_parents = 0;
1079                 int         num_descendants;
1080                 rss_edge_t *k_edge;
1081                 float       cost;
1082
1083                 /* get the number of unkilled parents */
1084                 foreach_pset(cbc->kill_edges, k_edge) {
1085                         if (k_edge->tgt == child && nodeset_find(x, k_edge->src))
1086                                 ++num_unkilled_parents;
1087                 }
1088
1089                 cost = (float)num_unkilled_parents;
1090
1091                 num_descendants = plist_count(r_child->descendant_list) + nodeset_count(y);
1092
1093                 if (num_descendants > 0)
1094                         cost /= (float)num_descendants;
1095
1096                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F, #desc %d, cost %.3f\n", child, num_descendants, cost));
1097
1098                 if (cost > max_cost) {
1099                         t->irn   = child;
1100                         t->cost  = cost;
1101                         max_cost = cost;
1102                 }
1103         }
1104
1105         return t;
1106 }
1107
1108 /**
1109  * Remove all parents from x which are killed by t_irn.
1110  */
1111 static void remove_covered_parents(rss_t *rss, nodeset *x, ir_node *t_irn, cbc_t *cbc) {
1112         rss_irn_t  *t = get_rss_irn(rss, t_irn);
1113         rss_edge_t *k_edge;
1114
1115         DBG((rss->dbg, LEVEL_2, "\t\tremoving parents covered by %+F:\n", t_irn));
1116
1117         foreach_pset(cbc->kill_edges, k_edge) {
1118                 if (k_edge->tgt == t_irn && nodeset_find(x, k_edge->src)) {
1119                         nodeset_remove(x, k_edge->src);
1120                         plist_insert_back(t->parent_list, k_edge->src);
1121                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", k_edge->src));
1122                 }
1123         }
1124 }
1125
1126 static void update_cumulated_descendent_values(rss_t *rss, nodeset *y, ir_node *t_irn) {
1127         rss_irn_t *t = get_rss_irn(rss, t_irn);
1128         plist_element_t *el;
1129
1130         DBG((rss->dbg, LEVEL_2, "\t\tupdating cumulated descendant value of %+F:\n", t_irn));
1131
1132         foreach_plist(t->descendant_list, el) {
1133                 nodeset_insert(y, plist_element_get_value(el));
1134                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", plist_element_get_value(el)));
1135         }
1136 }
1137
1138 /**
1139  * Greedy-k: a heuristics for the MMA problem
1140  */
1141 static void compute_killing_function(rss_t *rss) {
1142         cbc_t *cbc;
1143         struct obstack obst;
1144
1145         obstack_init(&obst);
1146
1147         rss->cbc_set = pset_new_ptr(5);
1148         compute_bipartite_decomposition(rss);
1149
1150         /* for all bipartite components do: */
1151         foreach_pset(rss->cbc_set, cbc) {
1152                 ir_node *p;
1153                 nodeset *x       = new_nodeset(10);
1154                 nodeset *y       = new_nodeset(10);
1155                 child_t **sks    = NEW_ARR_F(child_t *, 20);
1156                 int     cur_len  = 0;
1157                 int     cur_size = 20;
1158                 int     i;
1159
1160                 DBG((rss->dbg, LEVEL_1, "\tcomputing SKS for cbc %d:\n", cbc->nr));
1161                 DBG((rss->dbg, LEVEL_2, "\t\tinitializing parents X:\n"));
1162
1163                 /* X = S_cb (all parents are initially uncovered) */
1164                 foreach_nodeset(cbc->parents, p) {
1165                         nodeset_insert(x, p);
1166                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", p));
1167                 }
1168
1169                 /* while X not empty */
1170                 while (nodeset_count(x) > 0) {
1171                         child_t *t = obstack_alloc(&obst, sizeof(*t));
1172                         memset(t, 0, sizeof(*t));
1173
1174                         t = select_child_max_cost(rss, x, y, t, cbc);
1175
1176                         if (cur_len >= cur_size) {
1177                                 ARR_EXTO(child_t *, sks, cur_size * 2);
1178                                 cur_size *= 2;
1179                         }
1180
1181                         DBG((rss->dbg, LEVEL_2, "\t\tinsert child %+F (%.3f) into SKS at pos %d\n", t->irn, t->cost, cur_len));
1182
1183                         sks[cur_len++] = t;
1184                         remove_covered_parents(rss, x, t->irn, cbc);
1185                         update_cumulated_descendent_values(rss, y, t->irn);
1186                 }
1187
1188                 ARR_SHRINKLEN(sks, cur_len);
1189
1190                 /* sort SKS in increasing cost order */
1191                 qsort(sks, cur_len, sizeof(sks[0]), cmp_child_costs);
1192
1193                 DBG((rss->dbg, LEVEL_2, "\tprocessing SKS for cbc %d:\n", cbc->nr));
1194
1195                 /* build killing function */
1196                 for (i = cur_len - 1; i >= 0; --i) { /* loop over sks in decreasing cost order */
1197                         child_t         *t = sks[i];
1198                         rss_irn_t       *rt = get_rss_irn(rss, t->irn);
1199                         plist_element_t *p_el;
1200
1201                         DBG((rss->dbg, LEVEL_3, "\t\t\tkiller %+F (%.3f):\n", t->irn, t->cost));
1202
1203                         /* kill all unkilled parents of t */
1204                         foreach_plist(rt->parent_list, p_el) {
1205                                 ir_node    *par = plist_element_get_value(p_el);
1206                                 rss_irn_t *rpar = get_rss_irn(rss, par);
1207
1208                                 if (is_Sink(rpar->killer)) {
1209                                         rpar->killer = t->irn;
1210                                         DBG((rss->dbg, LEVEL_2, "\t\tkill %+F\n", rpar->irn));
1211                                 }
1212                                 else {
1213                                         DBG((rss->dbg, LEVEL_3, "\t\t\tkeeping %+F as killer for %+F\n", rpar->killer, rpar->irn));
1214                                 }
1215                         }
1216                 }
1217
1218                 del_nodeset(x);
1219                 del_nodeset(y);
1220                 DEL_ARR_F(sks);
1221         }
1222
1223         if (rss->opts->dump_flags & RSS_DUMP_KILL)
1224                 debug_vcg_dump_kill(rss);
1225
1226         del_pset(rss->cbc_set);
1227         obstack_free(&obst, NULL);
1228 }
1229
1230 /**
1231  * Adds the edge src -> tgt to the dvg. Checks if reverse edge is already there (asserts).
1232  */
1233 static INLINE void add_dvg_edge(rss_t *rss, dvg_t *dvg, ir_node *src, ir_node *tgt, int have_source) {
1234         rss_edge_t *dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1235         rss_edge_t key;
1236
1237         if (! have_source)
1238                 nodeset_insert(dvg->nodes, src);
1239         else
1240                 assert(nodeset_find(dvg->nodes, src) != NULL && "Wrong assumption");
1241
1242         nodeset_insert(dvg->nodes, tgt);
1243
1244         /* add an edge to our killer */
1245         dvg_edge->src  = src;
1246         dvg_edge->tgt  = tgt;
1247         dvg_edge->next = NULL;
1248
1249         key.src = tgt;
1250         key.tgt = src;
1251         assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1252
1253         /* add the edge to the DVG */
1254         DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", src, tgt));
1255         pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1256 }
1257
1258 /**
1259  * Computes the disjoint value DAG (DVG).
1260  * BEWARE: It is not made explicitly clear in the Touati paper,
1261  *         but the DVG is meant to be build from the KILLING DAG
1262  */
1263 static void compute_dvg(rss_t *rss, dvg_t *dvg) {
1264         plist_element_t *el;
1265
1266         DBG((rss->dbg, LEVEL_1, "\tcomputing DVG:\n"));
1267
1268         foreach_plist(rss->nodes, el) {
1269                 ir_node    *u_irn    = plist_element_get_value(el);
1270                 rss_irn_t  *u        = get_rss_irn(rss, u_irn);
1271                 rss_irn_t  *u_killer = get_rss_irn(rss, u->killer);
1272                 int        i;
1273
1274                 /* TODO: omit nodes only having sink as pkiller and killing no one */
1275
1276                 /* add an edge to killer */
1277                 add_dvg_edge(rss, dvg, u_irn, u->killer, 0);
1278
1279                 if (is_Sink(u->killer))
1280                         continue;
1281
1282                 /* We add an edge to every killer, from where we could be reached. */
1283                 for (i = ARR_LEN_SAFE(u_killer->descendants) - 1; i >= 0; --i) {
1284                         add_dvg_edge(rss, dvg, u_irn, u_killer->descendants[i], 1);
1285                 }
1286
1287 #if 0
1288
1289                 foreach_plist(rss->nodes, el2) {
1290                         ir_node *v_irn = plist_element_get_value(el2);
1291
1292                         /*
1293                                 There is an edge (u, v) in the DVG iff v is a descendant of the killer(u).
1294                         */
1295                         if (BSEARCH_IRN_ARR(v_irn, u_kill->descendants)) {
1296                                 rss_edge_t *dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1297                                 rss_edge_t key;
1298
1299                                 /* insert the user into the DVG and append it to the user list of u */
1300                                 nodeset_insert(dvg->nodes, v_irn);
1301                                 if (! plist_has_value(u->dvg_user_list, v_irn))
1302                                         plist_insert_back(u->dvg_user_list, v_irn);
1303
1304                                 dvg_edge->src  = u_irn;
1305                                 dvg_edge->tgt  = v_irn;
1306                                 dvg_edge->next = NULL;
1307
1308                                 key.src = v_irn;
1309                                 key.tgt = u_irn;
1310
1311                                 assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1312
1313                                 /* add the edge to the DVG */
1314                                 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", u_irn, v_irn));
1315                                 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1316                         }
1317                 }
1318 #endif /* if 0 */
1319         }
1320
1321         if (rss->opts->dump_flags & RSS_DUMP_DVG)
1322                 debug_vcg_dump_dvg(rss, dvg);
1323 }
1324
1325 /**
1326  * Updates the dvg structure when a serialization edge from src -> tgt is added.
1327  */
1328 static void update_dvg(rss_t *rss, dvg_t *dvg, rss_irn_t *src, rss_irn_t *tgt) {
1329         int i;
1330
1331         add_dvg_edge(rss, dvg, tgt->irn, src->irn, 0);
1332
1333         for (i = ARR_LEN_SAFE(src->descendants) - 1; i >= 0; --i) {
1334                 add_dvg_edge(rss, dvg, tgt->irn, src->descendants[i], 1);
1335         }
1336 }
1337
1338 #if 0
1339 /**
1340  * Accumulate all descendants for root into list.
1341  */
1342 static void accumulate_dvg_descendant_values(rss_t *rss, rss_irn_t *root, plist_t *list) {
1343         if (plist_count(root->dvg_user_list) > 0) {
1344                 plist_element_t *el;
1345
1346                 foreach_plist(root->dvg_user_list, el) {
1347                         ir_node   *v_irn = plist_element_get_value(el);
1348                         rss_irn_t *v     = get_rss_irn(rss, v_irn);
1349
1350                         /* add v as descendant */
1351                         if (! plist_has_value(list, v_irn)) {
1352                                 plist_insert_back(list, v_irn);
1353                                 DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG descendant %+F\n", v_irn));
1354                         }
1355
1356                         /* accumulate v's descendants */
1357                         accumulate_dvg_descendant_values(rss, v, list);
1358                 }
1359         }
1360 }
1361
1362 /**
1363  * Builds the list of potential killers for each node
1364  * in the given DVG.
1365  * Needs the descendant list for all user as sorted array.
1366  */
1367 static void build_dvg_pkiller_list(rss_t *rss, dvg_t *dvg) {
1368         ir_node *irn;
1369
1370         foreach_nodeset(dvg->nodes, irn) {
1371                 rss_irn_t       *node = get_rss_irn(rss, irn);
1372                 plist_element_t *el, *el2;
1373
1374                 DBG((rss->dbg, DEBUG_DVG, "\t\tbuilding pkiller list for %+F\n", irn));
1375
1376                 /* check each user */
1377                 foreach_plist(node->dvg_user_list, el) {
1378                         ir_node *u_irn = plist_element_get_value(el);
1379
1380                         /* is the current user u_irn not a descendant of any other user -> pkiller */
1381                         foreach_plist(node->dvg_user_list, el2) {
1382                                 ir_node   *v_irn = plist_element_get_value(el2);
1383                                 rss_irn_t *v     = get_rss_irn(rss, v_irn);
1384
1385                                 if (el != el2                             &&
1386                                         ! BSEARCH_IRN_ARR(u_irn, v->dvg_desc) &&
1387                                         ! plist_has_value(node->dvg_pkiller_list, u_irn))
1388                                 {
1389                                         plist_insert_back(node->dvg_pkiller_list, u_irn);
1390                                         DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG pkiller %+F\n", u_irn));
1391                                 }
1392                         }
1393                 }
1394
1395                 node->dvg_pkiller = build_sorted_array_from_list(node->dvg_pkiller_list, phase_obst(&rss->ph));
1396         }
1397
1398         DEBUG_ONLY(
1399                 if (firm_dbg_get_mask(rss->dbg) & DEBUG_DVG)
1400                         debug_vcg_dump_dvg_pkiller(rss, dvg);
1401         );
1402 }
1403
1404 #endif /* if 0 */
1405
1406 /**
1407  * Compute the maximal antichain of the current DVG.
1408  * This is a reimplementation of the MAXIMAL_ANTI_CHAIN function
1409  * from the DDG library 1.1 (DAG.cpp).
1410  */
1411 static nodeset *compute_maximal_antichain(rss_t *rss, dvg_t *dvg, int iteration) {
1412         int         n               = nodeset_count(dvg->nodes);
1413         int         *assignment     = alloca(n * sizeof(assignment[0]));
1414         int         *assignment_rev = alloca(n * sizeof(assignment_rev[0]));
1415         int         *idx_map        = alloca(n * sizeof(idx_map[0]));
1416         hungarian_problem_t *bp;
1417         nodeset     *values, *temp;
1418         ir_node     *u_irn;
1419         int         i, j, cost, cur_chain, res;
1420         rss_edge_t  *dvg_edge;
1421
1422 #define MAP_IDX(irn) bsearch_for_index(get_irn_idx(irn), idx_map,  n,  1)
1423
1424         if (pset_count(dvg->edges) == 0)
1425                 return NULL;
1426
1427         bp = hungarian_new(n, n, 1, HUNGARIAN_MATCH_NORMAL);
1428
1429         /*
1430                 At first, we build an index map for the nodes in the DVG,
1431                 because we cannot use the irn idx therefore as the resulting
1432                 bipartite data structure would have around 1.2GB.
1433                 So we limit the size to the number of nodes we have in the DVG
1434                 and build a sorted index map for their irn indices.
1435         */
1436         i = 0;
1437         foreach_nodeset(dvg->nodes, u_irn) {
1438                 idx_map[i++] = get_irn_idx(u_irn);
1439         }
1440         qsort(idx_map, n, sizeof(idx_map[0]), cmp_int);
1441
1442         foreach_pset(dvg->edges, dvg_edge) {
1443                 int idx_u = MAP_IDX(dvg_edge->src);
1444                 int idx_v = MAP_IDX(dvg_edge->tgt);
1445
1446                 /* add the entry to the bipartite data structure */
1447                 hungarian_add(bp, idx_u, idx_v, 1);
1448                 DBG((rss->dbg, LEVEL_3, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1449                         idx_u, dvg_edge->src, idx_v, dvg_edge->tgt));
1450         }
1451 #if 0
1452         /*
1453                 Add a bipartite entry for each pair of nodes (u, v), where exists a
1454                 path in the DVG from u to v, ie. connect all descendants(v) to v.
1455                 desc_dvg(v) = accumulated descendants of all z in dvg_user_list(v)
1456         */
1457         foreach_nodeset(dvg->nodes, u_irn) {
1458                 rss_irn_t *u        = get_rss_irn(rss, u_irn);
1459                 int       idx_u_irn = MAP_IDX(u_irn);
1460
1461                 DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG descendants of %+F:\n", u_irn));
1462
1463                 //plist_clear(u->dvg_desc_list);
1464                 //accumulate_dvg_descendant_values(rss, u, u->dvg_desc_list);
1465
1466                 /*
1467                         FIXME: The array is build on the phase obstack and we cannot free the data.
1468                         So the array get as many times allocated as this function is called.
1469                 */
1470
1471                 /* build the sorted array for faster searches */
1472                 //u->dvg_desc = build_sorted_array_from_list(u->dvg_desc_list, phase_obst(&rss->ph));
1473
1474                 DBG((rss->dbg, DEBUG_MAX_AC, "\t\tadding bipartite entries of %+F:\n", u_irn));
1475
1476                 /* add the bipartite entries for all descendants */
1477                 for (i = ARR_LEN_SAFE(u->dvg_desc) - 1; i >= 0; --i) {
1478                         rss_irn_t *desc    = get_rss_irn(rss, u->dvg_desc[i]);
1479                         int       idx_desc = MAP_IDX(u->dvg_desc[i]);
1480
1481                         /* add the entry to the bipartite data structure */
1482                         hungarian_add(bp, idx_u_irn, idx_desc, 1);
1483                         DBG((rss->dbg, DEBUG_MAX_AC, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1484                                 idx_u_irn, u_irn, idx_desc, u->dvg_desc[i]));
1485
1486                         need_matching = 1;
1487                 }
1488         }
1489 #endif
1490
1491         /* We want maximum cardinality matching */
1492         hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL);
1493
1494 #if 0
1495         DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG pkiller:\n"));
1496         /* beware: the following function needs the dvg_desc array */
1497         build_dvg_pkiller_list(rss, dvg);
1498 #endif
1499
1500         DBG((rss->dbg, LEVEL_2, "\t\tcomputing bipartite matching\n"));
1501         /*
1502                 The maximum cardinality bipartite matching gives us the minimal
1503                 chain partition, which corresponds to the maximum anti chains.
1504         */
1505         res = hungarian_solve(bp, assignment, &cost, 1);
1506         assert(res == 0 && "Bipartite matching failed!");
1507
1508         hungarian_free(bp);
1509         memset(assignment_rev, -1, n * sizeof(assignment_rev[0]));
1510
1511         /* build the reverse assignment, ie. foreach i -> j, add j -> i */
1512         for (i = 0; i < n; ++i) {
1513                 if (assignment[i] >= 0) {
1514                         int j = assignment[i];
1515                         assignment_rev[j] = i;
1516                 }
1517         }
1518
1519         DBG((rss->dbg, LEVEL_2, "\t\tgot assignment with cost %d\n", cost));
1520         DBG((rss->dbg, LEVEL_3, "\t\t\tassignment   ---   reverse assignment\n", cost));
1521         for (i = 0; i < n; ++i) {
1522                 DBG((rss->dbg, LEVEL_3, "\t\t\t%3d -> %3d         %3d -> %3d\n", i, assignment[i], i, assignment_rev[i]));
1523         }
1524
1525         values    = new_nodeset(10);
1526         cur_chain = 0;
1527         /* Construction of the minimal chain partition */
1528         for (j = 0; j < n; ++j) {
1529                 /* check nodes, which did not occur as target */
1530                 if (assignment_rev[j] == -1) {
1531                         int       xj      = idx_map[j];
1532                         ir_node   *xj_irn = get_idx_irn(rss->irg, xj);
1533                         rss_irn_t *xj_rss = get_rss_irn(rss, xj_irn);
1534                         chain_t   *c      = obstack_alloc(phase_obst(&rss->ph), sizeof(*c));
1535                         int       source;
1536
1537                         /* there was no source for j -> we have a source of a new chain */
1538                         nodeset_insert(values, xj_irn);
1539
1540                         c->elements = plist_obstack_new(phase_obst(&rss->ph));
1541                         c->nr       = cur_chain++;
1542                         plist_insert_back(c->elements, xj_irn);
1543
1544                         xj_rss->chain = c;
1545
1546                         DBG((rss->dbg, LEVEL_2, "\t\tstarting chain %d:\n", c->nr));
1547                         DBG((rss->dbg, LEVEL_2, "\t\t\t%+F (%d)", xj_irn, j));
1548
1549                         /* follow chain, having j as source */
1550                         source = j;
1551                         while (assignment[source] >= 0) {
1552                                 int       target  = assignment[source];
1553                                 int       irn_idx = idx_map[target];
1554                                 ir_node   *irn    = get_idx_irn(rss->irg, irn_idx);
1555                                 rss_irn_t *node   = get_rss_irn(rss, irn);
1556
1557                                 plist_insert_back(c->elements, irn);
1558                                 node->chain = c;
1559
1560                                 DB((rss->dbg, LEVEL_2, " -> %+F (%d)", irn, target));
1561
1562                                 /* new source = last target */
1563                                 source = target;
1564                         }
1565
1566                         DB((rss->dbg, LEVEL_2, "\n"));
1567                 }
1568         }
1569
1570         /*
1571                 Computing the maximal antichain: Select an element from each
1572                 chain such, such it is parallel with the others.
1573         */
1574         DBG((rss->dbg, LEVEL_1, "\t\tcomputing set of saturation values (MAX AC)\n"));
1575         DBG((rss->dbg, LEVEL_3, "\t\tstarting with:\n"));
1576
1577         DEBUG_ONLY(
1578                 if (firm_dbg_get_mask(rss->dbg) & LEVEL_3)
1579                         dump_nodeset(values, "\t\t\t");
1580         )
1581
1582         temp = NULL;
1583         do {
1584                 /*
1585                         We need an explicit array for the values as
1586                         we cannot iterate multiple times over the same
1587                         set at the same time. :-(((((
1588                 */
1589                 int     n         = nodeset_count(values);
1590                 int     i         = 0;
1591                 ir_node **val_arr = NEW_ARR_F(ir_node *, n);
1592
1593                 foreach_nodeset(values, u_irn)
1594                         val_arr[i++] = u_irn;
1595
1596                 if (temp)
1597                         del_nodeset(temp);
1598
1599                 temp = new_nodeset(10);
1600
1601                 /* Select all nodes from current value set, having another node in the set as descendant. */
1602                 for (i = 0; i < n; ++i) {
1603                         rss_irn_t *u = get_rss_irn(rss, val_arr[i]);
1604
1605                         for (j = 0; j < n; ++j) {
1606                                 if (i != j) {
1607                                         rss_edge_t key;
1608
1609                                         key.src = val_arr[i];
1610                                         key.tgt = val_arr[j];
1611
1612                                         if (pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1613                                                 /* v[j] is descendant of u -> remove u and break */
1614                                                 nodeset_insert(temp, u->irn);
1615                                                 nodeset_remove(values, u->irn);
1616
1617                                                 DBG((rss->dbg, LEVEL_3, "\t\t\tremoving %+F from values, adding it to temp\n", u->irn));
1618
1619                                                 break;
1620                                         }
1621                                 }
1622                         }
1623                 }
1624
1625                 /* Try to insert the chain predecessor of all selected u's */
1626                 foreach_nodeset(temp, u_irn) {
1627                         rss_irn_t       *u  = get_rss_irn(rss, u_irn);
1628                         chain_t         *c  = u->chain;
1629                         plist_element_t *el = plist_find_value(c->elements, u_irn);
1630
1631                         assert(el && "Missing element in chain!");
1632
1633                         /* If u has predecessor in chain: insert the predecessor */
1634                         if (el = plist_element_get_prev(el)) {
1635                                 nodeset_insert(values, plist_element_get_value(el));
1636                                 DBG((rss->dbg, LEVEL_3, "\t\t\tadding %+F to values\n", plist_element_get_value(el)));
1637                         }
1638                 }
1639
1640
1641                 DEL_ARR_F(val_arr);
1642         } while (nodeset_count(temp) > 0);
1643
1644         DBG((rss->dbg, LEVEL_2, "\t\tfinal set:\n"));
1645         DEBUG_ONLY(
1646                 if (firm_dbg_get_mask(rss->dbg) & LEVEL_2) {
1647                         dump_nodeset(values, "\t\t\t");
1648                 }
1649         );
1650
1651         if (rss->opts->dump_flags & RSS_DUMP_MAXAC)
1652                 debug_vcg_dump_pkg(rss, values, iteration);
1653
1654         del_nodeset(temp);
1655
1656         return values;
1657
1658 #undef MAP_IDX
1659 }
1660
1661 /**
1662  * Computes the best serialization between two nodes of sat_vals.
1663  */
1664 static serialization_t *compute_best_admissible_serialization(rss_t *rss, nodeset *sat_vals, serialization_t *ser, int num_regs) {
1665         int        n                    = nodeset_count(sat_vals);
1666         int        n_idx                = ARR_LEN_SAFE(rss->idx_map);
1667         int        i                    = 0;
1668         ir_node    **val_arr            = alloca(n * sizeof(val_arr[0]));
1669         bitset_t   *bs_sv               = bitset_alloca(n_idx);
1670         bitset_t   *bs_vdesc            = bitset_alloca(n_idx);
1671         bitset_t   *bs_tmp              = bitset_alloca(n_idx);
1672         bitset_t   *bs_ukilldesc        = bitset_alloca(n_idx);
1673         int        best_benefit         = INT_MAX;
1674         int        best_omega2          = INT_MAX;
1675         int        best_benefit_omega20 = INT_MAX;
1676         int        has_omega1           = 0;
1677         int        j, k;
1678         ir_node    *irn;
1679         rss_edge_t min_benefit_edge;
1680         rss_edge_t min_omega20_edge;
1681         rss_irn_t  *ser_u_omega1, *ser_v_omega1;
1682         rss_irn_t  *ser_u_omega20, *ser_v_omega20;
1683
1684         DBG((rss->dbg, LEVEL_1, "\tcomputing admissible serializations:\n"));
1685
1686         /*
1687                 We need an explicit array for the values as
1688                 we cannot iterate multiple times over the same
1689                 set at the same time. :-(((((
1690         */
1691
1692         foreach_nodeset(sat_vals, irn) {
1693                 val_arr[i++] = irn;
1694                 bitset_set(bs_sv, BLOCK_IDX_MAP(rss, irn));
1695         }
1696
1697         /*
1698                 We build all admissible serializations and remember the best found so far.
1699                 For u in sat_vals:
1700                  For v in sat_val:
1701                    if v in pkiller(u): add edge from v to all other pkiller(u)
1702                    else: for all vv in pkiller(u): add edge from v to vv if there exists no path from vv to v
1703         */
1704
1705 /*
1706         A node is unserializable if:
1707         - it has only one killer and this one is Sink
1708     - it kills no other values
1709         In this case there is no serialization which could
1710         reduce the registerpressure
1711 */
1712 #define IS_UNSERIALIZABLE_NODE(rss_node)           \
1713         ((plist_count(rss_node->pkiller_list) == 1) && \
1714         is_Sink(rss_node->killer)                   && \
1715         (plist_count(rss_node->kill_value_list) == 0))
1716
1717         /* for all u in sat_vals */
1718         for (i = 0; i < n; ++i) {
1719                 rss_irn_t       *u       = get_rss_irn(rss, val_arr[i]);
1720                 int             u_height = get_irn_height(rss->h, val_arr[i]);
1721                 plist_element_t *el;
1722
1723                 /* ignore nodes where serialization does not help */
1724                 if (IS_UNSERIALIZABLE_NODE(u)) {
1725                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", u->irn));
1726                         continue;
1727                 }
1728
1729                 /* accumulate all descendants of all pkiller(u) */
1730                 bitset_clear_all(bs_ukilldesc);
1731                 foreach_plist(u->pkiller_list, el) {
1732                         ir_node   *irn  = plist_element_get_value(el);
1733                         rss_irn_t *node = get_rss_irn(rss, irn);
1734
1735                         if (! is_Sink(irn))
1736                                 bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, irn));
1737                         else
1738                                 continue;
1739
1740                         for (k = ARR_LEN_SAFE(node->descendants) - 1; k >= 0; --k) {
1741                                 if (! is_Sink(node->descendants[k]))
1742                                         bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, node->descendants[k]));
1743                         }
1744                 }
1745
1746                 /* for all v in sat_vals */
1747                 for (j = 0; j < n; ++j) {
1748                         ir_node   *v_irn   = val_arr[j];
1749                         rss_irn_t *v       = get_rss_irn(rss, v_irn);
1750                         unsigned  v_height = get_irn_height(rss->h, v_irn);
1751                         int       omega1, omega2, is_pkiller;
1752
1753                         /* v cannot be serialized with itself
1754                          * ignore nodes where serialization does not help */
1755                         if (i == j || IS_UNSERIALIZABLE_NODE(v)) {
1756                                 if (i != j)
1757                                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", v->irn));
1758                                 continue;
1759                         }
1760
1761                         /* get descendants of v */
1762                         bitset_clear_all(bs_vdesc);
1763                         bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v_irn));
1764                         for (k = ARR_LEN_SAFE(v->descendants) - 1; k >= 0; --k) {
1765                                 if (! is_Sink(v->descendants[k]))
1766                                         bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v->descendants[k]));
1767                         }
1768
1769                         /* if v is in pkiller(u) */
1770                         is_pkiller = plist_has_value(u->pkiller_list, v_irn);
1771
1772                         /* for all vv in pkiller(u) */
1773                         foreach_plist(u->pkiller_list, el) {
1774                                 ir_node *vv_irn  = plist_element_get_value(el);
1775                                 int     add_edge;
1776
1777                                 if (is_Sink(vv_irn))
1778                                         continue;
1779
1780                                 if (is_pkiller)
1781                                         add_edge = vv_irn != v_irn && skip_Proj(vv_irn) != skip_Proj(v_irn);
1782                                 else
1783                                         add_edge = ! heights_reachable_in_block(rss->h, skip_Proj(vv_irn), skip_Proj(v_irn));
1784
1785                                 /*
1786                                         As we add an edge from vv -> v, we have to make sure,
1787                                         that there exists no path from v to vv.
1788                                 */
1789
1790                                 if (add_edge) {
1791                                         int vv_height = get_irn_height(rss->h, vv_irn);
1792                                         int mu1, mu2, critical_path_cost;
1793
1794                                         /*
1795                                                 mu1 = | descendants(v) cut sat_vals |
1796                                                 the number of saturating values which cannot
1797                                                 be simultaneously alive with u
1798                                         */
1799                                         bitset_copy(bs_tmp, bs_vdesc);
1800                                         mu1 = bitset_popcnt(bitset_and(bs_tmp, bs_sv));
1801
1802                                         /*
1803                                                 mu2 = | accum_desc_all_pkiller(u) without descendants(v) |
1804                                         */
1805                                         if (is_pkiller) {
1806                                                 bitset_copy(bs_tmp, bs_ukilldesc);
1807                                                 mu2 = bitset_popcnt(bitset_andnot(bs_tmp, bs_vdesc));
1808                                         }
1809                                         else {
1810                                                 mu2 = 0;
1811                                         }
1812
1813                                         /* omega1 = mu1 - mu2 */
1814                                         omega1 = mu1 - mu2;
1815
1816                                         if (omega1 != 0)
1817                                                 has_omega1 = 1;
1818
1819                                         /* omega2 = increase of critical path */
1820                                         critical_path_cost =
1821                                                 v_height                        /* longest path from v to sink */
1822                                                 + rss->max_height - vv_height   /* longest path from source to vv */
1823                                                 + 1;                            /* edge */
1824
1825                                         /*
1826                                                 If critical_path_cost > max_height -> the new edge
1827                                                 would increase the longest critical path by the difference.
1828                                         */
1829                                         omega2 = critical_path_cost > rss->max_height ? critical_path_cost - rss->max_height : 0;
1830
1831                                         /* this keeps track of the edge with the best benefit */
1832                                         if (omega1 >= num_regs - n && omega1 < best_benefit) {
1833                                                 min_benefit_edge.src = v_irn;
1834                                                 min_benefit_edge.tgt = vv_irn;
1835
1836                                                 ser_u_omega1 = u;
1837                                                 ser_v_omega1 = v;
1838
1839                                                 best_benefit    = omega1;
1840                                                 ser->new_killer = is_pkiller;
1841                                         }
1842
1843                                         /* this keeps track of the edge with the best omega1 costs where omega2 == 0 */
1844                                         if (omega2 == 0 && omega1 >= num_regs - n && omega1 < best_benefit_omega20) {
1845                                                 min_omega20_edge.src = v_irn;
1846                                                 min_omega20_edge.tgt = vv_irn;
1847
1848                                                 ser_u_omega20 = u;
1849                                                 ser_v_omega20 = v;
1850
1851                                                 best_benefit_omega20 = omega1;
1852                                                 ser->new_killer      = is_pkiller;
1853                                         }
1854
1855                                         best_omega2 = MIN(best_omega2, omega2);
1856
1857                                         DBG((rss->dbg, LEVEL_2, "\t\tfound %+F -> %+F (w1 %d, w2 %d)\n",
1858                                                 v_irn, vv_irn, omega1, omega2));
1859                                 } /* if add_edge */
1860                         } /* for all vv in pkiller(u) */
1861                 } /* for all v in sat_vals */
1862         } /* for all u in sat_vals */
1863
1864         if (! has_omega1)
1865                 return NULL;
1866
1867         if (best_omega2 == 0) {
1868                 ser->u         = ser_u_omega20;
1869                 ser->v         = ser_v_omega20;
1870                 ser->edge->src = min_omega20_edge.src;
1871                 ser->edge->tgt = min_omega20_edge.tgt;
1872                 ser->omega1    = best_benefit_omega20;
1873                 ser->omega2    = best_omega2;
1874         }
1875         else {
1876                 ser->u         = ser_u_omega1;
1877                 ser->v         = ser_v_omega1;
1878                 ser->edge->src = min_benefit_edge.src;
1879                 ser->edge->tgt = min_benefit_edge.tgt;
1880                 ser->omega1    = best_benefit;
1881                 ser->omega2    = best_omega2;
1882         }
1883
1884         return ser;
1885
1886 #undef IS_UNSERIALIZABLE_NODE
1887 }
1888
1889 /**
1890  * Perform the value serialization heuristic and add all
1891  * computed serialization edges as dependencies to the irg.
1892  */
1893 static void perform_value_serialization_heuristic(rss_t *rss) {
1894         bitset_t *arch_nonign_bs = bitset_alloca(arch_register_class_n_regs(rss->cls));
1895         bitset_t *abi_ign_bs     = bitset_alloca(arch_register_class_n_regs(rss->cls));
1896         int      available_regs, iteration;
1897         dvg_t    dvg;
1898         nodeset  *sat_vals;
1899         pset *ser_set = new_pset(cmp_rss_edges, 20);
1900
1901         /* available_regs = R = |arch_non_ignore_regs cut ~abi_ignore_regs| */
1902         arch_put_non_ignore_regs(rss->arch_env, rss->cls, arch_nonign_bs);
1903         be_abi_put_ignore_regs(rss->abi, rss->cls, abi_ign_bs);
1904         bitset_andnot(arch_nonign_bs, abi_ign_bs);
1905         available_regs = bitset_popcnt(arch_nonign_bs);
1906
1907         DBG((rss->dbg, LEVEL_1, "\n\t#available regs: %d\n\n", available_regs));
1908
1909         /*
1910                 At first we need to compute the disjoint value DAG (DVG = {V, E_dv}).
1911                 V    = set of all nodes we are currently interested in
1912                 E_dv = there is an edge from u to v iff v is a descendant of killer(u), forall u, v in V
1913         */
1914         dvg.nodes = new_nodeset(plist_count(rss->nodes));
1915         dvg.edges = new_pset(cmp_rss_edges, plist_count(rss->nodes) * 5);
1916         compute_dvg(rss, &dvg);
1917
1918         /*
1919                 Then we perform the heuristic serialization algorithm
1920                 on the DVG which gives us all necessary serialization
1921                 edges.
1922         */
1923         DBG((rss->dbg, LEVEL_1, "\tcomputing maximal antichain:\n"));
1924         iteration = 0;
1925         sat_vals  = compute_maximal_antichain(rss, &dvg, iteration++);
1926         while (sat_vals && (nodeset_count(sat_vals) > available_regs)) {
1927                 serialization_t *ser, best_ser;
1928                 rss_edge_t      *edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*edge));
1929                 rss_irn_t       *tgt, *src;
1930                 ir_node         *dep_src, *dep_tgt;
1931
1932                 best_ser.edge = edge;
1933                 ser = compute_best_admissible_serialization(rss, sat_vals, &best_ser, available_regs);
1934
1935                 DBG((rss->dbg, LEVEL_1, "\tcurrent register saturation %d, target %d\n", nodeset_count(sat_vals), available_regs));
1936
1937                 if (! ser) {
1938                         DBG((rss->dbg, LEVEL_1, "\tno RS improving serialization found, breaking at iteration %d\n", iteration));
1939                         break;
1940                 }
1941
1942                 /* Insert the serialization as dependency edge into the irg. */
1943                 DBG((rss->dbg, LEVEL_1, "\tserializing %+F -> %+F with edge %+F -> %+F and cost %d, %d\n",
1944                         ser->u->irn, ser->v->irn, ser->edge->src, ser->edge->tgt, ser->omega1, ser->omega2));
1945
1946                 if (pset_find(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge)))
1947                         ir_printf("WARNING: serialization %+F -> %+F computed twice!\n", ser->edge->src, ser->edge->tgt);
1948
1949
1950                 pset_insert(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge));
1951
1952                 /* update the dvg */
1953                 update_dvg(rss, &dvg, ser->v, ser->u);
1954                 update_dvg(rss, &dvg, get_rss_irn(rss, ser->edge->src), get_rss_irn(rss, ser->edge->tgt));
1955                 del_nodeset(sat_vals);
1956
1957                 dep_src = skip_Proj(ser->edge->src);
1958                 dep_tgt = ser->edge->tgt;
1959                 add_irn_dep(dep_src, dep_tgt);
1960
1961                 /* Update descendants, consumer and pkillers of the target */
1962                 update_node_info(rss, ser->edge->tgt, ser->edge->src);
1963
1964                 /* TODO: try to find a cheaper way for updating height information */
1965                 rss->max_height = heights_recompute_block(rss->h, rss->block);
1966
1967                 /* Recompute the antichain for next serialization */
1968                 DBG((rss->dbg, LEVEL_1, "\tre-computing maximal antichain:\n"));
1969                 sat_vals = compute_maximal_antichain(rss, &dvg, iteration++);
1970         }
1971
1972         del_nodeset(dvg.nodes);
1973         del_pset(dvg.edges);
1974 }
1975
1976 /**
1977  * Do initial calculations for a block.
1978  */
1979 static void process_block(ir_node *block, void *env) {
1980         rss_t *rss = env;
1981         int   i, n;
1982         const ir_edge_t *edge;
1983
1984         phase_init(&rss->ph, "rss block preprocessor", rss->irg, PHASE_DEFAULT_GROWTH, init_rss_irn);
1985
1986         DBG((rss->dbg, LEVEL_1, "preprocessing block %+F\n", block));
1987         rss->block = block;
1988
1989         /* build an index map for all nodes in the current block */
1990         i            = 0;
1991         n            = get_irn_n_edges(block);
1992         NEW_ARR_A(int *, rss->idx_map, n);
1993         foreach_out_edge(block, edge) {
1994                 ir_node *irn      = get_edge_src_irn(edge);
1995                 rss->idx_map[i++] = get_irn_idx(irn);
1996         }
1997         qsort(rss->idx_map, n, sizeof(rss->idx_map[0]), cmp_int);
1998         rss->max_height = heights_recompute_block(rss->h, block);
1999
2000         /* loop over all register classes */
2001         for (i = arch_isa_get_n_reg_class(rss->arch_env->isa) - 1; i >= 0; --i) {
2002                 const arch_register_class_t *cls = arch_isa_get_reg_class(rss->arch_env->isa, i);
2003
2004                 rss->cls = cls;
2005                 DBG((rss->dbg, LEVEL_1, "register class %s\n", arch_register_class_name(cls)));
2006
2007                 /* reset the list of interesting nodes */
2008                 plist_clear(rss->nodes);
2009                 plist_insert_back(rss->nodes, _sink);
2010
2011                 /* collect all nodes having a certain register class */
2012                 foreach_out_edge(block, edge) {
2013                         ir_node *irn = get_edge_src_irn(edge);
2014
2015                         /*
2016                                 We skip:
2017                                 - mode_T nodes (the projs are asked)
2018                                 - mode_X nodes (control flow nodes are always scheduled last)
2019                                 - Keeps (they are always immediately scheduled)
2020                                 - Phi (same as Keep)
2021                         */
2022                         if (get_irn_mode(irn) == mode_T || be_is_Keep(irn) || is_Phi(irn))
2023                                 continue;
2024
2025                         /*
2026                                 In case of a proj, we skip
2027                                 - Barrier (they are a Barrier :)
2028                                 - Start
2029                                 - the Proj itself, as it's scheduled always with it's super node
2030                         */
2031                         if (is_Proj(irn)) {
2032                                 ir_node *pred = get_Proj_pred(irn);
2033                                 if (be_is_Barrier(pred) || is_Start(pred))
2034                                         continue;
2035                         }
2036
2037                         if (! arch_irn_is(rss->arch_env, irn, ignore) && arch_get_irn_reg_class(rss->arch_env, irn, -1) == cls) {
2038                                 plist_insert_back(rss->nodes, skip_Proj(irn));
2039
2040                                 /* calculate the descendants and consumer for each node in the block */
2041                                 collect_node_info(rss, skip_Proj(irn));
2042                         }
2043                 }
2044
2045                 /* compute the potential killing set PK(G) */
2046                 compute_pkill_set(rss);
2047
2048                 /* compute the killing function k* */
2049                 compute_killing_function(rss);
2050
2051                 /*
2052                         Compute the heuristic value serialization and
2053                         add the necessary dependencies to the irg.
2054                 */
2055                 perform_value_serialization_heuristic(rss);
2056         }
2057
2058         phase_free(&rss->ph);
2059 }
2060
2061 #ifdef WITH_LIBCORE
2062 /**
2063  * Register the options.
2064  */
2065 void rss_register_options(lc_opt_entry_t *grp) {
2066         static int     run_once = 0;
2067         lc_opt_entry_t *rss_grp;
2068
2069         if (! run_once) {
2070                 run_once = 1;
2071                 rss_grp  = lc_opt_get_grp(grp, "rss");
2072
2073                 lc_opt_add_table(rss_grp, rss_option_table);
2074         }
2075 }
2076 #endif /* WITH_LIBCORE */
2077
2078 /**
2079  * Preprocess the irg for scheduling.
2080  */
2081 void rss_schedule_preparation(const be_irg_t *birg) {
2082         rss_t rss;
2083
2084         FIRM_DBG_REGISTER(rss.dbg, "firm.be.sched.rss");
2085
2086         //firm_dbg_set_mask(rss.dbg, LEVEL_1 | LEVEL_2 | LEVEL_3);
2087
2088         init_rss_special_nodes(birg->irg);
2089
2090         rss.irg      = birg->irg;
2091         rss.arch_env = birg->main_env->arch_env;
2092         rss.abi      = birg->abi;
2093         rss.h        = heights_new(birg->irg);
2094         rss.nodes    = plist_new();
2095         rss.opts     = &rss_options;
2096         irg_block_walk_graph(birg->irg, NULL, process_block, &rss);
2097         heights_free(rss.h);
2098         plist_free(rss.nodes);
2099
2100         if (birg->main_env->options->dump_flags & DUMP_SCHED)
2101                 be_dump(rss.irg, "-rss", dump_ir_block_graph);
2102 }