166b7298cdd1ae3fbcfee58573a1599e62e01154
[libfirm] / ir / be / beschedrss.c
1 /**
2  * Implementation of a register saturating list scheduler
3  * as described in: Sid-Ahmed-Ali Touati
4  * Register Saturation in Superscalar and VLIW Codes
5  *
6  * @license This file protected by GPL -  GNU GENERAL PUBLIC LICENSE.
7  * @author  Christian Wuerdig
8  * @date    29.08.2006
9  * @cvs-id  $Id$
10  */
11 #ifdef HAVE_CONFIG_H
12 #include "config.h"
13 #endif
14
15 #include <limits.h>
16
17 #include "obst.h"
18 #include "debug.h"
19
20 #include "irgraph_t.h"
21 #include "irnode_t.h"
22 #include "iredges_t.h"
23 #include "ircons_t.h"
24 #include "irphase_t.h"
25 #include "irgwalk.h"
26 #include "irdump.h"
27 #include "irtools.h"
28 #include "irbitset.h"
29 #include "irprintf.h"
30 #include "bipartite.h"
31 #include "hungarian.h"
32 #include "plist.h"
33
34 #include "height.h"
35
36 #include "beabi.h"
37 #include "benode_t.h"
38 #include "besched_t.h"
39
40 #ifdef WITH_LIBCORE
41 #include <libcore/lc_opts.h>
42 #include <libcore/lc_opts_enum.h>
43 #endif /* WITH_LIBCORE */
44
45
46 #define ARR_LEN_SAFE(arr) ((arr) != NULL ? ARR_LEN((arr)) : 0)
47
48 #define HASH_RSS_EDGE(edge) ((get_irn_node_nr((edge)->src) << 16) | (get_irn_node_nr((edge)->tgt) & 0xFFFF))
49 #define BSEARCH_IRN_ARR(val, arr) \
50         bsearch(&(val), (arr), ARR_LEN_SAFE((arr)), sizeof((arr)[0]), cmp_irn_idx)
51
52 #define BLOCK_IDX_MAP(rss, irn) bsearch_for_index(get_irn_idx((irn)), (rss)->idx_map, ARR_LEN_SAFE((rss)->idx_map), 1)
53
54 /* the rss options */
55 typedef struct _rss_opts_t {
56         int dump_flags;
57 } rss_opts_t;
58
59 /* Represents a child with associated costs */
60 typedef struct _child {
61         ir_node *irn;
62         float   cost;
63 } child_t;
64
65 /* We need edges for several purposes. */
66 typedef struct _rss_edge {
67         ir_node *src;
68         ir_node *tgt;
69         void    *next;
70 } rss_edge_t;
71
72 /* Represents a connected bipartite component. */
73 typedef struct _cbc {
74         nodeset *parents;       /**< = S  a set of value producers */
75         nodeset *children;      /**< = T  a set of value consumers */
76         pset    *kill_edges;    /**< = E  a set of edges (t in T, s in S) such as each s in S gets killed by at least one t in T */
77         int     nr;             /**< a deterministic index for set insertion (used as hash) */
78 } cbc_t;
79
80 /* Represents a disjoint value DAG. */
81 typedef struct _dvg {
82         nodeset *nodes;
83         pset    *edges;
84 } dvg_t;
85
86 /* Represents a chain of nodes. */
87 typedef struct _chain {
88         plist_t *elements;   /**< List of chain elements */
89         int     nr;          /**< a deterministic index for set insertion (used as hash) */
90 } chain_t;
91
92 typedef struct _rss_irn {
93         plist_t  *consumer_list;    /**< List of consumers */
94         ir_node **consumer;         /**< Sorted consumer array (needed for faster access) */
95
96         plist_t  *parent_list;      /**< List of parents */
97         ir_node **parents;          /**< Sorted parent array (needed for faster access) */
98
99         plist_t  *descendant_list;  /**< List of descendants */
100         ir_node **descendants;      /**< Sorted descendant array (needed for faster access) */
101
102         plist_t  *pkiller_list;     /**< List of potential killers */
103         ir_node **pkillers;         /**< Sorted pkiller array (needed for faster access) */
104
105 #if 0
106         plist_t  *dvg_desc_list;    /**< List of all descendants in the DVG */
107         ir_node **dvg_desc;         /**< Sorted dvg descendant array (needed for faster access) */
108
109         plist_t  *dvg_pkiller_list; /**< List of potential killers in the DVG */
110         ir_node **dvg_pkiller;      /**< Sorted dvg pkiller array (needed for faster access) */
111
112         plist_t  *dvg_user_list;    /**< List of users in the disjoint value DAG DVG */
113 #endif
114         plist_t  *kill_value_list;  /**< List of values getting potentially killed by this node */
115
116         ir_node  *killer;           /**< The selected unique killer */
117         ir_node  *irn;              /**< The corresponding firm node to this rss_irn */
118
119         chain_t  *chain;            /**< The chain, this node is associated to */
120
121         unsigned desc_walk;         /**< visited flag for collecting descendants */
122
123         unsigned live_out : 1;      /**< irn has consumers outside of it's block */
124         unsigned visited  : 1;      /**< visited flag for bipartite decomposition */
125         unsigned havecons : 1;      /**< visited flag collect consumer */
126         unsigned handled  : 1;      /**< flag indicating whether or not the list structures have been build */
127         unsigned dumped   : 1;      /**< flag indication whether or not this node was dumped */
128 } rss_irn_t;
129
130 /* Represents a serialization edge with associated costs. */
131 typedef struct _serialization {
132         rss_irn_t  *u;       /* the top node */
133         rss_irn_t  *v;       /* the node about to be serialized after u */
134         rss_edge_t *edge;    /* the edge selected for the serialization */
135         int        omega1;   /* estimated: available regs - RS reduction */
136         int        omega2;   /* increase in critical path length */
137         int        new_killer;
138 } serialization_t;
139
140 typedef struct _rss {
141         phase_t          ph;              /**< Phase to hold some data */
142         heights_t        *h;              /**< The current height object */
143         ir_graph         *irg;            /**< The irg to preprocess */
144         plist_t          *nodes;          /**< The list of interesting nodes */
145         const arch_env_t *arch_env;       /**< The architecture environment */
146         be_abi_irg_t     *abi;            /**< The abi for this irg */
147         pset             *cbc_set;        /**< A set of connected bipartite components */
148         ir_node          *block;          /**< The current block in progress. */
149         int              *idx_map;        /**< Mapping irn indices to per block indices */
150         unsigned         max_height;      /**< maximum height in the current block */
151         rss_opts_t       *opts;           /**< The options */
152         const arch_register_class_t *cls; /**< The current register class */
153         DEBUG_ONLY(firm_dbg_module_t *dbg);
154 } rss_t;
155
156 #define get_rss_irn(rss, irn)  (phase_get_or_set_irn_data(&rss->ph, irn))
157
158 /**
159  * We need some special nodes:
160  * a source and a sink for all live-in and live-out values of a block
161  */
162
163 enum {
164         iro_rss_Source,
165         iro_rss_Sink,
166         iro_rss_last
167 };
168
169 static ir_node *_source = NULL;
170 static ir_node *_sink   = NULL;
171
172 #define is_Source(irn) ((irn) == _source)
173 #define is_Sink(irn)   ((irn) == _sink)
174
175 enum {
176         RSS_DUMP_NONE  = 0,
177         RSS_DUMP_CBC   = 1 << 0,
178         RSS_DUMP_PKG   = 1 << 1,
179         RSS_DUMP_KILL  = 1 << 2,
180         RSS_DUMP_DVG   = 1 << 3,
181         RSS_DUMP_MAXAC = 1 << 4,
182         RSS_DUMP_ALL   = (RSS_DUMP_MAXAC << 1) - 1,
183 };
184
185 static rss_opts_t rss_options = {
186         RSS_DUMP_NONE,
187 };
188
189 #ifdef WITH_LIBCORE
190 static const lc_opt_enum_int_items_t dump_items[] = {
191         { "none",  RSS_DUMP_NONE  },
192         { "cbc",   RSS_DUMP_CBC   },
193         { "pkg",   RSS_DUMP_PKG   },
194         { "kill",  RSS_DUMP_KILL  },
195         { "dvg",   RSS_DUMP_DVG   },
196         { "maxac", RSS_DUMP_MAXAC },
197         { "all",   RSS_DUMP_ALL   },
198         { NULL, 0 }
199 };
200
201 static lc_opt_enum_int_var_t dump_var = {
202         &rss_options.dump_flags, dump_items
203 };
204
205 static const lc_opt_table_entry_t rss_option_table[] = {
206         LC_OPT_ENT_ENUM_MASK("dump", "dump phases (none, cbc, pkg, kill, dvg, maxac, all)", &dump_var),
207         { NULL }
208 };
209 #endif /* WITH_LIBCORE */
210
211 /******************************************************************************
212  *  _          _                    __                  _   _
213  * | |        | |                  / _|                | | (_)
214  * | |__   ___| |_ __   ___ _ __  | |_ _   _ _ __   ___| |_ _  ___  _ __  ___
215  * | '_ \ / _ \ | '_ \ / _ \ '__| |  _| | | | '_ \ / __| __| |/ _ \| '_ \/ __|
216  * | | | |  __/ | |_) |  __/ |    | | | |_| | | | | (__| |_| | (_) | | | \__ \
217  * |_| |_|\___|_| .__/ \___|_|    |_|  \__,_|_| |_|\___|\__|_|\___/|_| |_|___/
218  *            | |
219  *            |_|
220  ******************************************************************************/
221
222 /**
223  * Acquire opcodes and create source and sink nodes.
224  */
225 static void init_rss_special_nodes(ir_graph *irg) {
226         ir_node *block         = get_irg_start_block(irg);
227         int     iro_rss_base   = get_next_ir_opcodes(iro_rss_last);
228         ir_op   *op_rss_Source = new_ir_op(iro_rss_base + iro_rss_Source, "rss_Source", op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
229         ir_op   *op_rss_Sink   = new_ir_op(iro_rss_base + iro_rss_Sink,   "rss_Sink",   op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
230         _source                = new_ir_node(NULL, irg, block, op_rss_Source, mode_ANY, 0, NULL);
231         _sink                  = new_ir_node(NULL, irg, block, op_rss_Sink, mode_ANY, 0, NULL);
232 }
233
234 static int cmp_int(const void *a, const void *b) {
235         const int *i1 = a;
236         const int *i2 = b;
237
238         return QSORT_CMP(*i1, *i2);
239 }
240
241 static int cmp_child_costs(const void *a, const void *b) {
242         const child_t *c1 = a;
243         const child_t *c2 = b;
244
245         return QSORT_CMP(c1->cost, c2->cost);
246 }
247
248 static int cmp_irn_idx(const void *a, const void *b) {
249         const ir_node *n1 = *(ir_node **)a;
250         const ir_node *n2 = *(ir_node **)b;
251
252         return QSORT_CMP(get_irn_idx(n1), get_irn_idx(n2));
253 }
254
255 static int cmp_rss_edges(const void *a, const void *b) {
256         const rss_edge_t *e1 = a;
257         const rss_edge_t *e2 = b;
258
259         return (e1->src != e2->src) || (e1->tgt != e2->tgt);
260 }
261
262 static int bsearch_for_index(int key, int *arr, size_t len, int force) {
263         int left = 0;
264         int right = len;
265
266         while (right >= left) {
267                 int idx = (left + right) / 2;
268
269                 if (key < arr[idx])
270                         right = idx - 1;
271                 else if (key > arr[idx])
272                         left = idx + 1;
273                 else
274                         return idx;
275         }
276
277         if (force)
278                 assert(0 && "Something is wrong, key not found.");
279         return -1;
280 }
281
282 static ir_node **build_sorted_array_from_list(plist_t *irn_list, struct obstack *obst) {
283         plist_element_t *el;
284         int     i     = 0;
285         int     len   = plist_count(irn_list);
286         ir_node **arr = NEW_ARR_D(ir_node *, obst, len);
287
288         /* copy the list into the array */
289         foreach_plist(irn_list, el) {
290                 arr[i++] = plist_element_get_value(el);
291         }
292
293         /* sort the array by node index */
294         qsort(arr, len, sizeof(arr[0]), cmp_irn_idx);
295
296         return arr;
297 }
298
299 /*****************************************************
300  *      _      _                       _
301  *     | |    | |                     (_)
302  *   __| | ___| |__  _   _  __ _  __ _ _ _ __   __ _
303  *  / _` |/ _ \ '_ \| | | |/ _` |/ _` | | '_ \ / _` |
304  * | (_| |  __/ |_) | |_| | (_| | (_| | | | | | (_| |
305  *  \__,_|\___|_.__/ \__,_|\__, |\__, |_|_| |_|\__, |
306  *                          __/ | __/ |         __/ |
307  *                         |___/ |___/         |___/
308  *
309  *****************************************************/
310
311 static void dump_nodeset(nodeset *ns, const char *prefix) {
312         ir_node *irn;
313         foreach_nodeset(ns, irn) {
314                 ir_fprintf(stderr, "%s%+F\n", prefix, irn);
315         }
316 }
317
318 static void build_file_name(rss_t *rss, const char *suffix, size_t suf_len, char *buf, size_t len) {
319         const char *irg_name;
320
321         memset(buf, 0, len);
322         irg_name = get_entity_name(get_irg_entity(rss->irg));
323         snprintf(buf, len - suf_len, "%s-%s-block-%ld",
324                 irg_name, arch_register_class_name(rss->cls), get_irn_node_nr(rss->block));
325         strcat(buf, suffix);
326 }
327
328 /* Dumps all collected bipartite components of current irg as vcg. */
329 static void debug_vcg_dump_bipartite(rss_t *rss) {
330         cbc_t *cbc;
331         FILE  *f;
332         char  file_name[256];
333         static const char suffix[] = "-RSS-CBC.vcg";
334
335         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
336         f = fopen(file_name, "w");
337
338         ir_fprintf(f, "graph: { title: \"connected bipartite component graph of %+F\"\n", rss->irg);
339         fprintf(f, "display_edge_labels: no\n");
340         fprintf(f, "layoutalgorithm: mindepth\n");
341         fprintf(f, "manhattan_edges: yes\n\n");
342
343         foreach_pset(rss->cbc_set, cbc) {
344                 ir_node    *n;
345                 rss_edge_t *ke;
346
347                 fprintf(f, "graph: { titel: \"cbc %d\" label: \"cbc %d\" status:clustered color:yellow\n", cbc->nr, cbc->nr);
348                 foreach_nodeset(cbc->parents, n) {
349                         ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
350                 }
351                 foreach_nodeset(cbc->children, n) {
352                         ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
353                 }
354                 foreach_pset(cbc->kill_edges, ke) {
355                         ir_fprintf(f, "edge: { sourcename: \"n%d_%d\" targetname: \"n%d_%d\" }\n",
356                                 get_irn_node_nr(ke->src), cbc->nr, get_irn_node_nr(ke->tgt), cbc->nr);
357                 }
358                 fprintf(f, "}\n\n");
359         }
360         fprintf(f, "}\n");
361         fclose(f);
362 }
363
364 /* Dump the computed killing function as vcg. */
365 static void debug_vcg_dump_kill(rss_t *rss) {
366         FILE            *f;
367         char            file_name[256];
368         plist_element_t *el;
369         static const char suffix[] = "-RSS-KILL.vcg";
370
371         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
372         f = fopen(file_name, "w");
373
374         ir_fprintf(f, "graph: { title: \"computed kill graph of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
375         fprintf(f, "display_edge_labels: no\n");
376         fprintf(f, "layoutalgorithm: mindepth\n");
377         fprintf(f, "manhattan_edges: yes\n\n");
378
379         /* first: reset dumped flag of all nodes */
380         foreach_plist(rss->nodes, el) {
381                 ir_node   *irn  = plist_element_get_value(el);
382                 rss_irn_t *rirn = get_rss_irn(rss, irn);
383                 rirn->dumped = 0;
384         }
385
386         /* dump all nodes and their killers */
387         foreach_plist(rss->nodes, el) {
388                 ir_node   *irn     = plist_element_get_value(el);
389                 rss_irn_t *rirn    = get_rss_irn(rss, irn);
390                 rss_irn_t *pk_rirn = get_rss_irn(rss, rirn->killer);
391
392                 if (! rirn->dumped) {
393                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
394                         rirn->dumped = 1;
395                 }
396
397                 if (! pk_rirn->dumped) {
398                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(rirn->killer), rirn->killer);
399                         pk_rirn->dumped = 1;
400                 }
401
402                 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
403                         get_irn_node_nr(rirn->killer), get_irn_node_nr(irn));
404         }
405
406         fprintf(f, "}\n");
407         fclose(f);
408 }
409
410 /* Dumps the potential killing DAG (PKG) as vcg. */
411 static void debug_vcg_dump_pkg(rss_t *rss, nodeset *max_ac, int iteration) {
412         FILE              *f;
413         char              file_name[256];
414         char              *suffix   = alloca(32);
415         static const char suffix1[] = "-RSS-PKG.vcg";
416         static const char suffix2[] = "-RSS-PKG-MAXAC.vcg";
417         plist_element_t   *el;
418
419         if (! max_ac) {
420                 snprintf(suffix, 32, "%s", suffix1);
421         }
422         else {
423                 snprintf(suffix, 32, "-%02d%s", iteration, suffix2);
424         }
425
426         build_file_name(rss, suffix, strlen(suffix) + 1, file_name, sizeof(file_name));
427         f = fopen(file_name, "w");
428
429         ir_fprintf(f, "graph: { title: \"potential killing DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
430         fprintf(f, "display_edge_labels: no\n");
431         fprintf(f, "layoutalgorithm: mindepth\n");
432         fprintf(f, "manhattan_edges: yes\n\n");
433
434         foreach_plist(rss->nodes, el) {
435                 ir_node   *irn  = plist_element_get_value(el);
436                 rss_irn_t *rirn = get_rss_irn(rss, irn);
437                 char      *c1   = "";
438                 plist_element_t *k_el;
439
440                 /* dump selected saturating values in yellow */
441                 if (max_ac && nodeset_find(max_ac, irn))
442                         c1 = "color:yellow";
443
444                 if (rirn->chain)
445                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F   c%d\" %s }\n", get_irn_node_nr(irn), irn, rirn->chain->nr, c1);
446                 else
447                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(irn), irn, c1);
448                 rirn->dumped = 1;
449
450                 foreach_plist(rirn->pkiller_list, k_el) {
451                         ir_node   *pkiller = plist_element_get_value(k_el);
452                         rss_irn_t *pk_rirn = get_rss_irn(rss, pkiller);
453                         char      *c2      = "";
454
455                         if (max_ac && nodeset_find(max_ac, pkiller))
456                                 c2 = "color:yellow";
457
458                         if (! pk_rirn->dumped) {
459                                 if (pk_rirn->chain)
460                                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F   c%d\" %s }\n", get_irn_node_nr(pkiller), pkiller, pk_rirn->chain->nr, c2);
461                                 else
462                                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(pkiller), pkiller, c2);
463                                 pk_rirn->dumped = 1;
464                         }
465                         ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
466                                 get_irn_node_nr(pkiller), get_irn_node_nr(irn));
467                 }
468         }
469         fprintf(f, "}\n");
470         fclose(f);
471 }
472
473 /* Dumps the disjoint value DAG (DVG) as vcg. */
474 static void debug_vcg_dump_dvg(rss_t *rss, dvg_t *dvg) {
475         static const char suffix[] = "-RSS-DVG.vcg";
476         FILE       *f;
477         char       file_name[256];
478         ir_node    *irn;
479         rss_edge_t *edge;
480
481         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
482         f = fopen(file_name, "w");
483
484         ir_fprintf(f, "graph: { title: \"disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
485         fprintf(f, "display_edge_labels: no\n");
486         fprintf(f, "layoutalgorithm: mindepth\n");
487         fprintf(f, "manhattan_edges: yes\n\n");
488
489         /* dump all nodes */
490         foreach_nodeset(dvg->nodes, irn) {
491                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
492         }
493
494         /* dump all edges */
495         foreach_pset(dvg->edges, edge) {
496                 rss_irn_t *src = get_rss_irn(rss, edge->src);
497                 rss_irn_t *tgt = get_rss_irn(rss, edge->tgt);
498
499                 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
500                         get_irn_node_nr(edge->src), get_irn_node_nr(edge->tgt));
501         }
502
503         fprintf(f, "}\n");
504         fclose(f);
505 }
506
507 #if 0
508 /* Dumps the PKG(DVG). */
509 static void debug_vcg_dump_dvg_pkiller(rss_t *rss, dvg_t *dvg) {
510         static const char suffix[] = "-RSS-DVG-PKG.vcg";
511         FILE       *f;
512         char       file_name[256];
513         ir_node    *irn;
514
515         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
516         f = fopen(file_name, "w");
517
518         ir_fprintf(f, "graph: { title: \"PKG of disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
519         fprintf(f, "display_edge_labels: no\n");
520         fprintf(f, "layoutalgorithm: mindepth\n");
521         fprintf(f, "manhattan_edges: yes\n\n");
522
523         /* dump all nodes */
524         foreach_nodeset(dvg->nodes, irn) {
525                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
526         }
527
528         /* dump all edges */
529         foreach_nodeset(dvg->nodes, irn) {
530                 rss_irn_t       *node = get_rss_irn(rss, irn);
531                 plist_element_t *el;
532
533                 foreach_plist(node->dvg_pkiller_list, el) {
534                         ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
535                                 get_irn_node_nr(plist_element_get_value(el)), get_irn_node_nr(irn));
536                 }
537         }
538
539         fprintf(f, "}\n");
540         fclose(f);
541 }
542 #endif /* if 0 */
543
544 /**
545  * In case there is no rss information for irn, initialize it.
546  */
547 static void *init_rss_irn(phase_t *ph, ir_node *irn, void *old) {
548         rss_irn_t *res = old ? old : phase_alloc(ph, sizeof(res[0]));
549
550         res->descendant_list  = plist_obstack_new(phase_obst(ph));
551         res->descendants      = NULL;
552
553         res->consumer_list    = plist_obstack_new(phase_obst(ph));
554         res->consumer         = NULL;
555
556         res->pkiller_list     = plist_obstack_new(phase_obst(ph));
557         res->pkillers         = NULL;
558
559         res->parent_list      = plist_obstack_new(phase_obst(ph));
560         res->parents          = NULL;
561
562         //res->dvg_desc_list    = plist_obstack_new(phase_obst(ph));
563         //res->dvg_desc         = NULL;
564
565         res->kill_value_list  = plist_obstack_new(phase_obst(ph));
566         //res->dvg_user_list    = plist_obstack_new(phase_obst(ph));
567         //res->dvg_pkiller_list = plist_obstack_new(phase_obst(ph));
568
569         res->killer           = NULL;
570         res->irn              = irn;
571         res->chain            = NULL;
572
573         res->desc_walk        = 0;
574         res->live_out         = 0;
575         res->visited          = 0;
576         res->handled          = 0;
577         res->dumped           = 0;
578         res->havecons         = 0;
579
580         return res;
581 }
582
583 /**
584  * Collect all nodes data dependent on current node.
585  */
586 static void collect_descendants(rss_t *rss, rss_irn_t *rirn, ir_node *irn, int *got_sink, unsigned cur_desc_walk) {
587         const ir_edge_t *edge;
588         rss_irn_t       *cur_node = get_rss_irn(rss, irn);
589         ir_node         *block    = rss->block;
590         ir_edge_kind_t  ekind[2]  = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
591         int             i;
592
593         if (cur_node->desc_walk >= cur_desc_walk)
594                 return;
595         cur_node->desc_walk = cur_desc_walk;
596
597         /* Stop at Barriers and Keeps */
598         if (be_is_Barrier(irn) || be_is_Keep(irn))
599                 return;
600
601         /* loop over normal and dependency edges */
602         for (i = 0; i < 2; ++i) {
603                 foreach_out_edge_kind(irn, edge, ekind[i]) {
604                         ir_node *user = get_edge_src_irn(edge);
605
606                         /* skip ignore nodes as they do not really contribute to register pressure */
607                         if (arch_irn_is(rss->arch_env, user, ignore))
608                                 continue;
609
610                         if (is_Proj(user)) {
611                                 if (get_irn_mode(user) != mode_X && arch_get_irn_reg_class(rss->arch_env, user, -1) == rss->cls)
612                                         collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
613                         }
614                         else {
615                                 /* check if user lives in block and is not a control flow node */
616                                 if (get_nodes_block(user) == block) {
617                                         if (! plist_has_value(rirn->descendant_list, user)) {
618                                                 plist_insert_back(rirn->descendant_list, user);
619                                                 DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", user));
620                                         }
621                                         collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
622                                 }
623                                 else if (! *got_sink) {
624                                         /* user lives out of block: add sink as descendant if not already done */
625                                         plist_insert_back(rirn->descendant_list, _sink);
626                                         *got_sink = 1;
627                                         DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", _sink));
628                                 }
629                         }
630                 }
631         }
632 }
633
634 /**
635  * Handles a single consumer.
636  */
637 static void collect_single_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *consumer, int *got_sink) {
638         ir_node        *block   = rss->block;
639         ir_edge_kind_t ekind[2] = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
640
641         assert(! is_Proj(consumer) && "Cannot handle Projs");
642
643         if (get_nodes_block(consumer) == block) {
644                 if (! arch_irn_is(rss->arch_env, consumer, ignore)) {
645                         plist_insert_back(rss_irn->consumer_list, consumer);
646                         DBG((rss->dbg, LEVEL_2, "\t\tconsumer %+F\n", consumer));
647                 }
648         }
649         else {
650                 rss_irn->live_out = 1;
651                 DBG((rss->dbg, LEVEL_2, "\t\tlive out %+F", consumer));
652                 if (! *got_sink) {
653                         plist_insert_back(rss_irn->consumer_list, _sink);
654                         *got_sink = 1;
655                         DB((rss->dbg, LEVEL_2, ", %+F added instead", _sink));
656                 }
657                 DB((rss->dbg, LEVEL_2, "\n"));
658         }
659 }
660
661 /**
662  * Collect all nodes consuming the value(s) produced by current node.
663  */
664 static void collect_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *irn, int *got_sink) {
665         const ir_edge_t *edge;
666         int             i;
667         ir_edge_kind_t  ekind[2]  = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
668         rss_irn_t       *cur_node = get_rss_irn(rss, irn);
669
670         if (cur_node->havecons)
671                 return;
672         cur_node->havecons = 1;
673
674         for (i = 0; i < 2; ++i) {
675                 foreach_out_edge_kind(irn, edge, ekind[i]) {
676                         ir_node *consumer = get_edge_src_irn(edge);
677
678                         if (is_Proj(consumer)) {
679                                 if (arch_get_irn_reg_class(rss->arch_env, consumer, -1) == rss->cls)
680                                         collect_consumer(rss, rss_irn, consumer, got_sink);
681                         }
682                         else
683                                 collect_single_consumer(rss, rss_irn, consumer, got_sink);
684                 }
685         }
686 }
687
688 #if 0
689 /**
690  * We need to build the consumer and descendant list for _source.
691  */
692 static void collect_node_info_source(rss_t *rss) {
693         const ir_edge_t *edge;
694         rss_irn_t       *rirn  = get_rss_irn(rss, _source);
695         ir_node         *block = rss->block;
696
697         if (rirn->handled)
698                 return;
699
700         foreach_out_edge(block, edge) {
701                 ir_node *irn = get_edge_src_irn(edge);
702                 int     i;
703
704                 for (i = get_irn_arity(n) - 1; i >= 0; --i) {
705
706                 }
707         }
708 }
709
710 static void reset_node_info(rss_irn_t *rss_irn) {
711         /* Beware: array data resides on phase obstack, so it gets removed automatically */
712
713         plist_clear(rss_irn->consumer_list);
714         rss_irn->consumer = NULL;
715
716         plist_clear(rss_irn->parent_list);
717         rss_irn->parents = NULL;
718
719         plist_clear(rss_irn->descendant_list);
720         rss_irn->descendants = NULL;
721
722         plist_clear(rss_irn->pkiller_list);
723         rss_irn->pkillers = NULL;
724
725         plist_clear(rss_irn->kill_value_list);
726
727         rss_irn->killer   = NULL;
728         rss_irn->live_out = 0;
729         rss_irn->visited  = 0;
730         rss_irn->handled  = 0;
731 }
732 #endif /* if 0 */
733
734 /**
735  * Collects all consumer and descendant of a irn.
736  */
737 static void collect_node_info(rss_t *rss, ir_node *irn) {
738         static unsigned cur_desc_walk = 0;
739         rss_irn_t       *rss_irn      = get_rss_irn(rss, irn);
740         int             got_sink;
741
742         assert(! is_Proj(irn) && "Cannot handle Projs.");
743
744         /* check if node info is already available */
745         if (rss_irn->handled)
746                 phase_reinit_single_irn_data(&rss->ph, irn);
747
748         DBG((rss->dbg, LEVEL_1, "\tcomputing consumers of %+F:\n", irn));
749
750         /* collect all consumer */
751         got_sink = 0;
752         collect_consumer(rss, rss_irn, irn, &got_sink);
753
754         /* build sorted consumer array */
755         rss_irn->consumer = build_sorted_array_from_list(rss_irn->consumer_list, phase_obst(&rss->ph));
756
757         DBG((rss->dbg, LEVEL_1, "\tcompute descendants of %+F:\n", irn));
758
759         /* collect descendants */
760         got_sink = 0;
761         collect_descendants(rss, rss_irn, irn, &got_sink, ++cur_desc_walk);
762
763         /* build sorted descendant array */
764         rss_irn->descendants = build_sorted_array_from_list(rss_irn->descendant_list, phase_obst(&rss->ph));
765
766         rss_irn->handled = 1;
767 }
768
769 /**
770  * Checks if v is a potential killer of u.
771  * v is in pkill(u) iff descendants(v) cut consumer(u) is v
772  *
773  * @param rss   The rss object
774  * @param v      The node to check for killer
775  * @param u      The potentially killed value
776  * @return 1 if v is in pkill(u), 0 otherwise
777  */
778 static int is_potential_killer(rss_t *rss, rss_irn_t *v, rss_irn_t *u) {
779         plist_t *list;
780         ir_node **arr;
781         plist_element_t *el;
782
783         assert(is_Sink(v->irn) || ((plist_count(v->descendant_list) > 0 && v->descendants) || 1));
784         assert(is_Sink(u->irn) || ((plist_count(u->consumer_list)   > 0 && u->consumer)    || 1));
785
786         /* as we loop over the list: loop over the shorter one */
787         if (plist_count(v->descendant_list) > plist_count(u->consumer_list)) {
788                 list = u->consumer_list;
789                 arr  = v->descendants;
790         }
791         else {
792                 list = v->descendant_list;
793                 arr  = u->consumer;
794         }
795
796         /* for each list element: try to find element in array */
797         foreach_plist(list, el) {
798                 ir_node *irn   = plist_element_get_value(el);
799                 ir_node *match = BSEARCH_IRN_ARR(irn, arr);
800
801                 if (match && match != irn)
802                         return 0;
803         }
804
805         return 1;
806 }
807
808 /**
809  * Update descendants, consumer and pkiller list for the given irn.
810  */
811 static void update_node_info(rss_t *rss, ir_node *irn, ir_node *pk_irn) {
812         rss_irn_t *node    = get_rss_irn(rss, irn);
813         rss_irn_t *pkiller = get_rss_irn(rss, pk_irn);
814
815         /* add consumer and rebuild the consumer array */
816         if (! plist_has_value(node->consumer_list, pk_irn)) {
817                 plist_insert_back(node->consumer_list, pk_irn);
818                 node->consumer = build_sorted_array_from_list(node->consumer_list, phase_obst(&rss->ph));
819         }
820
821         /* add pk_irn as descendant, add it's descendants to irn's and rebuild array */
822         if (! plist_has_value(node->descendant_list, pk_irn)) {
823                 plist_element_t *el;
824
825                 plist_insert_back(node->descendant_list, pk_irn);
826
827                 foreach_plist(pkiller->descendant_list, el) {
828                         ir_node *desc = plist_element_get_value(el);
829
830                         if (! plist_has_value(node->descendant_list, desc))
831                                 plist_insert_back(node->descendant_list, desc);
832                 }
833
834                 node->descendants = build_sorted_array_from_list(node->descendant_list, phase_obst(&rss->ph));
835         }
836
837 #if 0
838         /* check, if pkiller is a potential killer of irn */
839         if (is_potential_killer(rss, pkiller, node)) {
840                 if (! plist_has_value(node->pkiller_list, pk_irn))
841                         plist_insert_back(node->pkiller_list, pk_irn);
842                 if (! plist_has_value(pkiller->kill_value_list, irn))
843                         plist_insert_back(pkiller->kill_value_list, irn);
844                 DBG((rss->dbg, LEVEL_1, "\tpotential killer %+F added to %+F\n", pk_irn, irn));
845         }
846 #endif
847 }
848
849 /**
850  * Compute the potential killing set PK.
851  */
852 static void compute_pkill_set(rss_t *rss) {
853         plist_element_t *u_el, *v_el;
854
855         foreach_plist(rss->nodes, u_el) {
856                 ir_node   *u_irn = plist_element_get_value(u_el);
857                 rss_irn_t *u     = get_rss_irn(rss, u_irn);
858
859                 DBG((rss->dbg, LEVEL_1, "\tcomputing potential killers of %+F:\n", u_irn));
860
861                 /* check each consumer if it is a potential killer  */
862                 foreach_plist(u->consumer_list, v_el) {
863                         ir_node   *v_irn = plist_element_get_value(v_el);
864                         rss_irn_t *v     = get_rss_irn(rss, v_irn);
865
866                         /* check, if v is a potential killer of u */
867                         if (is_potential_killer(rss, v, u)) {
868                                 if (! plist_has_value(u->pkiller_list, v_irn))
869                                         plist_insert_back(u->pkiller_list, v_irn);
870                                 if (! plist_has_value(v->kill_value_list, u_irn))
871                                         plist_insert_back(v->kill_value_list, u_irn);
872                                 DBG((rss->dbg, LEVEL_2, "\t\tpotential killer %+F\n", v_irn));
873                         }
874                 }
875
876                 u->killer = _sink;
877         }
878
879         if (rss->opts->dump_flags & RSS_DUMP_PKG)
880                 debug_vcg_dump_pkg(rss, NULL, 0);
881 }
882
883 /**
884  * Build set of killing edges (from values to their potential killers)
885  */
886 static void build_kill_edges(rss_t *rss, pset *epk) {
887         plist_element_t *el, *k_el;
888
889         foreach_plist(rss->nodes, el) {
890                 ir_node    *irn  = plist_element_get_value(el);
891                 rss_irn_t *rirn = get_rss_irn(rss, irn);
892
893                 DBG((rss->dbg, LEVEL_3, "\t\tbuilding kill edges for %+F:\n", irn));
894
895                 foreach_plist(rirn->pkiller_list, k_el) {
896                         ir_node    *pkiller = plist_element_get_value(k_el);
897                         rss_edge_t *ke      = obstack_alloc(phase_obst(&rss->ph), sizeof(*ke));
898
899                         ke->src  = irn;
900                         ke->tgt  = pkiller;
901                         ke->next = NULL;
902
903                         DBG((rss->dbg, LEVEL_3, "\t\t\ttarget %+F\n", pkiller));
904
905                         pset_insert(epk, ke, HASH_RSS_EDGE(ke));
906                 }
907         }
908 }
909
910 /* print the given cbc for debugging purpose */
911 static void debug_print_cbc(firm_dbg_module_t *mod, cbc_t *cbc) {
912         ir_node    *n;
913         rss_edge_t *ke;
914
915         DBG((mod, LEVEL_3, "\t\tS = set of parents:\n"));
916         foreach_nodeset(cbc->parents, n) {
917                 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
918         }
919         DBG((mod, LEVEL_3, "\t\tT = set of children:\n"));
920         foreach_nodeset(cbc->children, n) {
921                 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
922         }
923         DBG((mod, LEVEL_3, "\t\tE = Edges from producers to consumers\n"));
924         foreach_pset(cbc->kill_edges, ke) {
925                 DBG((mod, LEVEL_3, "\t\t\t%+F -> %+F\n", ke->src, ke->tgt));
926         }
927 }
928
929 /**
930  * Construct the bipartite decomposition.
931  * Sid-Ahmed-Ali Touati, Phd Thesis
932  * Register Pressure in Instruction Level Parallelism, p. 71
933  */
934 static void compute_bipartite_decomposition(rss_t *rss) {
935         pset *epk    = new_pset(cmp_rss_edges, 10);
936         int  cur_num = 0;
937
938         plist_element_t *el;
939
940         DBG((rss->dbg, LEVEL_1, "\tcomputing bipartite decomposition:\n"));
941
942         build_kill_edges(rss, epk);
943
944         foreach_plist(rss->nodes, el) {
945                 ir_node   *u_irn   = plist_element_get_value(el);
946                 rss_irn_t *u       = get_rss_irn(rss, u_irn);
947                 int       p_change = 1;
948                 int       c_change = 1;
949
950                 cbc_t           *cbc;
951                 plist_element_t *el2;
952                 rss_edge_t      *k_edge;
953                 rss_edge_t      *kedge_root = NULL;
954                 ir_node         *t_irn, *s_irn;
955
956                 if (u->visited || u_irn == _sink)
957                         continue;
958
959                 DBG((rss->dbg, LEVEL_2, "\t\t%+F choosen:\n", u_irn));
960
961                 cbc     = obstack_alloc(phase_obst(&rss->ph), sizeof(*cbc));
962                 cbc->nr = cur_num++;
963
964                 /* initialize S_cb */
965                 cbc->parents = new_nodeset(5);
966                 nodeset_insert(cbc->parents, u_irn);
967                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (init)\n", u_irn));
968
969                 /* E_cb = empty */
970                 cbc->kill_edges = new_pset(cmp_rss_edges, 5);
971
972                 /* each parent gets killed by at least one value from children */
973
974                 /* T_cb = PK_successors(u) */
975                 cbc->children = new_nodeset(5);
976                 foreach_plist(u->pkiller_list, el2) {
977                         nodeset_insert(cbc->children, plist_element_get_value(el2));
978                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (init)\n", plist_element_get_value(el2)));
979                 }
980
981                 /*
982                         Now: insert the parents of all children into the parent set
983                         and insert the children of all parents into the children set
984                         until the sets don't change any more
985                 */
986                 while (p_change || c_change) {
987                         p_change = c_change = 0;
988
989                         /* accumulate parents */
990                         foreach_nodeset(cbc->children, t_irn) {
991                                 rss_irn_t *t = get_rss_irn(rss, t_irn);
992                                 plist_element_t *kvl_el;
993
994                                 foreach_pset(epk, k_edge) {
995                                         ir_node *val = k_edge->src;
996
997                                         if (k_edge->src == t_irn && ! nodeset_find(cbc->parents, k_edge->src)) {
998                                                 nodeset_insert(cbc->parents, val);
999                                                 p_change = 1;
1000                                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents\n", val));
1001                                         }
1002                                 }
1003                         }
1004
1005                         /* accumulate children */
1006                         foreach_nodeset(cbc->parents, s_irn) {
1007                                 rss_irn_t *s = get_rss_irn(rss, s_irn);
1008                                 plist_element_t *pkl_el;
1009
1010                                 foreach_plist(s->pkiller_list, pkl_el) {
1011                                         ir_node *val = plist_element_get_value(pkl_el);
1012
1013                                         if (! nodeset_find(cbc->children, val)) {
1014                                                 nodeset_insert(cbc->children, val);
1015                                                 c_change = 1;
1016                                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children\n", val));
1017                                         }
1018                                 }
1019                         }
1020                 }
1021
1022                 /* mark all parent values as visited */
1023                 foreach_nodeset(cbc->parents, s_irn) {
1024                         rss_irn_t *s = get_rss_irn(rss, s_irn);
1025                         s->visited = 1;
1026                         /* assure bipartite property */
1027                         if (nodeset_find(cbc->children, s_irn)) {
1028                                 nodeset_remove(cbc->children, s_irn);
1029                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F removed from to children\n", s_irn));
1030                         }
1031                 }
1032
1033                 /* update edges */
1034                 foreach_pset(epk, k_edge) {
1035                         if (nodeset_find(cbc->parents, k_edge->src) && nodeset_find(cbc->children, k_edge->tgt)) {
1036                                 pset_insert(cbc->kill_edges, k_edge, HASH_RSS_EDGE(k_edge));
1037                                 /*
1038                                         Link all k_edges which are about to be removed together.
1039                                         Beware: pset_remove kills the iterator.
1040                                 */
1041                                 k_edge->next = kedge_root;
1042                                 kedge_root   = k_edge;
1043                         }
1044                 }
1045
1046                 /* remove all linked k_edges */
1047                 for (; kedge_root; kedge_root = kedge_root->next) {
1048                         pset_remove(epk, kedge_root, HASH_RSS_EDGE(kedge_root));
1049                 }
1050
1051                 /* add the connected bipartite component */
1052                 if (nodeset_count(cbc->parents) > 0 && nodeset_count(cbc->children) > 0 && pset_count(cbc->kill_edges) > 0)
1053                         pset_insert(rss->cbc_set, cbc, (unsigned)cbc->nr);
1054                 DBG((rss->dbg, LEVEL_2, "\tbipartite component %d inserted:\n", cbc->nr));
1055                 DEBUG_ONLY(
1056                         if (firm_dbg_get_mask(rss->dbg) & LEVEL_2)
1057                                 debug_print_cbc(rss->dbg, cbc);
1058                 );
1059         }
1060
1061         if (rss->opts->dump_flags & RSS_DUMP_CBC)
1062                 debug_vcg_dump_bipartite(rss);
1063
1064         del_pset(epk);
1065 }
1066
1067 /**
1068  * Select the child with the maximum cost.
1069  */
1070 static child_t *select_child_max_cost(rss_t *rss, nodeset *x, nodeset *y, child_t *t, cbc_t *cbc) {
1071         ir_node *child;
1072         float   max_cost = -1.0f;
1073
1074         DBG((rss->dbg, LEVEL_2, "\t\tcomputing children costs:\n"));
1075
1076         foreach_nodeset(cbc->children, child) {
1077                 rss_irn_t  *r_child             = get_rss_irn(rss, child);
1078                 int         num_unkilled_parents = 0;
1079                 int         num_descendants;
1080                 rss_edge_t *k_edge;
1081                 float       cost;
1082
1083                 /* get the number of unkilled parents */
1084                 foreach_pset(cbc->kill_edges, k_edge) {
1085                         if (k_edge->tgt == child && nodeset_find(x, k_edge->src))
1086                                 ++num_unkilled_parents;
1087                 }
1088
1089                 cost = (float)num_unkilled_parents;
1090
1091                 num_descendants = plist_count(r_child->descendant_list) + nodeset_count(y);
1092
1093                 if (num_descendants > 0)
1094                         cost /= (float)num_descendants;
1095
1096                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F, #desc %d, cost %.3f\n", child, num_descendants, cost));
1097
1098                 if (cost > max_cost) {
1099                         t->irn   = child;
1100                         t->cost  = cost;
1101                         max_cost = cost;
1102                 }
1103         }
1104
1105         return t;
1106 }
1107
1108 /**
1109  * Remove all parents from x which are killed by t_irn.
1110  */
1111 static void remove_covered_parents(rss_t *rss, nodeset *x, ir_node *t_irn, cbc_t *cbc) {
1112         rss_irn_t  *t = get_rss_irn(rss, t_irn);
1113         rss_edge_t *k_edge;
1114
1115         DBG((rss->dbg, LEVEL_2, "\t\tremoving parents covered by %+F:\n", t_irn));
1116
1117         foreach_pset(cbc->kill_edges, k_edge) {
1118                 if (k_edge->tgt == t_irn && nodeset_find(x, k_edge->src)) {
1119                         nodeset_remove(x, k_edge->src);
1120                         plist_insert_back(t->parent_list, k_edge->src);
1121                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", k_edge->src));
1122                 }
1123         }
1124 }
1125
1126 static void update_cumulated_descendent_values(rss_t *rss, nodeset *y, ir_node *t_irn) {
1127         rss_irn_t *t = get_rss_irn(rss, t_irn);
1128         plist_element_t *el;
1129
1130         DBG((rss->dbg, LEVEL_2, "\t\tupdating cumulated descendant value of %+F:\n", t_irn));
1131
1132         foreach_plist(t->descendant_list, el) {
1133                 nodeset_insert(y, plist_element_get_value(el));
1134                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", plist_element_get_value(el)));
1135         }
1136 }
1137
1138 /**
1139  * Greedy-k: a heuristics for the MMA problem
1140  */
1141 static void compute_killing_function(rss_t *rss) {
1142         cbc_t *cbc;
1143         struct obstack obst;
1144
1145         obstack_init(&obst);
1146
1147         rss->cbc_set = pset_new_ptr(5);
1148         compute_bipartite_decomposition(rss);
1149
1150         /* for all bipartite components do: */
1151         foreach_pset(rss->cbc_set, cbc) {
1152                 ir_node *p;
1153                 nodeset *x       = new_nodeset(10);
1154                 nodeset *y       = new_nodeset(10);
1155                 child_t **sks    = NEW_ARR_F(child_t *, 20);
1156                 int     cur_len  = 0;
1157                 int     cur_size = 20;
1158                 int     i;
1159
1160                 DBG((rss->dbg, LEVEL_1, "\tcomputing SKS for cbc %d:\n", cbc->nr));
1161                 DBG((rss->dbg, LEVEL_2, "\t\tinitializing parents X:\n"));
1162
1163                 /* X = S_cb (all parents are initially uncovered) */
1164                 foreach_nodeset(cbc->parents, p) {
1165                         nodeset_insert(x, p);
1166                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", p));
1167                 }
1168
1169                 /* while X not empty */
1170                 while (nodeset_count(x) > 0) {
1171                         child_t *t = obstack_alloc(&obst, sizeof(*t));
1172                         memset(t, 0, sizeof(*t));
1173
1174                         t = select_child_max_cost(rss, x, y, t, cbc);
1175
1176                         if (cur_len >= cur_size) {
1177                                 ARR_EXTO(child_t *, sks, cur_size * 2);
1178                                 cur_size *= 2;
1179                         }
1180
1181                         DBG((rss->dbg, LEVEL_2, "\t\tinsert child %+F (%.3f) into SKS at pos %d\n", t->irn, t->cost, cur_len));
1182
1183                         sks[cur_len++] = t;
1184                         remove_covered_parents(rss, x, t->irn, cbc);
1185                         update_cumulated_descendent_values(rss, y, t->irn);
1186                 }
1187
1188                 ARR_SHRINKLEN(sks, cur_len);
1189
1190                 /* sort SKS in increasing cost order */
1191                 qsort(sks, cur_len, sizeof(sks[0]), cmp_child_costs);
1192
1193                 DBG((rss->dbg, LEVEL_2, "\tprocessing SKS for cbc %d:\n", cbc->nr));
1194
1195                 /* build killing function */
1196                 for (i = cur_len - 1; i >= 0; --i) { /* loop over sks in decreasing cost order */
1197                         child_t         *t = sks[i];
1198                         rss_irn_t       *rt = get_rss_irn(rss, t->irn);
1199                         plist_element_t *p_el;
1200
1201                         DBG((rss->dbg, LEVEL_3, "\t\t\tkiller %+F (%.3f):\n", t->irn, t->cost));
1202
1203                         /* kill all unkilled parents of t */
1204                         foreach_plist(rt->parent_list, p_el) {
1205                                 ir_node    *par = plist_element_get_value(p_el);
1206                                 rss_irn_t *rpar = get_rss_irn(rss, par);
1207
1208                                 if (is_Sink(rpar->killer)) {
1209                                         rpar->killer = t->irn;
1210                                         DBG((rss->dbg, LEVEL_2, "\t\tkill %+F\n", rpar->irn));
1211                                 }
1212                                 else {
1213                                         DBG((rss->dbg, LEVEL_3, "\t\t\tkeeping %+F as killer for %+F\n", rpar->killer, rpar->irn));
1214                                 }
1215                         }
1216                 }
1217
1218                 del_nodeset(x);
1219                 del_nodeset(y);
1220                 DEL_ARR_F(sks);
1221         }
1222
1223         if (rss->opts->dump_flags & RSS_DUMP_KILL)
1224                 debug_vcg_dump_kill(rss);
1225
1226         del_pset(rss->cbc_set);
1227         obstack_free(&obst, NULL);
1228 }
1229
1230 /**
1231  * Adds the edge src -> tgt to the dvg. Checks if reverse edge is already there (asserts).
1232  */
1233 static INLINE void add_dvg_edge(rss_t *rss, dvg_t *dvg, ir_node *src, ir_node *tgt, int have_source) {
1234         rss_edge_t *dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1235         rss_edge_t key;
1236
1237         if (! have_source)
1238                 nodeset_insert(dvg->nodes, src);
1239
1240         nodeset_insert(dvg->nodes, tgt);
1241
1242         /* add an edge to our killer */
1243         dvg_edge->src  = src;
1244         dvg_edge->tgt  = tgt;
1245         dvg_edge->next = NULL;
1246
1247         key.src = tgt;
1248         key.tgt = src;
1249         assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1250
1251         /* add the edge to the DVG */
1252         DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", src, tgt));
1253         pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1254 }
1255
1256 /**
1257  * Computes the disjoint value DAG (DVG).
1258  * BEWARE: It is not made explicitly clear in the Touati paper,
1259  *         but the DVG is meant to be build from the KILLING DAG
1260  */
1261 static void compute_dvg(rss_t *rss, dvg_t *dvg) {
1262         plist_element_t *el;
1263
1264         DBG((rss->dbg, LEVEL_1, "\tcomputing DVG:\n"));
1265
1266         foreach_plist(rss->nodes, el) {
1267                 ir_node    *u_irn    = plist_element_get_value(el);
1268                 rss_irn_t  *u        = get_rss_irn(rss, u_irn);
1269                 rss_irn_t  *u_killer = get_rss_irn(rss, u->killer);
1270                 int        i;
1271
1272                 /* TODO: omit nodes only having sink as pkiller and killing no one */
1273
1274                 /* add an edge to killer */
1275                 add_dvg_edge(rss, dvg, u_irn, u->killer, 0);
1276
1277                 if (is_Sink(u->killer))
1278                         continue;
1279
1280                 /* We add an edge to every killer, from where we could be reached. */
1281                 for (i = ARR_LEN_SAFE(u_killer->descendants) - 1; i >= 0; --i) {
1282                         add_dvg_edge(rss, dvg, u_irn, u_killer->descendants[i], 1);
1283                 }
1284
1285 #if 0
1286
1287                 foreach_plist(rss->nodes, el2) {
1288                         ir_node *v_irn = plist_element_get_value(el2);
1289
1290                         /*
1291                                 There is an edge (u, v) in the DVG iff v is a descendant of the killer(u).
1292                         */
1293                         if (BSEARCH_IRN_ARR(v_irn, u_kill->descendants)) {
1294                                 rss_edge_t *dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1295                                 rss_edge_t key;
1296
1297                                 /* insert the user into the DVG and append it to the user list of u */
1298                                 nodeset_insert(dvg->nodes, v_irn);
1299                                 if (! plist_has_value(u->dvg_user_list, v_irn))
1300                                         plist_insert_back(u->dvg_user_list, v_irn);
1301
1302                                 dvg_edge->src  = u_irn;
1303                                 dvg_edge->tgt  = v_irn;
1304                                 dvg_edge->next = NULL;
1305
1306                                 key.src = v_irn;
1307                                 key.tgt = u_irn;
1308
1309                                 assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1310
1311                                 /* add the edge to the DVG */
1312                                 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", u_irn, v_irn));
1313                                 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1314                         }
1315                 }
1316 #endif /* if 0 */
1317         }
1318
1319         if (rss->opts->dump_flags & RSS_DUMP_DVG)
1320                 debug_vcg_dump_dvg(rss, dvg);
1321 }
1322
1323 /**
1324  * Updates the dvg structure when a serialization edge from src -> tgt is added.
1325  */
1326 static void update_dvg(rss_t *rss, dvg_t *dvg, rss_irn_t *src, rss_irn_t *tgt) {
1327         int i;
1328
1329         add_dvg_edge(rss, dvg, tgt->irn, src->irn, 1);
1330
1331         for (i = ARR_LEN_SAFE(src->descendants) - 1; i >= 0; --i) {
1332                 add_dvg_edge(rss, dvg, tgt->irn, src->descendants[i], 1);
1333         }
1334 }
1335
1336 #if 0
1337 /**
1338  * Accumulate all descendants for root into list.
1339  */
1340 static void accumulate_dvg_descendant_values(rss_t *rss, rss_irn_t *root, plist_t *list) {
1341         if (plist_count(root->dvg_user_list) > 0) {
1342                 plist_element_t *el;
1343
1344                 foreach_plist(root->dvg_user_list, el) {
1345                         ir_node   *v_irn = plist_element_get_value(el);
1346                         rss_irn_t *v     = get_rss_irn(rss, v_irn);
1347
1348                         /* add v as descendant */
1349                         if (! plist_has_value(list, v_irn)) {
1350                                 plist_insert_back(list, v_irn);
1351                                 DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG descendant %+F\n", v_irn));
1352                         }
1353
1354                         /* accumulate v's descendants */
1355                         accumulate_dvg_descendant_values(rss, v, list);
1356                 }
1357         }
1358 }
1359
1360 /**
1361  * Builds the list of potential killers for each node
1362  * in the given DVG.
1363  * Needs the descendant list for all user as sorted array.
1364  */
1365 static void build_dvg_pkiller_list(rss_t *rss, dvg_t *dvg) {
1366         ir_node *irn;
1367
1368         foreach_nodeset(dvg->nodes, irn) {
1369                 rss_irn_t       *node = get_rss_irn(rss, irn);
1370                 plist_element_t *el, *el2;
1371
1372                 DBG((rss->dbg, DEBUG_DVG, "\t\tbuilding pkiller list for %+F\n", irn));
1373
1374                 /* check each user */
1375                 foreach_plist(node->dvg_user_list, el) {
1376                         ir_node *u_irn = plist_element_get_value(el);
1377
1378                         /* is the current user u_irn not a descendant of any other user -> pkiller */
1379                         foreach_plist(node->dvg_user_list, el2) {
1380                                 ir_node   *v_irn = plist_element_get_value(el2);
1381                                 rss_irn_t *v     = get_rss_irn(rss, v_irn);
1382
1383                                 if (el != el2                             &&
1384                                         ! BSEARCH_IRN_ARR(u_irn, v->dvg_desc) &&
1385                                         ! plist_has_value(node->dvg_pkiller_list, u_irn))
1386                                 {
1387                                         plist_insert_back(node->dvg_pkiller_list, u_irn);
1388                                         DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG pkiller %+F\n", u_irn));
1389                                 }
1390                         }
1391                 }
1392
1393                 node->dvg_pkiller = build_sorted_array_from_list(node->dvg_pkiller_list, phase_obst(&rss->ph));
1394         }
1395
1396         DEBUG_ONLY(
1397                 if (firm_dbg_get_mask(rss->dbg) & DEBUG_DVG)
1398                         debug_vcg_dump_dvg_pkiller(rss, dvg);
1399         );
1400 }
1401
1402 #endif /* if 0 */
1403
1404 /**
1405  * Compute the maximal antichain of the current DVG.
1406  * This is a reimplementation of the MAXIMAL_ANTI_CHAIN function
1407  * from the DDG library 1.1 (DAG.cpp).
1408  */
1409 static nodeset *compute_maximal_antichain(rss_t *rss, dvg_t *dvg, int iteration) {
1410         int         n               = nodeset_count(dvg->nodes);
1411         int         *assignment     = alloca(n * sizeof(assignment[0]));
1412         int         *assignment_rev = alloca(n * sizeof(assignment_rev[0]));
1413         int         *idx_map        = alloca(n * sizeof(idx_map[0]));
1414         hungarian_problem_t *bp;
1415         nodeset     *values, *temp;
1416         ir_node     *u_irn;
1417         int         i, j, cost, cur_chain, res;
1418         rss_edge_t  *dvg_edge;
1419
1420 #define MAP_IDX(irn) bsearch_for_index(get_irn_idx(irn), idx_map,  n,  1)
1421
1422         if (pset_count(dvg->edges) == 0)
1423                 return NULL;
1424
1425         bp = hungarian_new(n, n, 1, HUNGARIAN_MATCH_NORMAL);
1426
1427         /*
1428                 At first, we build an index map for the nodes in the DVG,
1429                 because we cannot use the irn idx therefore as the resulting
1430                 bipartite data structure would have around 1.2GB.
1431                 So we limit the size to the number of nodes we have in the DVG
1432                 and build a sorted index map for their irn indices.
1433         */
1434         i = 0;
1435         foreach_nodeset(dvg->nodes, u_irn) {
1436                 idx_map[i++] = get_irn_idx(u_irn);
1437         }
1438         qsort(idx_map, n, sizeof(idx_map[0]), cmp_int);
1439
1440         foreach_pset(dvg->edges, dvg_edge) {
1441                 int idx_u = MAP_IDX(dvg_edge->src);
1442                 int idx_v = MAP_IDX(dvg_edge->tgt);
1443
1444                 /* add the entry to the bipartite data structure */
1445                 hungarian_add(bp, idx_u, idx_v, 1);
1446                 DBG((rss->dbg, LEVEL_3, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1447                         idx_u, dvg_edge->src, idx_v, dvg_edge->tgt));
1448         }
1449 #if 0
1450         /*
1451                 Add a bipartite entry for each pair of nodes (u, v), where exists a
1452                 path in the DVG from u to v, ie. connect all descendants(v) to v.
1453                 desc_dvg(v) = accumulated descendants of all z in dvg_user_list(v)
1454         */
1455         foreach_nodeset(dvg->nodes, u_irn) {
1456                 rss_irn_t *u        = get_rss_irn(rss, u_irn);
1457                 int       idx_u_irn = MAP_IDX(u_irn);
1458
1459                 DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG descendants of %+F:\n", u_irn));
1460
1461                 //plist_clear(u->dvg_desc_list);
1462                 //accumulate_dvg_descendant_values(rss, u, u->dvg_desc_list);
1463
1464                 /*
1465                         FIXME: The array is build on the phase obstack and we cannot free the data.
1466                         So the array get as many times allocated as this function is called.
1467                 */
1468
1469                 /* build the sorted array for faster searches */
1470                 //u->dvg_desc = build_sorted_array_from_list(u->dvg_desc_list, phase_obst(&rss->ph));
1471
1472                 DBG((rss->dbg, DEBUG_MAX_AC, "\t\tadding bipartite entries of %+F:\n", u_irn));
1473
1474                 /* add the bipartite entries for all descendants */
1475                 for (i = ARR_LEN_SAFE(u->dvg_desc) - 1; i >= 0; --i) {
1476                         rss_irn_t *desc    = get_rss_irn(rss, u->dvg_desc[i]);
1477                         int       idx_desc = MAP_IDX(u->dvg_desc[i]);
1478
1479                         /* add the entry to the bipartite data structure */
1480                         hungarian_add(bp, idx_u_irn, idx_desc, 1);
1481                         DBG((rss->dbg, DEBUG_MAX_AC, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1482                                 idx_u_irn, u_irn, idx_desc, u->dvg_desc[i]));
1483
1484                         need_matching = 1;
1485                 }
1486         }
1487 #endif
1488
1489         /* We want maximum cardinality matching */
1490         hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL);
1491
1492 #if 0
1493         DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG pkiller:\n"));
1494         /* beware: the following function needs the dvg_desc array */
1495         build_dvg_pkiller_list(rss, dvg);
1496 #endif
1497
1498         DBG((rss->dbg, LEVEL_2, "\t\tcomputing bipartite matching\n"));
1499         /*
1500                 The maximum cardinality bipartite matching gives us the minimal
1501                 chain partition, which corresponds to the maximum anti chains.
1502         */
1503         res = hungarian_solve(bp, assignment, &cost, 1);
1504         assert(res == 0 && "Bipartite matching failed!");
1505
1506         hungarian_free(bp);
1507         memset(assignment_rev, -1, n * sizeof(assignment_rev[0]));
1508
1509         /* build the reverse assignment, ie. foreach i -> j, add j -> i */
1510         for (i = 0; i < n; ++i) {
1511                 if (assignment[i] >= 0) {
1512                         int j = assignment[i];
1513                         assignment_rev[j] = i;
1514                 }
1515         }
1516
1517         DBG((rss->dbg, LEVEL_2, "\t\tgot assignment with cost %d\n", cost));
1518         DBG((rss->dbg, LEVEL_3, "\t\t\tassignment   ---   reverse assignment\n", cost));
1519         for (i = 0; i < n; ++i) {
1520                 DBG((rss->dbg, LEVEL_3, "\t\t\t%3d -> %3d         %3d -> %3d\n", i, assignment[i], i, assignment_rev[i]));
1521         }
1522
1523         values    = new_nodeset(10);
1524         cur_chain = 0;
1525         /* Construction of the minimal chain partition */
1526         for (j = 0; j < n; ++j) {
1527                 /* check nodes, which did not occur as target */
1528                 if (assignment_rev[j] == -1) {
1529                         int       xj      = idx_map[j];
1530                         ir_node   *xj_irn = get_idx_irn(rss->irg, xj);
1531                         rss_irn_t *xj_rss = get_rss_irn(rss, xj_irn);
1532                         chain_t   *c      = obstack_alloc(phase_obst(&rss->ph), sizeof(*c));
1533                         int       source;
1534
1535                         /* there was no source for j -> we have a source of a new chain */
1536                         nodeset_insert(values, xj_irn);
1537
1538                         c->elements = plist_obstack_new(phase_obst(&rss->ph));
1539                         c->nr       = cur_chain++;
1540                         plist_insert_back(c->elements, xj_irn);
1541
1542                         xj_rss->chain = c;
1543
1544                         DBG((rss->dbg, LEVEL_2, "\t\tstarting chain %d:\n", c->nr));
1545                         DBG((rss->dbg, LEVEL_2, "\t\t\t%+F (%d)", xj_irn, j));
1546
1547                         /* follow chain, having j as source */
1548                         source = j;
1549                         while (assignment[source] >= 0) {
1550                                 int       target  = assignment[source];
1551                                 int       irn_idx = idx_map[target];
1552                                 ir_node   *irn    = get_idx_irn(rss->irg, irn_idx);
1553                                 rss_irn_t *node   = get_rss_irn(rss, irn);
1554
1555                                 plist_insert_back(c->elements, irn);
1556                                 node->chain = c;
1557
1558                                 DB((rss->dbg, LEVEL_2, " -> %+F (%d)", irn, target));
1559
1560                                 /* new source = last target */
1561                                 source = target;
1562                         }
1563
1564                         DB((rss->dbg, LEVEL_2, "\n"));
1565                 }
1566         }
1567
1568         /*
1569                 Computing the maximal antichain: Select an element from each
1570                 chain such, such it is parallel with the others.
1571         */
1572         DBG((rss->dbg, LEVEL_1, "\t\tcomputing set of saturation values (MAX AC)\n"));
1573         DBG((rss->dbg, LEVEL_3, "\t\tstarting with:\n"));
1574
1575         DEBUG_ONLY(
1576                 if (firm_dbg_get_mask(rss->dbg) & LEVEL_3)
1577                         dump_nodeset(values, "\t\t\t");
1578         )
1579
1580         temp = NULL;
1581         do {
1582                 /*
1583                         We need an explicit array for the values as
1584                         we cannot iterate multiple times over the same
1585                         set at the same time. :-(((((
1586                 */
1587                 int     n         = nodeset_count(values);
1588                 int     i         = 0;
1589                 ir_node **val_arr = NEW_ARR_F(ir_node *, n);
1590
1591                 foreach_nodeset(values, u_irn)
1592                         val_arr[i++] = u_irn;
1593
1594                 if (temp)
1595                         del_nodeset(temp);
1596
1597                 temp = new_nodeset(10);
1598
1599                 /* Select all nodes from current value set, having another node in the set as descendant. */
1600                 for (i = 0; i < n; ++i) {
1601                         rss_irn_t *u = get_rss_irn(rss, val_arr[i]);
1602
1603                         for (j = 0; j < n; ++j) {
1604                                 if (i != j) {
1605                                         rss_edge_t key;
1606
1607                                         key.src = val_arr[i];
1608                                         key.tgt = val_arr[j];
1609
1610                                         if (pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1611                                                 /* v[j] is descendant of u -> remove u and break */
1612                                                 nodeset_insert(temp, u->irn);
1613                                                 nodeset_remove(values, u->irn);
1614
1615                                                 DBG((rss->dbg, LEVEL_3, "\t\t\tremoving %+F from values, adding it to temp\n", u->irn));
1616
1617                                                 break;
1618                                         }
1619                                 }
1620                         }
1621                 }
1622
1623                 /* Try to insert the chain predecessor of all selected u's */
1624                 foreach_nodeset(temp, u_irn) {
1625                         rss_irn_t       *u  = get_rss_irn(rss, u_irn);
1626                         chain_t         *c  = u->chain;
1627                         plist_element_t *el = plist_find_value(c->elements, u_irn);
1628
1629                         assert(el && "Missing element in chain!");
1630
1631                         /* If u has predecessor in chain: insert the predecessor */
1632                         if (el = plist_element_get_prev(el)) {
1633                                 nodeset_insert(values, plist_element_get_value(el));
1634                                 DBG((rss->dbg, LEVEL_3, "\t\t\tadding %+F to values\n", plist_element_get_value(el)));
1635                         }
1636                 }
1637
1638
1639                 DEL_ARR_F(val_arr);
1640         } while (nodeset_count(temp) > 0);
1641
1642         DBG((rss->dbg, LEVEL_2, "\t\tfinal set:\n"));
1643         DEBUG_ONLY(
1644                 if (firm_dbg_get_mask(rss->dbg) & LEVEL_2) {
1645                         dump_nodeset(values, "\t\t\t");
1646                 }
1647         );
1648
1649         if (rss->opts->dump_flags & RSS_DUMP_MAXAC)
1650                 debug_vcg_dump_pkg(rss, values, iteration);
1651
1652         del_nodeset(temp);
1653
1654         return values;
1655
1656 #undef MAP_IDX
1657 }
1658
1659 /**
1660  * Computes the best serialization between two nodes of sat_vals.
1661  */
1662 static serialization_t *compute_best_admissible_serialization(rss_t *rss, nodeset *sat_vals, serialization_t *ser, int num_regs) {
1663         int        n                    = nodeset_count(sat_vals);
1664         int        n_idx                = ARR_LEN_SAFE(rss->idx_map);
1665         int        i                    = 0;
1666         ir_node    **val_arr            = alloca(n * sizeof(val_arr[0]));
1667         bitset_t   *bs_sv               = bitset_alloca(n_idx);
1668         bitset_t   *bs_vdesc            = bitset_alloca(n_idx);
1669         bitset_t   *bs_tmp              = bitset_alloca(n_idx);
1670         bitset_t   *bs_ukilldesc        = bitset_alloca(n_idx);
1671         int        best_benefit         = INT_MAX;
1672         int        best_omega2          = INT_MAX;
1673         int        best_benefit_omega20 = INT_MAX;
1674         int        has_omega1           = 0;
1675         int        j, k;
1676         ir_node    *irn;
1677         rss_edge_t min_benefit_edge;
1678         rss_edge_t min_omega20_edge;
1679         rss_irn_t  *ser_u_omega1, *ser_v_omega1;
1680         rss_irn_t  *ser_u_omega20, *ser_v_omega20;
1681
1682         DBG((rss->dbg, LEVEL_1, "\tcomputing admissible serializations:\n"));
1683
1684         /*
1685                 We need an explicit array for the values as
1686                 we cannot iterate multiple times over the same
1687                 set at the same time. :-(((((
1688         */
1689
1690         foreach_nodeset(sat_vals, irn) {
1691                 val_arr[i++] = irn;
1692                 bitset_set(bs_sv, BLOCK_IDX_MAP(rss, irn));
1693         }
1694
1695         /*
1696                 We build all admissible serializations and remember the best found so far.
1697                 For u in sat_vals:
1698                  For v in sat_val:
1699                    if v in pkiller(u): add edge from v to all other pkiller(u)
1700                    else: for all vv in pkiller(u): add edge from v to vv if there exists no path from vv to v
1701         */
1702
1703 /*
1704         A node is unserializable if:
1705         - it has only one killer and this one is Sink
1706     - it kills no other values
1707         In this case there is no serialization which could
1708         reduce the registerpressure
1709 */
1710 #define IS_UNSERIALIZABLE_NODE(rss_node)           \
1711         ((plist_count(rss_node->pkiller_list) == 1) && \
1712         is_Sink(rss_node->killer)                   && \
1713         (plist_count(rss_node->kill_value_list) == 0))
1714
1715         /* for all u in sat_vals */
1716         for (i = 0; i < n; ++i) {
1717                 rss_irn_t       *u       = get_rss_irn(rss, val_arr[i]);
1718                 int             u_height = get_irn_height(rss->h, val_arr[i]);
1719                 plist_element_t *el;
1720
1721                 /* ignore nodes where serialization does not help */
1722                 if (IS_UNSERIALIZABLE_NODE(u)) {
1723                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", u->irn));
1724                         continue;
1725                 }
1726
1727                 /* accumulate all descendants of all pkiller(u) */
1728                 bitset_clear_all(bs_ukilldesc);
1729                 foreach_plist(u->pkiller_list, el) {
1730                         ir_node   *irn  = plist_element_get_value(el);
1731                         rss_irn_t *node = get_rss_irn(rss, irn);
1732
1733                         if (! is_Sink(irn))
1734                                 bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, irn));
1735                         else
1736                                 continue;
1737
1738                         for (k = ARR_LEN_SAFE(node->descendants) - 1; k >= 0; --k) {
1739                                 if (! is_Sink(node->descendants[k]))
1740                                         bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, node->descendants[k]));
1741                         }
1742                 }
1743
1744                 /* for all v in sat_vals */
1745                 for (j = 0; j < n; ++j) {
1746                         ir_node   *v_irn   = val_arr[j];
1747                         rss_irn_t *v       = get_rss_irn(rss, v_irn);
1748                         unsigned  v_height = get_irn_height(rss->h, v_irn);
1749                         int       omega1, omega2, is_pkiller;
1750
1751                         /* v cannot be serialized with itself
1752                          * ignore nodes where serialization does not help */
1753                         if (i == j || IS_UNSERIALIZABLE_NODE(v)) {
1754                                 if (i != j)
1755                                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", v->irn));
1756                                 continue;
1757                         }
1758
1759                         /* get descendants of v */
1760                         bitset_clear_all(bs_vdesc);
1761                         bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v_irn));
1762                         for (k = ARR_LEN_SAFE(v->descendants) - 1; k >= 0; --k) {
1763                                 if (! is_Sink(v->descendants[k]))
1764                                         bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v->descendants[k]));
1765                         }
1766
1767                         /* if v is in pkiller(u) */
1768                         is_pkiller = plist_has_value(u->pkiller_list, v_irn);
1769
1770                         /* for all vv in pkiller(u) */
1771                         foreach_plist(u->pkiller_list, el) {
1772                                 ir_node *vv_irn  = plist_element_get_value(el);
1773                                 int     add_edge;
1774
1775                                 if (is_Sink(vv_irn))
1776                                         continue;
1777
1778                                 if (is_pkiller)
1779                                         add_edge = vv_irn != v_irn && skip_Proj(vv_irn) != skip_Proj(v_irn);
1780                                 else
1781                                         add_edge = ! heights_reachable_in_block(rss->h, skip_Proj(vv_irn), skip_Proj(v_irn));
1782
1783                                 /*
1784                                         As we add an edge from vv -> v, we have to make sure,
1785                                         that there exists no path from v to vv.
1786                                 */
1787
1788                                 if (add_edge) {
1789                                         int vv_height = get_irn_height(rss->h, vv_irn);
1790                                         int mu1, mu2, critical_path_cost;
1791
1792                                         /*
1793                                                 mu1 = | descendants(v) cut sat_vals |
1794                                                 the number of saturating values which cannot
1795                                                 be simultaneously alive with u
1796                                         */
1797                                         bitset_copy(bs_tmp, bs_vdesc);
1798                                         mu1 = bitset_popcnt(bitset_and(bs_tmp, bs_sv));
1799
1800                                         /*
1801                                                 mu2 = | accum_desc_all_pkiller(u) without descendants(v) |
1802                                         */
1803                                         if (is_pkiller) {
1804                                                 bitset_copy(bs_tmp, bs_ukilldesc);
1805                                                 mu2 = bitset_popcnt(bitset_andnot(bs_tmp, bs_vdesc));
1806                                         }
1807                                         else {
1808                                                 mu2 = 0;
1809                                         }
1810
1811                                         /* omega1 = mu1 - mu2 */
1812                                         omega1 = mu1 - mu2;
1813
1814                                         if (omega1 != 0)
1815                                                 has_omega1 = 1;
1816
1817                                         /* omega2 = increase of critical path */
1818                                         critical_path_cost =
1819                                                 v_height                        /* longest path from v to sink */
1820                                                 + rss->max_height - vv_height   /* longest path from source to vv */
1821                                                 + 1;                            /* edge */
1822
1823                                         /*
1824                                                 If critical_path_cost > max_height -> the new edge
1825                                                 would increase the longest critical path by the difference.
1826                                         */
1827                                         omega2 = critical_path_cost > rss->max_height ? critical_path_cost - rss->max_height : 0;
1828
1829                                         /* this keeps track of the edge with the best benefit */
1830                                         if (omega1 >= num_regs - n && omega1 < best_benefit) {
1831                                                 min_benefit_edge.src = v_irn;
1832                                                 min_benefit_edge.tgt = vv_irn;
1833
1834                                                 ser_u_omega1 = u;
1835                                                 ser_v_omega1 = v;
1836
1837                                                 best_benefit    = omega1;
1838                                                 ser->new_killer = is_pkiller;
1839                                         }
1840
1841                                         /* this keeps track of the edge with the best omega1 costs where omega2 == 0 */
1842                                         if (omega2 == 0 && omega1 >= num_regs - n && omega1 < best_benefit_omega20) {
1843                                                 min_omega20_edge.src = v_irn;
1844                                                 min_omega20_edge.tgt = vv_irn;
1845
1846                                                 ser_u_omega20 = u;
1847                                                 ser_v_omega20 = v;
1848
1849                                                 best_benefit_omega20 = omega1;
1850                                                 ser->new_killer      = is_pkiller;
1851                                         }
1852
1853                                         best_omega2 = MIN(best_omega2, omega2);
1854
1855                                         DBG((rss->dbg, LEVEL_2, "\t\tfound %+F -> %+F (w1 %d, w2 %d)\n",
1856                                                 v_irn, vv_irn, omega1, omega2));
1857                                 } /* if add_edge */
1858                         } /* for all vv in pkiller(u) */
1859                 } /* for all v in sat_vals */
1860         } /* for all u in sat_vals */
1861
1862         if (! has_omega1)
1863                 return NULL;
1864
1865         if (best_omega2 == 0) {
1866                 ser->u         = ser_u_omega20;
1867                 ser->v         = ser_v_omega20;
1868                 ser->edge->src = min_omega20_edge.src;
1869                 ser->edge->tgt = min_omega20_edge.tgt;
1870                 ser->omega1    = best_benefit_omega20;
1871                 ser->omega2    = best_omega2;
1872         }
1873         else {
1874                 ser->u         = ser_u_omega1;
1875                 ser->v         = ser_v_omega1;
1876                 ser->edge->src = min_benefit_edge.src;
1877                 ser->edge->tgt = min_benefit_edge.tgt;
1878                 ser->omega1    = best_benefit;
1879                 ser->omega2    = best_omega2;
1880         }
1881
1882         return ser;
1883
1884 #undef IS_UNSERIALIZABLE_NODE
1885 }
1886
1887 /**
1888  * Perform the value serialization heuristic and add all
1889  * computed serialization edges as dependencies to the irg.
1890  */
1891 static void perform_value_serialization_heuristic(rss_t *rss) {
1892         bitset_t *arch_nonign_bs = bitset_alloca(arch_register_class_n_regs(rss->cls));
1893         bitset_t *abi_ign_bs     = bitset_alloca(arch_register_class_n_regs(rss->cls));
1894         int      available_regs, iteration;
1895         dvg_t    dvg;
1896         nodeset  *sat_vals;
1897         pset *ser_set = new_pset(cmp_rss_edges, 20);
1898
1899         /* available_regs = R = |arch_non_ignore_regs cut ~abi_ignore_regs| */
1900         arch_put_non_ignore_regs(rss->arch_env, rss->cls, arch_nonign_bs);
1901         be_abi_put_ignore_regs(rss->abi, rss->cls, abi_ign_bs);
1902         bitset_andnot(arch_nonign_bs, abi_ign_bs);
1903         available_regs = bitset_popcnt(arch_nonign_bs);
1904
1905         DBG((rss->dbg, LEVEL_1, "\n\t#available regs: %d\n\n", available_regs));
1906
1907         /*
1908                 At first we need to compute the disjoint value DAG (DVG = {V, E_dv}).
1909                 V    = set of all nodes we are currently interested in
1910                 E_dv = there is an edge from u to v iff v is a descendant of killer(u), forall u, v in V
1911         */
1912         dvg.nodes = new_nodeset(plist_count(rss->nodes));
1913         dvg.edges = new_pset(cmp_rss_edges, plist_count(rss->nodes) * 5);
1914         compute_dvg(rss, &dvg);
1915
1916         /*
1917                 Then we perform the heuristic serialization algorithm
1918                 on the DVG which gives us all necessary serialization
1919                 edges.
1920         */
1921         DBG((rss->dbg, LEVEL_1, "\tcomputing maximal antichain:\n"));
1922         iteration = 0;
1923         sat_vals  = compute_maximal_antichain(rss, &dvg, iteration++);
1924         while (sat_vals && (nodeset_count(sat_vals) > available_regs)) {
1925                 serialization_t *ser, best_ser;
1926                 rss_edge_t      *edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*edge));
1927                 rss_irn_t       *tgt, *src;
1928                 ir_node         *dep_src, *dep_tgt;
1929
1930                 best_ser.edge = edge;
1931                 ser = compute_best_admissible_serialization(rss, sat_vals, &best_ser, available_regs);
1932
1933                 DBG((rss->dbg, LEVEL_1, "\tcurrent register saturation %d, target %d\n", nodeset_count(sat_vals), available_regs));
1934
1935                 if (! ser) {
1936                         DBG((rss->dbg, LEVEL_1, "\tno RS improving serialization found, breaking at iteration %d\n", iteration));
1937                         break;
1938                 }
1939
1940                 /* Insert the serialization as dependency edge into the irg. */
1941                 DBG((rss->dbg, LEVEL_1, "\tserializing %+F -> %+F with edge %+F -> %+F and cost %d, %d\n",
1942                         ser->u->irn, ser->v->irn, ser->edge->src, ser->edge->tgt, ser->omega1, ser->omega2));
1943
1944                 if (pset_find(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge)))
1945                         ir_printf("WARNING: serialization %+F -> %+F computed twice!\n", ser->edge->src, ser->edge->tgt);
1946
1947
1948                 pset_insert(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge));
1949
1950                 /* update the dvg */
1951                 update_dvg(rss, &dvg, ser->v, ser->u);
1952                 update_dvg(rss, &dvg, get_rss_irn(rss, ser->edge->src), get_rss_irn(rss, ser->edge->tgt));
1953                 del_nodeset(sat_vals);
1954
1955                 dep_src = skip_Proj(ser->edge->src);
1956                 dep_tgt = ser->edge->tgt;
1957                 add_irn_dep(dep_src, dep_tgt);
1958
1959                 /* Update descendants, consumer and pkillers of the target */
1960                 update_node_info(rss, ser->edge->tgt, ser->edge->src);
1961
1962                 /* TODO: try to find a cheaper way for updating height information */
1963                 rss->max_height = heights_recompute_block(rss->h, rss->block);
1964
1965                 /* Recompute the antichain for next serialization */
1966                 DBG((rss->dbg, LEVEL_1, "\tre-computing maximal antichain:\n"));
1967                 sat_vals = compute_maximal_antichain(rss, &dvg, iteration++);
1968         }
1969
1970         del_nodeset(dvg.nodes);
1971         del_pset(dvg.edges);
1972 }
1973
1974 /**
1975  * Do initial calculations for a block.
1976  */
1977 static void process_block(ir_node *block, void *env) {
1978         rss_t *rss = env;
1979         int   i, n;
1980         const ir_edge_t *edge;
1981
1982         phase_init(&rss->ph, "rss block preprocessor", rss->irg, PHASE_DEFAULT_GROWTH, init_rss_irn);
1983
1984         DBG((rss->dbg, LEVEL_1, "preprocessing block %+F\n", block));
1985         rss->block = block;
1986
1987         /* build an index map for all nodes in the current block */
1988         i            = 0;
1989         n            = get_irn_n_edges(block);
1990         NEW_ARR_A(int *, rss->idx_map, n);
1991         foreach_out_edge(block, edge) {
1992                 ir_node *irn      = get_edge_src_irn(edge);
1993                 rss->idx_map[i++] = get_irn_idx(irn);
1994         }
1995         qsort(rss->idx_map, n, sizeof(rss->idx_map[0]), cmp_int);
1996         rss->max_height = heights_recompute_block(rss->h, block);
1997
1998         /* loop over all register classes */
1999         for (i = arch_isa_get_n_reg_class(rss->arch_env->isa) - 1; i >= 0; --i) {
2000                 const arch_register_class_t *cls = arch_isa_get_reg_class(rss->arch_env->isa, i);
2001
2002                 rss->cls = cls;
2003                 DBG((rss->dbg, LEVEL_1, "register class %s\n", arch_register_class_name(cls)));
2004
2005                 /* reset the list of interesting nodes */
2006                 plist_clear(rss->nodes);
2007                 plist_insert_back(rss->nodes, _sink);
2008
2009                 /* collect all nodes having a certain register class */
2010                 foreach_out_edge(block, edge) {
2011                         ir_node *irn = get_edge_src_irn(edge);
2012
2013                         /*
2014                                 We skip:
2015                                 - mode_T nodes (the projs are asked)
2016                                 - mode_X nodes (control flow nodes are always scheduled last)
2017                                 - Keeps (they are always immediately scheduled)
2018                                 - Phi (same as Keep)
2019                         */
2020                         if (get_irn_mode(irn) == mode_T || be_is_Keep(irn) || is_Phi(irn))
2021                                 continue;
2022
2023                         /*
2024                                 In case of a proj, we skip
2025                                 - Barrier (they are a Barrier :)
2026                                 - Start
2027                                 - the Proj itself, as it's scheduled always with it's super node
2028                         */
2029                         if (is_Proj(irn)) {
2030                                 ir_node *pred = get_Proj_pred(irn);
2031                                 if (be_is_Barrier(pred) || is_Start(pred))
2032                                         continue;
2033                         }
2034
2035                         if (! arch_irn_is(rss->arch_env, irn, ignore) && arch_get_irn_reg_class(rss->arch_env, irn, -1) == cls) {
2036                                 plist_insert_back(rss->nodes, skip_Proj(irn));
2037
2038                                 /* calculate the descendants and consumer for each node in the block */
2039                                 collect_node_info(rss, skip_Proj(irn));
2040                         }
2041                 }
2042
2043                 /* compute the potential killing set PK(G) */
2044                 compute_pkill_set(rss);
2045
2046                 /* compute the killing function k* */
2047                 compute_killing_function(rss);
2048
2049                 /*
2050                         Compute the heuristic value serialization and
2051                         add the necessary dependencies to the irg.
2052                 */
2053                 perform_value_serialization_heuristic(rss);
2054         }
2055
2056         phase_free(&rss->ph);
2057 }
2058
2059 #ifdef WITH_LIBCORE
2060 /**
2061  * Register the options.
2062  */
2063 void rss_register_options(lc_opt_entry_t *grp) {
2064         static int     run_once = 0;
2065         lc_opt_entry_t *rss_grp;
2066
2067         if (! run_once) {
2068                 run_once = 1;
2069                 rss_grp  = lc_opt_get_grp(grp, "rss");
2070
2071                 lc_opt_add_table(rss_grp, rss_option_table);
2072         }
2073 }
2074 #endif /* WITH_LIBCORE */
2075
2076 /**
2077  * Preprocess the irg for scheduling.
2078  */
2079 void rss_schedule_preparation(const be_irg_t *birg) {
2080         rss_t rss;
2081
2082         FIRM_DBG_REGISTER(rss.dbg, "firm.be.sched.rss");
2083
2084         //firm_dbg_set_mask(rss.dbg, LEVEL_1 | LEVEL_2 | LEVEL_3);
2085
2086         init_rss_special_nodes(birg->irg);
2087
2088         rss.irg      = birg->irg;
2089         rss.arch_env = birg->main_env->arch_env;
2090         rss.abi      = birg->abi;
2091         rss.h        = heights_new(birg->irg);
2092         rss.nodes    = plist_new();
2093         rss.opts     = &rss_options;
2094         irg_block_walk_graph(birg->irg, NULL, process_block, &rss);
2095         heights_free(rss.h);
2096         plist_free(rss.nodes);
2097
2098         if (birg->main_env->options->dump_flags & DUMP_SCHED)
2099                 be_dump(rss.irg, "-rss", dump_ir_block_graph);
2100 }