d049cde4b2578a3d3b7dcda574fc56eddfdf126c
[libfirm] / ir / be / beschedrss.c
1 /**
2  * Implementation of a register saturating list scheduler
3  * as described in: Sid-Ahmed-Ali Touati
4  * Register Saturation in Superscalar and VLIW Codes
5  *
6  * @license This file protected by GPL -  GNU GENERAL PUBLIC LICENSE.
7  * @author  Christian Wuerdig
8  * @date    29.08.2006
9  * @cvs-id  $Id$
10  */
11 #ifdef HAVE_CONFIG_H
12 #include "config.h"
13 #endif
14
15 #include <limits.h>
16
17 #include "obst.h"
18 #include "debug.h"
19
20 #include "irgraph_t.h"
21 #include "irnode_t.h"
22 #include "iredges_t.h"
23 #include "ircons_t.h"
24 #include "irphase_t.h"
25 #include "irgwalk.h"
26 #include "irdump.h"
27 #include "irtools.h"
28 #include "irbitset.h"
29 #include "irprintf.h"
30 #include "bipartite.h"
31 #include "hungarian.h"
32 #include "plist.h"
33
34 #include "height.h"
35
36 #include "beabi.h"
37 #include "benode_t.h"
38 #include "besched_t.h"
39
40 #ifdef WITH_LIBCORE
41 #include <libcore/lc_opts.h>
42 #include <libcore/lc_opts_enum.h>
43 #endif /* WITH_LIBCORE */
44
45
46 #define ARR_LEN_SAFE(arr) ((arr) != NULL ? ARR_LEN((arr)) : 0)
47
48 #define HASH_RSS_EDGE(edge) ((get_irn_node_nr((edge)->src) << 16) | (get_irn_node_nr((edge)->tgt) & 0xFFFF))
49 #define BSEARCH_IRN_ARR(val, arr) \
50         bsearch(&(val), (arr), ARR_LEN_SAFE((arr)), sizeof((arr)[0]), cmp_irn_idx)
51
52 #define BLOCK_IDX_MAP(rss, irn) bsearch_for_index(get_irn_idx((irn)), (rss)->idx_map, ARR_LEN_SAFE((rss)->idx_map), 1)
53
54 /* the rss options */
55 typedef struct _rss_opts_t {
56         int dump_flags;
57 } rss_opts_t;
58
59 /* Represents a child with associated costs */
60 typedef struct _child {
61         ir_node *irn;
62         float   cost;
63 } child_t;
64
65 /* We need edges for several purposes. */
66 typedef struct _rss_edge {
67         ir_node *src;
68         ir_node *tgt;
69         void    *next;
70 } rss_edge_t;
71
72 /* Represents a connected bipartite component. */
73 typedef struct _cbc {
74         nodeset *parents;       /**< = S  a set of value producers */
75         nodeset *children;      /**< = T  a set of value consumers */
76         pset    *kill_edges;    /**< = E  a set of edges (t in T, s in S) such as each s in S gets killed by at least one t in T */
77         int     nr;             /**< a deterministic index for set insertion (used as hash) */
78 } cbc_t;
79
80 /* Represents a disjoint value DAG. */
81 typedef struct _dvg {
82         nodeset *nodes;
83         pset    *edges;
84 } dvg_t;
85
86 /* Represents a chain of nodes. */
87 typedef struct _chain {
88         plist_t *elements;   /**< List of chain elements */
89         int     nr;          /**< a deterministic index for set insertion (used as hash) */
90 } chain_t;
91
92 typedef struct _rss_irn {
93         plist_t  *consumer_list;    /**< List of consumers */
94         ir_node **consumer;         /**< Sorted consumer array (needed for faster access) */
95
96         plist_t  *parent_list;      /**< List of parents */
97         ir_node **parents;          /**< Sorted parent array (needed for faster access) */
98
99         plist_t  *descendant_list;  /**< List of descendants */
100         ir_node **descendants;      /**< Sorted descendant array (needed for faster access) */
101
102         plist_t  *pkiller_list;     /**< List of potential killers */
103         ir_node **pkillers;         /**< Sorted pkiller array (needed for faster access) */
104
105 #if 0
106         plist_t  *dvg_desc_list;    /**< List of all descendants in the DVG */
107         ir_node **dvg_desc;         /**< Sorted dvg descendant array (needed for faster access) */
108
109         plist_t  *dvg_pkiller_list; /**< List of potential killers in the DVG */
110         ir_node **dvg_pkiller;      /**< Sorted dvg pkiller array (needed for faster access) */
111
112         plist_t  *dvg_user_list;    /**< List of users in the disjoint value DAG DVG */
113 #endif
114         plist_t  *kill_value_list;  /**< List of values getting potentially killed by this node */
115
116         ir_node  *killer;           /**< The selected unique killer */
117         ir_node  *irn;              /**< The corresponding firm node to this rss_irn */
118
119         chain_t  *chain;            /**< The chain, this node is associated to */
120
121         unsigned live_out : 1;      /**< irn has consumers outside of it's block */
122         unsigned visited  : 1;      /**< visited flag for bipartite decomposition */
123         unsigned havedesc : 1;      /**< visited flag collect descendants */
124         unsigned havecons : 1;      /**< visited flag collect consumer */
125         unsigned handled  : 1;      /**< flag indicating whether or not the list structures have been build */
126         unsigned dumped   : 1;      /**< flag indication whether or not this node was dumped */
127 } rss_irn_t;
128
129 /* Represents a serialization edge with associated costs. */
130 typedef struct _serialization {
131         rss_irn_t  *u;       /* the top node */
132         rss_irn_t  *v;       /* the node about to be serialized after u */
133         rss_edge_t *edge;    /* the edge selected for the serialization */
134         int        omega1;   /* estimated: available regs - RS reduction */
135         int        omega2;   /* increase in critical path length */
136         int        new_killer;
137 } serialization_t;
138
139 typedef struct _rss {
140         phase_t          ph;              /**< Phase to hold some data */
141         heights_t        *h;              /**< The current height object */
142         ir_graph         *irg;            /**< The irg to preprocess */
143         plist_t          *nodes;          /**< The list of interesting nodes */
144         const arch_env_t *arch_env;       /**< The architecture environment */
145         be_abi_irg_t     *abi;            /**< The abi for this irg */
146         pset             *cbc_set;        /**< A set of connected bipartite components */
147         ir_node          *block;          /**< The current block in progress. */
148         int              *idx_map;        /**< Mapping irn indices to per block indices */
149         unsigned         max_height;      /**< maximum height in the current block */
150         rss_opts_t       *opts;           /**< The options */
151         const arch_register_class_t *cls; /**< The current register class */
152         DEBUG_ONLY(firm_dbg_module_t *dbg);
153 } rss_t;
154
155 #define get_rss_irn(rss, irn)  (phase_get_or_set_irn_data(&rss->ph, irn))
156
157 /**
158  * We need some special nodes:
159  * a source and a sink for all live-in and live-out values of a block
160  */
161
162 enum {
163         iro_rss_Source,
164         iro_rss_Sink,
165         iro_rss_last
166 };
167
168 static ir_node *_source = NULL;
169 static ir_node *_sink   = NULL;
170
171 #define is_Source(irn) ((irn) == _source)
172 #define is_Sink(irn)   ((irn) == _sink)
173
174 enum {
175         RSS_DUMP_NONE  = 0,
176         RSS_DUMP_CBC   = 1 << 0,
177         RSS_DUMP_PKG   = 1 << 1,
178         RSS_DUMP_KILL  = 1 << 2,
179         RSS_DUMP_DVG   = 1 << 3,
180         RSS_DUMP_MAXAC = 1 << 4,
181         RSS_DUMP_ALL   = (RSS_DUMP_MAXAC << 1) - 1,
182 };
183
184 static rss_opts_t rss_options = {
185         RSS_DUMP_NONE,
186 };
187
188 #ifdef WITH_LIBCORE
189 static const lc_opt_enum_int_items_t dump_items[] = {
190         { "none",  RSS_DUMP_NONE  },
191         { "cbc",   RSS_DUMP_CBC   },
192         { "pkg",   RSS_DUMP_PKG   },
193         { "kill",  RSS_DUMP_KILL  },
194         { "dvg",   RSS_DUMP_DVG   },
195         { "maxac", RSS_DUMP_MAXAC },
196         { "all",   RSS_DUMP_ALL   },
197         { NULL, 0 }
198 };
199
200 static lc_opt_enum_int_var_t dump_var = {
201         &rss_options.dump_flags, dump_items
202 };
203
204 static const lc_opt_table_entry_t rss_option_table[] = {
205         LC_OPT_ENT_ENUM_MASK("dump", "dump phases (none, cbc, pkg, kill, dvg, maxac, all)", &dump_var),
206         { NULL }
207 };
208 #endif /* WITH_LIBCORE */
209
210 /******************************************************************************
211  *  _          _                    __                  _   _
212  * | |        | |                  / _|                | | (_)
213  * | |__   ___| |_ __   ___ _ __  | |_ _   _ _ __   ___| |_ _  ___  _ __  ___
214  * | '_ \ / _ \ | '_ \ / _ \ '__| |  _| | | | '_ \ / __| __| |/ _ \| '_ \/ __|
215  * | | | |  __/ | |_) |  __/ |    | | | |_| | | | | (__| |_| | (_) | | | \__ \
216  * |_| |_|\___|_| .__/ \___|_|    |_|  \__,_|_| |_|\___|\__|_|\___/|_| |_|___/
217  *            | |
218  *            |_|
219  ******************************************************************************/
220
221 /**
222  * Acquire opcodes and create source and sink nodes.
223  */
224 static void init_rss_special_nodes(ir_graph *irg) {
225         ir_node *block         = get_irg_start_block(irg);
226         int     iro_rss_base   = get_next_ir_opcodes(iro_rss_last);
227         ir_op   *op_rss_Source = new_ir_op(iro_rss_base + iro_rss_Source, "rss_Source", op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
228         ir_op   *op_rss_Sink   = new_ir_op(iro_rss_base + iro_rss_Sink,   "rss_Sink",   op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
229         _source                = new_ir_node(NULL, irg, block, op_rss_Source, mode_ANY, 0, NULL);
230         _sink                  = new_ir_node(NULL, irg, block, op_rss_Sink, mode_ANY, 0, NULL);
231 }
232
233 static int cmp_int(const void *a, const void *b) {
234         const int *i1 = a;
235         const int *i2 = b;
236
237         return QSORT_CMP(*i1, *i2);
238 }
239
240 static int cmp_child_costs(const void *a, const void *b) {
241         const child_t *c1 = a;
242         const child_t *c2 = b;
243
244         return QSORT_CMP(c1->cost, c2->cost);
245 }
246
247 static int cmp_irn_idx(const void *a, const void *b) {
248         const ir_node *n1 = *(ir_node **)a;
249         const ir_node *n2 = *(ir_node **)b;
250
251         return QSORT_CMP(get_irn_idx(n1), get_irn_idx(n2));
252 }
253
254 static int cmp_rss_edges(const void *a, const void *b) {
255         const rss_edge_t *e1 = a;
256         const rss_edge_t *e2 = b;
257
258         return (e1->src != e2->src) || (e1->tgt != e2->tgt);
259 }
260
261 static int bsearch_for_index(int key, int *arr, size_t len, int force) {
262         int left = 0;
263         int right = len;
264
265         while (right >= left) {
266                 int idx = (left + right) / 2;
267
268                 if (key < arr[idx])
269                         right = idx - 1;
270                 else if (key > arr[idx])
271                         left = idx + 1;
272                 else
273                         return idx;
274         }
275
276         if (force)
277                 assert(0 && "Something is wrong, key not found.");
278         return -1;
279 }
280
281 static ir_node **build_sorted_array_from_list(plist_t *irn_list, struct obstack *obst) {
282         plist_element_t *el;
283         int     i     = 0;
284         int     len   = plist_count(irn_list);
285         ir_node **arr = NEW_ARR_D(ir_node *, obst, len);
286
287         /* copy the list into the array */
288         foreach_plist(irn_list, el) {
289                 arr[i++] = plist_element_get_value(el);
290         }
291
292         /* sort the array by node index */
293         qsort(arr, len, sizeof(arr[0]), cmp_irn_idx);
294
295         return arr;
296 }
297
298 /*****************************************************
299  *      _      _                       _
300  *     | |    | |                     (_)
301  *   __| | ___| |__  _   _  __ _  __ _ _ _ __   __ _
302  *  / _` |/ _ \ '_ \| | | |/ _` |/ _` | | '_ \ / _` |
303  * | (_| |  __/ |_) | |_| | (_| | (_| | | | | | (_| |
304  *  \__,_|\___|_.__/ \__,_|\__, |\__, |_|_| |_|\__, |
305  *                          __/ | __/ |         __/ |
306  *                         |___/ |___/         |___/
307  *
308  *****************************************************/
309
310 static void dump_nodeset(nodeset *ns, const char *prefix) {
311         ir_node *irn;
312         foreach_nodeset(ns, irn) {
313                 ir_fprintf(stderr, "%s%+F\n", prefix, irn);
314         }
315 }
316
317 static void build_file_name(rss_t *rss, const char *suffix, size_t suf_len, char *buf, size_t len) {
318         const char *irg_name;
319
320         memset(buf, 0, len);
321         irg_name = get_entity_name(get_irg_entity(rss->irg));
322         snprintf(buf, len - suf_len, "%s-%s-block-%ld",
323                 irg_name, arch_register_class_name(rss->cls), get_irn_node_nr(rss->block));
324         strcat(buf, suffix);
325 }
326
327 /* Dumps all collected bipartite components of current irg as vcg. */
328 static void debug_vcg_dump_bipartite(rss_t *rss) {
329         cbc_t *cbc;
330         FILE  *f;
331         char  file_name[256];
332         static const char suffix[] = "-RSS-CBC.vcg";
333
334         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
335         f = fopen(file_name, "w");
336
337         ir_fprintf(f, "graph: { title: \"connected bipartite component graph of %+F\"\n", rss->irg);
338         fprintf(f, "display_edge_labels: no\n");
339         fprintf(f, "layoutalgorithm: mindepth\n");
340         fprintf(f, "manhattan_edges: yes\n\n");
341
342         foreach_pset(rss->cbc_set, cbc) {
343                 ir_node    *n;
344                 rss_edge_t *ke;
345
346                 fprintf(f, "graph: { titel: \"cbc %d\" label: \"cbc %d\" status:clustered color:yellow\n", cbc->nr, cbc->nr);
347                 foreach_nodeset(cbc->parents, n) {
348                         ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
349                 }
350                 foreach_nodeset(cbc->children, n) {
351                         ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
352                 }
353                 foreach_pset(cbc->kill_edges, ke) {
354                         ir_fprintf(f, "edge: { sourcename: \"n%d_%d\" targetname: \"n%d_%d\" }\n",
355                                 get_irn_node_nr(ke->src), cbc->nr, get_irn_node_nr(ke->tgt), cbc->nr);
356                 }
357                 fprintf(f, "}\n\n");
358         }
359         fprintf(f, "}\n");
360         fclose(f);
361 }
362
363 /* Dump the computed killing function as vcg. */
364 static void debug_vcg_dump_kill(rss_t *rss) {
365         FILE            *f;
366         char            file_name[256];
367         plist_element_t *el;
368         static const char suffix[] = "-RSS-KILL.vcg";
369
370         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
371         f = fopen(file_name, "w");
372
373         ir_fprintf(f, "graph: { title: \"computed kill graph of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
374         fprintf(f, "display_edge_labels: no\n");
375         fprintf(f, "layoutalgorithm: mindepth\n");
376         fprintf(f, "manhattan_edges: yes\n\n");
377
378         /* first: reset dumped flag of all nodes */
379         foreach_plist(rss->nodes, el) {
380                 ir_node   *irn  = plist_element_get_value(el);
381                 rss_irn_t *rirn = get_rss_irn(rss, irn);
382                 rirn->dumped = 0;
383         }
384
385         /* dump all nodes and their killers */
386         foreach_plist(rss->nodes, el) {
387                 ir_node   *irn     = plist_element_get_value(el);
388                 rss_irn_t *rirn    = get_rss_irn(rss, irn);
389                 rss_irn_t *pk_rirn = get_rss_irn(rss, rirn->killer);
390
391                 if (! rirn->dumped) {
392                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
393                         rirn->dumped = 1;
394                 }
395
396                 if (! pk_rirn->dumped) {
397                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(rirn->killer), rirn->killer);
398                         pk_rirn->dumped = 1;
399                 }
400
401                 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
402                         get_irn_node_nr(rirn->killer), get_irn_node_nr(irn));
403         }
404
405         fprintf(f, "}\n");
406         fclose(f);
407 }
408
409 /* Dumps the potential killing DAG (PKG) as vcg. */
410 static void debug_vcg_dump_pkg(rss_t *rss, nodeset *max_ac, int iteration) {
411         FILE              *f;
412         char              file_name[256];
413         char              *suffix   = alloca(32);
414         static const char suffix1[] = "-RSS-PKG.vcg";
415         static const char suffix2[] = "-RSS-PKG-MAXAC.vcg";
416         plist_element_t   *el;
417
418         if (! max_ac) {
419                 snprintf(suffix, 32, "%s", suffix1);
420         }
421         else {
422                 snprintf(suffix, 32, "-%02d%s", iteration, suffix2);
423         }
424
425         build_file_name(rss, suffix, strlen(suffix) + 1, file_name, sizeof(file_name));
426         f = fopen(file_name, "w");
427
428         ir_fprintf(f, "graph: { title: \"potential killing DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
429         fprintf(f, "display_edge_labels: no\n");
430         fprintf(f, "layoutalgorithm: mindepth\n");
431         fprintf(f, "manhattan_edges: yes\n\n");
432
433         foreach_plist(rss->nodes, el) {
434                 ir_node   *irn  = plist_element_get_value(el);
435                 rss_irn_t *rirn = get_rss_irn(rss, irn);
436                 char      *c1   = "";
437                 plist_element_t *k_el;
438
439                 /* dump selected saturating values in yellow */
440                 if (max_ac && nodeset_find(max_ac, irn))
441                         c1 = "color:yellow";
442
443                 if (rirn->chain)
444                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F   c%d\" %s }\n", get_irn_node_nr(irn), irn, rirn->chain->nr, c1);
445                 else
446                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(irn), irn, c1);
447                 rirn->dumped = 1;
448
449                 foreach_plist(rirn->pkiller_list, k_el) {
450                         ir_node   *pkiller = plist_element_get_value(k_el);
451                         rss_irn_t *pk_rirn = get_rss_irn(rss, pkiller);
452                         char      *c2      = "";
453
454                         if (max_ac && nodeset_find(max_ac, pkiller))
455                                 c2 = "color:yellow";
456
457                         if (! pk_rirn->dumped) {
458                                 if (pk_rirn->chain)
459                                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F   c%d\" %s }\n", get_irn_node_nr(pkiller), pkiller, pk_rirn->chain->nr, c2);
460                                 else
461                                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(pkiller), pkiller, c2);
462                                 pk_rirn->dumped = 1;
463                         }
464                         ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
465                                 get_irn_node_nr(pkiller), get_irn_node_nr(irn));
466                 }
467         }
468         fprintf(f, "}\n");
469         fclose(f);
470 }
471
472 /* Dumps the disjoint value DAG (DVG) as vcg. */
473 static void debug_vcg_dump_dvg(rss_t *rss, dvg_t *dvg) {
474         static const char suffix[] = "-RSS-DVG.vcg";
475         FILE       *f;
476         char       file_name[256];
477         ir_node    *irn;
478         rss_edge_t *edge;
479
480         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
481         f = fopen(file_name, "w");
482
483         ir_fprintf(f, "graph: { title: \"disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
484         fprintf(f, "display_edge_labels: no\n");
485         fprintf(f, "layoutalgorithm: mindepth\n");
486         fprintf(f, "manhattan_edges: yes\n\n");
487
488         /* dump all nodes */
489         foreach_nodeset(dvg->nodes, irn) {
490                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
491         }
492
493         /* dump all edges */
494         foreach_pset(dvg->edges, edge) {
495                 rss_irn_t *src = get_rss_irn(rss, edge->src);
496                 rss_irn_t *tgt = get_rss_irn(rss, edge->tgt);
497
498                 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
499                         get_irn_node_nr(edge->src), get_irn_node_nr(edge->tgt));
500         }
501
502         fprintf(f, "}\n");
503         fclose(f);
504 }
505
506 #if 0
507 /* Dumps the PKG(DVG). */
508 static void debug_vcg_dump_dvg_pkiller(rss_t *rss, dvg_t *dvg) {
509         static const char suffix[] = "-RSS-DVG-PKG.vcg";
510         FILE       *f;
511         char       file_name[256];
512         ir_node    *irn;
513
514         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
515         f = fopen(file_name, "w");
516
517         ir_fprintf(f, "graph: { title: \"PKG of disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
518         fprintf(f, "display_edge_labels: no\n");
519         fprintf(f, "layoutalgorithm: mindepth\n");
520         fprintf(f, "manhattan_edges: yes\n\n");
521
522         /* dump all nodes */
523         foreach_nodeset(dvg->nodes, irn) {
524                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
525         }
526
527         /* dump all edges */
528         foreach_nodeset(dvg->nodes, irn) {
529                 rss_irn_t       *node = get_rss_irn(rss, irn);
530                 plist_element_t *el;
531
532                 foreach_plist(node->dvg_pkiller_list, el) {
533                         ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
534                                 get_irn_node_nr(plist_element_get_value(el)), get_irn_node_nr(irn));
535                 }
536         }
537
538         fprintf(f, "}\n");
539         fclose(f);
540 }
541 #endif /* if 0 */
542
543 /**
544  * In case there is no rss information for irn, initialize it.
545  */
546 static void *init_rss_irn(phase_t *ph, ir_node *irn, void *old) {
547         rss_irn_t *res = old ? old : phase_alloc(ph, sizeof(res[0]));
548
549         res->descendant_list  = plist_obstack_new(phase_obst(ph));
550         res->descendants      = NULL;
551
552         res->consumer_list    = plist_obstack_new(phase_obst(ph));
553         res->consumer         = NULL;
554
555         res->pkiller_list     = plist_obstack_new(phase_obst(ph));
556         res->pkillers         = NULL;
557
558         res->parent_list      = plist_obstack_new(phase_obst(ph));
559         res->parents          = NULL;
560
561         //res->dvg_desc_list    = plist_obstack_new(phase_obst(ph));
562         //res->dvg_desc         = NULL;
563
564         res->kill_value_list  = plist_obstack_new(phase_obst(ph));
565         //res->dvg_user_list    = plist_obstack_new(phase_obst(ph));
566         //res->dvg_pkiller_list = plist_obstack_new(phase_obst(ph));
567
568         res->killer           = NULL;
569         res->irn              = irn;
570         res->chain            = NULL;
571
572         res->live_out         = 0;
573         res->visited          = 0;
574         res->handled          = 0;
575         res->dumped           = 0;
576         res->havedesc         = 0;
577         res->havecons         = 0;
578
579         return res;
580 }
581
582 /**
583  * Collect all nodes data dependent on current node.
584  */
585 static void collect_descendants(rss_t *rss, rss_irn_t *rirn, ir_node *irn, int *got_sink) {
586         const ir_edge_t *edge;
587         rss_irn_t       *cur_node = get_rss_irn(rss, irn);
588         ir_node         *block    = rss->block;
589         ir_edge_kind_t  ekind[2]  = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
590         int             i;
591
592 //      if (cur_node->havedesc)
593 //              return;
594 //      cur_node->havedesc = 1;
595
596         /* Stop at Barriers and Keeps */
597         if (be_is_Barrier(irn) || be_is_Keep(irn))
598                 return;
599
600         /* loop over normal and dependency edges */
601         for (i = 0; i < 2; ++i) {
602                 foreach_out_edge_kind(irn, edge, ekind[i]) {
603                         ir_node *user = get_edge_src_irn(edge);
604
605                         /* skip ignore nodes as they do not really contribute to register pressure */
606                         if (arch_irn_is(rss->arch_env, user, ignore))
607                                 continue;
608
609                         if (is_Proj(user)) {
610                                 if (get_irn_mode(user) != mode_X && arch_get_irn_reg_class(rss->arch_env, user, -1) == rss->cls)
611                                         collect_descendants(rss, rirn, user, got_sink);
612                         }
613                         else {
614                                 /* check if user lives in block and is not a control flow node */
615                                 if (get_nodes_block(user) == block) {
616                                         if (! plist_has_value(rirn->descendant_list, user)) {
617                                                 plist_insert_back(rirn->descendant_list, user);
618                                                 DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", user));
619                                         }
620                                         collect_descendants(rss, rirn, user, got_sink);
621                                 }
622                                 else if (! *got_sink) {
623                                         /* user lives out of block: add sink as descendant if not already done */
624                                         plist_insert_back(rirn->descendant_list, _sink);
625                                         *got_sink = 1;
626                                         DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", _sink));
627                                 }
628                         }
629                 }
630         }
631 }
632
633 /**
634  * Handles a single consumer.
635  */
636 static void collect_single_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *consumer, int *got_sink) {
637         ir_node        *block   = rss->block;
638         ir_edge_kind_t ekind[2] = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
639
640         assert(! is_Proj(consumer) && "Cannot handle Projs");
641
642         if (get_nodes_block(consumer) == block) {
643                 if (! arch_irn_is(rss->arch_env, consumer, ignore)) {
644                         plist_insert_back(rss_irn->consumer_list, consumer);
645                         DBG((rss->dbg, LEVEL_2, "\t\tconsumer %+F\n", consumer));
646                 }
647         }
648         else {
649                 rss_irn->live_out = 1;
650                 DBG((rss->dbg, LEVEL_2, "\t\tlive out %+F", consumer));
651                 if (! *got_sink) {
652                         plist_insert_back(rss_irn->consumer_list, _sink);
653                         *got_sink = 1;
654                         DB((rss->dbg, LEVEL_2, ", %+F added instead", _sink));
655                 }
656                 DB((rss->dbg, LEVEL_2, "\n"));
657         }
658 }
659
660 /**
661  * Collect all nodes consuming the value(s) produced by current node.
662  */
663 static void collect_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *irn, int *got_sink) {
664         const ir_edge_t *edge;
665         int             i;
666         ir_edge_kind_t  ekind[2]  = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
667         rss_irn_t       *cur_node = get_rss_irn(rss, irn);
668
669         if (cur_node->havecons)
670                 return;
671         cur_node->havecons = 1;
672
673         for (i = 0; i < 2; ++i) {
674                 foreach_out_edge_kind(irn, edge, ekind[i]) {
675                         ir_node *consumer = get_edge_src_irn(edge);
676
677                         if (is_Proj(consumer)) {
678                                 if (arch_get_irn_reg_class(rss->arch_env, consumer, -1) == rss->cls)
679                                         collect_consumer(rss, rss_irn, consumer, got_sink);
680                         }
681                         else
682                                 collect_single_consumer(rss, rss_irn, consumer, got_sink);
683                 }
684         }
685 }
686
687 #if 0
688 /**
689  * We need to build the consumer and descendant list for _source.
690  */
691 static void collect_node_info_source(rss_t *rss) {
692         const ir_edge_t *edge;
693         rss_irn_t       *rirn  = get_rss_irn(rss, _source);
694         ir_node         *block = rss->block;
695
696         if (rirn->handled)
697                 return;
698
699         foreach_out_edge(block, edge) {
700                 ir_node *irn = get_edge_src_irn(edge);
701                 int     i;
702
703                 for (i = get_irn_arity(n) - 1; i >= 0; --i) {
704
705                 }
706         }
707 }
708
709 static void reset_node_info(rss_irn_t *rss_irn) {
710         /* Beware: array data resides on phase obstack, so it gets removed automatically */
711
712         plist_clear(rss_irn->consumer_list);
713         rss_irn->consumer = NULL;
714
715         plist_clear(rss_irn->parent_list);
716         rss_irn->parents = NULL;
717
718         plist_clear(rss_irn->descendant_list);
719         rss_irn->descendants = NULL;
720
721         plist_clear(rss_irn->pkiller_list);
722         rss_irn->pkillers = NULL;
723
724         plist_clear(rss_irn->kill_value_list);
725
726         rss_irn->killer   = NULL;
727         rss_irn->live_out = 0;
728         rss_irn->visited  = 0;
729         rss_irn->handled  = 0;
730 }
731 #endif /* if 0 */
732
733 /**
734  * Collects all consumer and descendant of a irn.
735  */
736 static void collect_node_info(rss_t *rss, ir_node *irn) {
737         rss_irn_t *rss_irn = get_rss_irn(rss, irn);
738         int       got_sink;
739
740         assert(! is_Proj(irn) && "Cannot handle Projs.");
741
742         /* check if node info is already available */
743         if (rss_irn->handled)
744                 phase_reinit_single_irn_data(&rss->ph, irn);
745
746         DBG((rss->dbg, LEVEL_1, "\tcomputing consumers of %+F:\n", irn));
747
748         /* collect all consumer */
749         got_sink = 0;
750         collect_consumer(rss, rss_irn, irn, &got_sink);
751
752         /* build sorted consumer array */
753         rss_irn->consumer = build_sorted_array_from_list(rss_irn->consumer_list, phase_obst(&rss->ph));
754
755         DBG((rss->dbg, LEVEL_1, "\tcompute descendants of %+F:\n", irn));
756
757         /* collect descendants */
758         got_sink = 0;
759         collect_descendants(rss, rss_irn, irn, &got_sink);
760
761         /* build sorted descendant array */
762         rss_irn->descendants = build_sorted_array_from_list(rss_irn->descendant_list, phase_obst(&rss->ph));
763
764         rss_irn->handled = 1;
765 }
766
767 /**
768  * Checks if v is a potential killer of u.
769  * v is in pkill(u) iff descendants(v) cut consumer(u) is v
770  *
771  * @param rss   The rss object
772  * @param v      The node to check for killer
773  * @param u      The potentially killed value
774  * @return 1 if v is in pkill(u), 0 otherwise
775  */
776 static int is_potential_killer(rss_t *rss, rss_irn_t *v, rss_irn_t *u) {
777         plist_t *list;
778         ir_node **arr;
779         plist_element_t *el;
780
781         assert(is_Sink(v->irn) || ((plist_count(v->descendant_list) > 0 && v->descendants) || 1));
782         assert(is_Sink(u->irn) || ((plist_count(u->consumer_list)   > 0 && u->consumer)    || 1));
783
784         /* as we loop over the list: loop over the shorter one */
785         if (plist_count(v->descendant_list) > plist_count(u->consumer_list)) {
786                 list = u->consumer_list;
787                 arr  = v->descendants;
788         }
789         else {
790                 list = v->descendant_list;
791                 arr  = u->consumer;
792         }
793
794         /* for each list element: try to find element in array */
795         foreach_plist(list, el) {
796                 ir_node *irn   = plist_element_get_value(el);
797                 ir_node *match = BSEARCH_IRN_ARR(irn, arr);
798
799                 if (match && match != irn)
800                         return 0;
801         }
802
803         return 1;
804 }
805
806 /**
807  * Update descendants, consumer and pkiller list for the given irn.
808  */
809 static void update_node_info(rss_t *rss, ir_node *irn, ir_node *pk_irn) {
810         rss_irn_t *node    = get_rss_irn(rss, irn);
811         rss_irn_t *pkiller = get_rss_irn(rss, pk_irn);
812
813         /* add consumer and rebuild the consumer array */
814         if (! plist_has_value(node->consumer_list, pk_irn)) {
815                 plist_insert_back(node->consumer_list, pk_irn);
816                 node->consumer = build_sorted_array_from_list(node->consumer_list, phase_obst(&rss->ph));
817         }
818
819         /* add pk_irn as descendant, add it's descendants to irn's and rebuild array */
820         if (! plist_has_value(node->descendant_list, pk_irn)) {
821                 plist_element_t *el;
822
823                 plist_insert_back(node->descendant_list, pk_irn);
824
825                 foreach_plist(pkiller->descendant_list, el) {
826                         ir_node *desc = plist_element_get_value(el);
827
828                         if (! plist_has_value(node->descendant_list, desc))
829                                 plist_insert_back(node->descendant_list, desc);
830                 }
831
832                 node->descendants = build_sorted_array_from_list(node->descendant_list, phase_obst(&rss->ph));
833         }
834
835 #if 0
836         /* check, if pkiller is a potential killer of irn */
837         if (is_potential_killer(rss, pkiller, node)) {
838                 if (! plist_has_value(node->pkiller_list, pk_irn))
839                         plist_insert_back(node->pkiller_list, pk_irn);
840                 if (! plist_has_value(pkiller->kill_value_list, irn))
841                         plist_insert_back(pkiller->kill_value_list, irn);
842                 DBG((rss->dbg, LEVEL_1, "\tpotential killer %+F added to %+F\n", pk_irn, irn));
843         }
844 #endif
845 }
846
847 /**
848  * Compute the potential killing set PK.
849  */
850 static void compute_pkill_set(rss_t *rss) {
851         plist_element_t *u_el, *v_el;
852
853         foreach_plist(rss->nodes, u_el) {
854                 ir_node   *u_irn = plist_element_get_value(u_el);
855                 rss_irn_t *u     = get_rss_irn(rss, u_irn);
856
857                 DBG((rss->dbg, LEVEL_1, "\tcomputing potential killers of %+F:\n", u_irn));
858
859                 /* check each consumer if it is a potential killer  */
860                 foreach_plist(u->consumer_list, v_el) {
861                         ir_node   *v_irn = plist_element_get_value(v_el);
862                         rss_irn_t *v     = get_rss_irn(rss, v_irn);
863
864                         /* check, if v is a potential killer of u */
865                         if (is_potential_killer(rss, v, u)) {
866                                 if (! plist_has_value(u->pkiller_list, v_irn))
867                                         plist_insert_back(u->pkiller_list, v_irn);
868                                 if (! plist_has_value(v->kill_value_list, u_irn))
869                                         plist_insert_back(v->kill_value_list, u_irn);
870                                 DBG((rss->dbg, LEVEL_2, "\t\tpotential killer %+F\n", v_irn));
871                         }
872                 }
873
874                 u->killer = _sink;
875         }
876
877         if (rss->opts->dump_flags & RSS_DUMP_PKG)
878                 debug_vcg_dump_pkg(rss, NULL, 0);
879 }
880
881 /**
882  * Build set of killing edges (from values to their potential killers)
883  */
884 static void build_kill_edges(rss_t *rss, pset *epk) {
885         plist_element_t *el, *k_el;
886
887         foreach_plist(rss->nodes, el) {
888                 ir_node    *irn  = plist_element_get_value(el);
889                 rss_irn_t *rirn = get_rss_irn(rss, irn);
890
891                 DBG((rss->dbg, LEVEL_3, "\t\tbuilding kill edges for %+F:\n", irn));
892
893                 foreach_plist(rirn->pkiller_list, k_el) {
894                         ir_node    *pkiller = plist_element_get_value(k_el);
895                         rss_edge_t *ke      = obstack_alloc(phase_obst(&rss->ph), sizeof(*ke));
896
897                         ke->src  = irn;
898                         ke->tgt  = pkiller;
899                         ke->next = NULL;
900
901                         DBG((rss->dbg, LEVEL_3, "\t\t\ttarget %+F\n", pkiller));
902
903                         pset_insert(epk, ke, HASH_RSS_EDGE(ke));
904                 }
905         }
906 }
907
908 /* print the given cbc for debugging purpose */
909 static void debug_print_cbc(firm_dbg_module_t *mod, cbc_t *cbc) {
910         ir_node    *n;
911         rss_edge_t *ke;
912
913         DBG((mod, LEVEL_3, "\t\tS = set of parents:\n"));
914         foreach_nodeset(cbc->parents, n) {
915                 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
916         }
917         DBG((mod, LEVEL_3, "\t\tT = set of children:\n"));
918         foreach_nodeset(cbc->children, n) {
919                 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
920         }
921         DBG((mod, LEVEL_3, "\t\tE = Edges from producers to consumers\n"));
922         foreach_pset(cbc->kill_edges, ke) {
923                 DBG((mod, LEVEL_3, "\t\t\t%+F -> %+F\n", ke->src, ke->tgt));
924         }
925 }
926
927 /**
928  * Construct the bipartite decomposition.
929  * Sid-Ahmed-Ali Touati, Phd Thesis
930  * Register Pressure in Instruction Level Parallelism, p. 71
931  */
932 static void compute_bipartite_decomposition(rss_t *rss) {
933         pset *epk    = new_pset(cmp_rss_edges, 10);
934         int  cur_num = 0;
935
936         plist_element_t *el;
937
938         DBG((rss->dbg, LEVEL_1, "\tcomputing bipartite decomposition:\n"));
939
940         build_kill_edges(rss, epk);
941
942         foreach_plist(rss->nodes, el) {
943                 ir_node   *u_irn   = plist_element_get_value(el);
944                 rss_irn_t *u       = get_rss_irn(rss, u_irn);
945                 int       p_change = 1;
946                 int       c_change = 1;
947
948                 cbc_t           *cbc;
949                 plist_element_t *el2;
950                 rss_edge_t      *k_edge;
951                 rss_edge_t      *kedge_root = NULL;
952                 ir_node         *t_irn, *s_irn;
953
954                 if (u->visited || u_irn == _sink)
955                         continue;
956
957                 DBG((rss->dbg, LEVEL_2, "\t\t%+F choosen:\n", u_irn));
958
959                 cbc     = obstack_alloc(phase_obst(&rss->ph), sizeof(*cbc));
960                 cbc->nr = cur_num++;
961
962                 /* initialize S_cb */
963                 cbc->parents = new_nodeset(5);
964                 nodeset_insert(cbc->parents, u_irn);
965                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (init)\n", u_irn));
966
967                 /* E_cb = empty */
968                 cbc->kill_edges = new_pset(cmp_rss_edges, 5);
969
970                 /* each parent gets killed by at least one value from children */
971
972                 /* T_cb = PK_successors(u) */
973                 cbc->children = new_nodeset(5);
974                 foreach_plist(u->pkiller_list, el2) {
975                         nodeset_insert(cbc->children, plist_element_get_value(el2));
976                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (init)\n", plist_element_get_value(el2)));
977                 }
978
979                 /*
980                         Now: insert the parents of all children into the parent set
981                         and insert the children of all parents into the children set
982                         until the sets don't change any more
983                 */
984                 while (p_change || c_change) {
985                         p_change = c_change = 0;
986
987                         /* accumulate parents */
988                         foreach_nodeset(cbc->children, t_irn) {
989                                 rss_irn_t *t = get_rss_irn(rss, t_irn);
990                                 plist_element_t *kvl_el;
991
992                                 foreach_pset(epk, k_edge) {
993                                         ir_node *val = k_edge->src;
994
995                                         if (k_edge->src == t_irn && ! nodeset_find(cbc->parents, k_edge->src)) {
996                                                 nodeset_insert(cbc->parents, val);
997                                                 p_change = 1;
998                                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents\n", val));
999                                         }
1000                                 }
1001                         }
1002
1003                         /* accumulate children */
1004                         foreach_nodeset(cbc->parents, s_irn) {
1005                                 rss_irn_t *s = get_rss_irn(rss, s_irn);
1006                                 plist_element_t *pkl_el;
1007
1008                                 foreach_plist(s->pkiller_list, pkl_el) {
1009                                         ir_node *val = plist_element_get_value(pkl_el);
1010
1011                                         if (! nodeset_find(cbc->children, val)) {
1012                                                 nodeset_insert(cbc->children, val);
1013                                                 c_change = 1;
1014                                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children\n", val));
1015                                         }
1016                                 }
1017                         }
1018                 }
1019
1020                 /* mark all parent values as visited */
1021                 foreach_nodeset(cbc->parents, s_irn) {
1022                         rss_irn_t *s = get_rss_irn(rss, s_irn);
1023                         s->visited = 1;
1024                         /* assure bipartite property */
1025                         if (nodeset_find(cbc->children, s_irn)) {
1026                                 nodeset_remove(cbc->children, s_irn);
1027                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F removed from to children\n", s_irn));
1028                         }
1029                 }
1030
1031                 /* update edges */
1032                 foreach_pset(epk, k_edge) {
1033                         if (nodeset_find(cbc->parents, k_edge->src) && nodeset_find(cbc->children, k_edge->tgt)) {
1034                                 pset_insert(cbc->kill_edges, k_edge, HASH_RSS_EDGE(k_edge));
1035                                 /*
1036                                         Link all k_edges which are about to be removed together.
1037                                         Beware: pset_remove kills the iterator.
1038                                 */
1039                                 k_edge->next = kedge_root;
1040                                 kedge_root   = k_edge;
1041                         }
1042                 }
1043
1044                 /* remove all linked k_edges */
1045                 for (; kedge_root; kedge_root = kedge_root->next) {
1046                         pset_remove(epk, kedge_root, HASH_RSS_EDGE(kedge_root));
1047                 }
1048
1049                 /* add the connected bipartite component */
1050                 if (nodeset_count(cbc->parents) > 0 && nodeset_count(cbc->children) > 0 && pset_count(cbc->kill_edges) > 0)
1051                         pset_insert(rss->cbc_set, cbc, (unsigned)cbc->nr);
1052                 DBG((rss->dbg, LEVEL_2, "\tbipartite component %d inserted:\n", cbc->nr));
1053                 DEBUG_ONLY(
1054                         if (firm_dbg_get_mask(rss->dbg) & LEVEL_2)
1055                                 debug_print_cbc(rss->dbg, cbc);
1056                 );
1057         }
1058
1059         if (rss->opts->dump_flags & RSS_DUMP_CBC)
1060                 debug_vcg_dump_bipartite(rss);
1061
1062         del_pset(epk);
1063 }
1064
1065 /**
1066  * Select the child with the maximum cost.
1067  */
1068 static child_t *select_child_max_cost(rss_t *rss, nodeset *x, nodeset *y, child_t *t, cbc_t *cbc) {
1069         ir_node *child;
1070         float   max_cost = -1.0f;
1071
1072         DBG((rss->dbg, LEVEL_2, "\t\tcomputing children costs:\n"));
1073
1074         foreach_nodeset(cbc->children, child) {
1075                 rss_irn_t  *r_child             = get_rss_irn(rss, child);
1076                 int         num_unkilled_parents = 0;
1077                 int         num_descendants;
1078                 rss_edge_t *k_edge;
1079                 float       cost;
1080
1081                 /* get the number of unkilled parents */
1082                 foreach_pset(cbc->kill_edges, k_edge) {
1083                         if (k_edge->tgt == child && nodeset_find(x, k_edge->src))
1084                                 ++num_unkilled_parents;
1085                 }
1086
1087                 cost = (float)num_unkilled_parents;
1088
1089                 num_descendants = plist_count(r_child->descendant_list) + nodeset_count(y);
1090
1091                 if (num_descendants > 0)
1092                         cost /= (float)num_descendants;
1093
1094                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F, #desc %d, cost %.3f\n", child, num_descendants, cost));
1095
1096                 if (cost > max_cost) {
1097                         t->irn   = child;
1098                         t->cost  = cost;
1099                         max_cost = cost;
1100                 }
1101         }
1102
1103         return t;
1104 }
1105
1106 /**
1107  * Remove all parents from x which are killed by t_irn.
1108  */
1109 static void remove_covered_parents(rss_t *rss, nodeset *x, ir_node *t_irn, cbc_t *cbc) {
1110         rss_irn_t  *t = get_rss_irn(rss, t_irn);
1111         rss_edge_t *k_edge;
1112
1113         DBG((rss->dbg, LEVEL_2, "\t\tremoving parents covered by %+F:\n", t_irn));
1114
1115         foreach_pset(cbc->kill_edges, k_edge) {
1116                 if (k_edge->tgt == t_irn && nodeset_find(x, k_edge->src)) {
1117                         nodeset_remove(x, k_edge->src);
1118                         plist_insert_back(t->parent_list, k_edge->src);
1119                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", k_edge->src));
1120                 }
1121         }
1122 }
1123
1124 static void update_cumulated_descendent_values(rss_t *rss, nodeset *y, ir_node *t_irn) {
1125         rss_irn_t *t = get_rss_irn(rss, t_irn);
1126         plist_element_t *el;
1127
1128         DBG((rss->dbg, LEVEL_2, "\t\tupdating cumulated descendant value of %+F:\n", t_irn));
1129
1130         foreach_plist(t->descendant_list, el) {
1131                 nodeset_insert(y, plist_element_get_value(el));
1132                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", plist_element_get_value(el)));
1133         }
1134 }
1135
1136 /**
1137  * Greedy-k: a heuristics for the MMA problem
1138  */
1139 static void compute_killing_function(rss_t *rss) {
1140         cbc_t *cbc;
1141         struct obstack obst;
1142
1143         obstack_init(&obst);
1144
1145         rss->cbc_set = pset_new_ptr(5);
1146         compute_bipartite_decomposition(rss);
1147
1148         /* for all bipartite components do: */
1149         foreach_pset(rss->cbc_set, cbc) {
1150                 ir_node *p;
1151                 nodeset *x       = new_nodeset(10);
1152                 nodeset *y       = new_nodeset(10);
1153                 child_t **sks    = NEW_ARR_F(child_t *, 20);
1154                 int     cur_len  = 0;
1155                 int     cur_size = 20;
1156                 int     i;
1157
1158                 DBG((rss->dbg, LEVEL_1, "\tcomputing SKS for cbc %d:\n", cbc->nr));
1159                 DBG((rss->dbg, LEVEL_2, "\t\tinitializing parents X:\n"));
1160
1161                 /* X = S_cb (all parents are initially uncovered) */
1162                 foreach_nodeset(cbc->parents, p) {
1163                         nodeset_insert(x, p);
1164                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", p));
1165                 }
1166
1167                 /* while X not empty */
1168                 while (nodeset_count(x) > 0) {
1169                         child_t *t = obstack_alloc(&obst, sizeof(*t));
1170                         memset(t, 0, sizeof(*t));
1171
1172                         t = select_child_max_cost(rss, x, y, t, cbc);
1173
1174                         if (cur_len >= cur_size) {
1175                                 ARR_EXTO(child_t *, sks, cur_size * 2);
1176                                 cur_size *= 2;
1177                         }
1178
1179                         DBG((rss->dbg, LEVEL_2, "\t\tinsert child %+F (%.3f) into SKS at pos %d\n", t->irn, t->cost, cur_len));
1180
1181                         sks[cur_len++] = t;
1182                         remove_covered_parents(rss, x, t->irn, cbc);
1183                         update_cumulated_descendent_values(rss, y, t->irn);
1184                 }
1185
1186                 ARR_SHRINKLEN(sks, cur_len);
1187
1188                 /* sort SKS in increasing cost order */
1189                 qsort(sks, cur_len, sizeof(sks[0]), cmp_child_costs);
1190
1191                 DBG((rss->dbg, LEVEL_2, "\tprocessing SKS for cbc %d:\n", cbc->nr));
1192
1193                 /* build killing function */
1194                 for (i = cur_len - 1; i >= 0; --i) { /* loop over sks in decreasing cost order */
1195                         child_t         *t = sks[i];
1196                         rss_irn_t       *rt = get_rss_irn(rss, t->irn);
1197                         plist_element_t *p_el;
1198
1199                         DBG((rss->dbg, LEVEL_3, "\t\t\tkiller %+F (%.3f):\n", t->irn, t->cost));
1200
1201                         /* kill all unkilled parents of t */
1202                         foreach_plist(rt->parent_list, p_el) {
1203                                 ir_node    *par = plist_element_get_value(p_el);
1204                                 rss_irn_t *rpar = get_rss_irn(rss, par);
1205
1206                                 if (is_Sink(rpar->killer)) {
1207                                         rpar->killer = t->irn;
1208                                         DBG((rss->dbg, LEVEL_2, "\t\tkill %+F\n", rpar->irn));
1209                                 }
1210                                 else {
1211                                         DBG((rss->dbg, LEVEL_3, "\t\t\tkeeping %+F as killer for %+F\n", rpar->killer, rpar->irn));
1212                                 }
1213                         }
1214                 }
1215
1216                 del_nodeset(x);
1217                 del_nodeset(y);
1218                 DEL_ARR_F(sks);
1219         }
1220
1221         if (rss->opts->dump_flags & RSS_DUMP_KILL)
1222                 debug_vcg_dump_kill(rss);
1223
1224         del_pset(rss->cbc_set);
1225         obstack_free(&obst, NULL);
1226 }
1227
1228 /**
1229  * Adds the edge src -> tgt to the dvg. Checks if reverse edge is already there (asserts).
1230  */
1231 static INLINE void add_dvg_edge(rss_t *rss, dvg_t *dvg, ir_node *src, ir_node *tgt, int have_source) {
1232         rss_edge_t *dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1233         rss_edge_t key;
1234
1235         if (! have_source)
1236                 nodeset_insert(dvg->nodes, src);
1237
1238         nodeset_insert(dvg->nodes, tgt);
1239
1240         /* add an edge to our killer */
1241         dvg_edge->src  = src;
1242         dvg_edge->tgt  = tgt;
1243         dvg_edge->next = NULL;
1244
1245         key.src = tgt;
1246         key.tgt = src;
1247         assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1248
1249         /* add the edge to the DVG */
1250         DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", src, tgt));
1251         pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1252 }
1253
1254 /**
1255  * Computes the disjoint value DAG (DVG).
1256  * BEWARE: It is not made explicitly clear in the Touati paper,
1257  *         but the DVG is meant to be build from the KILLING DAG
1258  */
1259 static void compute_dvg(rss_t *rss, dvg_t *dvg) {
1260         plist_element_t *el;
1261
1262         DBG((rss->dbg, LEVEL_1, "\tcomputing DVG:\n"));
1263
1264         foreach_plist(rss->nodes, el) {
1265                 ir_node    *u_irn    = plist_element_get_value(el);
1266                 rss_irn_t  *u        = get_rss_irn(rss, u_irn);
1267                 rss_irn_t  *u_killer = get_rss_irn(rss, u->killer);
1268                 int        i;
1269
1270                 /* TODO: omit nodes only having sink as pkiller and killing no one */
1271
1272                 /* add an edge to killer */
1273                 add_dvg_edge(rss, dvg, u_irn, u->killer, 0);
1274
1275                 if (is_Sink(u->killer))
1276                         continue;
1277
1278                 /* We add an edge to every killer, from where we could be reached. */
1279                 for (i = ARR_LEN_SAFE(u_killer->descendants) - 1; i >= 0; --i) {
1280                         add_dvg_edge(rss, dvg, u_irn, u_killer->descendants[i], 1);
1281                 }
1282
1283 #if 0
1284
1285                 foreach_plist(rss->nodes, el2) {
1286                         ir_node *v_irn = plist_element_get_value(el2);
1287
1288                         /*
1289                                 There is an edge (u, v) in the DVG iff v is a descendant of the killer(u).
1290                         */
1291                         if (BSEARCH_IRN_ARR(v_irn, u_kill->descendants)) {
1292                                 rss_edge_t *dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1293                                 rss_edge_t key;
1294
1295                                 /* insert the user into the DVG and append it to the user list of u */
1296                                 nodeset_insert(dvg->nodes, v_irn);
1297                                 if (! plist_has_value(u->dvg_user_list, v_irn))
1298                                         plist_insert_back(u->dvg_user_list, v_irn);
1299
1300                                 dvg_edge->src  = u_irn;
1301                                 dvg_edge->tgt  = v_irn;
1302                                 dvg_edge->next = NULL;
1303
1304                                 key.src = v_irn;
1305                                 key.tgt = u_irn;
1306
1307                                 assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1308
1309                                 /* add the edge to the DVG */
1310                                 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", u_irn, v_irn));
1311                                 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1312                         }
1313                 }
1314 #endif /* if 0 */
1315         }
1316
1317         if (rss->opts->dump_flags & RSS_DUMP_DVG)
1318                 debug_vcg_dump_dvg(rss, dvg);
1319 }
1320
1321 /**
1322  * Updates the dvg structure when a serialization edge from src -> tgt is added.
1323  */
1324 static void update_dvg(rss_t *rss, dvg_t *dvg, rss_irn_t *src, rss_irn_t *tgt) {
1325         int i;
1326
1327         add_dvg_edge(rss, dvg, tgt->irn, src->irn, 1);
1328
1329         for (i = ARR_LEN_SAFE(src->descendants) - 1; i >= 0; --i) {
1330                 add_dvg_edge(rss, dvg, tgt->irn, src->descendants[i], 1);
1331         }
1332 }
1333
1334 #if 0
1335 /**
1336  * Accumulate all descendants for root into list.
1337  */
1338 static void accumulate_dvg_descendant_values(rss_t *rss, rss_irn_t *root, plist_t *list) {
1339         if (plist_count(root->dvg_user_list) > 0) {
1340                 plist_element_t *el;
1341
1342                 foreach_plist(root->dvg_user_list, el) {
1343                         ir_node   *v_irn = plist_element_get_value(el);
1344                         rss_irn_t *v     = get_rss_irn(rss, v_irn);
1345
1346                         /* add v as descendant */
1347                         if (! plist_has_value(list, v_irn)) {
1348                                 plist_insert_back(list, v_irn);
1349                                 DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG descendant %+F\n", v_irn));
1350                         }
1351
1352                         /* accumulate v's descendants */
1353                         accumulate_dvg_descendant_values(rss, v, list);
1354                 }
1355         }
1356 }
1357
1358 /**
1359  * Builds the list of potential killers for each node
1360  * in the given DVG.
1361  * Needs the descendant list for all user as sorted array.
1362  */
1363 static void build_dvg_pkiller_list(rss_t *rss, dvg_t *dvg) {
1364         ir_node *irn;
1365
1366         foreach_nodeset(dvg->nodes, irn) {
1367                 rss_irn_t       *node = get_rss_irn(rss, irn);
1368                 plist_element_t *el, *el2;
1369
1370                 DBG((rss->dbg, DEBUG_DVG, "\t\tbuilding pkiller list for %+F\n", irn));
1371
1372                 /* check each user */
1373                 foreach_plist(node->dvg_user_list, el) {
1374                         ir_node *u_irn = plist_element_get_value(el);
1375
1376                         /* is the current user u_irn not a descendant of any other user -> pkiller */
1377                         foreach_plist(node->dvg_user_list, el2) {
1378                                 ir_node   *v_irn = plist_element_get_value(el2);
1379                                 rss_irn_t *v     = get_rss_irn(rss, v_irn);
1380
1381                                 if (el != el2                             &&
1382                                         ! BSEARCH_IRN_ARR(u_irn, v->dvg_desc) &&
1383                                         ! plist_has_value(node->dvg_pkiller_list, u_irn))
1384                                 {
1385                                         plist_insert_back(node->dvg_pkiller_list, u_irn);
1386                                         DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG pkiller %+F\n", u_irn));
1387                                 }
1388                         }
1389                 }
1390
1391                 node->dvg_pkiller = build_sorted_array_from_list(node->dvg_pkiller_list, phase_obst(&rss->ph));
1392         }
1393
1394         DEBUG_ONLY(
1395                 if (firm_dbg_get_mask(rss->dbg) & DEBUG_DVG)
1396                         debug_vcg_dump_dvg_pkiller(rss, dvg);
1397         );
1398 }
1399
1400 #endif /* if 0 */
1401
1402 /**
1403  * Compute the maximal antichain of the current DVG.
1404  * This is a reimplementation of the MAXIMAL_ANTI_CHAIN function
1405  * from the DDG library 1.1 (DAG.cpp).
1406  */
1407 static nodeset *compute_maximal_antichain(rss_t *rss, dvg_t *dvg, int iteration) {
1408         int         n               = nodeset_count(dvg->nodes);
1409         int         *assignment     = alloca(n * sizeof(assignment[0]));
1410         int         *assignment_rev = alloca(n * sizeof(assignment_rev[0]));
1411         int         *idx_map        = alloca(n * sizeof(idx_map[0]));
1412         hungarian_problem_t *bp;
1413         nodeset     *values, *temp;
1414         ir_node     *u_irn;
1415         int         i, j, cost, cur_chain, res;
1416         rss_edge_t  *dvg_edge;
1417
1418 #define MAP_IDX(irn) bsearch_for_index(get_irn_idx(irn), idx_map,  n,  1)
1419
1420         if (pset_count(dvg->edges) == 0)
1421                 return NULL;
1422
1423         bp = hungarian_new(n, n, 1, HUNGARIAN_MATCH_NORMAL);
1424
1425         /*
1426                 At first, we build an index map for the nodes in the DVG,
1427                 because we cannot use the irn idx therefore as the resulting
1428                 bipartite data structure would have around 1.2GB.
1429                 So we limit the size to the number of nodes we have in the DVG
1430                 and build a sorted index map for their irn indices.
1431         */
1432         i = 0;
1433         foreach_nodeset(dvg->nodes, u_irn) {
1434                 idx_map[i++] = get_irn_idx(u_irn);
1435         }
1436         qsort(idx_map, n, sizeof(idx_map[0]), cmp_int);
1437
1438         foreach_pset(dvg->edges, dvg_edge) {
1439                 int idx_u = MAP_IDX(dvg_edge->src);
1440                 int idx_v = MAP_IDX(dvg_edge->tgt);
1441
1442                 /* add the entry to the bipartite data structure */
1443                 hungarian_add(bp, idx_u, idx_v, 1);
1444                 DBG((rss->dbg, LEVEL_3, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1445                         idx_u, dvg_edge->src, idx_v, dvg_edge->tgt));
1446         }
1447 #if 0
1448         /*
1449                 Add a bipartite entry for each pair of nodes (u, v), where exists a
1450                 path in the DVG from u to v, ie. connect all descendants(v) to v.
1451                 desc_dvg(v) = accumulated descendants of all z in dvg_user_list(v)
1452         */
1453         foreach_nodeset(dvg->nodes, u_irn) {
1454                 rss_irn_t *u        = get_rss_irn(rss, u_irn);
1455                 int       idx_u_irn = MAP_IDX(u_irn);
1456
1457                 DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG descendants of %+F:\n", u_irn));
1458
1459                 //plist_clear(u->dvg_desc_list);
1460                 //accumulate_dvg_descendant_values(rss, u, u->dvg_desc_list);
1461
1462                 /*
1463                         FIXME: The array is build on the phase obstack and we cannot free the data.
1464                         So the array get as many times allocated as this function is called.
1465                 */
1466
1467                 /* build the sorted array for faster searches */
1468                 //u->dvg_desc = build_sorted_array_from_list(u->dvg_desc_list, phase_obst(&rss->ph));
1469
1470                 DBG((rss->dbg, DEBUG_MAX_AC, "\t\tadding bipartite entries of %+F:\n", u_irn));
1471
1472                 /* add the bipartite entries for all descendants */
1473                 for (i = ARR_LEN_SAFE(u->dvg_desc) - 1; i >= 0; --i) {
1474                         rss_irn_t *desc    = get_rss_irn(rss, u->dvg_desc[i]);
1475                         int       idx_desc = MAP_IDX(u->dvg_desc[i]);
1476
1477                         /* add the entry to the bipartite data structure */
1478                         hungarian_add(bp, idx_u_irn, idx_desc, 1);
1479                         DBG((rss->dbg, DEBUG_MAX_AC, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1480                                 idx_u_irn, u_irn, idx_desc, u->dvg_desc[i]));
1481
1482                         need_matching = 1;
1483                 }
1484         }
1485 #endif
1486
1487         /* We want maximum cardinality matching */
1488         hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL);
1489
1490 #if 0
1491         DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG pkiller:\n"));
1492         /* beware: the following function needs the dvg_desc array */
1493         build_dvg_pkiller_list(rss, dvg);
1494 #endif
1495
1496         DBG((rss->dbg, LEVEL_2, "\t\tcomputing bipartite matching\n"));
1497         /*
1498                 The maximum cardinality bipartite matching gives us the minimal
1499                 chain partition, which corresponds to the maximum anti chains.
1500         */
1501         res = hungarian_solve(bp, assignment, &cost, 1);
1502         assert(res == 0 && "Bipartite matching failed!");
1503
1504         hungarian_free(bp);
1505         memset(assignment_rev, -1, n * sizeof(assignment_rev[0]));
1506
1507         /* build the reverse assignment, ie. foreach i -> j, add j -> i */
1508         for (i = 0; i < n; ++i) {
1509                 if (assignment[i] >= 0) {
1510                         int j = assignment[i];
1511                         assignment_rev[j] = i;
1512                 }
1513         }
1514
1515         DBG((rss->dbg, LEVEL_2, "\t\tgot assignment with cost %d\n", cost));
1516         DBG((rss->dbg, LEVEL_3, "\t\t\tassignment   ---   reverse assignment\n", cost));
1517         for (i = 0; i < n; ++i) {
1518                 DBG((rss->dbg, LEVEL_3, "\t\t\t%3d -> %3d         %3d -> %3d\n", i, assignment[i], i, assignment_rev[i]));
1519         }
1520
1521         values    = new_nodeset(10);
1522         cur_chain = 0;
1523         /* Construction of the minimal chain partition */
1524         for (j = 0; j < n; ++j) {
1525                 /* check nodes, which did not occur as target */
1526                 if (assignment_rev[j] == -1) {
1527                         int       xj      = idx_map[j];
1528                         ir_node   *xj_irn = get_idx_irn(rss->irg, xj);
1529                         rss_irn_t *xj_rss = get_rss_irn(rss, xj_irn);
1530                         chain_t   *c      = obstack_alloc(phase_obst(&rss->ph), sizeof(*c));
1531                         int       source;
1532
1533                         /* there was no source for j -> we have a source of a new chain */
1534                         nodeset_insert(values, xj_irn);
1535
1536                         c->elements = plist_obstack_new(phase_obst(&rss->ph));
1537                         c->nr       = cur_chain++;
1538                         plist_insert_back(c->elements, xj_irn);
1539
1540                         xj_rss->chain = c;
1541
1542                         DBG((rss->dbg, LEVEL_2, "\t\tstarting chain %d:\n", c->nr));
1543                         DBG((rss->dbg, LEVEL_2, "\t\t\t%+F (%d)", xj_irn, j));
1544
1545                         /* follow chain, having j as source */
1546                         source = j;
1547                         while (assignment[source] >= 0) {
1548                                 int       target  = assignment[source];
1549                                 int       irn_idx = idx_map[target];
1550                                 ir_node   *irn    = get_idx_irn(rss->irg, irn_idx);
1551                                 rss_irn_t *node   = get_rss_irn(rss, irn);
1552
1553                                 plist_insert_back(c->elements, irn);
1554                                 node->chain = c;
1555
1556                                 DB((rss->dbg, LEVEL_2, " -> %+F (%d)", irn, target));
1557
1558                                 /* new source = last target */
1559                                 source = target;
1560                         }
1561
1562                         DB((rss->dbg, LEVEL_2, "\n"));
1563                 }
1564         }
1565
1566         /*
1567                 Computing the maximal antichain: Select an element from each
1568                 chain such, such it is parallel with the others.
1569         */
1570         DBG((rss->dbg, LEVEL_1, "\t\tcomputing set of saturation values (MAX AC)\n"));
1571         DBG((rss->dbg, LEVEL_3, "\t\tstarting with:\n"));
1572
1573         DEBUG_ONLY(
1574                 if (firm_dbg_get_mask(rss->dbg) & LEVEL_3)
1575                         dump_nodeset(values, "\t\t\t");
1576         )
1577
1578         temp = NULL;
1579         do {
1580                 /*
1581                         We need an explicit array for the values as
1582                         we cannot iterate multiple times over the same
1583                         set at the same time. :-(((((
1584                 */
1585                 int     n         = nodeset_count(values);
1586                 int     i         = 0;
1587                 ir_node **val_arr = NEW_ARR_F(ir_node *, n);
1588
1589                 foreach_nodeset(values, u_irn)
1590                         val_arr[i++] = u_irn;
1591
1592                 if (temp)
1593                         del_nodeset(temp);
1594
1595                 temp = new_nodeset(10);
1596
1597                 /* Select all nodes from current value set, having another node in the set as descendant. */
1598                 for (i = 0; i < n; ++i) {
1599                         rss_irn_t *u = get_rss_irn(rss, val_arr[i]);
1600
1601                         for (j = 0; j < n; ++j) {
1602                                 if (i != j) {
1603                                         rss_edge_t key;
1604
1605                                         key.src = val_arr[i];
1606                                         key.tgt = val_arr[j];
1607
1608                                         if (pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1609                                                 /* v[j] is descendant of u -> remove u and break */
1610                                                 nodeset_insert(temp, u->irn);
1611                                                 nodeset_remove(values, u->irn);
1612
1613                                                 DBG((rss->dbg, LEVEL_3, "\t\t\tremoving %+F from values, adding it to temp\n", u->irn));
1614
1615                                                 break;
1616                                         }
1617                                 }
1618                         }
1619                 }
1620
1621                 /* Try to insert the chain predecessor of all selected u's */
1622                 foreach_nodeset(temp, u_irn) {
1623                         rss_irn_t       *u  = get_rss_irn(rss, u_irn);
1624                         chain_t         *c  = u->chain;
1625                         plist_element_t *el = plist_find_value(c->elements, u_irn);
1626
1627                         assert(el && "Missing element in chain!");
1628
1629                         /* If u has predecessor in chain: insert the predecessor */
1630                         if (el = plist_element_get_prev(el)) {
1631                                 nodeset_insert(values, plist_element_get_value(el));
1632                                 DBG((rss->dbg, LEVEL_3, "\t\t\tadding %+F to values\n", plist_element_get_value(el)));
1633                         }
1634                 }
1635
1636
1637                 DEL_ARR_F(val_arr);
1638         } while (nodeset_count(temp) > 0);
1639
1640         DBG((rss->dbg, LEVEL_2, "\t\tfinal set:\n"));
1641         DEBUG_ONLY(
1642                 if (firm_dbg_get_mask(rss->dbg) & LEVEL_2) {
1643                         dump_nodeset(values, "\t\t\t");
1644                 }
1645         );
1646
1647         if (rss->opts->dump_flags & RSS_DUMP_MAXAC)
1648                 debug_vcg_dump_pkg(rss, values, iteration);
1649
1650         del_nodeset(temp);
1651
1652         return values;
1653
1654 #undef MAP_IDX
1655 }
1656
1657 /**
1658  * Computes the best serialization between two nodes of sat_vals.
1659  */
1660 static serialization_t *compute_best_admissible_serialization(rss_t *rss, nodeset *sat_vals, serialization_t *ser, int num_regs) {
1661         int        n                    = nodeset_count(sat_vals);
1662         int        n_idx                = ARR_LEN_SAFE(rss->idx_map);
1663         int        i                    = 0;
1664         ir_node    **val_arr            = alloca(n * sizeof(val_arr[0]));
1665         bitset_t   *bs_sv               = bitset_alloca(n_idx);
1666         bitset_t   *bs_vdesc            = bitset_alloca(n_idx);
1667         bitset_t   *bs_tmp              = bitset_alloca(n_idx);
1668         bitset_t   *bs_ukilldesc        = bitset_alloca(n_idx);
1669         int        best_benefit         = INT_MAX;
1670         int        best_omega2          = INT_MAX;
1671         int        best_benefit_omega20 = INT_MAX;
1672         int        has_omega1           = 0;
1673         int        j, k;
1674         ir_node    *irn;
1675         rss_edge_t min_benefit_edge;
1676         rss_edge_t min_omega20_edge;
1677         rss_irn_t  *ser_u_omega1, *ser_v_omega1;
1678         rss_irn_t  *ser_u_omega20, *ser_v_omega20;
1679
1680         DBG((rss->dbg, LEVEL_1, "\tcomputing admissible serializations:\n"));
1681
1682         /*
1683                 We need an explicit array for the values as
1684                 we cannot iterate multiple times over the same
1685                 set at the same time. :-(((((
1686         */
1687
1688         foreach_nodeset(sat_vals, irn) {
1689                 val_arr[i++] = irn;
1690                 bitset_set(bs_sv, BLOCK_IDX_MAP(rss, irn));
1691         }
1692
1693         /*
1694                 We build all admissible serializations and remember the best found so far.
1695                 For u in sat_vals:
1696                  For v in sat_val:
1697                    if v in pkiller(u): add edge from v to all other pkiller(u)
1698                    else: for all vv in pkiller(u): add edge from v to vv if there exists no path from vv to v
1699         */
1700
1701 /*
1702         A node is unserializable if:
1703         - it has only one killer and this one is Sink
1704     - it kills no other values
1705         In this case there is no serialization which could
1706         reduce the registerpressure
1707 */
1708 #define IS_UNSERIALIZABLE_NODE(rss_node)           \
1709         ((plist_count(rss_node->pkiller_list) == 1) && \
1710         is_Sink(rss_node->killer)                   && \
1711         (plist_count(rss_node->kill_value_list) == 0))
1712
1713         /* for all u in sat_vals */
1714         for (i = 0; i < n; ++i) {
1715                 rss_irn_t       *u       = get_rss_irn(rss, val_arr[i]);
1716                 int             u_height = get_irn_height(rss->h, val_arr[i]);
1717                 plist_element_t *el;
1718
1719                 /* ignore nodes where serialization does not help */
1720                 if (IS_UNSERIALIZABLE_NODE(u)) {
1721                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", u->irn));
1722                         continue;
1723                 }
1724
1725                 /* accumulate all descendants of all pkiller(u) */
1726                 bitset_clear_all(bs_ukilldesc);
1727                 foreach_plist(u->pkiller_list, el) {
1728                         ir_node   *irn  = plist_element_get_value(el);
1729                         rss_irn_t *node = get_rss_irn(rss, irn);
1730
1731                         if (! is_Sink(irn))
1732                                 bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, irn));
1733                         else
1734                                 continue;
1735
1736                         for (k = ARR_LEN_SAFE(node->descendants) - 1; k >= 0; --k) {
1737                                 if (! is_Sink(node->descendants[k]))
1738                                         bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, node->descendants[k]));
1739                         }
1740                 }
1741
1742                 /* for all v in sat_vals */
1743                 for (j = 0; j < n; ++j) {
1744                         ir_node   *v_irn   = val_arr[j];
1745                         rss_irn_t *v       = get_rss_irn(rss, v_irn);
1746                         unsigned  v_height = get_irn_height(rss->h, v_irn);
1747                         int       omega1, omega2, is_pkiller;
1748
1749                         /* v cannot be serialized with itself
1750                          * ignore nodes where serialization does not help */
1751                         if (i == j || IS_UNSERIALIZABLE_NODE(v)) {
1752                                 if (i != j)
1753                                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", v->irn));
1754                                 continue;
1755                         }
1756
1757                         /* get descendants of v */
1758                         bitset_clear_all(bs_vdesc);
1759                         bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v_irn));
1760                         for (k = ARR_LEN_SAFE(v->descendants) - 1; k >= 0; --k) {
1761                                 if (! is_Sink(v->descendants[k]))
1762                                         bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v->descendants[k]));
1763                         }
1764
1765                         /* if v is in pkiller(u) */
1766                         is_pkiller = plist_has_value(u->pkiller_list, v_irn);
1767
1768                         /* for all vv in pkiller(u) */
1769                         foreach_plist(u->pkiller_list, el) {
1770                                 ir_node *vv_irn  = plist_element_get_value(el);
1771                                 int     add_edge;
1772
1773                                 if (is_Sink(vv_irn))
1774                                         continue;
1775
1776                                 if (is_pkiller)
1777                                         add_edge = vv_irn != v_irn && skip_Proj(vv_irn) != skip_Proj(v_irn);
1778                                 else
1779                                         add_edge = ! heights_reachable_in_block(rss->h, skip_Proj(vv_irn), skip_Proj(v_irn));
1780
1781                                 /*
1782                                         As we add an edge from vv -> v, we have to make sure,
1783                                         that there exists no path from v to vv.
1784                                 */
1785
1786                                 if (add_edge) {
1787                                         unsigned vv_height = get_irn_height(rss->h, vv_irn);
1788                                         int mu1, mu2, critical_path_cost;
1789
1790                                         /*
1791                                                 mu1 = | descendants(v) cut sat_vals |
1792                                                 the number of saturating values which cannot
1793                                                 be simultaneously alive with u
1794                                         */
1795                                         bitset_copy(bs_tmp, bs_vdesc);
1796                                         mu1 = bitset_popcnt(bitset_and(bs_tmp, bs_sv));
1797
1798                                         /*
1799                                                 mu2 = | accum_desc_all_pkiller(u) without descendants(v) |
1800                                         */
1801                                         if (is_pkiller) {
1802                                                 bitset_copy(bs_tmp, bs_ukilldesc);
1803                                                 mu2 = bitset_popcnt(bitset_andnot(bs_tmp, bs_vdesc));
1804                                         }
1805                                         else {
1806                                                 mu2 = 0;
1807                                         }
1808
1809                                         /* omega1 = mu1 - mu2 */
1810                                         omega1 = mu1 - mu2;
1811
1812                                         if (omega1 != 0)
1813                                                 has_omega1 = 1;
1814
1815                                         /* omega2 = increase of critical path */
1816                                         critical_path_cost =
1817                                                 v_height                        /* longest path from v to sink */
1818                                                 + rss->max_height - vv_height   /* longest path from source to vv */
1819                                                 + 1;                            /* edge */
1820
1821                                         /*
1822                                                 If critical_path_cost > max_height -> the new edge
1823                                                 would increase the longest critical path by the difference.
1824                                         */
1825                                         omega2 = critical_path_cost > rss->max_height ? critical_path_cost - rss->max_height : 0;
1826
1827                                         /* this keeps track of the edge with the best benefit */
1828                                         if (omega1 >= num_regs - n && omega1 < best_benefit) {
1829                                                 min_benefit_edge.src = v_irn;
1830                                                 min_benefit_edge.tgt = vv_irn;
1831
1832                                                 ser_u_omega1 = u;
1833                                                 ser_v_omega1 = v;
1834
1835                                                 best_benefit    = omega1;
1836                                                 ser->new_killer = is_pkiller;
1837                                         }
1838
1839                                         /* this keeps track of the edge with the best omega1 costs where omega2 == 0 */
1840                                         if (omega2 == 0 && omega1 >= num_regs - n && omega1 < best_benefit_omega20) {
1841                                                 min_omega20_edge.src = v_irn;
1842                                                 min_omega20_edge.tgt = vv_irn;
1843
1844                                                 ser_u_omega20 = u;
1845                                                 ser_v_omega20 = v;
1846
1847                                                 best_benefit_omega20 = omega1;
1848                                                 ser->new_killer      = is_pkiller;
1849                                         }
1850
1851                                         best_omega2 = MIN(best_omega2, omega2);
1852
1853                                         DBG((rss->dbg, LEVEL_2, "\t\tfound %+F -> %+F (w1 %d, w2 %d)\n",
1854                                                 v_irn, vv_irn, omega1, omega2));
1855                                 } /* if add_edge */
1856                         } /* for all vv in pkiller(u) */
1857                 } /* for all v in sat_vals */
1858         } /* for all u in sat_vals */
1859
1860         if (! has_omega1)
1861                 return NULL;
1862
1863         if (best_omega2 == 0) {
1864                 ser->u         = ser_u_omega20;
1865                 ser->v         = ser_v_omega20;
1866                 ser->edge->src = min_omega20_edge.src;
1867                 ser->edge->tgt = min_omega20_edge.tgt;
1868                 ser->omega1    = best_benefit_omega20;
1869                 ser->omega2    = best_omega2;
1870         }
1871         else {
1872                 ser->u         = ser_u_omega1;
1873                 ser->v         = ser_v_omega1;
1874                 ser->edge->src = min_benefit_edge.src;
1875                 ser->edge->tgt = min_benefit_edge.tgt;
1876                 ser->omega1    = best_benefit;
1877                 ser->omega2    = best_omega2;
1878         }
1879
1880         return ser;
1881
1882 #undef IS_UNSERIALIZABLE_NODE
1883 }
1884
1885 /**
1886  * Perform the value serialization heuristic and add all
1887  * computed serialization edges as dependencies to the irg.
1888  */
1889 static void perform_value_serialization_heuristic(rss_t *rss) {
1890         bitset_t *arch_nonign_bs = bitset_alloca(arch_register_class_n_regs(rss->cls));
1891         bitset_t *abi_ign_bs     = bitset_alloca(arch_register_class_n_regs(rss->cls));
1892         int      available_regs, iteration;
1893         dvg_t    dvg;
1894         nodeset  *sat_vals;
1895         pset *ser_set = new_pset(cmp_rss_edges, 20);
1896
1897         /* available_regs = R = |arch_non_ignore_regs cut ~abi_ignore_regs| */
1898         arch_put_non_ignore_regs(rss->arch_env, rss->cls, arch_nonign_bs);
1899         be_abi_put_ignore_regs(rss->abi, rss->cls, abi_ign_bs);
1900         bitset_andnot(arch_nonign_bs, abi_ign_bs);
1901         available_regs = bitset_popcnt(arch_nonign_bs);
1902
1903         DBG((rss->dbg, LEVEL_1, "\n\t#available regs: %d\n\n", available_regs));
1904
1905         /*
1906                 At first we need to compute the disjoint value DAG (DVG = {V, E_dv}).
1907                 V    = set of all nodes we are currently interested in
1908                 E_dv = there is an edge from u to v iff v is a descendant of killer(u), forall u, v in V
1909         */
1910         dvg.nodes = new_nodeset(plist_count(rss->nodes));
1911         dvg.edges = new_pset(cmp_rss_edges, plist_count(rss->nodes) * 5);
1912         compute_dvg(rss, &dvg);
1913
1914         /*
1915                 Then we perform the heuristic serialization algorithm
1916                 on the DVG which gives us all necessary serialization
1917                 edges.
1918         */
1919         DBG((rss->dbg, LEVEL_1, "\tcomputing maximal antichain:\n"));
1920         iteration = 0;
1921         sat_vals  = compute_maximal_antichain(rss, &dvg, iteration++);
1922         while (sat_vals && (nodeset_count(sat_vals) > available_regs)) {
1923                 serialization_t *ser, best_ser;
1924                 rss_edge_t      *edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*edge));
1925                 rss_irn_t       *tgt, *src;
1926                 ir_node         *dep_src, *dep_tgt;
1927
1928                 best_ser.edge = edge;
1929                 ser = compute_best_admissible_serialization(rss, sat_vals, &best_ser, available_regs);
1930
1931                 DBG((rss->dbg, LEVEL_1, "\tcurrent register saturation %d, target %d\n", nodeset_count(sat_vals), available_regs));
1932
1933                 if (! ser) {
1934                         DBG((rss->dbg, LEVEL_1, "\tno RS improving serialization found, breaking at iteration %d\n", iteration));
1935                         break;
1936                 }
1937
1938                 /* Insert the serialization as dependency edge into the irg. */
1939                 DBG((rss->dbg, LEVEL_1, "\tserializing %+F -> %+F with edge %+F -> %+F and cost %d, %d\n",
1940                         ser->u->irn, ser->v->irn, ser->edge->src, ser->edge->tgt, ser->omega1, ser->omega2));
1941
1942                 if (pset_find(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge)))
1943                         ir_printf("WARNING: serialization %+F -> %+F computed twice!\n", ser->edge->src, ser->edge->tgt);
1944
1945
1946                 pset_insert(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge));
1947
1948                 /* update the dvg */
1949                 update_dvg(rss, &dvg, ser->v, ser->u);
1950                 update_dvg(rss, &dvg, get_rss_irn(rss, ser->edge->src), get_rss_irn(rss, ser->edge->tgt));
1951                 del_nodeset(sat_vals);
1952
1953                 dep_src = skip_Proj(ser->edge->src);
1954                 dep_tgt = ser->edge->tgt;
1955                 add_irn_dep(dep_src, dep_tgt);
1956
1957                 /* Update descendants, consumer and pkillers of the target */
1958                 update_node_info(rss, ser->edge->tgt, ser->edge->src);
1959
1960                 /* TODO: try to find a cheaper way for updating height information */
1961                 rss->max_height = heights_recompute_block(rss->h, rss->block);
1962
1963                 /* Recompute the antichain for next serialization */
1964                 DBG((rss->dbg, LEVEL_1, "\tre-computing maximal antichain:\n"));
1965                 sat_vals = compute_maximal_antichain(rss, &dvg, iteration++);
1966         }
1967
1968         del_nodeset(dvg.nodes);
1969         del_pset(dvg.edges);
1970 }
1971
1972 /**
1973  * Do initial calculations for a block.
1974  */
1975 static void process_block(ir_node *block, void *env) {
1976         rss_t *rss = env;
1977         int   i, n;
1978         const ir_edge_t *edge;
1979
1980         phase_init(&rss->ph, "rss block preprocessor", rss->irg, PHASE_DEFAULT_GROWTH, init_rss_irn);
1981
1982         DBG((rss->dbg, LEVEL_1, "preprocessing block %+F\n", block));
1983         rss->block = block;
1984
1985         /* build an index map for all nodes in the current block */
1986         i            = 0;
1987         n            = get_irn_n_edges(block);
1988         NEW_ARR_A(int *, rss->idx_map, n);
1989         foreach_out_edge(block, edge) {
1990                 ir_node *irn      = get_edge_src_irn(edge);
1991                 rss->idx_map[i++] = get_irn_idx(irn);
1992         }
1993         qsort(rss->idx_map, n, sizeof(rss->idx_map[0]), cmp_int);
1994         rss->max_height = heights_recompute_block(rss->h, block);
1995
1996         /* loop over all register classes */
1997         for (i = arch_isa_get_n_reg_class(rss->arch_env->isa) - 1; i >= 0; --i) {
1998                 const arch_register_class_t *cls = arch_isa_get_reg_class(rss->arch_env->isa, i);
1999
2000                 rss->cls = cls;
2001                 DBG((rss->dbg, LEVEL_1, "register class %s\n", arch_register_class_name(cls)));
2002
2003                 /* reset the list of interesting nodes */
2004                 plist_clear(rss->nodes);
2005                 plist_insert_back(rss->nodes, _sink);
2006
2007                 /* collect all nodes having a certain register class */
2008                 foreach_out_edge(block, edge) {
2009                         ir_node *irn = get_edge_src_irn(edge);
2010
2011                         /*
2012                                 We skip:
2013                                 - mode_T nodes (the projs are asked)
2014                                 - mode_X nodes (control flow nodes are always scheduled last)
2015                                 - Keeps (they are always immediately scheduled)
2016                                 - Phi (same as Keep)
2017                         */
2018                         if (get_irn_mode(irn) == mode_T || be_is_Keep(irn) || is_Phi(irn))
2019                                 continue;
2020
2021                         /*
2022                                 In case of a proj, we skip
2023                                 - Barrier (they are a Barrier :)
2024                                 - Start
2025                                 - the Proj itself, as it's scheduled always with it's super node
2026                         */
2027                         if (is_Proj(irn)) {
2028                                 ir_node *pred = get_Proj_pred(irn);
2029                                 if (be_is_Barrier(pred) || is_Start(pred))
2030                                         continue;
2031                         }
2032
2033                         if (! arch_irn_is(rss->arch_env, irn, ignore) && arch_get_irn_reg_class(rss->arch_env, irn, -1) == cls) {
2034                                 plist_insert_back(rss->nodes, skip_Proj(irn));
2035
2036                                 /* calculate the descendants and consumer for each node in the block */
2037                                 collect_node_info(rss, skip_Proj(irn));
2038                         }
2039                 }
2040
2041                 /* compute the potential killing set PK(G) */
2042                 compute_pkill_set(rss);
2043
2044                 /* compute the killing function k* */
2045                 compute_killing_function(rss);
2046
2047                 /*
2048                         Compute the heuristic value serialization and
2049                         add the necessary dependencies to the irg.
2050                 */
2051                 perform_value_serialization_heuristic(rss);
2052         }
2053
2054         phase_free(&rss->ph);
2055 }
2056
2057 #ifdef WITH_LIBCORE
2058 /**
2059  * Register the options.
2060  */
2061 void rss_register_options(lc_opt_entry_t *grp) {
2062         static int     run_once = 0;
2063         lc_opt_entry_t *rss_grp;
2064
2065         if (! run_once) {
2066                 run_once = 1;
2067                 rss_grp  = lc_opt_get_grp(grp, "rss");
2068
2069                 lc_opt_add_table(rss_grp, rss_option_table);
2070         }
2071 }
2072 #endif /* WITH_LIBCORE */
2073
2074 /**
2075  * Preprocess the irg for scheduling.
2076  */
2077 void rss_schedule_preparation(const be_irg_t *birg) {
2078         rss_t rss;
2079
2080         FIRM_DBG_REGISTER(rss.dbg, "firm.be.sched.rss");
2081
2082         //firm_dbg_set_mask(rss.dbg, LEVEL_1 | LEVEL_2 | LEVEL_3);
2083
2084         init_rss_special_nodes(birg->irg);
2085
2086         rss.irg      = birg->irg;
2087         rss.arch_env = birg->main_env->arch_env;
2088         rss.abi      = birg->abi;
2089         rss.h        = heights_new(birg->irg);
2090         rss.nodes    = plist_new();
2091         rss.opts     = &rss_options;
2092         irg_block_walk_graph(birg->irg, NULL, process_block, &rss);
2093         heights_free(rss.h);
2094         plist_free(rss.nodes);
2095
2096         if (birg->main_env->options->dump_flags & DUMP_SCHED)
2097                 be_dump(rss.irg, "-rss", dump_ir_block_graph);
2098 }