cad9322f9414377c8fc1d7527c4f2abf8d9a9efe
[libfirm] / ir / be / beschedrss.c
1 /**
2  * Implementation of a register saturating list scheduler
3  * as described in: Sid-Ahmed-Ali Touati
4  * Register Saturation in Superscalar and VLIW Codes
5  *
6  * @license This file protected by GPL -  GNU GENERAL PUBLIC LICENSE.
7  * @author  Christian Wuerdig
8  * @date    29.08.2006
9  * @cvs-id  $Id$
10  */
11 #ifdef HAVE_CONFIG_H
12 #include "config.h"
13 #endif
14
15 #include <limits.h>
16
17 #include "obst.h"
18 #include "debug.h"
19
20 #include "irgraph_t.h"
21 #include "irnode_t.h"
22 #include "iredges_t.h"
23 #include "ircons_t.h"
24 #include "irphase_t.h"
25 #include "irgwalk.h"
26 #include "irdump.h"
27 #include "irtools.h"
28 #include "irbitset.h"
29 #include "irprintf.h"
30 #include "bipartite.h"
31 #include "hungarian.h"
32 #include "plist.h"
33
34 #include "height.h"
35
36 #include "beabi.h"
37 #include "benode_t.h"
38 #include "besched_t.h"
39
40 #ifdef WITH_LIBCORE
41 #include <libcore/lc_opts.h>
42 #include <libcore/lc_opts_enum.h>
43 #endif /* WITH_LIBCORE */
44
45
46 #define ARR_LEN_SAFE(arr) ((arr) != NULL ? ARR_LEN((arr)) : 0)
47
48 #define HASH_RSS_EDGE(edge) ((get_irn_node_nr((edge)->src) << 16) | (get_irn_node_nr((edge)->tgt) & 0xFFFF))
49 #define BSEARCH_IRN_ARR(val, arr) \
50         bsearch(&(val), (arr), ARR_LEN_SAFE((arr)), sizeof((arr)[0]), cmp_irn_idx)
51
52 #define BLOCK_IDX_MAP(rss, irn) bsearch_for_index(get_irn_idx((irn)), (rss)->idx_map, ARR_LEN_SAFE((rss)->idx_map), 1)
53
54 /* the rss options */
55 typedef struct _rss_opts_t {
56         int dump_flags;
57 } rss_opts_t;
58
59 /* Represents a child with associated costs */
60 typedef struct _child {
61         ir_node *irn;
62         float   cost;
63 } child_t;
64
65 /* We need edges for several purposes. */
66 typedef struct _rss_edge {
67         ir_node *src;
68         ir_node *tgt;
69         void    *next;
70 } rss_edge_t;
71
72 /* Represents a connected bipartite component. */
73 typedef struct _cbc {
74         nodeset *parents;       /**< = S  a set of value producers */
75         nodeset *children;      /**< = T  a set of value consumers */
76         pset    *kill_edges;    /**< = E  a set of edges (t in T, s in S) such as each s in S gets killed by at least one t in T */
77         int     nr;             /**< a deterministic index for set insertion (used as hash) */
78 } cbc_t;
79
80 /* Represents a disjoint value DAG. */
81 typedef struct _dvg {
82         nodeset *nodes;
83         pset    *edges;
84 } dvg_t;
85
86 /* Represents a chain of nodes. */
87 typedef struct _chain {
88         plist_t *elements;   /**< List of chain elements */
89         int     nr;          /**< a deterministic index for set insertion (used as hash) */
90 } chain_t;
91
92 typedef struct _rss_irn {
93         plist_t  *consumer_list;    /**< List of consumers */
94         ir_node **consumer;         /**< Sorted consumer array (needed for faster access) */
95
96         plist_t  *parent_list;      /**< List of parents */
97         plist_t  *pkiller_list;     /**< List of potential killers */
98
99         plist_t  *descendant_list;  /**< List of descendants */
100         ir_node **descendants;      /**< Sorted descendant array (needed for faster access) */
101
102         ir_node  *killer;           /**< The selected unique killer */
103         ir_node  *irn;              /**< The corresponding firm node to this rss_irn */
104
105         chain_t  *chain;            /**< The chain, this node is associated to */
106
107         unsigned desc_walk;         /**< visited flag for collecting descendants */
108         unsigned kill_count;        /**< number of nodes killed by this one */
109
110         unsigned live_out : 1;      /**< irn has consumers outside of it's block */
111         unsigned visited  : 1;      /**< visited flag for bipartite decomposition */
112         unsigned havecons : 1;      /**< visited flag collect consumer */
113         unsigned handled  : 1;      /**< flag indicating whether or not the list structures have been build */
114         unsigned dumped   : 1;      /**< flag indication whether or not this node was dumped */
115 } rss_irn_t;
116
117 /* Represents a serialization edge with associated costs. */
118 typedef struct _serialization {
119         rss_irn_t  *u;       /* the top node */
120         rss_irn_t  *v;       /* the node about to be serialized after u */
121         rss_edge_t *edge;    /* the edge selected for the serialization */
122         int        omega1;   /* estimated: available regs - RS reduction */
123         int        omega2;   /* increase in critical path length */
124         int        new_killer;
125 } serialization_t;
126
127 typedef struct _rss {
128         phase_t          ph;              /**< Phase to hold some data */
129         heights_t        *h;              /**< The current height object */
130         ir_graph         *irg;            /**< The irg to preprocess */
131         plist_t          *nodes;          /**< The list of interesting nodes */
132         const arch_env_t *arch_env;       /**< The architecture environment */
133         be_abi_irg_t     *abi;            /**< The abi for this irg */
134         pset             *cbc_set;        /**< A set of connected bipartite components */
135         ir_node          *block;          /**< The current block in progress. */
136         int              *idx_map;        /**< Mapping irn indices to per block indices */
137         unsigned         max_height;      /**< maximum height in the current block */
138         rss_opts_t       *opts;           /**< The options */
139         be_lv_t          *liveness;       /**< The liveness information for this irg */
140         pset             *live_block;     /**< Values alive at end of block */
141         const arch_register_class_t *cls; /**< The current register class */
142         DEBUG_ONLY(firm_dbg_module_t *dbg);
143 } rss_t;
144
145 #define get_rss_irn(rss, irn)  (phase_get_or_set_irn_data(&rss->ph, irn))
146
147 /**
148  * We need some special nodes:
149  * a source and a sink for all live-in and live-out values of a block
150  */
151
152 enum {
153         iro_rss_Source,
154         iro_rss_Sink,
155         iro_rss_last
156 };
157
158 static ir_node *_source = NULL;
159 static ir_node *_sink   = NULL;
160
161 #define is_Source(irn) ((irn) == _source)
162 #define is_Sink(irn)   ((irn) == _sink)
163
164 enum {
165         RSS_DUMP_NONE  = 0,
166         RSS_DUMP_CBC   = 1 << 0,
167         RSS_DUMP_PKG   = 1 << 1,
168         RSS_DUMP_KILL  = 1 << 2,
169         RSS_DUMP_DVG   = 1 << 3,
170         RSS_DUMP_MAXAC = 1 << 4,
171         RSS_DUMP_ALL   = (RSS_DUMP_MAXAC << 1) - 1,
172 };
173
174 static rss_opts_t rss_options = {
175         RSS_DUMP_NONE,
176 };
177
178 #ifdef WITH_LIBCORE
179 static const lc_opt_enum_int_items_t dump_items[] = {
180         { "none",  RSS_DUMP_NONE  },
181         { "cbc",   RSS_DUMP_CBC   },
182         { "pkg",   RSS_DUMP_PKG   },
183         { "kill",  RSS_DUMP_KILL  },
184         { "dvg",   RSS_DUMP_DVG   },
185         { "maxac", RSS_DUMP_MAXAC },
186         { "all",   RSS_DUMP_ALL   },
187         { NULL, 0 }
188 };
189
190 static lc_opt_enum_int_var_t dump_var = {
191         &rss_options.dump_flags, dump_items
192 };
193
194 static const lc_opt_table_entry_t rss_option_table[] = {
195         LC_OPT_ENT_ENUM_MASK("dump", "dump phases", &dump_var),
196         { NULL }
197 };
198 #endif /* WITH_LIBCORE */
199
200 /******************************************************************************
201  *  _          _                    __                  _   _
202  * | |        | |                  / _|                | | (_)
203  * | |__   ___| |_ __   ___ _ __  | |_ _   _ _ __   ___| |_ _  ___  _ __  ___
204  * | '_ \ / _ \ | '_ \ / _ \ '__| |  _| | | | '_ \ / __| __| |/ _ \| '_ \/ __|
205  * | | | |  __/ | |_) |  __/ |    | | | |_| | | | | (__| |_| | (_) | | | \__ \
206  * |_| |_|\___|_| .__/ \___|_|    |_|  \__,_|_| |_|\___|\__|_|\___/|_| |_|___/
207  *            | |
208  *            |_|
209  ******************************************************************************/
210
211 /**
212  * Acquire opcodes and create source and sink nodes.
213  */
214 static void init_rss_special_nodes(ir_graph *irg) {
215         ir_node *block         = get_irg_start_block(irg);
216         int     iro_rss_base   = get_next_ir_opcodes(iro_rss_last);
217         ir_op   *op_rss_Source = new_ir_op(iro_rss_base + iro_rss_Source, "rss_Source", op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
218         ir_op   *op_rss_Sink   = new_ir_op(iro_rss_base + iro_rss_Sink,   "rss_Sink",   op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
219         _source                = new_ir_node(NULL, irg, block, op_rss_Source, mode_ANY, 0, NULL);
220         _sink                  = new_ir_node(NULL, irg, block, op_rss_Sink, mode_ANY, 0, NULL);
221 }
222
223 static int cmp_int(const void *a, const void *b) {
224         const int *i1 = a;
225         const int *i2 = b;
226
227         return QSORT_CMP(*i1, *i2);
228 }
229
230 static int cmp_child_costs(const void *a, const void *b) {
231         const child_t *c1 = a;
232         const child_t *c2 = b;
233
234         return QSORT_CMP(c1->cost, c2->cost);
235 }
236
237 static int cmp_irn_idx(const void *a, const void *b) {
238         const ir_node *n1 = *(ir_node **)a;
239         const ir_node *n2 = *(ir_node **)b;
240
241         return QSORT_CMP(get_irn_idx(n1), get_irn_idx(n2));
242 }
243
244 static int cmp_rss_edges(const void *a, const void *b) {
245         const rss_edge_t *e1 = a;
246         const rss_edge_t *e2 = b;
247
248         return (e1->src != e2->src) || (e1->tgt != e2->tgt);
249 }
250
251 static int bsearch_for_index(int key, int *arr, size_t len, int force) {
252         int left = 0;
253         int right = len;
254
255         while (right >= left) {
256                 int idx = (left + right) / 2;
257
258                 if (key < arr[idx])
259                         right = idx - 1;
260                 else if (key > arr[idx])
261                         left = idx + 1;
262                 else
263                         return idx;
264         }
265
266         if (force)
267                 assert(0 && "Something is wrong, key not found.");
268         return -1;
269 }
270
271 static ir_node **build_sorted_array_from_list(plist_t *irn_list, struct obstack *obst) {
272         plist_element_t *el;
273         int     i     = 0;
274         int     len   = plist_count(irn_list);
275         ir_node **arr = NEW_ARR_D(ir_node *, obst, len);
276
277         /* copy the list into the array */
278         foreach_plist(irn_list, el) {
279                 arr[i++] = plist_element_get_value(el);
280         }
281
282         /* sort the array by node index */
283         qsort(arr, len, sizeof(arr[0]), cmp_irn_idx);
284
285         return arr;
286 }
287
288 /*****************************************************
289  *      _      _                       _
290  *     | |    | |                     (_)
291  *   __| | ___| |__  _   _  __ _  __ _ _ _ __   __ _
292  *  / _` |/ _ \ '_ \| | | |/ _` |/ _` | | '_ \ / _` |
293  * | (_| |  __/ |_) | |_| | (_| | (_| | | | | | (_| |
294  *  \__,_|\___|_.__/ \__,_|\__, |\__, |_|_| |_|\__, |
295  *                          __/ | __/ |         __/ |
296  *                         |___/ |___/         |___/
297  *
298  *****************************************************/
299
300 static void dump_nodeset(nodeset *ns, const char *prefix) {
301         ir_node *irn;
302         foreach_nodeset(ns, irn) {
303                 ir_fprintf(stderr, "%s%+F\n", prefix, irn);
304         }
305 }
306
307 static void build_file_name(rss_t *rss, const char *suffix, size_t suf_len, char *buf, size_t len) {
308         const char *irg_name;
309
310         memset(buf, 0, len);
311         irg_name = get_entity_name(get_irg_entity(rss->irg));
312         snprintf(buf, len - suf_len, "%s-%s-block-%ld",
313                 irg_name, arch_register_class_name(rss->cls), get_irn_node_nr(rss->block));
314         strcat(buf, suffix);
315 }
316
317 /* Dumps all collected bipartite components of current irg as vcg. */
318 static void debug_vcg_dump_bipartite(rss_t *rss) {
319         cbc_t *cbc;
320         FILE  *f;
321         char  file_name[256];
322         static const char suffix[] = "-RSS-CBC.vcg";
323
324         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
325         f = fopen(file_name, "w");
326
327         ir_fprintf(f, "graph: { title: \"connected bipartite component graph of %+F\"\n", rss->irg);
328         fprintf(f, "display_edge_labels: no\n");
329         fprintf(f, "layoutalgorithm: mindepth\n");
330         fprintf(f, "manhattan_edges: yes\n\n");
331
332         foreach_pset(rss->cbc_set, cbc) {
333                 ir_node    *n;
334                 rss_edge_t *ke;
335
336                 fprintf(f, "graph: { titel: \"cbc %d\" label: \"cbc %d\" status:clustered color:yellow\n", cbc->nr, cbc->nr);
337                 foreach_nodeset(cbc->parents, n) {
338                         ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
339                 }
340                 foreach_nodeset(cbc->children, n) {
341                         ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
342                 }
343                 foreach_pset(cbc->kill_edges, ke) {
344                         ir_fprintf(f, "edge: { sourcename: \"n%d_%d\" targetname: \"n%d_%d\" }\n",
345                                 get_irn_node_nr(ke->src), cbc->nr, get_irn_node_nr(ke->tgt), cbc->nr);
346                 }
347                 fprintf(f, "}\n\n");
348         }
349         fprintf(f, "}\n");
350         fclose(f);
351 }
352
353 /* Dump the computed killing function as vcg. */
354 static void debug_vcg_dump_kill(rss_t *rss) {
355         FILE            *f;
356         char            file_name[256];
357         plist_element_t *el;
358         static const char suffix[] = "-RSS-KILL.vcg";
359
360         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
361         f = fopen(file_name, "w");
362
363         ir_fprintf(f, "graph: { title: \"computed kill graph of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
364         fprintf(f, "display_edge_labels: no\n");
365         fprintf(f, "layoutalgorithm: mindepth\n");
366         fprintf(f, "manhattan_edges: yes\n\n");
367
368         {
369                 /* reset dump_flag */
370                 ir_node *irn;
371                 foreach_phase_irn(&rss->ph, irn) {
372                         rss_irn_t *node = get_rss_irn(rss, irn);
373                         node->dumped = 0;
374                 }
375         }
376
377         /* dump all nodes and their killers */
378         foreach_plist(rss->nodes, el) {
379                 ir_node   *irn     = plist_element_get_value(el);
380                 rss_irn_t *rirn    = get_rss_irn(rss, irn);
381                 rss_irn_t *pk_rirn = get_rss_irn(rss, rirn->killer);
382
383                 if (! rirn->dumped) {
384                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
385                         rirn->dumped = 1;
386                 }
387
388                 if (! pk_rirn->dumped) {
389                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(rirn->killer), rirn->killer);
390                         pk_rirn->dumped = 1;
391                 }
392
393                 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
394                         get_irn_node_nr(rirn->killer), get_irn_node_nr(irn));
395         }
396
397         fprintf(f, "}\n");
398         fclose(f);
399 }
400
401 /* Dumps the potential killing DAG (PKG) as vcg. */
402 static void debug_vcg_dump_pkg(rss_t *rss, nodeset *max_ac, int iteration) {
403         FILE              *f;
404         char              file_name[256];
405         char              *suffix   = alloca(32);
406         static const char suffix1[] = "-RSS-PKG.vcg";
407         static const char suffix2[] = "-RSS-PKG-MAXAC.vcg";
408         plist_element_t   *el;
409
410         if (! max_ac) {
411                 snprintf(suffix, 32, "%s", suffix1);
412         }
413         else {
414                 snprintf(suffix, 32, "-%02d%s", iteration, suffix2);
415         }
416
417         build_file_name(rss, suffix, strlen(suffix) + 1, file_name, sizeof(file_name));
418         f = fopen(file_name, "w");
419
420         ir_fprintf(f, "graph: { title: \"potential killing DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
421         fprintf(f, "display_edge_labels: no\n");
422         fprintf(f, "layoutalgorithm: mindepth\n");
423         fprintf(f, "manhattan_edges: yes\n\n");
424
425         {
426                 /* reset dump_flag */
427                 ir_node *irn;
428                 foreach_phase_irn(&rss->ph, irn) {
429                         rss_irn_t *node = get_rss_irn(rss, irn);
430                         node->dumped = 0;
431                 }
432         }
433
434         foreach_plist(rss->nodes, el) {
435                 ir_node   *irn  = plist_element_get_value(el);
436                 rss_irn_t *rirn = get_rss_irn(rss, irn);
437                 char      *c1   = "";
438                 plist_element_t *k_el;
439
440                 /* dump selected saturating values in yellow */
441                 if (max_ac && nodeset_find(max_ac, irn))
442                         c1 = "color:yellow";
443
444                 if (! rirn->dumped) {
445                         if (rirn->chain)
446                                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F   c%d\" %s }\n", get_irn_node_nr(irn), irn, rirn->chain->nr, c1);
447                         else
448                                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(irn), irn, c1);
449                         rirn->dumped = 1;
450                 }
451
452                 foreach_plist(rirn->pkiller_list, k_el) {
453                         ir_node   *pkiller = plist_element_get_value(k_el);
454                         rss_irn_t *pk_rirn = get_rss_irn(rss, pkiller);
455                         char      *c2      = "";
456
457                         if (max_ac && nodeset_find(max_ac, pkiller))
458                                 c2 = "color:yellow";
459
460                         if (! pk_rirn->dumped) {
461                                 if (pk_rirn->chain)
462                                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F   c%d\" %s }\n", get_irn_node_nr(pkiller), pkiller, pk_rirn->chain->nr, c2);
463                                 else
464                                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(pkiller), pkiller, c2);
465                                 pk_rirn->dumped = 1;
466                         }
467                         ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
468                                 get_irn_node_nr(pkiller), get_irn_node_nr(irn));
469                 }
470         }
471         fprintf(f, "}\n");
472         fclose(f);
473 }
474
475 /* Dumps the disjoint value DAG (DVG) as vcg. */
476 static void debug_vcg_dump_dvg(rss_t *rss, dvg_t *dvg) {
477         static const char suffix[] = "-RSS-DVG.vcg";
478         FILE       *f;
479         char       file_name[256];
480         ir_node    *irn;
481         rss_edge_t *edge;
482
483         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
484         f = fopen(file_name, "w");
485
486         ir_fprintf(f, "graph: { title: \"disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
487         fprintf(f, "display_edge_labels: no\n");
488         fprintf(f, "layoutalgorithm: mindepth\n");
489         fprintf(f, "manhattan_edges: yes\n\n");
490
491         /* dump all nodes */
492         foreach_nodeset(dvg->nodes, irn) {
493                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
494         }
495
496         /* dump all edges */
497         foreach_pset(dvg->edges, edge) {
498                 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
499                         get_irn_node_nr(edge->src), get_irn_node_nr(edge->tgt));
500         }
501
502         fprintf(f, "}\n");
503         fclose(f);
504 }
505
506 #if 0
507 /* Dumps the PKG(DVG). */
508 static void debug_vcg_dump_dvg_pkiller(rss_t *rss, dvg_t *dvg) {
509         static const char suffix[] = "-RSS-DVG-PKG.vcg";
510         FILE       *f;
511         char       file_name[256];
512         ir_node    *irn;
513
514         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
515         f = fopen(file_name, "w");
516
517         ir_fprintf(f, "graph: { title: \"PKG of disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
518         fprintf(f, "display_edge_labels: no\n");
519         fprintf(f, "layoutalgorithm: mindepth\n");
520         fprintf(f, "manhattan_edges: yes\n\n");
521
522         /* dump all nodes */
523         foreach_nodeset(dvg->nodes, irn) {
524                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
525         }
526
527         /* dump all edges */
528         foreach_nodeset(dvg->nodes, irn) {
529                 rss_irn_t       *node = get_rss_irn(rss, irn);
530                 plist_element_t *el;
531
532                 foreach_plist(node->dvg_pkiller_list, el) {
533                         ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
534                                 get_irn_node_nr(plist_element_get_value(el)), get_irn_node_nr(irn));
535                 }
536         }
537
538         fprintf(f, "}\n");
539         fclose(f);
540 }
541 #endif /* if 0 */
542
543 /**
544  * In case there is no rss information for irn, initialize it.
545  */
546 static void *init_rss_irn(phase_t *ph, ir_node *irn, void *old) {
547         rss_irn_t *res = old ? old : phase_alloc(ph, sizeof(res[0]));
548
549         res->descendant_list = plist_obstack_new(phase_obst(ph));
550         res->descendants     = NULL;
551
552         res->consumer_list   = plist_obstack_new(phase_obst(ph));
553         res->consumer        = NULL;
554
555         res->pkiller_list    = plist_obstack_new(phase_obst(ph));
556
557         res->parent_list     = plist_obstack_new(phase_obst(ph));
558
559         res->killer          = NULL;
560         res->irn             = irn;
561         res->chain           = NULL;
562
563         res->kill_count      = 0;
564         res->desc_walk       = 0;
565         res->live_out        = 0;
566         res->visited         = 0;
567         res->handled         = 0;
568         res->dumped          = 0;
569         res->havecons        = 0;
570
571         return res;
572 }
573
574 /**
575  * Collect all nodes data dependent on current node.
576  */
577 static void collect_descendants(rss_t *rss, rss_irn_t *rirn, ir_node *irn, int *got_sink, unsigned cur_desc_walk) {
578         const ir_edge_t *edge;
579         rss_irn_t       *cur_node = get_rss_irn(rss, irn);
580         ir_node         *block    = rss->block;
581         ir_edge_kind_t  ekind[2]  = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
582         int             i;
583
584         if (cur_node->desc_walk >= cur_desc_walk)
585                 return;
586         cur_node->desc_walk = cur_desc_walk;
587
588         /* Stop at Barriers */
589         if (be_is_Barrier(irn))
590                 return;
591
592         /* loop over normal and dependency edges */
593         for (i = 0; i < 2; ++i) {
594                 foreach_out_edge_kind(irn, edge, ekind[i]) {
595                         ir_node *user = get_edge_src_irn(edge);
596
597                         /* skip ignore nodes as they do not really contribute to register pressure */
598                         if (arch_irn_is(rss->arch_env, user, ignore))
599                                 continue;
600
601                         if (get_irn_mode(user) == mode_X) {
602                                 if (! *got_sink)
603                                         goto force_sink;
604                                 else
605                                         continue;
606                         }
607
608                         if (is_Proj(user)) {
609
610                                 //if (arch_get_irn_reg_class(rss->arch_env, user, -1) == rss->cls)
611                                         collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
612                         }
613                         else {
614                                 /* check if user lives in block and is not a control flow node */
615                                 if (get_nodes_block(user) == block) {
616                                         if (! plist_has_value(rirn->descendant_list, user)) {
617                                                 plist_insert_back(rirn->descendant_list, user);
618                                                 DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", user));
619                                         }
620                                         collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
621                                 }
622                                 else if (! *got_sink) {
623 force_sink:
624                                         /* user lives out of block: add sink as descendant if not already done */
625                                         plist_insert_back(rirn->descendant_list, _sink);
626                                         *got_sink = 1;
627                                         DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", _sink));
628                                 }
629                         }
630                 }
631         }
632 }
633
634 /**
635  * Handles a single consumer.
636  */
637 static void collect_single_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *consumer, int *got_sink) {
638         ir_node *block = rss->block;
639
640         assert(! is_Proj(consumer) && "Cannot handle Projs");
641
642         if (! is_Block(consumer) && get_nodes_block(consumer) == block) {
643                 if (! arch_irn_is(rss->arch_env, consumer, ignore) && ! plist_has_value(rss_irn->consumer_list, consumer)) {
644                         plist_insert_back(rss_irn->consumer_list, consumer);
645                         DBG((rss->dbg, LEVEL_2, "\t\tconsumer %+F\n", consumer));
646                 }
647         }
648         else {
649                 rss_irn->live_out = 1;
650                 DBG((rss->dbg, LEVEL_2, "\t\tlive out %+F", consumer));
651                 if (! *got_sink) {
652                         plist_insert_back(rss_irn->consumer_list, _sink);
653                         *got_sink = 1;
654                         DB((rss->dbg, LEVEL_2, ", %+F added instead", _sink));
655                 }
656                 DB((rss->dbg, LEVEL_2, "\n"));
657         }
658 }
659
660 /**
661  * Collect all nodes consuming the value(s) produced by current node.
662  */
663 static void collect_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *irn, int *got_sink) {
664         const ir_edge_t *edge;
665         int             i;
666         ir_edge_kind_t  ekind[2]  = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
667         rss_irn_t       *cur_node = get_rss_irn(rss, irn);
668
669         if (cur_node->havecons)
670                 return;
671         cur_node->havecons = 1;
672
673         for (i = 0; i < 2; ++i) {
674                 foreach_out_edge_kind(irn, edge, ekind[i]) {
675                         ir_node *consumer = get_edge_src_irn(edge);
676
677                         if (is_Proj(consumer)) {
678                                 //if (arch_get_irn_reg_class(rss->arch_env, consumer, -1) == rss->cls)
679                                         collect_consumer(rss, rss_irn, consumer, got_sink);
680                         }
681                         else
682                                 collect_single_consumer(rss, rss_irn, consumer, got_sink);
683                 }
684         }
685 }
686
687 /**
688  * Collects all consumer and descendant of a irn.
689  */
690 static void collect_node_info(rss_t *rss, ir_node *irn) {
691         static unsigned cur_desc_walk = 0;
692         rss_irn_t       *rss_irn      = get_rss_irn(rss, irn);
693         int             got_sink;
694
695         assert(! is_Proj(irn) && "Cannot handle Projs.");
696
697         /* check if node info is already available */
698         if (rss_irn->handled)
699                 return;
700                 //phase_reinit_single_irn_data(&rss->ph, irn);
701
702         DBG((rss->dbg, LEVEL_1, "\tcomputing consumers of %+F:\n", irn));
703
704         /* collect all consumer */
705         got_sink = 0;
706         collect_consumer(rss, rss_irn, irn, &got_sink);
707
708         /* build sorted consumer array */
709         rss_irn->consumer = build_sorted_array_from_list(rss_irn->consumer_list, phase_obst(&rss->ph));
710
711         DBG((rss->dbg, LEVEL_1, "\tcompute descendants of %+F:\n", irn));
712
713         /* collect descendants */
714         got_sink = 0;
715         collect_descendants(rss, rss_irn, irn, &got_sink, ++cur_desc_walk);
716
717         /* build sorted descendant array */
718         rss_irn->descendants = build_sorted_array_from_list(rss_irn->descendant_list, phase_obst(&rss->ph));
719
720         rss_irn->handled = 1;
721 }
722
723 /**
724  * Checks if v is a potential killer of u.
725  * v is in pkill(u) iff descendants(v) cut consumer(u) is v
726  *
727  * @param rss   The rss object
728  * @param v      The node to check for killer
729  * @param u      The potentially killed value
730  * @return 1 if v is in pkill(u), 0 otherwise
731  */
732 static int is_potential_killer(rss_t *rss, rss_irn_t *v, rss_irn_t *u) {
733         plist_t *list;
734         ir_node **arr;
735         plist_element_t *el;
736
737         assert(is_Sink(v->irn) || ((plist_count(v->descendant_list) > 0 && v->descendants) || 1));
738         assert(is_Sink(u->irn) || ((plist_count(u->consumer_list)   > 0 && u->consumer)    || 1));
739
740         /* as we loop over the list: loop over the shorter one */
741         if (plist_count(v->descendant_list) > plist_count(u->consumer_list)) {
742                 list = u->consumer_list;
743                 arr  = v->descendants;
744         }
745         else {
746                 list = v->descendant_list;
747                 arr  = u->consumer;
748         }
749
750         /* for each list element: try to find element in array */
751         foreach_plist(list, el) {
752                 ir_node *irn   = plist_element_get_value(el);
753                 ir_node *match = BSEARCH_IRN_ARR(irn, arr);
754
755                 if (match && match != irn)
756                         return 0;
757         }
758
759         return 1;
760 }
761
762 /**
763  * Update descendants, consumer and pkiller list for the given irn.
764  */
765 static void update_node_info(rss_t *rss, ir_node *irn, ir_node *pk_irn) {
766         rss_irn_t *node    = get_rss_irn(rss, irn);
767         rss_irn_t *pkiller = get_rss_irn(rss, pk_irn);
768
769         /* add consumer and rebuild the consumer array */
770         if (! plist_has_value(node->consumer_list, pk_irn)) {
771                 plist_insert_back(node->consumer_list, pk_irn);
772                 node->consumer = build_sorted_array_from_list(node->consumer_list, phase_obst(&rss->ph));
773         }
774
775         /* add pk_irn as descendant, add it's descendants to irn's and rebuild array */
776         if (! plist_has_value(node->descendant_list, pk_irn)) {
777                 plist_element_t *el;
778
779                 plist_insert_back(node->descendant_list, pk_irn);
780
781                 foreach_plist(pkiller->descendant_list, el) {
782                         ir_node *desc = plist_element_get_value(el);
783
784                         if (! plist_has_value(node->descendant_list, desc))
785                                 plist_insert_back(node->descendant_list, desc);
786                 }
787
788                 node->descendants = build_sorted_array_from_list(node->descendant_list, phase_obst(&rss->ph));
789         }
790
791 }
792
793 /**
794  * Compute the potential killing set PK.
795  */
796 static void compute_pkill_set(rss_t *rss) {
797         plist_element_t *u_el, *v_el;
798
799         foreach_plist(rss->nodes, u_el) {
800                 ir_node   *u_irn = plist_element_get_value(u_el);
801                 rss_irn_t *u     = get_rss_irn(rss, u_irn);
802
803                 DBG((rss->dbg, LEVEL_1, "\tcomputing potential killers of %+F:\n", u_irn));
804
805                 /* check each consumer if it is a potential killer  */
806                 foreach_plist(u->consumer_list, v_el) {
807                         ir_node   *v_irn = plist_element_get_value(v_el);
808                         rss_irn_t *v     = get_rss_irn(rss, v_irn);
809
810                         /* check, if v is a potential killer of u */
811                         if (is_potential_killer(rss, v, u)) {
812                                 if (! plist_has_value(u->pkiller_list, v_irn))
813                                         plist_insert_back(u->pkiller_list, v_irn);
814                                 v->kill_count++;
815                                 DBG((rss->dbg, LEVEL_2, "\t\tpotential killer %+F\n", v_irn));
816                         }
817                 }
818
819                 u->killer = _sink;
820         }
821
822         if (rss->opts->dump_flags & RSS_DUMP_PKG)
823                 debug_vcg_dump_pkg(rss, NULL, 0);
824 }
825
826 /**
827  * Build set of killing edges (from values to their potential killers)
828  */
829 static void build_kill_edges(rss_t *rss, pset *epk) {
830         plist_element_t *el, *k_el;
831
832         foreach_plist(rss->nodes, el) {
833                 ir_node    *irn  = plist_element_get_value(el);
834                 rss_irn_t *rirn = get_rss_irn(rss, irn);
835
836                 DBG((rss->dbg, LEVEL_3, "\t\tbuilding kill edges for %+F:\n", irn));
837
838                 foreach_plist(rirn->pkiller_list, k_el) {
839                         ir_node    *pkiller = plist_element_get_value(k_el);
840                         rss_edge_t *ke      = obstack_alloc(phase_obst(&rss->ph), sizeof(*ke));
841
842                         ke->src  = irn;
843                         ke->tgt  = pkiller;
844                         ke->next = NULL;
845
846                         DBG((rss->dbg, LEVEL_3, "\t\t\ttarget %+F\n", pkiller));
847
848                         pset_insert(epk, ke, HASH_RSS_EDGE(ke));
849                 }
850         }
851 }
852
853 /* print the given cbc for debugging purpose */
854 static void debug_print_cbc(firm_dbg_module_t *mod, cbc_t *cbc) {
855         ir_node    *n;
856         rss_edge_t *ke;
857
858         DBG((mod, LEVEL_3, "\t\tS = set of parents:\n"));
859         foreach_nodeset(cbc->parents, n) {
860                 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
861         }
862         DBG((mod, LEVEL_3, "\t\tT = set of children:\n"));
863         foreach_nodeset(cbc->children, n) {
864                 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
865         }
866         DBG((mod, LEVEL_3, "\t\tE = Edges from producers to consumers\n"));
867         foreach_pset(cbc->kill_edges, ke) {
868                 DBG((mod, LEVEL_3, "\t\t\t%+F -> %+F\n", ke->src, ke->tgt));
869         }
870 }
871
872 /**
873  * Construct the bipartite decomposition.
874  * Sid-Ahmed-Ali Touati, Phd Thesis
875  * Register Pressure in Instruction Level Parallelism, p. 71
876  */
877 static void compute_bipartite_decomposition(rss_t *rss) {
878         pset *epk    = new_pset(cmp_rss_edges, 10);
879         int  cur_num = 0;
880
881         plist_element_t *el;
882
883         DBG((rss->dbg, LEVEL_1, "\tcomputing bipartite decomposition:\n"));
884
885         build_kill_edges(rss, epk);
886
887         foreach_plist(rss->nodes, el) {
888                 ir_node   *u_irn   = plist_element_get_value(el);
889                 rss_irn_t *u       = get_rss_irn(rss, u_irn);
890                 int       p_change = 1;
891                 int       c_change = 1;
892                 int       vrfy_ok  = 1;
893
894                 cbc_t           *cbc;
895                 plist_element_t *el2;
896                 rss_edge_t      *k_edge;
897                 rss_edge_t      *kedge_root = NULL;
898                 ir_node         *t_irn, *s_irn;
899
900                 if (u->visited || u_irn == _sink)
901                         continue;
902
903                 DBG((rss->dbg, LEVEL_2, "\t\t%+F choosen:\n", u_irn));
904
905                 cbc     = obstack_alloc(phase_obst(&rss->ph), sizeof(*cbc));
906                 cbc->nr = cur_num++;
907
908                 /* initialize S_cb */
909                 cbc->parents = new_nodeset(5);
910                 nodeset_insert(cbc->parents, u_irn);
911                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (init)\n", u_irn));
912
913                 /* E_cb = empty */
914                 cbc->kill_edges = new_pset(cmp_rss_edges, 5);
915
916                 /* each parent gets killed by at least one value from children */
917
918                 /* T_cb = PK_successors(u) */
919                 cbc->children = new_nodeset(5);
920                 foreach_plist(u->pkiller_list, el2) {
921                         nodeset_insert(cbc->children, plist_element_get_value(el2));
922                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (init)\n", plist_element_get_value(el2)));
923                 }
924
925                 /*
926                         Now: insert the parents of all children into the parent set
927                         and insert the children of all parents into the children set
928                         until the sets don't change any more
929                 */
930                 while (p_change || c_change) {
931                         p_change = c_change = 0;
932
933                         /* accumulate parents */
934                         foreach_nodeset(cbc->children, t_irn) {
935                                 foreach_pset(epk, k_edge) {
936                                         ir_node *val = k_edge->src;
937
938                                         if (k_edge->tgt == t_irn && ! nodeset_find(cbc->parents, val)) {
939                                                 nodeset_insert(cbc->parents, val);
940                                                 p_change = 1;
941                                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (killed by %+F)\n", val, t_irn));
942                                         }
943                                 }
944                         }
945
946                         /* accumulate children */
947                         foreach_nodeset(cbc->parents, s_irn) {
948                                 foreach_pset(epk, k_edge) {
949                                         ir_node *val = k_edge->tgt;
950
951                                         if (k_edge->src == s_irn  && ! nodeset_find(cbc->children, val)) {
952                                                 nodeset_insert(cbc->children, val);
953                                                 c_change = 1;
954                                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (kills %+F)\n", val, s_irn));
955                                         }
956                                 }
957                         }
958                 }
959
960                 /* mark all parent values as visited */
961                 foreach_nodeset(cbc->parents, s_irn) {
962                         rss_irn_t *s = get_rss_irn(rss, s_irn);
963                         s->visited = 1;
964                         /* assure bipartite property */
965 #if 0
966                         if (nodeset_find(cbc->children, s_irn)) {
967                                 nodeset_remove(cbc->children, s_irn);
968                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F removed from to children\n", s_irn));
969                         }
970 #endif
971                 }
972
973                 /* update edges */
974                 foreach_pset(epk, k_edge) {
975                         if (nodeset_find(cbc->parents, k_edge->src) && nodeset_find(cbc->children, k_edge->tgt)) {
976                                 pset_insert(cbc->kill_edges, k_edge, HASH_RSS_EDGE(k_edge));
977                                 /*
978                                         Link all k_edges which are about to be removed together.
979                                         Beware: pset_remove kills the iterator.
980                                 */
981                                 k_edge->next = kedge_root;
982                                 kedge_root   = k_edge;
983                         }
984                 }
985
986                 /* remove all linked k_edges */
987                 for (; kedge_root; kedge_root = kedge_root->next) {
988                         pset_remove(epk, kedge_root, HASH_RSS_EDGE(kedge_root));
989                 }
990
991                 /* verify the cbc */
992                 foreach_nodeset(cbc->parents, s_irn) {
993                         int is_killed = 0;
994
995                         foreach_pset(cbc->kill_edges, k_edge) {
996                                 if (k_edge->src == s_irn) {
997                                         is_killed = 1;
998                                         pset_break(cbc->kill_edges);
999                                         break;
1000                                 }
1001                         }
1002
1003                         if (! is_killed) {
1004                                 vrfy_ok = 0;
1005                                 ir_fprintf(stderr, "Warning: parent %+F is not killed in current cbc\n", s_irn);
1006                         }
1007                 }
1008                 assert(vrfy_ok && "Verification of CBC failed");
1009
1010                 /* add the connected bipartite component */
1011                 if (nodeset_count(cbc->parents) > 0 && nodeset_count(cbc->children) > 0 && pset_count(cbc->kill_edges) > 0)
1012                         pset_insert(rss->cbc_set, cbc, (unsigned)cbc->nr);
1013                 DBG((rss->dbg, LEVEL_2, "\tbipartite component %d inserted:\n", cbc->nr));
1014                 DEBUG_ONLY(
1015                         if (firm_dbg_get_mask(rss->dbg) & LEVEL_2)
1016                                 debug_print_cbc(rss->dbg, cbc);
1017                 );
1018         }
1019
1020         if (rss->opts->dump_flags & RSS_DUMP_CBC)
1021                 debug_vcg_dump_bipartite(rss);
1022
1023         del_pset(epk);
1024 }
1025
1026 /**
1027  * Select the child with the maximum cost.
1028  */
1029 static child_t *select_child_max_cost(rss_t *rss, nodeset *x, nodeset *y, child_t *t, cbc_t *cbc) {
1030         ir_node *child;
1031         float   max_cost = -1.0f;
1032
1033         DBG((rss->dbg, LEVEL_2, "\t\tcomputing children costs:\n"));
1034
1035         foreach_nodeset(cbc->children, child) {
1036                 rss_irn_t  *r_child             = get_rss_irn(rss, child);
1037                 int         num_unkilled_parents = 0;
1038                 int         num_descendants;
1039                 rss_edge_t *k_edge;
1040                 float       cost;
1041
1042                 /* get the number of unkilled parents */
1043                 foreach_pset(cbc->kill_edges, k_edge) {
1044                         if (k_edge->tgt == child && nodeset_find(x, k_edge->src))
1045                                 ++num_unkilled_parents;
1046                 }
1047
1048                 cost = (float)num_unkilled_parents;
1049
1050                 num_descendants = plist_count(r_child->descendant_list) + nodeset_count(y);
1051
1052                 if (num_descendants > 0)
1053                         cost /= (float)num_descendants;
1054
1055                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F, #desc %d, cost %.3f\n", child, num_descendants, cost));
1056
1057                 if (cost > max_cost) {
1058                         t->irn   = child;
1059                         t->cost  = cost;
1060                         max_cost = cost;
1061                 }
1062         }
1063
1064         return t;
1065 }
1066
1067 /**
1068  * Remove all parents from x which are killed by t_irn.
1069  */
1070 static void remove_covered_parents(rss_t *rss, nodeset *x, ir_node *t_irn, cbc_t *cbc) {
1071         rss_irn_t  *t = get_rss_irn(rss, t_irn);
1072         rss_edge_t *k_edge;
1073
1074         DBG((rss->dbg, LEVEL_2, "\t\tremoving parents covered by %+F:\n", t_irn));
1075
1076         foreach_pset(cbc->kill_edges, k_edge) {
1077                 if (k_edge->tgt == t_irn && nodeset_find(x, k_edge->src)) {
1078                         nodeset_remove(x, k_edge->src);
1079                         plist_insert_back(t->parent_list, k_edge->src);
1080                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", k_edge->src));
1081                 }
1082         }
1083 }
1084
1085 static void update_cumulated_descendent_values(rss_t *rss, nodeset *y, ir_node *t_irn) {
1086         rss_irn_t *t = get_rss_irn(rss, t_irn);
1087         plist_element_t *el;
1088
1089         DBG((rss->dbg, LEVEL_2, "\t\tupdating cumulated descendant value of %+F:\n", t_irn));
1090
1091         foreach_plist(t->descendant_list, el) {
1092                 nodeset_insert(y, plist_element_get_value(el));
1093                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", plist_element_get_value(el)));
1094         }
1095 }
1096
1097 /**
1098  * Greedy-k: a heuristics for the MMA problem
1099  */
1100 static void compute_killing_function(rss_t *rss) {
1101         cbc_t *cbc;
1102         struct obstack obst;
1103
1104         obstack_init(&obst);
1105
1106         rss->cbc_set = pset_new_ptr(5);
1107         compute_bipartite_decomposition(rss);
1108
1109         /* for all bipartite components do: */
1110         foreach_pset(rss->cbc_set, cbc) {
1111                 ir_node *p;
1112                 nodeset *x       = new_nodeset(10);
1113                 nodeset *y       = new_nodeset(10);
1114                 child_t **sks    = NEW_ARR_F(child_t *, 20);
1115                 int     cur_len  = 0;
1116                 int     cur_size = 20;
1117                 int     i;
1118
1119                 DBG((rss->dbg, LEVEL_1, "\tcomputing SKS for cbc %d:\n", cbc->nr));
1120                 DBG((rss->dbg, LEVEL_2, "\t\tinitializing parents X:\n"));
1121
1122                 /* X = S_cb (all parents are initially uncovered) */
1123                 foreach_nodeset(cbc->parents, p) {
1124                         nodeset_insert(x, p);
1125                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", p));
1126                 }
1127
1128                 /* while X not empty */
1129                 while (nodeset_count(x) > 0) {
1130                         child_t *t = obstack_alloc(&obst, sizeof(*t));
1131                         memset(t, 0, sizeof(*t));
1132
1133                         t = select_child_max_cost(rss, x, y, t, cbc);
1134
1135                         if (cur_len >= cur_size) {
1136                                 ARR_EXTO(child_t *, sks, cur_size * 2);
1137                                 cur_size *= 2;
1138                         }
1139
1140                         DBG((rss->dbg, LEVEL_2, "\t\tinsert child %+F (%.3f) into SKS at pos %d\n", t->irn, t->cost, cur_len));
1141
1142                         sks[cur_len++] = t;
1143                         remove_covered_parents(rss, x, t->irn, cbc);
1144                         update_cumulated_descendent_values(rss, y, t->irn);
1145                 }
1146
1147                 ARR_SHRINKLEN(sks, cur_len);
1148
1149                 /* sort SKS in increasing cost order */
1150                 qsort(sks, cur_len, sizeof(sks[0]), cmp_child_costs);
1151
1152                 DBG((rss->dbg, LEVEL_2, "\tprocessing SKS for cbc %d:\n", cbc->nr));
1153
1154                 /* build killing function */
1155                 for (i = cur_len - 1; i >= 0; --i) { /* loop over sks in decreasing cost order */
1156                         child_t         *t = sks[i];
1157                         rss_irn_t       *rt = get_rss_irn(rss, t->irn);
1158                         plist_element_t *p_el;
1159
1160                         DBG((rss->dbg, LEVEL_3, "\t\t\tkiller %+F (%.3f):\n", t->irn, t->cost));
1161
1162                         /* kill all unkilled parents of t */
1163                         foreach_plist(rt->parent_list, p_el) {
1164                                 ir_node    *par = plist_element_get_value(p_el);
1165                                 rss_irn_t *rpar = get_rss_irn(rss, par);
1166
1167                                 if (is_Sink(rpar->killer)) {
1168                                         rpar->killer = t->irn;
1169                                         DBG((rss->dbg, LEVEL_2, "\t\tkill %+F\n", rpar->irn));
1170                                 }
1171                                 else {
1172                                         DBG((rss->dbg, LEVEL_3, "\t\t\tkeeping %+F as killer for %+F\n", rpar->killer, rpar->irn));
1173                                 }
1174                         }
1175                 }
1176
1177                 del_nodeset(x);
1178                 del_nodeset(y);
1179                 DEL_ARR_F(sks);
1180         }
1181
1182         if (rss->opts->dump_flags & RSS_DUMP_KILL)
1183                 debug_vcg_dump_kill(rss);
1184
1185         del_pset(rss->cbc_set);
1186         obstack_free(&obst, NULL);
1187 }
1188
1189 /**
1190  * Adds the edge src -> tgt to the dvg. Checks if reverse edge is already there (asserts).
1191  */
1192 static INLINE void add_dvg_edge(rss_t *rss, dvg_t *dvg, ir_node *src, ir_node *tgt, int have_source) {
1193         rss_edge_t *dvg_edge;
1194         rss_edge_t key;
1195
1196         if (! have_source)
1197                 nodeset_insert(dvg->nodes, src);
1198         else
1199                 assert(nodeset_find(dvg->nodes, src) != NULL && "Wrong assumption");
1200
1201         nodeset_insert(dvg->nodes, tgt);
1202
1203         key.src = tgt;
1204         key.tgt = src;
1205         assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1206
1207         key.src = src;
1208         key.tgt = tgt;
1209         if (NULL != pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1210                 /* add the edge to the DVG */
1211                 dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1212
1213                 dvg_edge->src  = src;
1214                 dvg_edge->tgt  = tgt;
1215                 dvg_edge->next = NULL;
1216
1217                 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", src, tgt));
1218                 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1219         }
1220 }
1221
1222 /**
1223  * Computes the disjoint value DAG (DVG).
1224  * BEWARE: It is not made explicitly clear in the Touati paper,
1225  *         but the DVG is meant to be build from the KILLING DAG
1226  */
1227 static void compute_dvg(rss_t *rss, dvg_t *dvg) {
1228         plist_element_t *el;
1229
1230         DBG((rss->dbg, LEVEL_1, "\tcomputing DVG:\n"));
1231
1232         foreach_plist(rss->nodes, el) {
1233                 ir_node    *u_irn    = plist_element_get_value(el);
1234                 rss_irn_t  *u        = get_rss_irn(rss, u_irn);
1235                 rss_irn_t  *u_killer = get_rss_irn(rss, u->killer);
1236                 int        i;
1237
1238                 /* TODO: omit nodes only having sink as pkiller and killing no one */
1239
1240                 /* add an edge to killer */
1241                 add_dvg_edge(rss, dvg, u_irn, u->killer, 0);
1242
1243                 if (is_Sink(u->killer))
1244                         continue;
1245
1246                 /* We add an edge to every killer, from where we could be reached. */
1247                 for (i = ARR_LEN_SAFE(u_killer->descendants) - 1; i >= 0; --i) {
1248                         add_dvg_edge(rss, dvg, u_irn, u_killer->descendants[i], 1);
1249                 }
1250
1251 #if 0
1252
1253                 foreach_plist(rss->nodes, el2) {
1254                         ir_node *v_irn = plist_element_get_value(el2);
1255
1256                         /*
1257                                 There is an edge (u, v) in the DVG iff v is a descendant of the killer(u).
1258                         */
1259                         if (BSEARCH_IRN_ARR(v_irn, u_kill->descendants)) {
1260                                 rss_edge_t *dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1261                                 rss_edge_t key;
1262
1263                                 /* insert the user into the DVG and append it to the user list of u */
1264                                 nodeset_insert(dvg->nodes, v_irn);
1265                                 if (! plist_has_value(u->dvg_user_list, v_irn))
1266                                         plist_insert_back(u->dvg_user_list, v_irn);
1267
1268                                 dvg_edge->src  = u_irn;
1269                                 dvg_edge->tgt  = v_irn;
1270                                 dvg_edge->next = NULL;
1271
1272                                 key.src = v_irn;
1273                                 key.tgt = u_irn;
1274
1275                                 assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1276
1277                                 /* add the edge to the DVG */
1278                                 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", u_irn, v_irn));
1279                                 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1280                         }
1281                 }
1282 #endif /* if 0 */
1283         }
1284
1285         if (rss->opts->dump_flags & RSS_DUMP_DVG)
1286                 debug_vcg_dump_dvg(rss, dvg);
1287 }
1288
1289 /**
1290  * Updates the dvg structure when a serialization edge from src -> tgt is added.
1291  */
1292 static void update_dvg(rss_t *rss, dvg_t *dvg, rss_irn_t *src, rss_irn_t *tgt) {
1293         int i, j, idx;
1294         rss_edge_t *edge;
1295         rss_edge_t **arr = alloca(pset_count(dvg->edges) * sizeof(arr[0]));
1296
1297         /*
1298                 Add an edge from serialization target to serialization src:
1299                 src cannot be alive with target
1300         */
1301         add_dvg_edge(rss, dvg, tgt->irn, src->irn, 0);
1302
1303         /* Add edges to src's descendants as well, they also getting serialized. */
1304         for (i = ARR_LEN_SAFE(src->descendants) - 1; i >= 0; --i) {
1305                 add_dvg_edge(rss, dvg, tgt->irn, src->descendants[i], 1);
1306         }
1307
1308         /* We also have to add edges from targets predecessors in dvg */
1309         idx = 0;
1310         /* We cannot insert elements into set over which we iterate ... */
1311         foreach_pset(dvg->edges, edge) {
1312                 if (edge->tgt == tgt->irn) {
1313                         arr[idx++] = edge;
1314                 }
1315         }
1316
1317         for (i = 0; i < idx; ++i) {
1318                 edge = arr[i];
1319                 add_dvg_edge(rss, dvg, edge->src, src->irn, 1);
1320                 for (j = ARR_LEN_SAFE(src->descendants) - 1; j >= 0; --j) {
1321                         add_dvg_edge(rss, dvg, edge->src, src->descendants[j], 1);
1322                 }
1323         }
1324 }
1325
1326 #if 0
1327 /**
1328  * Accumulate all descendants for root into list.
1329  */
1330 static void accumulate_dvg_descendant_values(rss_t *rss, rss_irn_t *root, plist_t *list) {
1331         if (plist_count(root->dvg_user_list) > 0) {
1332                 plist_element_t *el;
1333
1334                 foreach_plist(root->dvg_user_list, el) {
1335                         ir_node   *v_irn = plist_element_get_value(el);
1336                         rss_irn_t *v     = get_rss_irn(rss, v_irn);
1337
1338                         /* add v as descendant */
1339                         if (! plist_has_value(list, v_irn)) {
1340                                 plist_insert_back(list, v_irn);
1341                                 DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG descendant %+F\n", v_irn));
1342                         }
1343
1344                         /* accumulate v's descendants */
1345                         accumulate_dvg_descendant_values(rss, v, list);
1346                 }
1347         }
1348 }
1349
1350 /**
1351  * Builds the list of potential killers for each node
1352  * in the given DVG.
1353  * Needs the descendant list for all user as sorted array.
1354  */
1355 static void build_dvg_pkiller_list(rss_t *rss, dvg_t *dvg) {
1356         ir_node *irn;
1357
1358         foreach_nodeset(dvg->nodes, irn) {
1359                 rss_irn_t       *node = get_rss_irn(rss, irn);
1360                 plist_element_t *el, *el2;
1361
1362                 DBG((rss->dbg, DEBUG_DVG, "\t\tbuilding pkiller list for %+F\n", irn));
1363
1364                 /* check each user */
1365                 foreach_plist(node->dvg_user_list, el) {
1366                         ir_node *u_irn = plist_element_get_value(el);
1367
1368                         /* is the current user u_irn not a descendant of any other user -> pkiller */
1369                         foreach_plist(node->dvg_user_list, el2) {
1370                                 ir_node   *v_irn = plist_element_get_value(el2);
1371                                 rss_irn_t *v     = get_rss_irn(rss, v_irn);
1372
1373                                 if (el != el2                             &&
1374                                         ! BSEARCH_IRN_ARR(u_irn, v->dvg_desc) &&
1375                                         ! plist_has_value(node->dvg_pkiller_list, u_irn))
1376                                 {
1377                                         plist_insert_back(node->dvg_pkiller_list, u_irn);
1378                                         DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG pkiller %+F\n", u_irn));
1379                                 }
1380                         }
1381                 }
1382
1383                 node->dvg_pkiller = build_sorted_array_from_list(node->dvg_pkiller_list, phase_obst(&rss->ph));
1384         }
1385
1386         DEBUG_ONLY(
1387                 if (firm_dbg_get_mask(rss->dbg) & DEBUG_DVG)
1388                         debug_vcg_dump_dvg_pkiller(rss, dvg);
1389         );
1390 }
1391
1392 #endif /* if 0 */
1393
1394 /**
1395  * Compute the maximal antichain of the current DVG.
1396  * This is a reimplementation of the MAXIMAL_ANTI_CHAIN function
1397  * from the DDG library 1.1 (DAG.cpp).
1398  */
1399 static nodeset *compute_maximal_antichain(rss_t *rss, dvg_t *dvg, int iteration) {
1400         int         n               = nodeset_count(dvg->nodes);
1401         int         *assignment     = alloca(n * sizeof(assignment[0]));
1402         int         *assignment_rev = alloca(n * sizeof(assignment_rev[0]));
1403         int         *idx_map        = alloca(n * sizeof(idx_map[0]));
1404         hungarian_problem_t *bp;
1405         nodeset     *values, *temp;
1406         ir_node     *u_irn;
1407         int         i, j, cost, cur_chain, res;
1408         rss_edge_t  *dvg_edge;
1409
1410 #define MAP_IDX(irn) bsearch_for_index(get_irn_idx(irn), idx_map,  n,  1)
1411
1412         if (pset_count(dvg->edges) == 0)
1413                 return NULL;
1414
1415         bp = hungarian_new(n, n, 1, HUNGARIAN_MATCH_NORMAL);
1416
1417         /*
1418                 At first, we build an index map for the nodes in the DVG,
1419                 because we cannot use the irn idx therefore as the resulting
1420                 bipartite data structure would have around 1.2GB.
1421                 So we limit the size to the number of nodes we have in the DVG
1422                 and build a sorted index map for their irn indices.
1423         */
1424         i = 0;
1425         foreach_nodeset(dvg->nodes, u_irn) {
1426                 idx_map[i++] = get_irn_idx(u_irn);
1427         }
1428         qsort(idx_map, n, sizeof(idx_map[0]), cmp_int);
1429
1430         foreach_pset(dvg->edges, dvg_edge) {
1431                 int idx_u = MAP_IDX(dvg_edge->src);
1432                 int idx_v = MAP_IDX(dvg_edge->tgt);
1433
1434                 /* add the entry to the bipartite data structure */
1435                 hungarian_add(bp, idx_u, idx_v, 1);
1436                 DBG((rss->dbg, LEVEL_3, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1437                         idx_u, dvg_edge->src, idx_v, dvg_edge->tgt));
1438         }
1439 #if 0
1440         /*
1441                 Add a bipartite entry for each pair of nodes (u, v), where exists a
1442                 path in the DVG from u to v, ie. connect all descendants(v) to v.
1443                 desc_dvg(v) = accumulated descendants of all z in dvg_user_list(v)
1444         */
1445         foreach_nodeset(dvg->nodes, u_irn) {
1446                 rss_irn_t *u        = get_rss_irn(rss, u_irn);
1447                 int       idx_u_irn = MAP_IDX(u_irn);
1448
1449                 DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG descendants of %+F:\n", u_irn));
1450
1451                 //plist_clear(u->dvg_desc_list);
1452                 //accumulate_dvg_descendant_values(rss, u, u->dvg_desc_list);
1453
1454                 /*
1455                         FIXME: The array is build on the phase obstack and we cannot free the data.
1456                         So the array get as many times allocated as this function is called.
1457                 */
1458
1459                 /* build the sorted array for faster searches */
1460                 //u->dvg_desc = build_sorted_array_from_list(u->dvg_desc_list, phase_obst(&rss->ph));
1461
1462                 DBG((rss->dbg, DEBUG_MAX_AC, "\t\tadding bipartite entries of %+F:\n", u_irn));
1463
1464                 /* add the bipartite entries for all descendants */
1465                 for (i = ARR_LEN_SAFE(u->dvg_desc) - 1; i >= 0; --i) {
1466                         rss_irn_t *desc    = get_rss_irn(rss, u->dvg_desc[i]);
1467                         int       idx_desc = MAP_IDX(u->dvg_desc[i]);
1468
1469                         /* add the entry to the bipartite data structure */
1470                         hungarian_add(bp, idx_u_irn, idx_desc, 1);
1471                         DBG((rss->dbg, DEBUG_MAX_AC, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1472                                 idx_u_irn, u_irn, idx_desc, u->dvg_desc[i]));
1473
1474                         need_matching = 1;
1475                 }
1476         }
1477 #endif
1478
1479         /* We want maximum cardinality matching */
1480         hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL);
1481
1482 #if 0
1483         DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG pkiller:\n"));
1484         /* beware: the following function needs the dvg_desc array */
1485         build_dvg_pkiller_list(rss, dvg);
1486 #endif
1487
1488         DBG((rss->dbg, LEVEL_2, "\t\tcomputing bipartite matching\n"));
1489         /*
1490                 The maximum cardinality bipartite matching gives us the minimal
1491                 chain partition, which corresponds to the maximum anti chains.
1492         */
1493         res = hungarian_solve(bp, assignment, &cost, 1);
1494         assert(res == 0 && "Bipartite matching failed!");
1495
1496         hungarian_free(bp);
1497         memset(assignment_rev, -1, n * sizeof(assignment_rev[0]));
1498
1499         /* build the reverse assignment, ie. foreach i -> j, add j -> i */
1500         for (i = 0; i < n; ++i) {
1501                 if (assignment[i] >= 0) {
1502                         int j = assignment[i];
1503                         assignment_rev[j] = i;
1504                 }
1505         }
1506
1507         DBG((rss->dbg, LEVEL_2, "\t\tgot assignment with cost %d\n", cost));
1508         DBG((rss->dbg, LEVEL_3, "\t\t\tassignment   ---   reverse assignment\n", cost));
1509         for (i = 0; i < n; ++i) {
1510                 DBG((rss->dbg, LEVEL_3, "\t\t\t%3d -> %3d         %3d -> %3d\n", i, assignment[i], i, assignment_rev[i]));
1511         }
1512
1513         values    = new_nodeset(10);
1514         cur_chain = 0;
1515         /* Construction of the minimal chain partition */
1516         for (j = 0; j < n; ++j) {
1517                 /* check nodes, which did not occur as target */
1518                 if (assignment_rev[j] == -1) {
1519                         int       xj      = idx_map[j];
1520                         ir_node   *xj_irn = get_idx_irn(rss->irg, xj);
1521                         rss_irn_t *xj_rss = get_rss_irn(rss, xj_irn);
1522                         chain_t   *c      = obstack_alloc(phase_obst(&rss->ph), sizeof(*c));
1523                         int       source;
1524
1525                         /* there was no source for j -> we have a source of a new chain */
1526                         nodeset_insert(values, xj_irn);
1527
1528                         c->elements = plist_obstack_new(phase_obst(&rss->ph));
1529                         c->nr       = cur_chain++;
1530                         plist_insert_back(c->elements, xj_irn);
1531
1532                         xj_rss->chain = c;
1533
1534                         DBG((rss->dbg, LEVEL_2, "\t\tstarting chain %d:\n", c->nr));
1535                         DBG((rss->dbg, LEVEL_2, "\t\t\t%+F (%d)", xj_irn, j));
1536
1537                         /* follow chain, having j as source */
1538                         source = j;
1539                         while (assignment[source] >= 0) {
1540                                 int       target  = assignment[source];
1541                                 int       irn_idx = idx_map[target];
1542                                 ir_node   *irn    = get_idx_irn(rss->irg, irn_idx);
1543                                 rss_irn_t *node   = get_rss_irn(rss, irn);
1544
1545                                 plist_insert_back(c->elements, irn);
1546                                 node->chain = c;
1547
1548                                 DB((rss->dbg, LEVEL_2, " -> %+F (%d)", irn, target));
1549
1550                                 /* new source = last target */
1551                                 source = target;
1552                         }
1553
1554                         DB((rss->dbg, LEVEL_2, "\n"));
1555                 }
1556         }
1557
1558         /*
1559                 Computing the maximal antichain: Select an element from each
1560                 chain such, such it is parallel with the others.
1561         */
1562         DBG((rss->dbg, LEVEL_1, "\t\tcomputing set of saturation values (MAX AC)\n"));
1563         DBG((rss->dbg, LEVEL_3, "\t\tstarting with:\n"));
1564
1565         DEBUG_ONLY(
1566                 if (firm_dbg_get_mask(rss->dbg) & LEVEL_3)
1567                         dump_nodeset(values, "\t\t\t");
1568         )
1569
1570         temp = NULL;
1571         do {
1572                 /*
1573                         We need an explicit array for the values as
1574                         we cannot iterate multiple times over the same
1575                         set at the same time. :-(((((
1576                 */
1577                 int     n         = nodeset_count(values);
1578                 int     i         = 0;
1579                 ir_node **val_arr = NEW_ARR_F(ir_node *, n);
1580
1581                 foreach_nodeset(values, u_irn)
1582                         val_arr[i++] = u_irn;
1583
1584                 if (temp)
1585                         del_nodeset(temp);
1586
1587                 temp = new_nodeset(10);
1588
1589                 /* Select all nodes from current value set, having another node in the set as descendant. */
1590                 for (i = 0; i < n; ++i) {
1591                         rss_irn_t *u = get_rss_irn(rss, val_arr[i]);
1592
1593                         for (j = 0; j < n; ++j) {
1594                                 if (i != j) {
1595                                         rss_edge_t key;
1596
1597                                         key.src = val_arr[i];
1598                                         key.tgt = val_arr[j];
1599
1600                                         if (pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1601                                                 /* v[j] is descendant of u -> remove u and break */
1602                                                 nodeset_insert(temp, u->irn);
1603                                                 nodeset_remove(values, u->irn);
1604
1605                                                 DBG((rss->dbg, LEVEL_3, "\t\t\tremoving %+F from values, adding it to temp\n", u->irn));
1606
1607                                                 break;
1608                                         }
1609                                 }
1610                         }
1611                 }
1612
1613                 /* Try to insert the chain predecessor of all selected u's */
1614                 foreach_nodeset(temp, u_irn) {
1615                         rss_irn_t       *u  = get_rss_irn(rss, u_irn);
1616                         chain_t         *c  = u->chain;
1617                         plist_element_t *el = plist_find_value(c->elements, u_irn);
1618
1619                         assert(el && "Missing element in chain!");
1620
1621                         /* If u has predecessor in chain: insert the predecessor */
1622                         if (el == plist_element_get_prev(el)) {
1623                                 nodeset_insert(values, plist_element_get_value(el));
1624                                 DBG((rss->dbg, LEVEL_3, "\t\t\tadding %+F to values\n", plist_element_get_value(el)));
1625                         }
1626                 }
1627
1628
1629                 DEL_ARR_F(val_arr);
1630         } while (nodeset_count(temp) > 0);
1631
1632         DBG((rss->dbg, LEVEL_2, "\t\tfinal set:\n"));
1633         DEBUG_ONLY(
1634                 if (firm_dbg_get_mask(rss->dbg) & LEVEL_2) {
1635                         dump_nodeset(values, "\t\t\t");
1636                 }
1637         );
1638
1639         if (rss->opts->dump_flags & RSS_DUMP_MAXAC)
1640                 debug_vcg_dump_pkg(rss, values, iteration);
1641
1642         del_nodeset(temp);
1643
1644         return values;
1645
1646 #undef MAP_IDX
1647 }
1648
1649 /**
1650  * Computes the best serialization between two nodes of sat_vals.
1651  */
1652 static serialization_t *compute_best_admissible_serialization(rss_t *rss, nodeset *sat_vals, serialization_t *ser, int num_regs) {
1653         int        n                    = nodeset_count(sat_vals);
1654         int        n_idx                = ARR_LEN_SAFE(rss->idx_map);
1655         int        i                    = 0;
1656         ir_node    **val_arr            = alloca(n * sizeof(val_arr[0]));
1657         bitset_t   *bs_sv               = bitset_alloca(n_idx);
1658         bitset_t   *bs_vdesc            = bitset_alloca(n_idx);
1659         bitset_t   *bs_tmp              = bitset_alloca(n_idx);
1660         bitset_t   *bs_ukilldesc        = bitset_alloca(n_idx);
1661         int        best_benefit         = INT_MAX;
1662         int        best_omega2          = INT_MAX;
1663         int        best_benefit_omega20 = INT_MAX;
1664         int        has_omega1           = 0;
1665         int        j, k;
1666         ir_node    *irn;
1667         rss_edge_t min_benefit_edge;
1668         rss_edge_t min_omega20_edge;
1669         rss_irn_t  *ser_u_omega1, *ser_v_omega1;
1670         rss_irn_t  *ser_u_omega20, *ser_v_omega20;
1671
1672         DBG((rss->dbg, LEVEL_1, "\tcomputing admissible serializations:\n"));
1673
1674         /*
1675                 We need an explicit array for the values as
1676                 we cannot iterate multiple times over the same
1677                 set at the same time. :-(((((
1678         */
1679
1680         foreach_nodeset(sat_vals, irn) {
1681                 val_arr[i++] = irn;
1682                 bitset_set(bs_sv, BLOCK_IDX_MAP(rss, irn));
1683         }
1684
1685         /*
1686                 We build all admissible serializations and remember the best found so far.
1687                 For u in sat_vals:
1688                  For v in sat_val:
1689                    if v in pkiller(u): add edge from v to all other pkiller(u)
1690                    else: for all vv in pkiller(u): add edge from v to vv if there exists no path from vv to v
1691         */
1692
1693 /*
1694         A node is unserializable if:
1695         - it has only one killer and this one is Sink
1696     - it kills no other values
1697         In this case there is no serialization which could
1698         reduce the registerpressure
1699 */
1700 #define IS_UNSERIALIZABLE_NODE(rss_node)                  \
1701         (                                                     \
1702                 (                                                 \
1703                         (plist_count(rss_node->pkiller_list) == 1) && \
1704                         is_Sink(rss_node->killer)                  && \
1705                         (rss_node->kill_count                == 0)    \
1706                 )                            ||                   \
1707                 be_is_Barrier(rss_node->irn) ||                   \
1708                 be_is_Keep(rss_node->irn)                         \
1709         )
1710
1711         /* for all u in sat_vals */
1712         for (i = 0; i < n; ++i) {
1713                 rss_irn_t       *u = get_rss_irn(rss, val_arr[i]);
1714                 plist_element_t *el;
1715
1716                 /* ignore nodes where serialization does not help */
1717                 if (IS_UNSERIALIZABLE_NODE(u)) {
1718                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", u->irn));
1719                         continue;
1720                 }
1721
1722                 /* accumulate all descendants of all pkiller(u) */
1723                 bitset_clear_all(bs_ukilldesc);
1724                 foreach_plist(u->pkiller_list, el) {
1725                         ir_node   *irn  = plist_element_get_value(el);
1726                         rss_irn_t *node = get_rss_irn(rss, irn);
1727
1728                         if (! is_Sink(irn))
1729                                 bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, irn));
1730                         else
1731                                 continue;
1732
1733                         for (k = ARR_LEN_SAFE(node->descendants) - 1; k >= 0; --k) {
1734                                 if (! is_Sink(node->descendants[k]))
1735                                         bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, node->descendants[k]));
1736                         }
1737                 }
1738
1739                 /* for all v in sat_vals */
1740                 for (j = 0; j < n; ++j) {
1741                         ir_node   *v_irn   = val_arr[j];
1742                         rss_irn_t *v       = get_rss_irn(rss, v_irn);
1743                         unsigned  v_height = get_irn_height(rss->h, v_irn);
1744                         int       omega1, omega2, is_pkiller;
1745
1746                         /* v cannot be serialized with itself
1747                          * ignore nodes where serialization does not help */
1748                         if (i == j || IS_UNSERIALIZABLE_NODE(v)) {
1749                                 if (i != j)
1750                                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", v->irn));
1751                                 continue;
1752                         }
1753
1754                         /* get descendants of v */
1755                         bitset_clear_all(bs_vdesc);
1756                         bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v_irn));
1757                         for (k = ARR_LEN_SAFE(v->descendants) - 1; k >= 0; --k) {
1758                                 if (! is_Sink(v->descendants[k]))
1759                                         bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v->descendants[k]));
1760                         }
1761
1762                         /* if v is in pkiller(u) */
1763                         is_pkiller = plist_has_value(u->pkiller_list, v_irn);
1764
1765                         /* for all vv in pkiller(u) */
1766                         foreach_plist(u->pkiller_list, el) {
1767                                 ir_node *vv_irn  = plist_element_get_value(el);
1768                                 int     add_edge;
1769
1770                                 if (is_Sink(vv_irn) || is_cfop(vv_irn))
1771                                         continue;
1772
1773                                 if (is_pkiller)
1774                                         add_edge = vv_irn != v_irn && skip_Proj(vv_irn) != skip_Proj(v_irn);
1775                                 else
1776                                         add_edge = ! heights_reachable_in_block(rss->h, skip_Proj(vv_irn), skip_Proj(v_irn));
1777
1778                                 /*
1779                                         As we add an edge from vv -> v, we have to make sure,
1780                                         that there exists no path from v to vv.
1781                                 */
1782
1783                                 if (add_edge) {
1784                                         int vv_height = get_irn_height(rss->h, vv_irn);
1785                                         int mu1, mu2, critical_path_cost;
1786
1787                                         /*
1788                                                 mu1 = | descendants(v) cut sat_vals |
1789                                                 the number of saturating values which cannot
1790                                                 be simultaneously alive with u
1791                                         */
1792                                         bitset_copy(bs_tmp, bs_vdesc);
1793                                         mu1 = bitset_popcnt(bitset_and(bs_tmp, bs_sv));
1794
1795                                         /*
1796                                                 mu2 = | accum_desc_all_pkiller(u) without descendants(v) |
1797                                         */
1798                                         if (is_pkiller) {
1799                                                 bitset_copy(bs_tmp, bs_ukilldesc);
1800                                                 mu2 = bitset_popcnt(bitset_andnot(bs_tmp, bs_vdesc));
1801                                         }
1802                                         else {
1803                                                 mu2 = 0;
1804                                         }
1805
1806                                         /* omega1 = mu1 - mu2 */
1807                                         omega1 = mu1 - mu2;
1808
1809                                         if (omega1 != 0)
1810                                                 has_omega1 = 1;
1811
1812                                         /* omega2 = increase of critical path */
1813                                         critical_path_cost =
1814                                                 v_height                        /* longest path from v to sink */
1815                                                 + rss->max_height - vv_height   /* longest path from source to vv */
1816                                                 + 1;                            /* edge */
1817
1818                                         /*
1819                                                 If critical_path_cost > max_height -> the new edge
1820                                                 would increase the longest critical path by the difference.
1821                                         */
1822                                         omega2 = critical_path_cost > rss->max_height ? critical_path_cost - rss->max_height : 0;
1823
1824                                         /* this keeps track of the edge with the best benefit */
1825                                         if (omega1 >= num_regs - n && omega1 < best_benefit) {
1826                                                 min_benefit_edge.src = v_irn;
1827                                                 min_benefit_edge.tgt = vv_irn;
1828
1829                                                 ser_u_omega1 = u;
1830                                                 ser_v_omega1 = v;
1831
1832                                                 best_benefit    = omega1;
1833                                                 ser->new_killer = is_pkiller;
1834                                         }
1835
1836                                         /* this keeps track of the edge with the best omega1 costs where omega2 == 0 */
1837                                         if (omega2 == 0 && omega1 >= num_regs - n && omega1 < best_benefit_omega20) {
1838                                                 min_omega20_edge.src = v_irn;
1839                                                 min_omega20_edge.tgt = vv_irn;
1840
1841                                                 ser_u_omega20 = u;
1842                                                 ser_v_omega20 = v;
1843
1844                                                 best_benefit_omega20 = omega1;
1845                                                 ser->new_killer      = is_pkiller;
1846                                         }
1847
1848                                         best_omega2 = MIN(best_omega2, omega2);
1849
1850                                         DBG((rss->dbg, LEVEL_2, "\t\tfound %+F -> %+F (w1 %d, w2 %d)\n",
1851                                                 v_irn, vv_irn, omega1, omega2));
1852                                 } /* if add_edge */
1853                         } /* for all vv in pkiller(u) */
1854                 } /* for all v in sat_vals */
1855         } /* for all u in sat_vals */
1856
1857         if (! has_omega1)
1858                 return NULL;
1859
1860         if (best_omega2 == 0) {
1861                 ser->u         = ser_u_omega20;
1862                 ser->v         = ser_v_omega20;
1863                 ser->edge->src = min_omega20_edge.src;
1864                 ser->edge->tgt = min_omega20_edge.tgt;
1865                 ser->omega1    = best_benefit_omega20;
1866                 ser->omega2    = best_omega2;
1867         }
1868         else {
1869                 ser->u         = ser_u_omega1;
1870                 ser->v         = ser_v_omega1;
1871                 ser->edge->src = min_benefit_edge.src;
1872                 ser->edge->tgt = min_benefit_edge.tgt;
1873                 ser->omega1    = best_benefit;
1874                 ser->omega2    = best_omega2;
1875         }
1876
1877         return ser;
1878
1879 #undef IS_UNSERIALIZABLE_NODE
1880 }
1881
1882 /**
1883  * Perform the value serialization heuristic and add all
1884  * computed serialization edges as dependencies to the irg.
1885  */
1886 static void perform_value_serialization_heuristic(rss_t *rss) {
1887         bitset_t *arch_nonign_bs = bitset_alloca(arch_register_class_n_regs(rss->cls));
1888         bitset_t *abi_ign_bs     = bitset_alloca(arch_register_class_n_regs(rss->cls));
1889         int      available_regs, iteration, num_live;
1890         dvg_t    dvg;
1891         nodeset  *sat_vals;
1892         pset *ser_set = new_pset(cmp_rss_edges, 20);
1893
1894         /* available_regs = R = |arch_non_ignore_regs cut ~abi_ignore_regs| */
1895         arch_put_non_ignore_regs(rss->arch_env, rss->cls, arch_nonign_bs);
1896         be_abi_put_ignore_regs(rss->abi, rss->cls, abi_ign_bs);
1897         bitset_andnot(arch_nonign_bs, abi_ign_bs);
1898         available_regs  = bitset_popcnt(arch_nonign_bs);
1899         //um_live = pset_count(rss->live_block);
1900         //available_regs -= num_live < available_regs ? num_live : 0;
1901
1902         DBG((rss->dbg, LEVEL_1, "\n\t#available regs: %d\n\n", available_regs));
1903
1904         /*
1905                 At first we need to compute the disjoint value DAG (DVG = {V, E_dv}).
1906                 V    = set of all nodes we are currently interested in
1907                 E_dv = there is an edge from u to v iff v is a descendant of killer(u), forall u, v in V
1908         */
1909         dvg.nodes = new_nodeset(plist_count(rss->nodes));
1910         dvg.edges = new_pset(cmp_rss_edges, plist_count(rss->nodes) * 5);
1911         compute_dvg(rss, &dvg);
1912
1913         /*
1914                 Then we perform the heuristic serialization algorithm
1915                 on the DVG which gives us all necessary serialization
1916                 edges.
1917         */
1918         DBG((rss->dbg, LEVEL_1, "\tcomputing maximal antichain:\n"));
1919         iteration = 0;
1920         sat_vals  = compute_maximal_antichain(rss, &dvg, iteration++);
1921         while (sat_vals && (nodeset_count(sat_vals) > available_regs)) {
1922                 serialization_t *ser, best_ser;
1923                 rss_edge_t      *edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*edge));
1924                 ir_node         *dep_src, *dep_tgt;
1925
1926                 best_ser.edge = edge;
1927                 ser = compute_best_admissible_serialization(rss, sat_vals, &best_ser, available_regs);
1928
1929                 DBG((rss->dbg, LEVEL_1, "\tcurrent register saturation %d, target %d\n", nodeset_count(sat_vals), available_regs));
1930
1931                 if (! ser) {
1932                         DBG((rss->dbg, LEVEL_1, "\tno RS improving serialization found, breaking at iteration %d\n", iteration));
1933                         break;
1934                 }
1935
1936                 /* Insert the serialization as dependency edge into the irg. */
1937                 DBG((rss->dbg, LEVEL_1, "\tserializing %+F -> %+F with edge %+F -> %+F and cost %d, %d\n",
1938                         ser->u->irn, ser->v->irn, ser->edge->src, ser->edge->tgt, ser->omega1, ser->omega2));
1939
1940                 if (pset_find(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge)))
1941                         ir_printf("WARNING: serialization %+F -> %+F computed twice!\n", ser->edge->src, ser->edge->tgt);
1942
1943
1944                 pset_insert(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge));
1945
1946                 /* update the dvg */
1947                 update_dvg(rss, &dvg, ser->v, ser->u);
1948                 update_dvg(rss, &dvg, get_rss_irn(rss, ser->edge->src), get_rss_irn(rss, ser->edge->tgt));
1949                 del_nodeset(sat_vals);
1950
1951                 dep_src = skip_Proj(ser->edge->src);
1952                 dep_tgt = ser->edge->tgt;
1953                 add_irn_dep(dep_src, dep_tgt);
1954
1955                 /* Update descendants, consumer and pkillers of the target */
1956                 update_node_info(rss, ser->edge->tgt, ser->edge->src);
1957
1958                 /* TODO: try to find a cheaper way for updating height information */
1959                 rss->max_height = heights_recompute_block(rss->h, rss->block);
1960
1961                 /* Recompute the antichain for next serialization */
1962                 DBG((rss->dbg, LEVEL_1, "\tre-computing maximal antichain:\n"));
1963                 sat_vals = compute_maximal_antichain(rss, &dvg, iteration++);
1964         }
1965
1966         del_nodeset(dvg.nodes);
1967         del_pset(dvg.edges);
1968 }
1969
1970 /**
1971  * Do initial calculations for a block.
1972  */
1973 static void process_block(ir_node *block, void *env) {
1974         rss_t *rss = env;
1975         int   i, n;
1976         const ir_edge_t *edge;
1977
1978         phase_init(&rss->ph, "rss block preprocessor", rss->irg, PHASE_DEFAULT_GROWTH, init_rss_irn);
1979
1980         DBG((rss->dbg, LEVEL_1, "preprocessing block %+F\n", block));
1981         rss->block = block;
1982
1983         /* build an index map for all nodes in the current block */
1984         i            = 0;
1985         n            = get_irn_n_edges(block);
1986         NEW_ARR_A(int *, rss->idx_map, n);
1987         foreach_out_edge(block, edge) {
1988                 ir_node *irn      = get_edge_src_irn(edge);
1989                 rss->idx_map[i++] = get_irn_idx(irn);
1990         }
1991         qsort(rss->idx_map, n, sizeof(rss->idx_map[0]), cmp_int);
1992         rss->max_height = heights_recompute_block(rss->h, block);
1993
1994         /* loop over all register classes */
1995         for (i = arch_isa_get_n_reg_class(rss->arch_env->isa) - 1; i >= 0; --i) {
1996                 const arch_register_class_t *cls = arch_isa_get_reg_class(rss->arch_env->isa, i);
1997
1998                 rss->cls = cls;
1999                 DBG((rss->dbg, LEVEL_1, "register class %s\n", arch_register_class_name(cls)));
2000
2001                 /* Get all live value at end of Block having current register class */
2002                 rss->live_block = pset_new_ptr(10);
2003                 be_liveness_end_of_block(rss->liveness, rss->arch_env, rss->cls, rss->block, rss->live_block);
2004
2005                 /* reset the list of interesting nodes */
2006                 plist_clear(rss->nodes);
2007                 plist_insert_back(rss->nodes, _sink);
2008
2009                 /* collect all nodes having a certain register class */
2010                 foreach_out_edge(block, edge) {
2011                         ir_node *irn  = get_edge_src_irn(edge);
2012                         ir_mode *mode = get_irn_mode(irn);
2013
2014                         /*
2015                                 We skip:
2016                                 - mode_T nodes (the projs are asked)
2017                                 - mode_X nodes (control flow nodes are always scheduled last)
2018                                 - Keeps (they are always immediately scheduled)
2019                                 - Phi (same as Keep)
2020                         */
2021                         if (mode == mode_T || mode == mode_X || is_Phi(irn))
2022                                 continue;
2023
2024                         /*
2025                                 In case of a proj, we skip
2026                                 - Barrier (they are a Barrier :)
2027                                 - Start
2028                                 - the Proj itself, as it's scheduled always with it's super node
2029                         */
2030                         if (is_Proj(irn)) {
2031                                 ir_node *pred = get_Proj_pred(irn);
2032                                 if (be_is_Barrier(pred) || is_Start(pred))
2033                                         continue;
2034                         }
2035
2036                         /* calculate the descendants and consumer for each node in the block */
2037                         collect_node_info(rss, skip_Proj(irn));
2038
2039                         if (be_is_Keep(irn))
2040                                 continue;
2041
2042                         if (! arch_irn_is(rss->arch_env, irn, ignore) && arch_get_irn_reg_class(rss->arch_env, irn, -1) == cls) {
2043                                 plist_insert_back(rss->nodes, skip_Proj(irn));
2044                         }
2045                         //}
2046                 }
2047
2048                 /* compute the potential killing set PK(G) */
2049                 compute_pkill_set(rss);
2050
2051                 /* compute the killing function k* */
2052                 compute_killing_function(rss);
2053
2054                 /*
2055                         Compute the heuristic value serialization and
2056                         add the necessary dependencies to the irg.
2057                 */
2058                 perform_value_serialization_heuristic(rss);
2059
2060                 del_pset(rss->live_block);
2061         }
2062
2063         phase_free(&rss->ph);
2064 }
2065
2066 #ifdef WITH_LIBCORE
2067 /**
2068  * Register the options.
2069  */
2070 void rss_register_options(lc_opt_entry_t *grp) {
2071         static int     run_once = 0;
2072         lc_opt_entry_t *rss_grp;
2073
2074         if (! run_once) {
2075                 run_once = 1;
2076                 rss_grp  = lc_opt_get_grp(grp, "rss");
2077
2078                 lc_opt_add_table(rss_grp, rss_option_table);
2079         }
2080 }
2081 #endif /* WITH_LIBCORE */
2082
2083 /**
2084  * Preprocess the irg for scheduling.
2085  */
2086 void rss_schedule_preparation(const be_irg_t *birg) {
2087         rss_t rss;
2088
2089         FIRM_DBG_REGISTER(rss.dbg, "firm.be.sched.rss");
2090
2091         //firm_dbg_set_mask(rss.dbg, LEVEL_1 | LEVEL_2 | LEVEL_3);
2092
2093         init_rss_special_nodes(birg->irg);
2094
2095         rss.irg      = birg->irg;
2096         rss.arch_env = birg->main_env->arch_env;
2097         rss.abi      = birg->abi;
2098         rss.h        = heights_new(birg->irg);
2099         rss.nodes    = plist_new();
2100         rss.opts     = &rss_options;
2101         rss.liveness = be_liveness(birg->irg);
2102         irg_block_walk_graph(birg->irg, NULL, process_block, &rss);
2103         heights_free(rss.h);
2104         plist_free(rss.nodes);
2105         be_liveness_free(rss.liveness);
2106
2107         if (birg->main_env->options->dump_flags & DUMP_SCHED)
2108                 be_dump(rss.irg, "-rss", dump_ir_block_graph);
2109 }