More missing config.h
[libfirm] / ir / be / beschedrss.c
1 /**
2  * Implementation of a register saturating list scheduler
3  * as described in: Sid-Ahmed-Ali Touati
4  * Register Saturation in Superscalar and VLIW Codes
5  *
6  * @license This file protected by GPL -  GNU GENERAL PUBLIC LICENSE.
7  * @author  Christian Wuerdig
8  * @date    29.08.2006
9  * @cvs-id  $Id$
10  */
11 #ifdef HAVE_CONFIG_H
12 #include "config.h"
13 #endif
14
15 #include <limits.h>
16
17 #include "obst.h"
18 #include "debug.h"
19
20 #include "irgraph_t.h"
21 #include "irnode_t.h"
22 #include "iredges_t.h"
23 #include "ircons_t.h"
24 #include "irphase_t.h"
25 #include "irgwalk.h"
26 #include "irdump.h"
27 #include "irtools.h"
28 #include "irbitset.h"
29 #include "irprintf.h"
30 #include "bipartite.h"
31 #include "hungarian.h"
32 #include "plist.h"
33
34 #include "height.h"
35
36 #include "beabi.h"
37 #include "benode_t.h"
38 #include "besched_t.h"
39
40 #ifdef WITH_LIBCORE
41 #include <libcore/lc_opts.h>
42 #include <libcore/lc_opts_enum.h>
43 #endif /* WITH_LIBCORE */
44
45
46 #define ARR_LEN_SAFE(arr) ((arr) != NULL ? ARR_LEN((arr)) : 0)
47
48 #define HASH_RSS_EDGE(edge) ((get_irn_node_nr((edge)->src) << 16) | (get_irn_node_nr((edge)->tgt) & 0xFFFF))
49 #define BSEARCH_IRN_ARR(val, arr) \
50         bsearch(&(val), (arr), ARR_LEN_SAFE((arr)), sizeof((arr)[0]), cmp_irn_idx)
51
52 #define BLOCK_IDX_MAP(rss, irn) bsearch_for_index(get_irn_idx((irn)), (rss)->idx_map, ARR_LEN_SAFE((rss)->idx_map), 1)
53
54 /* the rss options */
55 typedef struct _rss_opts_t {
56         int dump_flags;
57 } rss_opts_t;
58
59 /* Represents a child with associated costs */
60 typedef struct _child {
61         ir_node *irn;
62         float   cost;
63 } child_t;
64
65 /* We need edges for several purposes. */
66 typedef struct _rss_edge {
67         ir_node *src;
68         ir_node *tgt;
69         void    *next;
70 } rss_edge_t;
71
72 /* Represents a connected bipartite component. */
73 typedef struct _cbc {
74         nodeset *parents;       /**< = S  a set of value producers */
75         nodeset *children;      /**< = T  a set of value consumers */
76         pset    *kill_edges;    /**< = E  a set of edges (t in T, s in S) such as each s in S gets killed by at least one t in T */
77         int     nr;             /**< a deterministic index for set insertion (used as hash) */
78 } cbc_t;
79
80 /* Represents a disjoint value DAG. */
81 typedef struct _dvg {
82         nodeset *nodes;
83         pset    *edges;
84 } dvg_t;
85
86 /* Represents a chain of nodes. */
87 typedef struct _chain {
88         plist_t *elements;   /**< List of chain elements */
89         int     nr;          /**< a deterministic index for set insertion (used as hash) */
90 } chain_t;
91
92 typedef struct _rss_irn {
93         plist_t  *consumer_list;    /**< List of consumers */
94         ir_node **consumer;         /**< Sorted consumer array (needed for faster access) */
95
96         plist_t  *parent_list;      /**< List of parents */
97         plist_t  *pkiller_list;     /**< List of potential killers */
98
99         plist_t  *descendant_list;  /**< List of descendants */
100         ir_node **descendants;      /**< Sorted descendant array (needed for faster access) */
101
102         ir_node  *killer;           /**< The selected unique killer */
103         ir_node  *irn;              /**< The corresponding firm node to this rss_irn */
104
105         chain_t  *chain;            /**< The chain, this node is associated to */
106
107         unsigned desc_walk;         /**< visited flag for collecting descendants */
108         unsigned kill_count;        /**< number of nodes killed by this one */
109
110         unsigned live_out : 1;      /**< irn has consumers outside of it's block */
111         unsigned visited  : 1;      /**< visited flag for bipartite decomposition */
112         unsigned havecons : 1;      /**< visited flag collect consumer */
113         unsigned handled  : 1;      /**< flag indicating whether or not the list structures have been build */
114         unsigned dumped   : 1;      /**< flag indication whether or not this node was dumped */
115 } rss_irn_t;
116
117 /* Represents a serialization edge with associated costs. */
118 typedef struct _serialization {
119         rss_irn_t  *u;       /* the top node */
120         rss_irn_t  *v;       /* the node about to be serialized after u */
121         rss_edge_t *edge;    /* the edge selected for the serialization */
122         int        omega1;   /* estimated: available regs - RS reduction */
123         int        omega2;   /* increase in critical path length */
124         int        new_killer;
125 } serialization_t;
126
127 typedef struct _rss {
128         phase_t          ph;              /**< Phase to hold some data */
129         heights_t        *h;              /**< The current height object */
130         ir_graph         *irg;            /**< The irg to preprocess */
131         plist_t          *nodes;          /**< The list of interesting nodes */
132         const arch_env_t *arch_env;       /**< The architecture environment */
133         be_abi_irg_t     *abi;            /**< The abi for this irg */
134         pset             *cbc_set;        /**< A set of connected bipartite components */
135         ir_node          *block;          /**< The current block in progress. */
136         int              *idx_map;        /**< Mapping irn indices to per block indices */
137         unsigned         max_height;      /**< maximum height in the current block */
138         rss_opts_t       *opts;           /**< The options */
139         be_lv_t          *liveness;       /**< The liveness information for this irg */
140         pset             *live_block;     /**< Values alive at end of block */
141         const arch_register_class_t *cls; /**< The current register class */
142         DEBUG_ONLY(firm_dbg_module_t *dbg);
143 } rss_t;
144
145 #define get_rss_irn(rss, irn)  (phase_get_or_set_irn_data(&rss->ph, irn))
146
147 /**
148  * We need some special nodes:
149  * a source and a sink for all live-in and live-out values of a block
150  */
151
152 enum {
153         iro_rss_Source,
154         iro_rss_Sink,
155         iro_rss_last
156 };
157
158 static ir_node *_source = NULL;
159 static ir_node *_sink   = NULL;
160
161 #define is_Source(irn) ((irn) == _source)
162 #define is_Sink(irn)   ((irn) == _sink)
163
164 enum {
165         RSS_DUMP_NONE  = 0,
166         RSS_DUMP_CBC   = 1 << 0,
167         RSS_DUMP_PKG   = 1 << 1,
168         RSS_DUMP_KILL  = 1 << 2,
169         RSS_DUMP_DVG   = 1 << 3,
170         RSS_DUMP_MAXAC = 1 << 4,
171         RSS_DUMP_ALL   = (RSS_DUMP_MAXAC << 1) - 1,
172 };
173
174 static rss_opts_t rss_options = {
175         RSS_DUMP_NONE,
176 };
177
178 #ifdef WITH_LIBCORE
179 static const lc_opt_enum_int_items_t dump_items[] = {
180         { "none",  RSS_DUMP_NONE  },
181         { "cbc",   RSS_DUMP_CBC   },
182         { "pkg",   RSS_DUMP_PKG   },
183         { "kill",  RSS_DUMP_KILL  },
184         { "dvg",   RSS_DUMP_DVG   },
185         { "maxac", RSS_DUMP_MAXAC },
186         { "all",   RSS_DUMP_ALL   },
187         { NULL, 0 }
188 };
189
190 static lc_opt_enum_int_var_t dump_var = {
191         &rss_options.dump_flags, dump_items
192 };
193
194 static const lc_opt_table_entry_t rss_option_table[] = {
195         LC_OPT_ENT_ENUM_MASK("dump", "dump phases", &dump_var),
196         { NULL }
197 };
198 #endif /* WITH_LIBCORE */
199
200 /******************************************************************************
201  *  _          _                    __                  _   _
202  * | |        | |                  / _|                | | (_)
203  * | |__   ___| |_ __   ___ _ __  | |_ _   _ _ __   ___| |_ _  ___  _ __  ___
204  * | '_ \ / _ \ | '_ \ / _ \ '__| |  _| | | | '_ \ / __| __| |/ _ \| '_ \/ __|
205  * | | | |  __/ | |_) |  __/ |    | | | |_| | | | | (__| |_| | (_) | | | \__ \
206  * |_| |_|\___|_| .__/ \___|_|    |_|  \__,_|_| |_|\___|\__|_|\___/|_| |_|___/
207  *            | |
208  *            |_|
209  ******************************************************************************/
210
211 /**
212  * Acquire opcodes and create source and sink nodes.
213  */
214 static void init_rss_special_nodes(ir_graph *irg) {
215         ir_node *block         = get_irg_start_block(irg);
216         int     iro_rss_base   = get_next_ir_opcodes(iro_rss_last);
217         ir_op   *op_rss_Source = new_ir_op(iro_rss_base + iro_rss_Source, "rss_Source", op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
218         ir_op   *op_rss_Sink   = new_ir_op(iro_rss_base + iro_rss_Sink,   "rss_Sink",   op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
219         _source                = new_ir_node(NULL, irg, block, op_rss_Source, mode_ANY, 0, NULL);
220         _sink                  = new_ir_node(NULL, irg, block, op_rss_Sink, mode_ANY, 0, NULL);
221 }
222
223 static int cmp_int(const void *a, const void *b) {
224         const int *i1 = a;
225         const int *i2 = b;
226
227         return QSORT_CMP(*i1, *i2);
228 }
229
230 static int cmp_child_costs(const void *a, const void *b) {
231         const child_t *c1 = a;
232         const child_t *c2 = b;
233
234         return QSORT_CMP(c1->cost, c2->cost);
235 }
236
237 static int cmp_irn_idx(const void *a, const void *b) {
238         const ir_node *n1 = *(ir_node **)a;
239         const ir_node *n2 = *(ir_node **)b;
240
241         return QSORT_CMP(get_irn_idx(n1), get_irn_idx(n2));
242 }
243
244 static int cmp_rss_edges(const void *a, const void *b) {
245         const rss_edge_t *e1 = a;
246         const rss_edge_t *e2 = b;
247
248         return (e1->src != e2->src) || (e1->tgt != e2->tgt);
249 }
250
251 static int bsearch_for_index(int key, int *arr, size_t len, int force) {
252         int left = 0;
253         int right = len;
254
255         while (right >= left) {
256                 int idx = (left + right) / 2;
257
258                 if (key < arr[idx])
259                         right = idx - 1;
260                 else if (key > arr[idx])
261                         left = idx + 1;
262                 else
263                         return idx;
264         }
265
266         if (force)
267                 assert(0 && "Something is wrong, key not found.");
268         return -1;
269 }
270
271 static ir_node **build_sorted_array_from_list(plist_t *irn_list, struct obstack *obst) {
272         plist_element_t *el;
273         int     i     = 0;
274         int     len   = plist_count(irn_list);
275         ir_node **arr = NEW_ARR_D(ir_node *, obst, len);
276
277         /* copy the list into the array */
278         foreach_plist(irn_list, el) {
279                 arr[i++] = plist_element_get_value(el);
280         }
281
282         /* sort the array by node index */
283         qsort(arr, len, sizeof(arr[0]), cmp_irn_idx);
284
285         return arr;
286 }
287
288 /*****************************************************
289  *      _      _                       _
290  *     | |    | |                     (_)
291  *   __| | ___| |__  _   _  __ _  __ _ _ _ __   __ _
292  *  / _` |/ _ \ '_ \| | | |/ _` |/ _` | | '_ \ / _` |
293  * | (_| |  __/ |_) | |_| | (_| | (_| | | | | | (_| |
294  *  \__,_|\___|_.__/ \__,_|\__, |\__, |_|_| |_|\__, |
295  *                          __/ | __/ |         __/ |
296  *                         |___/ |___/         |___/
297  *
298  *****************************************************/
299
300 static void dump_nodeset(nodeset *ns, const char *prefix) {
301         ir_node *irn;
302         foreach_nodeset(ns, irn) {
303                 ir_fprintf(stderr, "%s%+F\n", prefix, irn);
304         }
305 }
306
307 static void build_file_name(rss_t *rss, const char *suffix, size_t suf_len, char *buf, size_t len) {
308         const char *irg_name;
309
310         memset(buf, 0, len);
311         irg_name = get_entity_name(get_irg_entity(rss->irg));
312         snprintf(buf, len - suf_len, "%s-%s-block-%ld",
313                 irg_name, arch_register_class_name(rss->cls), get_irn_node_nr(rss->block));
314         strcat(buf, suffix);
315 }
316
317 /* Dumps all collected bipartite components of current irg as vcg. */
318 static void debug_vcg_dump_bipartite(rss_t *rss) {
319         cbc_t *cbc;
320         FILE  *f;
321         char  file_name[256];
322         static const char suffix[] = "-RSS-CBC.vcg";
323
324         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
325         f = fopen(file_name, "w");
326
327         ir_fprintf(f, "graph: { title: \"connected bipartite component graph of %+F\"\n", rss->irg);
328         fprintf(f, "display_edge_labels: no\n");
329         fprintf(f, "layoutalgorithm: mindepth\n");
330         fprintf(f, "manhattan_edges: yes\n\n");
331
332         foreach_pset(rss->cbc_set, cbc) {
333                 ir_node    *n;
334                 rss_edge_t *ke;
335
336                 fprintf(f, "graph: { titel: \"cbc %d\" label: \"cbc %d\" status:clustered color:yellow\n", cbc->nr, cbc->nr);
337                 foreach_nodeset(cbc->parents, n) {
338                         ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
339                 }
340                 foreach_nodeset(cbc->children, n) {
341                         ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
342                 }
343                 foreach_pset(cbc->kill_edges, ke) {
344                         ir_fprintf(f, "edge: { sourcename: \"n%d_%d\" targetname: \"n%d_%d\" }\n",
345                                 get_irn_node_nr(ke->src), cbc->nr, get_irn_node_nr(ke->tgt), cbc->nr);
346                 }
347                 fprintf(f, "}\n\n");
348         }
349         fprintf(f, "}\n");
350         fclose(f);
351 }
352
353 /* Dump the computed killing function as vcg. */
354 static void debug_vcg_dump_kill(rss_t *rss) {
355         FILE            *f;
356         char            file_name[256];
357         plist_element_t *el;
358         static const char suffix[] = "-RSS-KILL.vcg";
359
360         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
361         f = fopen(file_name, "w");
362
363         ir_fprintf(f, "graph: { title: \"computed kill graph of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
364         fprintf(f, "display_edge_labels: no\n");
365         fprintf(f, "layoutalgorithm: mindepth\n");
366         fprintf(f, "manhattan_edges: yes\n\n");
367
368         {
369                 /* reset dump_flag */
370                 ir_node *irn;
371                 foreach_phase_irn(&rss->ph, irn) {
372                         rss_irn_t *node = get_rss_irn(rss, irn);
373                         node->dumped = 0;
374                 }
375         }
376
377         /* dump all nodes and their killers */
378         foreach_plist(rss->nodes, el) {
379                 ir_node   *irn     = plist_element_get_value(el);
380                 rss_irn_t *rirn    = get_rss_irn(rss, irn);
381                 rss_irn_t *pk_rirn = get_rss_irn(rss, rirn->killer);
382
383                 if (! rirn->dumped) {
384                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
385                         rirn->dumped = 1;
386                 }
387
388                 if (! pk_rirn->dumped) {
389                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(rirn->killer), rirn->killer);
390                         pk_rirn->dumped = 1;
391                 }
392
393                 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
394                         get_irn_node_nr(rirn->killer), get_irn_node_nr(irn));
395         }
396
397         fprintf(f, "}\n");
398         fclose(f);
399 }
400
401 /* Dumps the potential killing DAG (PKG) as vcg. */
402 static void debug_vcg_dump_pkg(rss_t *rss, nodeset *max_ac, int iteration) {
403         FILE              *f;
404         char              file_name[256];
405         char              *suffix   = alloca(32);
406         static const char suffix1[] = "-RSS-PKG.vcg";
407         static const char suffix2[] = "-RSS-PKG-MAXAC.vcg";
408         plist_element_t   *el;
409
410         if (! max_ac) {
411                 snprintf(suffix, 32, "%s", suffix1);
412         }
413         else {
414                 snprintf(suffix, 32, "-%02d%s", iteration, suffix2);
415         }
416
417         build_file_name(rss, suffix, strlen(suffix) + 1, file_name, sizeof(file_name));
418         f = fopen(file_name, "w");
419
420         ir_fprintf(f, "graph: { title: \"potential killing DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
421         fprintf(f, "display_edge_labels: no\n");
422         fprintf(f, "layoutalgorithm: mindepth\n");
423         fprintf(f, "manhattan_edges: yes\n\n");
424
425         {
426                 /* reset dump_flag */
427                 ir_node *irn;
428                 foreach_phase_irn(&rss->ph, irn) {
429                         rss_irn_t *node = get_rss_irn(rss, irn);
430                         node->dumped = 0;
431                 }
432         }
433
434         foreach_plist(rss->nodes, el) {
435                 ir_node   *irn  = plist_element_get_value(el);
436                 rss_irn_t *rirn = get_rss_irn(rss, irn);
437                 char      *c1   = "";
438                 plist_element_t *k_el;
439
440                 /* dump selected saturating values in yellow */
441                 if (max_ac && nodeset_find(max_ac, irn))
442                         c1 = "color:yellow";
443
444                 if (! rirn->dumped) {
445                         if (rirn->chain)
446                                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F   c%d\" %s }\n", get_irn_node_nr(irn), irn, rirn->chain->nr, c1);
447                         else
448                                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(irn), irn, c1);
449                         rirn->dumped = 1;
450                 }
451
452                 foreach_plist(rirn->pkiller_list, k_el) {
453                         ir_node   *pkiller = plist_element_get_value(k_el);
454                         rss_irn_t *pk_rirn = get_rss_irn(rss, pkiller);
455                         char      *c2      = "";
456
457                         if (max_ac && nodeset_find(max_ac, pkiller))
458                                 c2 = "color:yellow";
459
460                         if (! pk_rirn->dumped) {
461                                 if (pk_rirn->chain)
462                                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F   c%d\" %s }\n", get_irn_node_nr(pkiller), pkiller, pk_rirn->chain->nr, c2);
463                                 else
464                                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(pkiller), pkiller, c2);
465                                 pk_rirn->dumped = 1;
466                         }
467                         ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
468                                 get_irn_node_nr(pkiller), get_irn_node_nr(irn));
469                 }
470         }
471         fprintf(f, "}\n");
472         fclose(f);
473 }
474
475 /* Dumps the disjoint value DAG (DVG) as vcg. */
476 static void debug_vcg_dump_dvg(rss_t *rss, dvg_t *dvg) {
477         static const char suffix[] = "-RSS-DVG.vcg";
478         FILE       *f;
479         char       file_name[256];
480         ir_node    *irn;
481         rss_edge_t *edge;
482
483         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
484         f = fopen(file_name, "w");
485
486         ir_fprintf(f, "graph: { title: \"disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
487         fprintf(f, "display_edge_labels: no\n");
488         fprintf(f, "layoutalgorithm: mindepth\n");
489         fprintf(f, "manhattan_edges: yes\n\n");
490
491         /* dump all nodes */
492         foreach_nodeset(dvg->nodes, irn) {
493                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
494         }
495
496         /* dump all edges */
497         foreach_pset(dvg->edges, edge) {
498                 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
499                         get_irn_node_nr(edge->src), get_irn_node_nr(edge->tgt));
500         }
501
502         fprintf(f, "}\n");
503         fclose(f);
504 }
505
506 #if 0
507 /* Dumps the PKG(DVG). */
508 static void debug_vcg_dump_dvg_pkiller(rss_t *rss, dvg_t *dvg) {
509         static const char suffix[] = "-RSS-DVG-PKG.vcg";
510         FILE       *f;
511         char       file_name[256];
512         ir_node    *irn;
513
514         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
515         f = fopen(file_name, "w");
516
517         ir_fprintf(f, "graph: { title: \"PKG of disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
518         fprintf(f, "display_edge_labels: no\n");
519         fprintf(f, "layoutalgorithm: mindepth\n");
520         fprintf(f, "manhattan_edges: yes\n\n");
521
522         /* dump all nodes */
523         foreach_nodeset(dvg->nodes, irn) {
524                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
525         }
526
527         /* dump all edges */
528         foreach_nodeset(dvg->nodes, irn) {
529                 rss_irn_t       *node = get_rss_irn(rss, irn);
530                 plist_element_t *el;
531
532                 foreach_plist(node->dvg_pkiller_list, el) {
533                         ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
534                                 get_irn_node_nr(plist_element_get_value(el)), get_irn_node_nr(irn));
535                 }
536         }
537
538         fprintf(f, "}\n");
539         fclose(f);
540 }
541 #endif /* if 0 */
542
543 /**
544  * In case there is no rss information for irn, initialize it.
545  */
546 static void *init_rss_irn(phase_t *ph, ir_node *irn, void *old) {
547         rss_irn_t *res = old ? old : phase_alloc(ph, sizeof(res[0]));
548
549         res->descendant_list = plist_obstack_new(phase_obst(ph));
550         res->descendants     = NULL;
551
552         res->consumer_list   = plist_obstack_new(phase_obst(ph));
553         res->consumer        = NULL;
554
555         res->pkiller_list    = plist_obstack_new(phase_obst(ph));
556
557         res->parent_list     = plist_obstack_new(phase_obst(ph));
558
559         res->killer          = NULL;
560         res->irn             = irn;
561         res->chain           = NULL;
562
563         res->kill_count      = 0;
564         res->desc_walk       = 0;
565         res->live_out        = 0;
566         res->visited         = 0;
567         res->handled         = 0;
568         res->dumped          = 0;
569         res->havecons        = 0;
570
571         return res;
572 }
573
574 /**
575  * Collect all nodes data dependent on current node.
576  */
577 static void collect_descendants(rss_t *rss, rss_irn_t *rirn, ir_node *irn, int *got_sink, unsigned cur_desc_walk) {
578         const ir_edge_t *edge;
579         rss_irn_t       *cur_node = get_rss_irn(rss, irn);
580         ir_node         *block    = rss->block;
581         ir_edge_kind_t  ekind[2]  = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
582         int             i;
583
584         if (cur_node->desc_walk >= cur_desc_walk)
585                 return;
586         cur_node->desc_walk = cur_desc_walk;
587
588         /* Stop at Barriers */
589         if (be_is_Barrier(irn))
590                 return;
591
592         /* loop over normal and dependency edges */
593         for (i = 0; i < 2; ++i) {
594                 foreach_out_edge_kind(irn, edge, ekind[i]) {
595                         ir_node *user = get_edge_src_irn(edge);
596
597                         /* skip ignore nodes as they do not really contribute to register pressure */
598                         if (arch_irn_is(rss->arch_env, user, ignore))
599                                 continue;
600
601                         /*
602                                 (a) mode_X means Jump            -> out_edge
603                                 (b) Phi as user of a normal node -> out-edge
604                         */
605                         if (get_irn_mode(user) == mode_X || is_Phi(user)) {
606                                 if (! *got_sink)
607                                         goto force_sink;
608                                 else
609                                         continue;
610                         }
611
612                         if (is_Proj(user)) {
613
614                                 //if (arch_get_irn_reg_class(rss->arch_env, user, -1) == rss->cls)
615                                         collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
616                         }
617                         else {
618                                 /* check if user lives in block and is not a control flow node */
619                                 if (get_nodes_block(user) == block) {
620                                         if (! plist_has_value(rirn->descendant_list, user)) {
621                                                 plist_insert_back(rirn->descendant_list, user);
622                                                 DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", user));
623                                         }
624                                         collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
625                                 }
626                                 else if (! *got_sink) {
627 force_sink:
628                                         /* user lives out of block: add sink as descendant if not already done */
629                                         plist_insert_back(rirn->descendant_list, _sink);
630                                         *got_sink = 1;
631                                         DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", _sink));
632                                 }
633                         }
634                 }
635         }
636 }
637
638 /**
639  * Handles a single consumer.
640  */
641 static void collect_single_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *consumer, int *got_sink) {
642         ir_node *block = rss->block;
643
644         assert(! is_Proj(consumer) && "Cannot handle Projs");
645
646         if (! is_Phi(consumer) && ! is_Block(consumer) && get_nodes_block(consumer) == block) {
647                 if (! arch_irn_is(rss->arch_env, consumer, ignore) && ! plist_has_value(rss_irn->consumer_list, consumer)) {
648                         plist_insert_back(rss_irn->consumer_list, consumer);
649                         DBG((rss->dbg, LEVEL_2, "\t\tconsumer %+F\n", consumer));
650                 }
651         }
652         else {
653                 rss_irn->live_out = 1;
654                 DBG((rss->dbg, LEVEL_2, "\t\tlive out %+F", consumer));
655                 if (! *got_sink) {
656                         plist_insert_back(rss_irn->consumer_list, _sink);
657                         *got_sink = 1;
658                         DB((rss->dbg, LEVEL_2, ", %+F added instead", _sink));
659                 }
660                 DB((rss->dbg, LEVEL_2, "\n"));
661         }
662 }
663
664 /**
665  * Collect all nodes consuming the value(s) produced by current node.
666  */
667 static void collect_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *irn, int *got_sink) {
668         const ir_edge_t *edge;
669         int             i;
670         ir_edge_kind_t  ekind[2]  = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
671         rss_irn_t       *cur_node = get_rss_irn(rss, irn);
672
673         if (cur_node->havecons)
674                 return;
675         cur_node->havecons = 1;
676
677         for (i = 0; i < 2; ++i) {
678                 foreach_out_edge_kind(irn, edge, ekind[i]) {
679                         ir_node *consumer = get_edge_src_irn(edge);
680
681                         if (is_Proj(consumer)) {
682                                 //if (arch_get_irn_reg_class(rss->arch_env, consumer, -1) == rss->cls)
683                                         collect_consumer(rss, rss_irn, consumer, got_sink);
684                         }
685                         else
686                                 collect_single_consumer(rss, rss_irn, consumer, got_sink);
687                 }
688         }
689 }
690
691 /**
692  * Collects all consumer and descendant of a irn.
693  */
694 static void collect_node_info(rss_t *rss, ir_node *irn) {
695         static unsigned cur_desc_walk = 0;
696         rss_irn_t       *rss_irn      = get_rss_irn(rss, irn);
697         int             got_sink;
698
699         assert(! is_Proj(irn) && "Cannot handle Projs.");
700
701         /* check if node info is already available */
702         if (rss_irn->handled)
703                 return;
704                 //phase_reinit_single_irn_data(&rss->ph, irn);
705
706         DBG((rss->dbg, LEVEL_1, "\tcomputing consumers of %+F:\n", irn));
707
708         /* collect all consumer */
709         got_sink = 0;
710         collect_consumer(rss, rss_irn, irn, &got_sink);
711
712         /* build sorted consumer array */
713         rss_irn->consumer = build_sorted_array_from_list(rss_irn->consumer_list, phase_obst(&rss->ph));
714
715         DBG((rss->dbg, LEVEL_1, "\tcompute descendants of %+F:\n", irn));
716
717         /* collect descendants */
718         got_sink = 0;
719         collect_descendants(rss, rss_irn, irn, &got_sink, ++cur_desc_walk);
720
721         /* build sorted descendant array */
722         rss_irn->descendants = build_sorted_array_from_list(rss_irn->descendant_list, phase_obst(&rss->ph));
723
724         rss_irn->handled = 1;
725 }
726
727 /**
728  * Checks if v is a potential killer of u.
729  * v is in pkill(u) iff descendants(v) cut consumer(u) is v
730  *
731  * @param rss   The rss object
732  * @param v      The node to check for killer
733  * @param u      The potentially killed value
734  * @return 1 if v is in pkill(u), 0 otherwise
735  */
736 static int is_potential_killer(rss_t *rss, rss_irn_t *v, rss_irn_t *u) {
737         plist_t *list;
738         ir_node **arr;
739         plist_element_t *el;
740
741         assert(is_Sink(v->irn) || ((plist_count(v->descendant_list) > 0 && v->descendants) || 1));
742         assert(is_Sink(u->irn) || ((plist_count(u->consumer_list)   > 0 && u->consumer)    || 1));
743
744         /* as we loop over the list: loop over the shorter one */
745         if (plist_count(v->descendant_list) > plist_count(u->consumer_list)) {
746                 list = u->consumer_list;
747                 arr  = v->descendants;
748         }
749         else {
750                 list = v->descendant_list;
751                 arr  = u->consumer;
752         }
753
754         /* for each list element: try to find element in array */
755         foreach_plist(list, el) {
756                 ir_node *irn   = plist_element_get_value(el);
757                 ir_node *match = BSEARCH_IRN_ARR(irn, arr);
758
759                 if (match && match != irn)
760                         return 0;
761         }
762
763         return 1;
764 }
765
766 /**
767  * Update descendants, consumer and pkiller list for the given irn.
768  */
769 static void update_node_info(rss_t *rss, ir_node *irn, ir_node *pk_irn) {
770         rss_irn_t *node    = get_rss_irn(rss, irn);
771         rss_irn_t *pkiller = get_rss_irn(rss, pk_irn);
772
773         /* add consumer and rebuild the consumer array */
774         if (! plist_has_value(node->consumer_list, pk_irn)) {
775                 plist_insert_back(node->consumer_list, pk_irn);
776                 node->consumer = build_sorted_array_from_list(node->consumer_list, phase_obst(&rss->ph));
777         }
778
779         /* add pk_irn as descendant, add it's descendants to irn's and rebuild array */
780         if (! plist_has_value(node->descendant_list, pk_irn)) {
781                 plist_element_t *el;
782
783                 plist_insert_back(node->descendant_list, pk_irn);
784
785                 foreach_plist(pkiller->descendant_list, el) {
786                         ir_node *desc = plist_element_get_value(el);
787
788                         if (! plist_has_value(node->descendant_list, desc))
789                                 plist_insert_back(node->descendant_list, desc);
790                 }
791
792                 node->descendants = build_sorted_array_from_list(node->descendant_list, phase_obst(&rss->ph));
793         }
794
795 }
796
797 /**
798  * Compute the potential killing set PK.
799  */
800 static void compute_pkill_set(rss_t *rss) {
801         plist_element_t *u_el, *v_el;
802
803         foreach_plist(rss->nodes, u_el) {
804                 ir_node   *u_irn = plist_element_get_value(u_el);
805                 rss_irn_t *u     = get_rss_irn(rss, u_irn);
806
807                 DBG((rss->dbg, LEVEL_1, "\tcomputing potential killers of %+F:\n", u_irn));
808
809                 /* check each consumer if it is a potential killer  */
810                 foreach_plist(u->consumer_list, v_el) {
811                         ir_node   *v_irn = plist_element_get_value(v_el);
812                         rss_irn_t *v     = get_rss_irn(rss, v_irn);
813
814                         /* check, if v is a potential killer of u */
815                         if (is_potential_killer(rss, v, u)) {
816                                 if (! plist_has_value(u->pkiller_list, v_irn))
817                                         plist_insert_back(u->pkiller_list, v_irn);
818                                 v->kill_count++;
819                                 DBG((rss->dbg, LEVEL_2, "\t\tpotential killer %+F\n", v_irn));
820                         }
821                 }
822
823                 u->killer = _sink;
824         }
825
826         if (rss->opts->dump_flags & RSS_DUMP_PKG)
827                 debug_vcg_dump_pkg(rss, NULL, 0);
828 }
829
830 /**
831  * Build set of killing edges (from values to their potential killers)
832  */
833 static void build_kill_edges(rss_t *rss, pset *epk) {
834         plist_element_t *el, *k_el;
835
836         foreach_plist(rss->nodes, el) {
837                 ir_node    *irn  = plist_element_get_value(el);
838                 rss_irn_t *rirn = get_rss_irn(rss, irn);
839
840                 DBG((rss->dbg, LEVEL_3, "\t\tbuilding kill edges for %+F:\n", irn));
841
842                 foreach_plist(rirn->pkiller_list, k_el) {
843                         ir_node    *pkiller = plist_element_get_value(k_el);
844                         rss_edge_t *ke      = obstack_alloc(phase_obst(&rss->ph), sizeof(*ke));
845
846                         ke->src  = irn;
847                         ke->tgt  = pkiller;
848                         ke->next = NULL;
849
850                         DBG((rss->dbg, LEVEL_3, "\t\t\ttarget %+F\n", pkiller));
851
852                         pset_insert(epk, ke, HASH_RSS_EDGE(ke));
853                 }
854         }
855 }
856
857 /* print the given cbc for debugging purpose */
858 static void debug_print_cbc(firm_dbg_module_t *mod, cbc_t *cbc) {
859         ir_node    *n;
860         rss_edge_t *ke;
861
862         DBG((mod, LEVEL_3, "\t\tS = set of parents:\n"));
863         foreach_nodeset(cbc->parents, n) {
864                 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
865         }
866         DBG((mod, LEVEL_3, "\t\tT = set of children:\n"));
867         foreach_nodeset(cbc->children, n) {
868                 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
869         }
870         DBG((mod, LEVEL_3, "\t\tE = Edges from producers to consumers\n"));
871         foreach_pset(cbc->kill_edges, ke) {
872                 DBG((mod, LEVEL_3, "\t\t\t%+F -> %+F\n", ke->src, ke->tgt));
873         }
874 }
875
876 /**
877  * Construct the bipartite decomposition.
878  * Sid-Ahmed-Ali Touati, Phd Thesis
879  * Register Pressure in Instruction Level Parallelism, p. 71
880  */
881 static void compute_bipartite_decomposition(rss_t *rss) {
882         pset *epk    = new_pset(cmp_rss_edges, 10);
883         int  cur_num = 0;
884
885         plist_element_t *el;
886
887         DBG((rss->dbg, LEVEL_1, "\tcomputing bipartite decomposition:\n"));
888
889         build_kill_edges(rss, epk);
890
891         foreach_plist(rss->nodes, el) {
892                 ir_node   *u_irn   = plist_element_get_value(el);
893                 rss_irn_t *u       = get_rss_irn(rss, u_irn);
894                 int       p_change = 1;
895                 int       c_change = 1;
896                 int       vrfy_ok  = 1;
897
898                 cbc_t           *cbc;
899                 plist_element_t *el2;
900                 rss_edge_t      *k_edge;
901                 rss_edge_t      *kedge_root = NULL;
902                 ir_node         *t_irn, *s_irn;
903
904                 if (u->visited || u_irn == _sink)
905                         continue;
906
907                 DBG((rss->dbg, LEVEL_2, "\t\t%+F choosen:\n", u_irn));
908
909                 cbc     = obstack_alloc(phase_obst(&rss->ph), sizeof(*cbc));
910                 cbc->nr = cur_num++;
911
912                 /* initialize S_cb */
913                 cbc->parents = new_nodeset(5);
914                 nodeset_insert(cbc->parents, u_irn);
915                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (init)\n", u_irn));
916
917                 /* E_cb = empty */
918                 cbc->kill_edges = new_pset(cmp_rss_edges, 5);
919
920                 /* each parent gets killed by at least one value from children */
921
922                 /* T_cb = PK_successors(u) */
923                 cbc->children = new_nodeset(5);
924                 foreach_plist(u->pkiller_list, el2) {
925                         nodeset_insert(cbc->children, plist_element_get_value(el2));
926                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (init)\n", plist_element_get_value(el2)));
927                 }
928
929                 /*
930                         Now: insert the parents of all children into the parent set
931                         and insert the children of all parents into the children set
932                         until the sets don't change any more
933                 */
934                 while (p_change || c_change) {
935                         p_change = c_change = 0;
936
937                         /* accumulate parents */
938                         foreach_nodeset(cbc->children, t_irn) {
939                                 foreach_pset(epk, k_edge) {
940                                         ir_node *val = k_edge->src;
941
942                                         if (k_edge->tgt == t_irn && ! nodeset_find(cbc->parents, val)) {
943                                                 nodeset_insert(cbc->parents, val);
944                                                 p_change = 1;
945                                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (killed by %+F)\n", val, t_irn));
946                                         }
947                                 }
948                         }
949
950                         /* accumulate children */
951                         foreach_nodeset(cbc->parents, s_irn) {
952                                 foreach_pset(epk, k_edge) {
953                                         ir_node *val = k_edge->tgt;
954
955                                         if (k_edge->src == s_irn  && ! nodeset_find(cbc->children, val)) {
956                                                 nodeset_insert(cbc->children, val);
957                                                 c_change = 1;
958                                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (kills %+F)\n", val, s_irn));
959                                         }
960                                 }
961                         }
962                 }
963
964                 /* mark all parent values as visited */
965                 foreach_nodeset(cbc->parents, s_irn) {
966                         rss_irn_t *s = get_rss_irn(rss, s_irn);
967                         s->visited = 1;
968                         /* assure bipartite property */
969 #if 0
970                         if (nodeset_find(cbc->children, s_irn)) {
971                                 nodeset_remove(cbc->children, s_irn);
972                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F removed from to children\n", s_irn));
973                         }
974 #endif
975                 }
976
977                 /* update edges */
978                 foreach_pset(epk, k_edge) {
979                         if (nodeset_find(cbc->parents, k_edge->src) && nodeset_find(cbc->children, k_edge->tgt)) {
980                                 pset_insert(cbc->kill_edges, k_edge, HASH_RSS_EDGE(k_edge));
981                                 /*
982                                         Link all k_edges which are about to be removed together.
983                                         Beware: pset_remove kills the iterator.
984                                 */
985                                 k_edge->next = kedge_root;
986                                 kedge_root   = k_edge;
987                         }
988                 }
989
990                 /* remove all linked k_edges */
991                 for (; kedge_root; kedge_root = kedge_root->next) {
992                         pset_remove(epk, kedge_root, HASH_RSS_EDGE(kedge_root));
993                 }
994
995                 /* verify the cbc */
996                 foreach_nodeset(cbc->parents, s_irn) {
997                         int is_killed = 0;
998
999                         foreach_pset(cbc->kill_edges, k_edge) {
1000                                 if (k_edge->src == s_irn) {
1001                                         is_killed = 1;
1002                                         pset_break(cbc->kill_edges);
1003                                         break;
1004                                 }
1005                         }
1006
1007                         if (! is_killed) {
1008                                 vrfy_ok = 0;
1009                                 ir_fprintf(stderr, "Warning: parent %+F is not killed in current cbc\n", s_irn);
1010                         }
1011                 }
1012                 assert(vrfy_ok && "Verification of CBC failed");
1013
1014                 /* add the connected bipartite component */
1015                 if (nodeset_count(cbc->parents) > 0 && nodeset_count(cbc->children) > 0 && pset_count(cbc->kill_edges) > 0)
1016                         pset_insert(rss->cbc_set, cbc, (unsigned)cbc->nr);
1017                 DBG((rss->dbg, LEVEL_2, "\tbipartite component %d inserted:\n", cbc->nr));
1018                 DEBUG_ONLY(
1019                         if (firm_dbg_get_mask(rss->dbg) & LEVEL_2)
1020                                 debug_print_cbc(rss->dbg, cbc);
1021                 );
1022         }
1023
1024         if (rss->opts->dump_flags & RSS_DUMP_CBC)
1025                 debug_vcg_dump_bipartite(rss);
1026
1027         del_pset(epk);
1028 }
1029
1030 /**
1031  * Select the child with the maximum cost.
1032  */
1033 static child_t *select_child_max_cost(rss_t *rss, nodeset *x, nodeset *y, child_t *t, cbc_t *cbc) {
1034         ir_node *child;
1035         float   max_cost = -1.0f;
1036
1037         DBG((rss->dbg, LEVEL_2, "\t\tcomputing children costs:\n"));
1038
1039         foreach_nodeset(cbc->children, child) {
1040                 rss_irn_t  *r_child             = get_rss_irn(rss, child);
1041                 int         num_unkilled_parents = 0;
1042                 int         num_descendants;
1043                 rss_edge_t *k_edge;
1044                 float       cost;
1045
1046                 /* get the number of unkilled parents */
1047                 foreach_pset(cbc->kill_edges, k_edge) {
1048                         if (k_edge->tgt == child && nodeset_find(x, k_edge->src))
1049                                 ++num_unkilled_parents;
1050                 }
1051
1052                 cost = (float)num_unkilled_parents;
1053
1054                 num_descendants = plist_count(r_child->descendant_list) + nodeset_count(y);
1055
1056                 if (num_descendants > 0)
1057                         cost /= (float)num_descendants;
1058
1059                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F, #desc %d, cost %.3f\n", child, num_descendants, cost));
1060
1061                 if (cost > max_cost) {
1062                         t->irn   = child;
1063                         t->cost  = cost;
1064                         max_cost = cost;
1065                 }
1066         }
1067
1068         return t;
1069 }
1070
1071 /**
1072  * Remove all parents from x which are killed by t_irn.
1073  */
1074 static void remove_covered_parents(rss_t *rss, nodeset *x, ir_node *t_irn, cbc_t *cbc) {
1075         rss_irn_t  *t = get_rss_irn(rss, t_irn);
1076         rss_edge_t *k_edge;
1077
1078         DBG((rss->dbg, LEVEL_2, "\t\tremoving parents covered by %+F:\n", t_irn));
1079
1080         foreach_pset(cbc->kill_edges, k_edge) {
1081                 if (k_edge->tgt == t_irn && nodeset_find(x, k_edge->src)) {
1082                         nodeset_remove(x, k_edge->src);
1083                         plist_insert_back(t->parent_list, k_edge->src);
1084                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", k_edge->src));
1085                 }
1086         }
1087 }
1088
1089 static void update_cumulated_descendent_values(rss_t *rss, nodeset *y, ir_node *t_irn) {
1090         rss_irn_t *t = get_rss_irn(rss, t_irn);
1091         plist_element_t *el;
1092
1093         DBG((rss->dbg, LEVEL_2, "\t\tupdating cumulated descendant value of %+F:\n", t_irn));
1094
1095         foreach_plist(t->descendant_list, el) {
1096                 nodeset_insert(y, plist_element_get_value(el));
1097                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", plist_element_get_value(el)));
1098         }
1099 }
1100
1101 /**
1102  * Greedy-k: a heuristics for the MMA problem
1103  */
1104 static void compute_killing_function(rss_t *rss) {
1105         cbc_t *cbc;
1106         struct obstack obst;
1107
1108         obstack_init(&obst);
1109
1110         rss->cbc_set = pset_new_ptr(5);
1111         compute_bipartite_decomposition(rss);
1112
1113         /* for all bipartite components do: */
1114         foreach_pset(rss->cbc_set, cbc) {
1115                 ir_node *p;
1116                 nodeset *x       = new_nodeset(10);
1117                 nodeset *y       = new_nodeset(10);
1118                 child_t **sks    = NEW_ARR_F(child_t *, 20);
1119                 int     cur_len  = 0;
1120                 int     cur_size = 20;
1121                 int     i;
1122
1123                 DBG((rss->dbg, LEVEL_1, "\tcomputing SKS for cbc %d:\n", cbc->nr));
1124                 DBG((rss->dbg, LEVEL_2, "\t\tinitializing parents X:\n"));
1125
1126                 /* X = S_cb (all parents are initially uncovered) */
1127                 foreach_nodeset(cbc->parents, p) {
1128                         nodeset_insert(x, p);
1129                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", p));
1130                 }
1131
1132                 /* while X not empty */
1133                 while (nodeset_count(x) > 0) {
1134                         child_t *t = obstack_alloc(&obst, sizeof(*t));
1135                         memset(t, 0, sizeof(*t));
1136
1137                         t = select_child_max_cost(rss, x, y, t, cbc);
1138
1139                         if (cur_len >= cur_size) {
1140                                 ARR_EXTO(child_t *, sks, cur_size * 2);
1141                                 cur_size *= 2;
1142                         }
1143
1144                         DBG((rss->dbg, LEVEL_2, "\t\tinsert child %+F (%.3f) into SKS at pos %d\n", t->irn, t->cost, cur_len));
1145
1146                         sks[cur_len++] = t;
1147                         remove_covered_parents(rss, x, t->irn, cbc);
1148                         update_cumulated_descendent_values(rss, y, t->irn);
1149                 }
1150
1151                 ARR_SHRINKLEN(sks, cur_len);
1152
1153                 /* sort SKS in increasing cost order */
1154                 qsort(sks, cur_len, sizeof(sks[0]), cmp_child_costs);
1155
1156                 DBG((rss->dbg, LEVEL_2, "\tprocessing SKS for cbc %d:\n", cbc->nr));
1157
1158                 /* build killing function */
1159                 for (i = cur_len - 1; i >= 0; --i) { /* loop over sks in decreasing cost order */
1160                         child_t         *t = sks[i];
1161                         rss_irn_t       *rt = get_rss_irn(rss, t->irn);
1162                         plist_element_t *p_el;
1163
1164                         DBG((rss->dbg, LEVEL_3, "\t\t\tkiller %+F (%.3f):\n", t->irn, t->cost));
1165
1166                         /* kill all unkilled parents of t */
1167                         foreach_plist(rt->parent_list, p_el) {
1168                                 ir_node    *par = plist_element_get_value(p_el);
1169                                 rss_irn_t *rpar = get_rss_irn(rss, par);
1170
1171                                 if (is_Sink(rpar->killer)) {
1172                                         rpar->killer = t->irn;
1173                                         DBG((rss->dbg, LEVEL_2, "\t\tkill %+F\n", rpar->irn));
1174                                 }
1175                                 else {
1176                                         DBG((rss->dbg, LEVEL_3, "\t\t\tkeeping %+F as killer for %+F\n", rpar->killer, rpar->irn));
1177                                 }
1178                         }
1179                 }
1180
1181                 del_nodeset(x);
1182                 del_nodeset(y);
1183                 DEL_ARR_F(sks);
1184         }
1185
1186         if (rss->opts->dump_flags & RSS_DUMP_KILL)
1187                 debug_vcg_dump_kill(rss);
1188
1189         del_pset(rss->cbc_set);
1190         obstack_free(&obst, NULL);
1191 }
1192
1193 /**
1194  * Adds the edge src -> tgt to the dvg. Checks if reverse edge is already there (asserts).
1195  */
1196 static INLINE void add_dvg_edge(rss_t *rss, dvg_t *dvg, ir_node *src, ir_node *tgt, int have_source) {
1197         rss_edge_t *dvg_edge;
1198         rss_edge_t key;
1199
1200         if (! have_source)
1201                 nodeset_insert(dvg->nodes, src);
1202         else
1203                 assert(nodeset_find(dvg->nodes, src) != NULL && "Wrong assumption");
1204
1205         nodeset_insert(dvg->nodes, tgt);
1206
1207         key.src = tgt;
1208         key.tgt = src;
1209         assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1210
1211         key.src = src;
1212         key.tgt = tgt;
1213         if (NULL != pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1214                 /* add the edge to the DVG */
1215                 dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1216
1217                 dvg_edge->src  = src;
1218                 dvg_edge->tgt  = tgt;
1219                 dvg_edge->next = NULL;
1220
1221                 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", src, tgt));
1222                 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1223         }
1224 }
1225
1226 /**
1227  * Computes the disjoint value DAG (DVG).
1228  * BEWARE: It is not made explicitly clear in the Touati paper,
1229  *         but the DVG is meant to be build from the KILLING DAG
1230  */
1231 static void compute_dvg(rss_t *rss, dvg_t *dvg) {
1232         plist_element_t *el;
1233
1234         DBG((rss->dbg, LEVEL_1, "\tcomputing DVG:\n"));
1235
1236         foreach_plist(rss->nodes, el) {
1237                 ir_node    *u_irn    = plist_element_get_value(el);
1238                 rss_irn_t  *u        = get_rss_irn(rss, u_irn);
1239                 rss_irn_t  *u_killer = get_rss_irn(rss, u->killer);
1240                 int        i;
1241
1242                 /* TODO: omit nodes only having sink as pkiller and killing no one */
1243
1244                 /* add an edge to killer */
1245                 add_dvg_edge(rss, dvg, u_irn, u->killer, 0);
1246
1247                 if (is_Sink(u->killer))
1248                         continue;
1249
1250                 /* We add an edge to every killer, from where we could be reached. */
1251                 for (i = ARR_LEN_SAFE(u_killer->descendants) - 1; i >= 0; --i) {
1252                         add_dvg_edge(rss, dvg, u_irn, u_killer->descendants[i], 1);
1253                 }
1254
1255 #if 0
1256
1257                 foreach_plist(rss->nodes, el2) {
1258                         ir_node *v_irn = plist_element_get_value(el2);
1259
1260                         /*
1261                                 There is an edge (u, v) in the DVG iff v is a descendant of the killer(u).
1262                         */
1263                         if (BSEARCH_IRN_ARR(v_irn, u_kill->descendants)) {
1264                                 rss_edge_t *dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1265                                 rss_edge_t key;
1266
1267                                 /* insert the user into the DVG and append it to the user list of u */
1268                                 nodeset_insert(dvg->nodes, v_irn);
1269                                 if (! plist_has_value(u->dvg_user_list, v_irn))
1270                                         plist_insert_back(u->dvg_user_list, v_irn);
1271
1272                                 dvg_edge->src  = u_irn;
1273                                 dvg_edge->tgt  = v_irn;
1274                                 dvg_edge->next = NULL;
1275
1276                                 key.src = v_irn;
1277                                 key.tgt = u_irn;
1278
1279                                 assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1280
1281                                 /* add the edge to the DVG */
1282                                 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", u_irn, v_irn));
1283                                 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1284                         }
1285                 }
1286 #endif /* if 0 */
1287         }
1288
1289         if (rss->opts->dump_flags & RSS_DUMP_DVG)
1290                 debug_vcg_dump_dvg(rss, dvg);
1291 }
1292
1293 /**
1294  * Updates the dvg structure when a serialization edge from src -> tgt is added.
1295  */
1296 static void update_dvg(rss_t *rss, dvg_t *dvg, rss_irn_t *src, rss_irn_t *tgt) {
1297         int i, j, idx;
1298         rss_edge_t *edge;
1299         rss_edge_t **arr = alloca(pset_count(dvg->edges) * sizeof(arr[0]));
1300
1301         /*
1302                 Add an edge from serialization target to serialization src:
1303                 src cannot be alive with target
1304         */
1305         add_dvg_edge(rss, dvg, tgt->irn, src->irn, 0);
1306
1307         /* Add edges to src's descendants as well, they also getting serialized. */
1308         for (i = ARR_LEN_SAFE(src->descendants) - 1; i >= 0; --i) {
1309                 add_dvg_edge(rss, dvg, tgt->irn, src->descendants[i], 1);
1310         }
1311
1312         /* We also have to add edges from targets predecessors in dvg */
1313         idx = 0;
1314         /* We cannot insert elements into set over which we iterate ... */
1315         foreach_pset(dvg->edges, edge) {
1316                 if (edge->tgt == tgt->irn) {
1317                         arr[idx++] = edge;
1318                 }
1319         }
1320
1321         for (i = 0; i < idx; ++i) {
1322                 edge = arr[i];
1323                 add_dvg_edge(rss, dvg, edge->src, src->irn, 1);
1324                 for (j = ARR_LEN_SAFE(src->descendants) - 1; j >= 0; --j) {
1325                         add_dvg_edge(rss, dvg, edge->src, src->descendants[j], 1);
1326                 }
1327         }
1328 }
1329
1330 #if 0
1331 /**
1332  * Accumulate all descendants for root into list.
1333  */
1334 static void accumulate_dvg_descendant_values(rss_t *rss, rss_irn_t *root, plist_t *list) {
1335         if (plist_count(root->dvg_user_list) > 0) {
1336                 plist_element_t *el;
1337
1338                 foreach_plist(root->dvg_user_list, el) {
1339                         ir_node   *v_irn = plist_element_get_value(el);
1340                         rss_irn_t *v     = get_rss_irn(rss, v_irn);
1341
1342                         /* add v as descendant */
1343                         if (! plist_has_value(list, v_irn)) {
1344                                 plist_insert_back(list, v_irn);
1345                                 DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG descendant %+F\n", v_irn));
1346                         }
1347
1348                         /* accumulate v's descendants */
1349                         accumulate_dvg_descendant_values(rss, v, list);
1350                 }
1351         }
1352 }
1353
1354 /**
1355  * Builds the list of potential killers for each node
1356  * in the given DVG.
1357  * Needs the descendant list for all user as sorted array.
1358  */
1359 static void build_dvg_pkiller_list(rss_t *rss, dvg_t *dvg) {
1360         ir_node *irn;
1361
1362         foreach_nodeset(dvg->nodes, irn) {
1363                 rss_irn_t       *node = get_rss_irn(rss, irn);
1364                 plist_element_t *el, *el2;
1365
1366                 DBG((rss->dbg, DEBUG_DVG, "\t\tbuilding pkiller list for %+F\n", irn));
1367
1368                 /* check each user */
1369                 foreach_plist(node->dvg_user_list, el) {
1370                         ir_node *u_irn = plist_element_get_value(el);
1371
1372                         /* is the current user u_irn not a descendant of any other user -> pkiller */
1373                         foreach_plist(node->dvg_user_list, el2) {
1374                                 ir_node   *v_irn = plist_element_get_value(el2);
1375                                 rss_irn_t *v     = get_rss_irn(rss, v_irn);
1376
1377                                 if (el != el2                             &&
1378                                         ! BSEARCH_IRN_ARR(u_irn, v->dvg_desc) &&
1379                                         ! plist_has_value(node->dvg_pkiller_list, u_irn))
1380                                 {
1381                                         plist_insert_back(node->dvg_pkiller_list, u_irn);
1382                                         DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG pkiller %+F\n", u_irn));
1383                                 }
1384                         }
1385                 }
1386
1387                 node->dvg_pkiller = build_sorted_array_from_list(node->dvg_pkiller_list, phase_obst(&rss->ph));
1388         }
1389
1390         DEBUG_ONLY(
1391                 if (firm_dbg_get_mask(rss->dbg) & DEBUG_DVG)
1392                         debug_vcg_dump_dvg_pkiller(rss, dvg);
1393         );
1394 }
1395
1396 #endif /* if 0 */
1397
1398 /**
1399  * Compute the maximal antichain of the current DVG.
1400  * This is a reimplementation of the MAXIMAL_ANTI_CHAIN function
1401  * from the DDG library 1.1 (DAG.cpp).
1402  */
1403 static nodeset *compute_maximal_antichain(rss_t *rss, dvg_t *dvg, int iteration) {
1404         int         n               = nodeset_count(dvg->nodes);
1405         int         *assignment     = alloca(n * sizeof(assignment[0]));
1406         int         *assignment_rev = alloca(n * sizeof(assignment_rev[0]));
1407         int         *idx_map        = alloca(n * sizeof(idx_map[0]));
1408         hungarian_problem_t *bp;
1409         nodeset     *values, *temp;
1410         ir_node     *u_irn;
1411         int         i, j, cost, cur_chain, res;
1412         rss_edge_t  *dvg_edge;
1413
1414 #define MAP_IDX(irn) bsearch_for_index(get_irn_idx(irn), idx_map,  n,  1)
1415
1416         if (pset_count(dvg->edges) == 0)
1417                 return NULL;
1418
1419         bp = hungarian_new(n, n, 1, HUNGARIAN_MATCH_NORMAL);
1420
1421         /*
1422                 At first, we build an index map for the nodes in the DVG,
1423                 because we cannot use the irn idx therefore as the resulting
1424                 bipartite data structure would have around 1.2GB.
1425                 So we limit the size to the number of nodes we have in the DVG
1426                 and build a sorted index map for their irn indices.
1427         */
1428         i = 0;
1429         foreach_nodeset(dvg->nodes, u_irn) {
1430                 idx_map[i++] = get_irn_idx(u_irn);
1431         }
1432         qsort(idx_map, n, sizeof(idx_map[0]), cmp_int);
1433
1434         foreach_pset(dvg->edges, dvg_edge) {
1435                 int idx_u = MAP_IDX(dvg_edge->src);
1436                 int idx_v = MAP_IDX(dvg_edge->tgt);
1437
1438                 /* add the entry to the bipartite data structure */
1439                 hungarian_add(bp, idx_u, idx_v, 1);
1440                 DBG((rss->dbg, LEVEL_3, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1441                         idx_u, dvg_edge->src, idx_v, dvg_edge->tgt));
1442         }
1443 #if 0
1444         /*
1445                 Add a bipartite entry for each pair of nodes (u, v), where exists a
1446                 path in the DVG from u to v, ie. connect all descendants(v) to v.
1447                 desc_dvg(v) = accumulated descendants of all z in dvg_user_list(v)
1448         */
1449         foreach_nodeset(dvg->nodes, u_irn) {
1450                 rss_irn_t *u        = get_rss_irn(rss, u_irn);
1451                 int       idx_u_irn = MAP_IDX(u_irn);
1452
1453                 DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG descendants of %+F:\n", u_irn));
1454
1455                 //plist_clear(u->dvg_desc_list);
1456                 //accumulate_dvg_descendant_values(rss, u, u->dvg_desc_list);
1457
1458                 /*
1459                         FIXME: The array is build on the phase obstack and we cannot free the data.
1460                         So the array get as many times allocated as this function is called.
1461                 */
1462
1463                 /* build the sorted array for faster searches */
1464                 //u->dvg_desc = build_sorted_array_from_list(u->dvg_desc_list, phase_obst(&rss->ph));
1465
1466                 DBG((rss->dbg, DEBUG_MAX_AC, "\t\tadding bipartite entries of %+F:\n", u_irn));
1467
1468                 /* add the bipartite entries for all descendants */
1469                 for (i = ARR_LEN_SAFE(u->dvg_desc) - 1; i >= 0; --i) {
1470                         rss_irn_t *desc    = get_rss_irn(rss, u->dvg_desc[i]);
1471                         int       idx_desc = MAP_IDX(u->dvg_desc[i]);
1472
1473                         /* add the entry to the bipartite data structure */
1474                         hungarian_add(bp, idx_u_irn, idx_desc, 1);
1475                         DBG((rss->dbg, DEBUG_MAX_AC, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1476                                 idx_u_irn, u_irn, idx_desc, u->dvg_desc[i]));
1477
1478                         need_matching = 1;
1479                 }
1480         }
1481 #endif
1482
1483         /* We want maximum cardinality matching */
1484         hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL);
1485
1486 #if 0
1487         DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG pkiller:\n"));
1488         /* beware: the following function needs the dvg_desc array */
1489         build_dvg_pkiller_list(rss, dvg);
1490 #endif
1491
1492         DBG((rss->dbg, LEVEL_2, "\t\tcomputing bipartite matching\n"));
1493         /*
1494                 The maximum cardinality bipartite matching gives us the minimal
1495                 chain partition, which corresponds to the maximum anti chains.
1496         */
1497         res = hungarian_solve(bp, assignment, &cost, 1);
1498         assert(res == 0 && "Bipartite matching failed!");
1499
1500         hungarian_free(bp);
1501         memset(assignment_rev, -1, n * sizeof(assignment_rev[0]));
1502
1503         /* build the reverse assignment, ie. foreach i -> j, add j -> i */
1504         for (i = 0; i < n; ++i) {
1505                 if (assignment[i] >= 0) {
1506                         int j = assignment[i];
1507                         assignment_rev[j] = i;
1508                 }
1509         }
1510
1511         DBG((rss->dbg, LEVEL_2, "\t\tgot assignment with cost %d\n", cost));
1512         DBG((rss->dbg, LEVEL_3, "\t\t\tassignment   ---   reverse assignment\n", cost));
1513         for (i = 0; i < n; ++i) {
1514                 DBG((rss->dbg, LEVEL_3, "\t\t\t%3d -> %3d         %3d -> %3d\n", i, assignment[i], i, assignment_rev[i]));
1515         }
1516
1517         values    = new_nodeset(10);
1518         cur_chain = 0;
1519         /* Construction of the minimal chain partition */
1520         for (j = 0; j < n; ++j) {
1521                 /* check nodes, which did not occur as target */
1522                 if (assignment_rev[j] == -1) {
1523                         int       xj      = idx_map[j];
1524                         ir_node   *xj_irn = get_idx_irn(rss->irg, xj);
1525                         rss_irn_t *xj_rss = get_rss_irn(rss, xj_irn);
1526                         chain_t   *c      = obstack_alloc(phase_obst(&rss->ph), sizeof(*c));
1527                         int       source;
1528
1529                         /* there was no source for j -> we have a source of a new chain */
1530                         nodeset_insert(values, xj_irn);
1531
1532                         c->elements = plist_obstack_new(phase_obst(&rss->ph));
1533                         c->nr       = cur_chain++;
1534                         plist_insert_back(c->elements, xj_irn);
1535
1536                         xj_rss->chain = c;
1537
1538                         DBG((rss->dbg, LEVEL_2, "\t\tstarting chain %d:\n", c->nr));
1539                         DBG((rss->dbg, LEVEL_2, "\t\t\t%+F (%d)", xj_irn, j));
1540
1541                         /* follow chain, having j as source */
1542                         source = j;
1543                         while (assignment[source] >= 0) {
1544                                 int       target  = assignment[source];
1545                                 int       irn_idx = idx_map[target];
1546                                 ir_node   *irn    = get_idx_irn(rss->irg, irn_idx);
1547                                 rss_irn_t *node   = get_rss_irn(rss, irn);
1548
1549                                 plist_insert_back(c->elements, irn);
1550                                 node->chain = c;
1551
1552                                 DB((rss->dbg, LEVEL_2, " -> %+F (%d)", irn, target));
1553
1554                                 /* new source = last target */
1555                                 source = target;
1556                         }
1557
1558                         DB((rss->dbg, LEVEL_2, "\n"));
1559                 }
1560         }
1561
1562         /*
1563                 Computing the maximal antichain: Select an element from each
1564                 chain such, such it is parallel with the others.
1565         */
1566         DBG((rss->dbg, LEVEL_1, "\t\tcomputing set of saturation values (MAX AC)\n"));
1567         DBG((rss->dbg, LEVEL_3, "\t\tstarting with:\n"));
1568
1569         DEBUG_ONLY(
1570                 if (firm_dbg_get_mask(rss->dbg) & LEVEL_3)
1571                         dump_nodeset(values, "\t\t\t");
1572         )
1573
1574         temp = NULL;
1575         do {
1576                 /*
1577                         We need an explicit array for the values as
1578                         we cannot iterate multiple times over the same
1579                         set at the same time. :-(((((
1580                 */
1581                 int     n         = nodeset_count(values);
1582                 int     i         = 0;
1583                 ir_node **val_arr = NEW_ARR_F(ir_node *, n);
1584
1585                 foreach_nodeset(values, u_irn)
1586                         val_arr[i++] = u_irn;
1587
1588                 if (temp)
1589                         del_nodeset(temp);
1590
1591                 temp = new_nodeset(10);
1592
1593                 /* Select all nodes from current value set, having another node in the set as descendant. */
1594                 for (i = 0; i < n; ++i) {
1595                         rss_irn_t *u = get_rss_irn(rss, val_arr[i]);
1596
1597                         for (j = 0; j < n; ++j) {
1598                                 if (i != j) {
1599                                         rss_edge_t key;
1600
1601                                         key.src = val_arr[i];
1602                                         key.tgt = val_arr[j];
1603
1604                                         if (pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1605                                                 /* v[j] is descendant of u -> remove u and break */
1606                                                 nodeset_insert(temp, u->irn);
1607                                                 nodeset_remove(values, u->irn);
1608
1609                                                 DBG((rss->dbg, LEVEL_3, "\t\t\tremoving %+F from values, adding it to temp\n", u->irn));
1610
1611                                                 break;
1612                                         }
1613                                 }
1614                         }
1615                 }
1616
1617                 /* Try to insert the chain predecessor of all selected u's */
1618                 foreach_nodeset(temp, u_irn) {
1619                         rss_irn_t       *u  = get_rss_irn(rss, u_irn);
1620                         chain_t         *c  = u->chain;
1621                         plist_element_t *el = plist_find_value(c->elements, u_irn);
1622
1623                         assert(el && "Missing element in chain!");
1624
1625                         /* If u has predecessor in chain: insert the predecessor */
1626                         if (el == plist_element_get_prev(el)) {
1627                                 nodeset_insert(values, plist_element_get_value(el));
1628                                 DBG((rss->dbg, LEVEL_3, "\t\t\tadding %+F to values\n", plist_element_get_value(el)));
1629                         }
1630                 }
1631
1632
1633                 DEL_ARR_F(val_arr);
1634         } while (nodeset_count(temp) > 0);
1635
1636         DBG((rss->dbg, LEVEL_2, "\t\tfinal set:\n"));
1637         DEBUG_ONLY(
1638                 if (firm_dbg_get_mask(rss->dbg) & LEVEL_2) {
1639                         dump_nodeset(values, "\t\t\t");
1640                 }
1641         );
1642
1643         if (rss->opts->dump_flags & RSS_DUMP_MAXAC)
1644                 debug_vcg_dump_pkg(rss, values, iteration);
1645
1646         del_nodeset(temp);
1647
1648         return values;
1649
1650 #undef MAP_IDX
1651 }
1652
1653 /**
1654  * Computes the best serialization between two nodes of sat_vals.
1655  */
1656 static serialization_t *compute_best_admissible_serialization(rss_t *rss, nodeset *sat_vals, serialization_t *ser, int num_regs) {
1657         int        n                    = nodeset_count(sat_vals);
1658         int        n_idx                = ARR_LEN_SAFE(rss->idx_map);
1659         int        i                    = 0;
1660         ir_node    **val_arr            = alloca(n * sizeof(val_arr[0]));
1661         bitset_t   *bs_sv               = bitset_alloca(n_idx);
1662         bitset_t   *bs_vdesc            = bitset_alloca(n_idx);
1663         bitset_t   *bs_tmp              = bitset_alloca(n_idx);
1664         bitset_t   *bs_ukilldesc        = bitset_alloca(n_idx);
1665         int        best_benefit         = INT_MAX;
1666         int        best_omega2          = INT_MAX;
1667         int        best_benefit_omega20 = INT_MAX;
1668         int        has_omega1           = 0;
1669         int        j, k;
1670         ir_node    *irn;
1671         rss_edge_t min_benefit_edge;
1672         rss_edge_t min_omega20_edge;
1673         rss_irn_t  *ser_u_omega1, *ser_v_omega1;
1674         rss_irn_t  *ser_u_omega20, *ser_v_omega20;
1675
1676         DBG((rss->dbg, LEVEL_1, "\tcomputing admissible serializations:\n"));
1677
1678         /*
1679                 We need an explicit array for the values as
1680                 we cannot iterate multiple times over the same
1681                 set at the same time. :-(((((
1682         */
1683
1684         foreach_nodeset(sat_vals, irn) {
1685                 val_arr[i++] = irn;
1686                 bitset_set(bs_sv, BLOCK_IDX_MAP(rss, irn));
1687         }
1688
1689         /*
1690                 We build all admissible serializations and remember the best found so far.
1691                 For u in sat_vals:
1692                  For v in sat_val:
1693                    if v in pkiller(u): add edge from v to all other pkiller(u)
1694                    else: for all vv in pkiller(u): add edge from v to vv if there exists no path from vv to v
1695         */
1696
1697 /*
1698         A node is unserializable if:
1699         - it has only one killer and this one is Sink
1700     - it kills no other values
1701         In this case there is no serialization which could
1702         reduce the registerpressure
1703 */
1704 #define IS_UNSERIALIZABLE_NODE(rss_node)                  \
1705         (                                                     \
1706                 (                                                 \
1707                         (plist_count(rss_node->pkiller_list) == 1) && \
1708                         is_Sink(rss_node->killer)                  && \
1709                         (rss_node->kill_count                == 0)    \
1710                 )                            ||                   \
1711                 be_is_Barrier(rss_node->irn) ||                   \
1712                 be_is_Keep(rss_node->irn)                         \
1713         )
1714
1715         /* for all u in sat_vals */
1716         for (i = 0; i < n; ++i) {
1717                 rss_irn_t       *u = get_rss_irn(rss, val_arr[i]);
1718                 plist_element_t *el;
1719
1720                 /* ignore nodes where serialization does not help */
1721                 if (IS_UNSERIALIZABLE_NODE(u)) {
1722                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", u->irn));
1723                         continue;
1724                 }
1725
1726                 /* accumulate all descendants of all pkiller(u) */
1727                 bitset_clear_all(bs_ukilldesc);
1728                 foreach_plist(u->pkiller_list, el) {
1729                         ir_node   *irn  = plist_element_get_value(el);
1730                         rss_irn_t *node = get_rss_irn(rss, irn);
1731
1732                         if (! is_Sink(irn))
1733                                 bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, irn));
1734                         else
1735                                 continue;
1736
1737                         for (k = ARR_LEN_SAFE(node->descendants) - 1; k >= 0; --k) {
1738                                 if (! is_Sink(node->descendants[k]))
1739                                         bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, node->descendants[k]));
1740                         }
1741                 }
1742
1743                 /* for all v in sat_vals */
1744                 for (j = 0; j < n; ++j) {
1745                         ir_node   *v_irn   = val_arr[j];
1746                         rss_irn_t *v       = get_rss_irn(rss, v_irn);
1747                         unsigned  v_height = get_irn_height(rss->h, v_irn);
1748                         int       omega1, omega2, is_pkiller;
1749
1750                         /* v cannot be serialized with itself
1751                          * ignore nodes where serialization does not help */
1752                         if (i == j || IS_UNSERIALIZABLE_NODE(v)) {
1753                                 if (i != j)
1754                                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", v->irn));
1755                                 continue;
1756                         }
1757
1758                         /* get descendants of v */
1759                         bitset_clear_all(bs_vdesc);
1760                         bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v_irn));
1761                         for (k = ARR_LEN_SAFE(v->descendants) - 1; k >= 0; --k) {
1762                                 if (! is_Sink(v->descendants[k]))
1763                                         bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v->descendants[k]));
1764                         }
1765
1766                         /* if v is in pkiller(u) */
1767                         is_pkiller = plist_has_value(u->pkiller_list, v_irn);
1768
1769                         /* for all vv in pkiller(u) */
1770                         foreach_plist(u->pkiller_list, el) {
1771                                 ir_node *vv_irn  = plist_element_get_value(el);
1772                                 int     add_edge;
1773
1774                                 if (is_Sink(vv_irn) || is_cfop(vv_irn))
1775                                         continue;
1776
1777                                 if (is_pkiller)
1778                                         add_edge = vv_irn != v_irn && skip_Proj(vv_irn) != skip_Proj(v_irn);
1779                                 else
1780                                         add_edge = ! heights_reachable_in_block(rss->h, skip_Proj(vv_irn), skip_Proj(v_irn));
1781
1782                                 /*
1783                                         As we add an edge from vv -> v, we have to make sure,
1784                                         that there exists no path from v to vv.
1785                                 */
1786
1787                                 if (add_edge) {
1788                                         int vv_height = get_irn_height(rss->h, vv_irn);
1789                                         int mu1, mu2, critical_path_cost;
1790
1791                                         /*
1792                                                 mu1 = | descendants(v) cut sat_vals |
1793                                                 the number of saturating values which cannot
1794                                                 be simultaneously alive with u
1795                                         */
1796                                         bitset_copy(bs_tmp, bs_vdesc);
1797                                         mu1 = bitset_popcnt(bitset_and(bs_tmp, bs_sv));
1798
1799                                         /*
1800                                                 mu2 = | accum_desc_all_pkiller(u) without descendants(v) |
1801                                         */
1802                                         if (is_pkiller) {
1803                                                 bitset_copy(bs_tmp, bs_ukilldesc);
1804                                                 mu2 = bitset_popcnt(bitset_andnot(bs_tmp, bs_vdesc));
1805                                         }
1806                                         else {
1807                                                 mu2 = 0;
1808                                         }
1809
1810                                         /* omega1 = mu1 - mu2 */
1811                                         omega1 = mu1 - mu2;
1812
1813                                         if (omega1 != 0)
1814                                                 has_omega1 = 1;
1815
1816                                         /* omega2 = increase of critical path */
1817                                         critical_path_cost =
1818                                                 v_height                        /* longest path from v to sink */
1819                                                 + rss->max_height - vv_height   /* longest path from source to vv */
1820                                                 + 1;                            /* edge */
1821
1822                                         /*
1823                                                 If critical_path_cost > max_height -> the new edge
1824                                                 would increase the longest critical path by the difference.
1825                                         */
1826                                         omega2 = critical_path_cost > rss->max_height ? critical_path_cost - rss->max_height : 0;
1827
1828                                         /* this keeps track of the edge with the best benefit */
1829                                         if (omega1 >= num_regs - n && omega1 < best_benefit) {
1830                                                 min_benefit_edge.src = v_irn;
1831                                                 min_benefit_edge.tgt = vv_irn;
1832
1833                                                 ser_u_omega1 = u;
1834                                                 ser_v_omega1 = v;
1835
1836                                                 best_benefit    = omega1;
1837                                                 ser->new_killer = is_pkiller;
1838                                         }
1839
1840                                         /* this keeps track of the edge with the best omega1 costs where omega2 == 0 */
1841                                         if (omega2 == 0 && omega1 >= num_regs - n && omega1 < best_benefit_omega20) {
1842                                                 min_omega20_edge.src = v_irn;
1843                                                 min_omega20_edge.tgt = vv_irn;
1844
1845                                                 ser_u_omega20 = u;
1846                                                 ser_v_omega20 = v;
1847
1848                                                 best_benefit_omega20 = omega1;
1849                                                 ser->new_killer      = is_pkiller;
1850                                         }
1851
1852                                         best_omega2 = MIN(best_omega2, omega2);
1853
1854                                         DBG((rss->dbg, LEVEL_2, "\t\tfound %+F -> %+F (w1 %d, w2 %d)\n",
1855                                                 v_irn, vv_irn, omega1, omega2));
1856                                 } /* if add_edge */
1857                         } /* for all vv in pkiller(u) */
1858                 } /* for all v in sat_vals */
1859         } /* for all u in sat_vals */
1860
1861         if (! has_omega1)
1862                 return NULL;
1863
1864         if (best_omega2 == 0) {
1865                 ser->u         = ser_u_omega20;
1866                 ser->v         = ser_v_omega20;
1867                 ser->edge->src = min_omega20_edge.src;
1868                 ser->edge->tgt = min_omega20_edge.tgt;
1869                 ser->omega1    = best_benefit_omega20;
1870                 ser->omega2    = best_omega2;
1871         }
1872         else {
1873                 ser->u         = ser_u_omega1;
1874                 ser->v         = ser_v_omega1;
1875                 ser->edge->src = min_benefit_edge.src;
1876                 ser->edge->tgt = min_benefit_edge.tgt;
1877                 ser->omega1    = best_benefit;
1878                 ser->omega2    = best_omega2;
1879         }
1880
1881         return ser;
1882
1883 #undef IS_UNSERIALIZABLE_NODE
1884 }
1885
1886 /**
1887  * Perform the value serialization heuristic and add all
1888  * computed serialization edges as dependencies to the irg.
1889  */
1890 static void perform_value_serialization_heuristic(rss_t *rss) {
1891         bitset_t *arch_nonign_bs = bitset_alloca(arch_register_class_n_regs(rss->cls));
1892         bitset_t *abi_ign_bs     = bitset_alloca(arch_register_class_n_regs(rss->cls));
1893         int      available_regs, iteration;
1894         dvg_t    dvg;
1895         nodeset  *sat_vals;
1896         pset *ser_set = new_pset(cmp_rss_edges, 20);
1897
1898         /* available_regs = R = |arch_non_ignore_regs cut ~abi_ignore_regs| */
1899         arch_put_non_ignore_regs(rss->arch_env, rss->cls, arch_nonign_bs);
1900         be_abi_put_ignore_regs(rss->abi, rss->cls, abi_ign_bs);
1901         bitset_andnot(arch_nonign_bs, abi_ign_bs);
1902         available_regs  = bitset_popcnt(arch_nonign_bs);
1903         //num_live = pset_count(rss->live_block);
1904         //available_regs -= num_live < available_regs ? num_live : 0;
1905
1906         DBG((rss->dbg, LEVEL_1, "\n\t#available regs: %d\n\n", available_regs));
1907
1908         /*
1909                 At first we need to compute the disjoint value DAG (DVG = {V, E_dv}).
1910                 V    = set of all nodes we are currently interested in
1911                 E_dv = there is an edge from u to v iff v is a descendant of killer(u), forall u, v in V
1912         */
1913         dvg.nodes = new_nodeset(plist_count(rss->nodes));
1914         dvg.edges = new_pset(cmp_rss_edges, plist_count(rss->nodes) * 5);
1915         compute_dvg(rss, &dvg);
1916
1917         /*
1918                 Then we perform the heuristic serialization algorithm
1919                 on the DVG which gives us all necessary serialization
1920                 edges.
1921         */
1922         DBG((rss->dbg, LEVEL_1, "\tcomputing maximal antichain:\n"));
1923         iteration = 0;
1924         sat_vals  = compute_maximal_antichain(rss, &dvg, iteration++);
1925         while (sat_vals && (nodeset_count(sat_vals) > available_regs)) {
1926                 serialization_t *ser, best_ser;
1927                 rss_edge_t      *edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*edge));
1928                 ir_node         *dep_src, *dep_tgt;
1929
1930                 best_ser.edge = edge;
1931                 ser = compute_best_admissible_serialization(rss, sat_vals, &best_ser, available_regs);
1932
1933                 DBG((rss->dbg, LEVEL_1, "\tcurrent register saturation %d, target %d\n", nodeset_count(sat_vals), available_regs));
1934
1935                 if (! ser) {
1936                         DBG((rss->dbg, LEVEL_1, "\tno RS improving serialization found, breaking at iteration %d\n", iteration));
1937                         break;
1938                 }
1939
1940                 /* Insert the serialization as dependency edge into the irg. */
1941                 DBG((rss->dbg, LEVEL_1, "\tserializing %+F -> %+F with edge %+F -> %+F and cost %d, %d\n",
1942                         ser->u->irn, ser->v->irn, ser->edge->src, ser->edge->tgt, ser->omega1, ser->omega2));
1943
1944                 if (pset_find(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge)))
1945                         ir_printf("WARNING: serialization %+F -> %+F computed twice!\n", ser->edge->src, ser->edge->tgt);
1946
1947
1948                 pset_insert(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge));
1949
1950                 /* update the dvg */
1951                 update_dvg(rss, &dvg, ser->v, ser->u);
1952                 update_dvg(rss, &dvg, get_rss_irn(rss, ser->edge->src), get_rss_irn(rss, ser->edge->tgt));
1953                 del_nodeset(sat_vals);
1954
1955                 dep_src = skip_Proj(ser->edge->src);
1956                 dep_tgt = ser->edge->tgt;
1957                 add_irn_dep(dep_src, dep_tgt);
1958
1959                 /* Update descendants, consumer and pkillers of the target */
1960                 update_node_info(rss, ser->edge->tgt, ser->edge->src);
1961
1962                 /* TODO: try to find a cheaper way for updating height information */
1963                 rss->max_height = heights_recompute_block(rss->h, rss->block);
1964
1965                 /* Recompute the antichain for next serialization */
1966                 DBG((rss->dbg, LEVEL_1, "\tre-computing maximal antichain:\n"));
1967                 sat_vals = compute_maximal_antichain(rss, &dvg, iteration++);
1968         }
1969
1970         del_nodeset(dvg.nodes);
1971         del_pset(dvg.edges);
1972 }
1973
1974 /**
1975  * Do initial calculations for a block.
1976  */
1977 static void process_block(ir_node *block, void *env) {
1978         rss_t *rss = env;
1979         int   i, n;
1980         const ir_edge_t *edge;
1981
1982         phase_init(&rss->ph, "rss block preprocessor", rss->irg, PHASE_DEFAULT_GROWTH, init_rss_irn);
1983
1984         DBG((rss->dbg, LEVEL_1, "preprocessing block %+F\n", block));
1985         rss->block = block;
1986
1987         /* build an index map for all nodes in the current block */
1988         i            = 0;
1989         n            = get_irn_n_edges(block);
1990         NEW_ARR_A(int *, rss->idx_map, n);
1991         foreach_out_edge(block, edge) {
1992                 ir_node *irn      = get_edge_src_irn(edge);
1993                 rss->idx_map[i++] = get_irn_idx(irn);
1994         }
1995         qsort(rss->idx_map, n, sizeof(rss->idx_map[0]), cmp_int);
1996         rss->max_height = heights_recompute_block(rss->h, block);
1997
1998         /* loop over all register classes */
1999         for (i = arch_isa_get_n_reg_class(rss->arch_env->isa) - 1; i >= 0; --i) {
2000                 const arch_register_class_t *cls = arch_isa_get_reg_class(rss->arch_env->isa, i);
2001
2002                 rss->cls = cls;
2003                 DBG((rss->dbg, LEVEL_1, "register class %s\n", arch_register_class_name(cls)));
2004
2005                 /* Get all live value at end of Block having current register class */
2006                 rss->live_block = pset_new_ptr(10);
2007                 be_liveness_end_of_block(rss->liveness, rss->arch_env, rss->cls, rss->block, rss->live_block);
2008
2009                 /* reset the list of interesting nodes */
2010                 plist_clear(rss->nodes);
2011                 plist_insert_back(rss->nodes, _sink);
2012
2013                 /* collect all nodes having a certain register class */
2014                 foreach_out_edge(block, edge) {
2015                         ir_node *irn  = get_edge_src_irn(edge);
2016                         ir_mode *mode = get_irn_mode(irn);
2017
2018                         /*
2019                                 We skip:
2020                                 - mode_T nodes (the projs are asked)
2021                                 - mode_X nodes (control flow nodes are always scheduled last)
2022                                 - Keeps (they are always immediately scheduled)
2023                                 - Phi (same as Keep)
2024                         */
2025                         if (mode == mode_T || mode == mode_X || is_Phi(irn))
2026                                 continue;
2027
2028                         /*
2029                                 In case of a proj, we skip
2030                                 - Barrier (they are a Barrier :)
2031                                 - Start
2032                                 - the Proj itself, as it's scheduled always with it's super node
2033                         */
2034                         if (is_Proj(irn)) {
2035                                 ir_node *pred = get_Proj_pred(irn);
2036                                 if (be_is_Barrier(pred) || is_Start(pred))
2037                                         continue;
2038                         }
2039
2040                         /* calculate the descendants and consumer for each node in the block */
2041                         collect_node_info(rss, skip_Proj(irn));
2042
2043                         if (be_is_Keep(irn))
2044                                 continue;
2045
2046                         if (! arch_irn_is(rss->arch_env, irn, ignore) && arch_get_irn_reg_class(rss->arch_env, irn, -1) == cls) {
2047                                 plist_insert_back(rss->nodes, skip_Proj(irn));
2048                         }
2049                         //}
2050                 }
2051
2052                 /* compute the potential killing set PK(G) */
2053                 compute_pkill_set(rss);
2054
2055                 /* compute the killing function k* */
2056                 compute_killing_function(rss);
2057
2058                 /*
2059                         Compute the heuristic value serialization and
2060                         add the necessary dependencies to the irg.
2061                 */
2062                 perform_value_serialization_heuristic(rss);
2063
2064                 del_pset(rss->live_block);
2065         }
2066
2067         phase_free(&rss->ph);
2068 }
2069
2070 #ifdef WITH_LIBCORE
2071 /**
2072  * Register the options.
2073  */
2074 void rss_register_options(lc_opt_entry_t *grp) {
2075         static int     run_once = 0;
2076         lc_opt_entry_t *rss_grp;
2077
2078         if (! run_once) {
2079                 run_once = 1;
2080                 rss_grp  = lc_opt_get_grp(grp, "rss");
2081
2082                 lc_opt_add_table(rss_grp, rss_option_table);
2083         }
2084 }
2085 #endif /* WITH_LIBCORE */
2086
2087 /**
2088  * Preprocess the irg for scheduling.
2089  */
2090 void rss_schedule_preparation(const be_irg_t *birg) {
2091         rss_t rss;
2092
2093         FIRM_DBG_REGISTER(rss.dbg, "firm.be.sched.rss");
2094
2095         //firm_dbg_set_mask(rss.dbg, LEVEL_1 | LEVEL_2 | LEVEL_3);
2096
2097         init_rss_special_nodes(birg->irg);
2098
2099         rss.irg      = birg->irg;
2100         rss.arch_env = birg->main_env->arch_env;
2101         rss.abi      = birg->abi;
2102         rss.h        = heights_new(birg->irg);
2103         rss.nodes    = plist_new();
2104         rss.opts     = &rss_options;
2105         rss.liveness = be_liveness(birg->irg);
2106         irg_block_walk_graph(birg->irg, NULL, process_block, &rss);
2107         heights_free(rss.h);
2108         plist_free(rss.nodes);
2109         be_liveness_free(rss.liveness);
2110
2111         if (birg->main_env->options->dump_flags & DUMP_SCHED)
2112                 be_dump(rss.irg, "-rss", dump_ir_block_graph);
2113 }