new register type
[libfirm] / ir / be / beschedrss.c
1 /**
2  * Implementation of a register saturating list scheduler
3  * as described in: Sid-Ahmed-Ali Touati
4  * Register Saturation in Superscalar and VLIW Codes
5  *
6  * @license This file protected by GPL -  GNU GENERAL PUBLIC LICENSE.
7  * @author  Christian Wuerdig
8  * @date    29.08.2006
9  * @cvs-id  $Id$
10  */
11 #ifdef HAVE_CONFIG_H
12 #include <config.h>
13 #endif
14
15 #include <limits.h>
16
17 #include "obst.h"
18 #include "debug.h"
19
20 #include "irgraph_t.h"
21 #include "irnode_t.h"
22 #include "iredges_t.h"
23 #include "ircons_t.h"
24 #include "irphase_t.h"
25 #include "irgwalk.h"
26 #include "irdump.h"
27 #include "irtools.h"
28 #include "irbitset.h"
29 #include "irprintf.h"
30 #include "bipartite.h"
31 #include "hungarian.h"
32 #include "plist.h"
33
34 #include "height.h"
35
36 #include "beabi.h"
37 #include "bemodule.h"
38 #include "benode_t.h"
39 #include "besched_t.h"
40
41 #include <libcore/lc_opts.h>
42 #include <libcore/lc_opts_enum.h>
43
44
45 #define ARR_LEN_SAFE(arr) ((arr) != NULL ? ARR_LEN((arr)) : 0)
46
47 #define HASH_RSS_EDGE(edge) ((get_irn_node_nr((edge)->src) << 16) | (get_irn_node_nr((edge)->tgt) & 0xFFFF))
48 #define BSEARCH_IRN_ARR(val, arr) \
49         bsearch(&(val), (arr), ARR_LEN_SAFE((arr)), sizeof((arr)[0]), cmp_irn_idx)
50
51 #define BLOCK_IDX_MAP(rss, irn) bsearch_for_index(get_irn_idx((irn)), (rss)->idx_map, ARR_LEN_SAFE((rss)->idx_map), 1)
52
53 /* the rss options */
54 typedef struct _rss_opts_t {
55         int dump_flags;
56 } rss_opts_t;
57
58 /* Represents a child with associated costs */
59 typedef struct _child {
60         ir_node *irn;
61         float   cost;
62 } child_t;
63
64 /* We need edges for several purposes. */
65 typedef struct _rss_edge {
66         ir_node *src;
67         ir_node *tgt;
68         void    *next;
69 } rss_edge_t;
70
71 /* Represents a connected bipartite component. */
72 typedef struct _cbc {
73         nodeset *parents;       /**< = S  a set of value producers */
74         nodeset *children;      /**< = T  a set of value consumers */
75         pset    *kill_edges;    /**< = E  a set of edges (t in T, s in S) such as each s in S gets killed by at least one t in T */
76         int     nr;             /**< a deterministic index for set insertion (used as hash) */
77 } cbc_t;
78
79 /* Represents a disjoint value DAG. */
80 typedef struct _dvg {
81         nodeset *nodes;
82         pset    *edges;
83 } dvg_t;
84
85 /* Represents a chain of nodes. */
86 typedef struct _chain {
87         plist_t *elements;   /**< List of chain elements */
88         int     nr;          /**< a deterministic index for set insertion (used as hash) */
89 } chain_t;
90
91 typedef struct _rss_irn {
92         plist_t  *consumer_list;    /**< List of consumers */
93         ir_node **consumer;         /**< Sorted consumer array (needed for faster access) */
94
95         plist_t  *parent_list;      /**< List of parents */
96         plist_t  *pkiller_list;     /**< List of potential killers */
97
98         plist_t  *descendant_list;  /**< List of descendants */
99         ir_node **descendants;      /**< Sorted descendant array (needed for faster access) */
100
101         ir_node  *killer;           /**< The selected unique killer */
102         ir_node  *irn;              /**< The corresponding firm node to this rss_irn */
103
104         chain_t  *chain;            /**< The chain, this node is associated to */
105
106         unsigned desc_walk;         /**< visited flag for collecting descendants */
107         unsigned kill_count;        /**< number of nodes killed by this one */
108
109         unsigned live_out : 1;      /**< irn has consumers outside of it's block */
110         unsigned visited  : 1;      /**< visited flag for bipartite decomposition */
111         unsigned havecons : 1;      /**< visited flag collect consumer */
112         unsigned handled  : 1;      /**< flag indicating whether or not the list structures have been build */
113         unsigned dumped   : 1;      /**< flag indication whether or not this node was dumped */
114 } rss_irn_t;
115
116 /* Represents a serialization edge with associated costs. */
117 typedef struct _serialization {
118         rss_irn_t  *u;       /* the top node */
119         rss_irn_t  *v;       /* the node about to be serialized after u */
120         rss_edge_t *edge;    /* the edge selected for the serialization */
121         int        omega1;   /* estimated: available regs - RS reduction */
122         int        omega2;   /* increase in critical path length */
123         int        new_killer;
124 } serialization_t;
125
126 typedef struct _rss {
127         phase_t          ph;              /**< Phase to hold some data */
128         heights_t        *h;              /**< The current height object */
129         ir_graph         *irg;            /**< The irg to preprocess */
130         plist_t          *nodes;          /**< The list of interesting nodes */
131         const arch_env_t *arch_env;       /**< The architecture environment */
132         be_abi_irg_t     *abi;            /**< The abi for this irg */
133         pset             *cbc_set;        /**< A set of connected bipartite components */
134         ir_node          *block;          /**< The current block in progress. */
135         int              *idx_map;        /**< Mapping irn indices to per block indices */
136         unsigned         max_height;      /**< maximum height in the current block */
137         rss_opts_t       *opts;           /**< The options */
138         be_lv_t          *liveness;       /**< The liveness information for this irg */
139         pset             *live_block;     /**< Values alive at end of block */
140         const arch_register_class_t *cls; /**< The current register class */
141         DEBUG_ONLY(firm_dbg_module_t *dbg);
142 } rss_t;
143
144 #define get_rss_irn(rss, irn)  (phase_get_or_set_irn_data(&rss->ph, irn))
145
146 /**
147  * We need some special nodes:
148  * a source and a sink for all live-in and live-out values of a block
149  */
150 enum {
151         iro_rss_Source,
152         iro_rss_Sink,
153         iro_rss_last
154 };
155
156 /** The opcode of the rss_Source node once allocated. */
157 static ir_op *op_rss_Source;
158 /** The opcode of the rss_Sink node once allocated. */
159 static ir_op *op_rss_Sink;
160
161 /** The rss_Source node of the current graph. */
162 static ir_node *_source = NULL;
163 /** The rss_Sink node of the current graph. */
164 static ir_node *_sink   = NULL;
165
166 #define is_Source(irn) ((irn) == _source)
167 #define is_Sink(irn)   ((irn) == _sink)
168
169 enum {
170         RSS_DUMP_NONE  = 0,
171         RSS_DUMP_CBC   = 1 << 0,
172         RSS_DUMP_PKG   = 1 << 1,
173         RSS_DUMP_KILL  = 1 << 2,
174         RSS_DUMP_DVG   = 1 << 3,
175         RSS_DUMP_MAXAC = 1 << 4,
176         RSS_DUMP_ALL   = (RSS_DUMP_MAXAC << 1) - 1,
177 };
178
179 static rss_opts_t rss_options = {
180         RSS_DUMP_NONE,
181 };
182
183 static const lc_opt_enum_int_items_t dump_items[] = {
184         { "none",  RSS_DUMP_NONE  },
185         { "cbc",   RSS_DUMP_CBC   },
186         { "pkg",   RSS_DUMP_PKG   },
187         { "kill",  RSS_DUMP_KILL  },
188         { "dvg",   RSS_DUMP_DVG   },
189         { "maxac", RSS_DUMP_MAXAC },
190         { "all",   RSS_DUMP_ALL   },
191         { NULL, 0 }
192 };
193
194 static lc_opt_enum_int_var_t dump_var = {
195         &rss_options.dump_flags, dump_items
196 };
197
198 static const lc_opt_table_entry_t rss_option_table[] = {
199         LC_OPT_ENT_ENUM_MASK("dump", "dump phases", &dump_var),
200         { NULL }
201 };
202
203 /******************************************************************************
204  *  _          _                    __                  _   _
205  * | |        | |                  / _|                | | (_)
206  * | |__   ___| |_ __   ___ _ __  | |_ _   _ _ __   ___| |_ _  ___  _ __  ___
207  * | '_ \ / _ \ | '_ \ / _ \ '__| |  _| | | | '_ \ / __| __| |/ _ \| '_ \/ __|
208  * | | | |  __/ | |_) |  __/ |    | | | |_| | | | | (__| |_| | (_) | | | \__ \
209  * |_| |_|\___|_| .__/ \___|_|    |_|  \__,_|_| |_|\___|\__|_|\___/|_| |_|___/
210  *            | |
211  *            |_|
212  ******************************************************************************/
213
214 /**
215  * Acquire opcodes if needed and create source and sink nodes.
216  */
217 static void init_rss_special_nodes(ir_graph *irg) {
218         ir_node *block;
219
220         if (op_rss_Source == NULL) {
221                 int iro_rss_base = get_next_ir_opcodes(iro_rss_last);
222                 op_rss_Source = new_ir_op(iro_rss_base + iro_rss_Source, "rss_Source", op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
223                 op_rss_Sink   = new_ir_op(iro_rss_base + iro_rss_Sink,   "rss_Sink",   op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
224         }
225         block   = get_irg_start_block(irg);
226         _source = new_ir_node(NULL, irg, block, op_rss_Source, mode_ANY, 0, NULL);
227         _sink   = new_ir_node(NULL, irg, block, op_rss_Sink, mode_ANY, 0, NULL);
228 }
229
230 static int cmp_int(const void *a, const void *b) {
231         const int *i1 = a;
232         const int *i2 = b;
233
234         return QSORT_CMP(*i1, *i2);
235 }
236
237 static int cmp_child_costs(const void *a, const void *b) {
238         const child_t *c1 = a;
239         const child_t *c2 = b;
240
241         return QSORT_CMP(c1->cost, c2->cost);
242 }
243
244 static int cmp_irn_idx(const void *a, const void *b) {
245         const ir_node *n1 = *(ir_node **)a;
246         const ir_node *n2 = *(ir_node **)b;
247
248         return QSORT_CMP(get_irn_idx(n1), get_irn_idx(n2));
249 }
250
251 static int cmp_rss_edges(const void *a, const void *b) {
252         const rss_edge_t *e1 = a;
253         const rss_edge_t *e2 = b;
254
255         return (e1->src != e2->src) || (e1->tgt != e2->tgt);
256 }
257
258 static int bsearch_for_index(int key, int *arr, size_t len, int force) {
259         int left = 0;
260         int right = len;
261
262         while (right >= left) {
263                 int idx = (left + right) / 2;
264
265                 if (key < arr[idx])
266                         right = idx - 1;
267                 else if (key > arr[idx])
268                         left = idx + 1;
269                 else
270                         return idx;
271         }
272
273         if (force)
274                 assert(0 && "Something is wrong, key not found.");
275         return -1;
276 }
277
278 static ir_node **build_sorted_array_from_list(plist_t *irn_list, struct obstack *obst) {
279         plist_element_t *el;
280         int     i     = 0;
281         int     len   = plist_count(irn_list);
282         ir_node **arr = NEW_ARR_D(ir_node *, obst, len);
283
284         /* copy the list into the array */
285         foreach_plist(irn_list, el) {
286                 arr[i++] = plist_element_get_value(el);
287         }
288
289         /* sort the array by node index */
290         qsort(arr, len, sizeof(arr[0]), cmp_irn_idx);
291
292         return arr;
293 }
294
295 /*****************************************************
296  *      _      _                       _
297  *     | |    | |                     (_)
298  *   __| | ___| |__  _   _  __ _  __ _ _ _ __   __ _
299  *  / _` |/ _ \ '_ \| | | |/ _` |/ _` | | '_ \ / _` |
300  * | (_| |  __/ |_) | |_| | (_| | (_| | | | | | (_| |
301  *  \__,_|\___|_.__/ \__,_|\__, |\__, |_|_| |_|\__, |
302  *                          __/ | __/ |         __/ |
303  *                         |___/ |___/         |___/
304  *
305  *****************************************************/
306
307 #ifdef DEBUG_libfirm
308 static void dump_nodeset(nodeset *ns, const char *prefix) {
309         ir_node *irn;
310         foreach_nodeset(ns, irn) {
311                 ir_fprintf(stderr, "%s%+F\n", prefix, irn);
312         }
313 }
314 #endif
315
316 static void build_file_name(rss_t *rss, const char *suffix, size_t suf_len, char *buf, size_t len) {
317         const char *irg_name;
318
319         memset(buf, 0, len);
320         irg_name = get_entity_name(get_irg_entity(rss->irg));
321         snprintf(buf, len - suf_len, "%s-%s-block-%ld",
322                 irg_name, arch_register_class_name(rss->cls), get_irn_node_nr(rss->block));
323         strcat(buf, suffix);
324 }
325
326 /* Dumps all collected bipartite components of current irg as vcg. */
327 static void debug_vcg_dump_bipartite(rss_t *rss) {
328         cbc_t *cbc;
329         FILE  *f;
330         char  file_name[256];
331         static const char suffix[] = "-RSS-CBC.vcg";
332
333         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
334         f = fopen(file_name, "w");
335
336         ir_fprintf(f, "graph: { title: \"connected bipartite component graph of %+F\"\n", rss->irg);
337         fprintf(f, "display_edge_labels: no\n");
338         fprintf(f, "layoutalgorithm: mindepth\n");
339         fprintf(f, "manhattan_edges: yes\n\n");
340
341         foreach_pset(rss->cbc_set, cbc) {
342                 ir_node    *n;
343                 rss_edge_t *ke;
344
345                 fprintf(f, "graph: { titel: \"cbc %d\" label: \"cbc %d\" status:clustered color:yellow\n", cbc->nr, cbc->nr);
346                 foreach_nodeset(cbc->parents, n) {
347                         ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
348                 }
349                 foreach_nodeset(cbc->children, n) {
350                         ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
351                 }
352                 foreach_pset(cbc->kill_edges, ke) {
353                         ir_fprintf(f, "edge: { sourcename: \"n%d_%d\" targetname: \"n%d_%d\" }\n",
354                                 get_irn_node_nr(ke->src), cbc->nr, get_irn_node_nr(ke->tgt), cbc->nr);
355                 }
356                 fprintf(f, "}\n\n");
357         }
358         fprintf(f, "}\n");
359         fclose(f);
360 }
361
362 /* Dump the computed killing function as vcg. */
363 static void debug_vcg_dump_kill(rss_t *rss) {
364         FILE            *f;
365         char            file_name[256];
366         plist_element_t *el;
367         static const char suffix[] = "-RSS-KILL.vcg";
368
369         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
370         f = fopen(file_name, "w");
371
372         ir_fprintf(f, "graph: { title: \"computed kill graph of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
373         fprintf(f, "display_edge_labels: no\n");
374         fprintf(f, "layoutalgorithm: mindepth\n");
375         fprintf(f, "manhattan_edges: yes\n\n");
376
377         {
378                 /* reset dump_flag */
379                 ir_node *irn;
380                 foreach_phase_irn(&rss->ph, irn) {
381                         rss_irn_t *node = get_rss_irn(rss, irn);
382                         node->dumped = 0;
383                 }
384         }
385
386         /* dump all nodes and their killers */
387         foreach_plist(rss->nodes, el) {
388                 ir_node   *irn     = plist_element_get_value(el);
389                 rss_irn_t *rirn    = get_rss_irn(rss, irn);
390                 rss_irn_t *pk_rirn = get_rss_irn(rss, rirn->killer);
391
392                 if (! rirn->dumped) {
393                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
394                         rirn->dumped = 1;
395                 }
396
397                 if (! pk_rirn->dumped) {
398                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(rirn->killer), rirn->killer);
399                         pk_rirn->dumped = 1;
400                 }
401
402                 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
403                         get_irn_node_nr(rirn->killer), get_irn_node_nr(irn));
404         }
405
406         fprintf(f, "}\n");
407         fclose(f);
408 }
409
410 /* Dumps the potential killing DAG (PKG) as vcg. */
411 static void debug_vcg_dump_pkg(rss_t *rss, nodeset *max_ac, int iteration) {
412         FILE              *f;
413         char              file_name[256];
414         char              *suffix   = alloca(32);
415         static const char suffix1[] = "-RSS-PKG.vcg";
416         static const char suffix2[] = "-RSS-PKG-MAXAC.vcg";
417         plist_element_t   *el;
418
419         if (! max_ac) {
420                 snprintf(suffix, 32, "%s", suffix1);
421         }
422         else {
423                 snprintf(suffix, 32, "-%02d%s", iteration, suffix2);
424         }
425
426         build_file_name(rss, suffix, strlen(suffix) + 1, file_name, sizeof(file_name));
427         f = fopen(file_name, "w");
428
429         ir_fprintf(f, "graph: { title: \"potential killing DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
430         fprintf(f, "display_edge_labels: no\n");
431         fprintf(f, "layoutalgorithm: mindepth\n");
432         fprintf(f, "manhattan_edges: yes\n\n");
433
434         {
435                 /* reset dump_flag */
436                 ir_node *irn;
437                 foreach_phase_irn(&rss->ph, irn) {
438                         rss_irn_t *node = get_rss_irn(rss, irn);
439                         node->dumped = 0;
440                 }
441         }
442
443         foreach_plist(rss->nodes, el) {
444                 ir_node   *irn  = plist_element_get_value(el);
445                 rss_irn_t *rirn = get_rss_irn(rss, irn);
446                 char      *c1   = "";
447                 plist_element_t *k_el;
448
449                 /* dump selected saturating values in yellow */
450                 if (max_ac && nodeset_find(max_ac, irn))
451                         c1 = "color:yellow";
452
453                 if (! rirn->dumped) {
454                         if (rirn->chain)
455                                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F   c%d\" %s }\n", get_irn_node_nr(irn), irn, rirn->chain->nr, c1);
456                         else
457                                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(irn), irn, c1);
458                         rirn->dumped = 1;
459                 }
460
461                 foreach_plist(rirn->pkiller_list, k_el) {
462                         ir_node   *pkiller = plist_element_get_value(k_el);
463                         rss_irn_t *pk_rirn = get_rss_irn(rss, pkiller);
464                         char      *c2      = "";
465
466                         if (max_ac && nodeset_find(max_ac, pkiller))
467                                 c2 = "color:yellow";
468
469                         if (! pk_rirn->dumped) {
470                                 if (pk_rirn->chain)
471                                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F   c%d\" %s }\n", get_irn_node_nr(pkiller), pkiller, pk_rirn->chain->nr, c2);
472                                 else
473                                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(pkiller), pkiller, c2);
474                                 pk_rirn->dumped = 1;
475                         }
476                         ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
477                                 get_irn_node_nr(pkiller), get_irn_node_nr(irn));
478                 }
479         }
480         fprintf(f, "}\n");
481         fclose(f);
482 }
483
484 /* Dumps the disjoint value DAG (DVG) as vcg. */
485 static void debug_vcg_dump_dvg(rss_t *rss, dvg_t *dvg) {
486         static const char suffix[] = "-RSS-DVG.vcg";
487         FILE       *f;
488         char       file_name[256];
489         ir_node    *irn;
490         rss_edge_t *edge;
491
492         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
493         f = fopen(file_name, "w");
494
495         ir_fprintf(f, "graph: { title: \"disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
496         fprintf(f, "display_edge_labels: no\n");
497         fprintf(f, "layoutalgorithm: mindepth\n");
498         fprintf(f, "manhattan_edges: yes\n\n");
499
500         /* dump all nodes */
501         foreach_nodeset(dvg->nodes, irn) {
502                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
503         }
504
505         /* dump all edges */
506         foreach_pset(dvg->edges, edge) {
507                 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
508                         get_irn_node_nr(edge->src), get_irn_node_nr(edge->tgt));
509         }
510
511         fprintf(f, "}\n");
512         fclose(f);
513 }
514
515 #if 0
516 /* Dumps the PKG(DVG). */
517 static void debug_vcg_dump_dvg_pkiller(rss_t *rss, dvg_t *dvg) {
518         static const char suffix[] = "-RSS-DVG-PKG.vcg";
519         FILE       *f;
520         char       file_name[256];
521         ir_node    *irn;
522
523         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
524         f = fopen(file_name, "w");
525
526         ir_fprintf(f, "graph: { title: \"PKG of disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
527         fprintf(f, "display_edge_labels: no\n");
528         fprintf(f, "layoutalgorithm: mindepth\n");
529         fprintf(f, "manhattan_edges: yes\n\n");
530
531         /* dump all nodes */
532         foreach_nodeset(dvg->nodes, irn) {
533                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
534         }
535
536         /* dump all edges */
537         foreach_nodeset(dvg->nodes, irn) {
538                 rss_irn_t       *node = get_rss_irn(rss, irn);
539                 plist_element_t *el;
540
541                 foreach_plist(node->dvg_pkiller_list, el) {
542                         ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
543                                 get_irn_node_nr(plist_element_get_value(el)), get_irn_node_nr(irn));
544                 }
545         }
546
547         fprintf(f, "}\n");
548         fclose(f);
549 }
550 #endif /* if 0 */
551
552 /**
553  * In case there is no rss information for irn, initialize it.
554  */
555 static void *init_rss_irn(phase_t *ph, ir_node *irn, void *old) {
556         rss_irn_t *res = old ? old : phase_alloc(ph, sizeof(res[0]));
557
558         res->descendant_list = plist_obstack_new(phase_obst(ph));
559         res->descendants     = NULL;
560
561         res->consumer_list   = plist_obstack_new(phase_obst(ph));
562         res->consumer        = NULL;
563
564         res->pkiller_list    = plist_obstack_new(phase_obst(ph));
565
566         res->parent_list     = plist_obstack_new(phase_obst(ph));
567
568         res->killer          = NULL;
569         res->irn             = irn;
570         res->chain           = NULL;
571
572         res->kill_count      = 0;
573         res->desc_walk       = 0;
574         res->live_out        = 0;
575         res->visited         = 0;
576         res->handled         = 0;
577         res->dumped          = 0;
578         res->havecons        = 0;
579
580         return res;
581 }
582
583 /**
584  * Collect all nodes data dependent on current node.
585  */
586 static void collect_descendants(rss_t *rss, rss_irn_t *rirn, ir_node *irn, int *got_sink, unsigned cur_desc_walk) {
587         const ir_edge_t *edge;
588         rss_irn_t       *cur_node = get_rss_irn(rss, irn);
589         ir_node         *block    = rss->block;
590         ir_edge_kind_t  ekind[2]  = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
591         int             i;
592
593         if (cur_node->desc_walk >= cur_desc_walk)
594                 return;
595         cur_node->desc_walk = cur_desc_walk;
596
597         /* Stop at Barriers */
598         if (be_is_Barrier(irn))
599                 return;
600
601         /* loop over normal and dependency edges */
602         for (i = 0; i < 2; ++i) {
603                 foreach_out_edge_kind(irn, edge, ekind[i]) {
604                         ir_node *user = get_edge_src_irn(edge);
605
606                         /* skip ignore nodes as they do not really contribute to register pressure */
607                         if (arch_irn_is(rss->arch_env, user, ignore))
608                                 continue;
609
610                         /*
611                                 (a) mode_X means Jump            -> out_edge
612                                 (b) Phi as user of a normal node -> out-edge
613                         */
614                         if (get_irn_mode(user) == mode_X || is_Phi(user)) {
615                                 if (! *got_sink)
616                                         goto force_sink;
617                                 else
618                                         continue;
619                         }
620
621                         if (is_Proj(user)) {
622
623                                 //if (arch_get_irn_reg_class(rss->arch_env, user, -1) == rss->cls)
624                                         collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
625                         }
626                         else {
627                                 /* check if user lives in block and is not a control flow node */
628                                 if (get_nodes_block(user) == block) {
629                                         if (! plist_has_value(rirn->descendant_list, user)) {
630                                                 plist_insert_back(rirn->descendant_list, user);
631                                                 DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", user));
632                                         }
633                                         collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
634                                 }
635                                 else if (! *got_sink) {
636 force_sink:
637                                         /* user lives out of block: add sink as descendant if not already done */
638                                         plist_insert_back(rirn->descendant_list, _sink);
639                                         *got_sink = 1;
640                                         DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", _sink));
641                                 }
642                         }
643                 }
644         }
645 }
646
647 /**
648  * Handles a single consumer.
649  */
650 static void collect_single_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *consumer, int *got_sink) {
651         ir_node *block = rss->block;
652
653         assert(! is_Proj(consumer) && "Cannot handle Projs");
654
655         if (! is_Phi(consumer) && ! is_Block(consumer) && get_nodes_block(consumer) == block) {
656                 if (! arch_irn_is(rss->arch_env, consumer, ignore) && ! plist_has_value(rss_irn->consumer_list, consumer)) {
657                         plist_insert_back(rss_irn->consumer_list, consumer);
658                         DBG((rss->dbg, LEVEL_2, "\t\tconsumer %+F\n", consumer));
659                 }
660         }
661         else {
662                 rss_irn->live_out = 1;
663                 DBG((rss->dbg, LEVEL_2, "\t\tlive out %+F", consumer));
664                 if (! *got_sink) {
665                         plist_insert_back(rss_irn->consumer_list, _sink);
666                         *got_sink = 1;
667                         DB((rss->dbg, LEVEL_2, ", %+F added instead", _sink));
668                 }
669                 DB((rss->dbg, LEVEL_2, "\n"));
670         }
671 }
672
673 /**
674  * Collect all nodes consuming the value(s) produced by current node.
675  */
676 static void collect_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *irn, int *got_sink) {
677         const ir_edge_t *edge;
678         int             i;
679         ir_edge_kind_t  ekind[2]  = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
680         rss_irn_t       *cur_node = get_rss_irn(rss, irn);
681
682         if (cur_node->havecons)
683                 return;
684         cur_node->havecons = 1;
685
686         for (i = 0; i < 2; ++i) {
687                 foreach_out_edge_kind(irn, edge, ekind[i]) {
688                         ir_node *consumer = get_edge_src_irn(edge);
689
690                         if (is_Proj(consumer)) {
691                                 //if (arch_get_irn_reg_class(rss->arch_env, consumer, -1) == rss->cls)
692                                         collect_consumer(rss, rss_irn, consumer, got_sink);
693                         }
694                         else
695                                 collect_single_consumer(rss, rss_irn, consumer, got_sink);
696                 }
697         }
698 }
699
700 /**
701  * Collects all consumer and descendant of a irn.
702  */
703 static void collect_node_info(rss_t *rss, ir_node *irn) {
704         static unsigned cur_desc_walk = 0;
705         rss_irn_t       *rss_irn      = get_rss_irn(rss, irn);
706         int             got_sink;
707
708         assert(! is_Proj(irn) && "Cannot handle Projs.");
709
710         /* check if node info is already available */
711         if (rss_irn->handled)
712                 return;
713                 //phase_reinit_single_irn_data(&rss->ph, irn);
714
715         DBG((rss->dbg, LEVEL_1, "\tcomputing consumers of %+F:\n", irn));
716
717         /* collect all consumer */
718         got_sink = 0;
719         collect_consumer(rss, rss_irn, irn, &got_sink);
720
721         /* build sorted consumer array */
722         rss_irn->consumer = build_sorted_array_from_list(rss_irn->consumer_list, phase_obst(&rss->ph));
723
724         DBG((rss->dbg, LEVEL_1, "\tcompute descendants of %+F:\n", irn));
725
726         /* collect descendants */
727         got_sink = 0;
728         collect_descendants(rss, rss_irn, irn, &got_sink, ++cur_desc_walk);
729
730         /* build sorted descendant array */
731         rss_irn->descendants = build_sorted_array_from_list(rss_irn->descendant_list, phase_obst(&rss->ph));
732
733         rss_irn->handled = 1;
734 }
735
736 /**
737  * Checks if v is a potential killer of u.
738  * v is in pkill(u) iff descendants(v) cut consumer(u) is v
739  *
740  * @param rss   The rss object
741  * @param v      The node to check for killer
742  * @param u      The potentially killed value
743  * @return 1 if v is in pkill(u), 0 otherwise
744  */
745 static int is_potential_killer(rss_t *rss, rss_irn_t *v, rss_irn_t *u) {
746         plist_t *list;
747         ir_node **arr;
748         plist_element_t *el;
749
750         assert(is_Sink(v->irn) || ((plist_count(v->descendant_list) > 0 && v->descendants) || 1));
751         assert(is_Sink(u->irn) || ((plist_count(u->consumer_list)   > 0 && u->consumer)    || 1));
752
753         /* as we loop over the list: loop over the shorter one */
754         if (plist_count(v->descendant_list) > plist_count(u->consumer_list)) {
755                 list = u->consumer_list;
756                 arr  = v->descendants;
757         }
758         else {
759                 list = v->descendant_list;
760                 arr  = u->consumer;
761         }
762
763         /* for each list element: try to find element in array */
764         foreach_plist(list, el) {
765                 ir_node *irn   = plist_element_get_value(el);
766                 ir_node *match = BSEARCH_IRN_ARR(irn, arr);
767
768                 if (match && match != irn)
769                         return 0;
770         }
771
772         return 1;
773 }
774
775 /**
776  * Update descendants, consumer and pkiller list for the given irn.
777  */
778 static void update_node_info(rss_t *rss, ir_node *irn, ir_node *pk_irn) {
779         rss_irn_t *node    = get_rss_irn(rss, irn);
780         rss_irn_t *pkiller = get_rss_irn(rss, pk_irn);
781
782         /* add consumer and rebuild the consumer array */
783         if (! plist_has_value(node->consumer_list, pk_irn)) {
784                 plist_insert_back(node->consumer_list, pk_irn);
785                 node->consumer = build_sorted_array_from_list(node->consumer_list, phase_obst(&rss->ph));
786         }
787
788         /* add pk_irn as descendant, add it's descendants to irn's and rebuild array */
789         if (! plist_has_value(node->descendant_list, pk_irn)) {
790                 plist_element_t *el;
791
792                 plist_insert_back(node->descendant_list, pk_irn);
793
794                 foreach_plist(pkiller->descendant_list, el) {
795                         ir_node *desc = plist_element_get_value(el);
796
797                         if (! plist_has_value(node->descendant_list, desc))
798                                 plist_insert_back(node->descendant_list, desc);
799                 }
800
801                 node->descendants = build_sorted_array_from_list(node->descendant_list, phase_obst(&rss->ph));
802         }
803
804 }
805
806 /**
807  * Compute the potential killing set PK.
808  */
809 static void compute_pkill_set(rss_t *rss) {
810         plist_element_t *u_el, *v_el;
811
812         foreach_plist(rss->nodes, u_el) {
813                 ir_node   *u_irn = plist_element_get_value(u_el);
814                 rss_irn_t *u     = get_rss_irn(rss, u_irn);
815
816                 DBG((rss->dbg, LEVEL_1, "\tcomputing potential killers of %+F:\n", u_irn));
817
818                 /* check each consumer if it is a potential killer  */
819                 foreach_plist(u->consumer_list, v_el) {
820                         ir_node   *v_irn = plist_element_get_value(v_el);
821                         rss_irn_t *v     = get_rss_irn(rss, v_irn);
822
823                         /* check, if v is a potential killer of u */
824                         if (is_potential_killer(rss, v, u)) {
825                                 if (! plist_has_value(u->pkiller_list, v_irn))
826                                         plist_insert_back(u->pkiller_list, v_irn);
827                                 v->kill_count++;
828                                 DBG((rss->dbg, LEVEL_2, "\t\tpotential killer %+F\n", v_irn));
829                         }
830                 }
831
832                 u->killer = _sink;
833         }
834
835         if (rss->opts->dump_flags & RSS_DUMP_PKG)
836                 debug_vcg_dump_pkg(rss, NULL, 0);
837 }
838
839 /**
840  * Build set of killing edges (from values to their potential killers)
841  */
842 static void build_kill_edges(rss_t *rss, pset *epk) {
843         plist_element_t *el, *k_el;
844
845         foreach_plist(rss->nodes, el) {
846                 ir_node    *irn  = plist_element_get_value(el);
847                 rss_irn_t *rirn = get_rss_irn(rss, irn);
848
849                 DBG((rss->dbg, LEVEL_3, "\t\tbuilding kill edges for %+F:\n", irn));
850
851                 foreach_plist(rirn->pkiller_list, k_el) {
852                         ir_node    *pkiller = plist_element_get_value(k_el);
853                         rss_edge_t *ke      = obstack_alloc(phase_obst(&rss->ph), sizeof(*ke));
854
855                         ke->src  = irn;
856                         ke->tgt  = pkiller;
857                         ke->next = NULL;
858
859                         DBG((rss->dbg, LEVEL_3, "\t\t\ttarget %+F\n", pkiller));
860
861                         pset_insert(epk, ke, HASH_RSS_EDGE(ke));
862                 }
863         }
864 }
865
866 #ifdef DEBUG_libfirm
867 /* print the given cbc for debugging purpose */
868 static void debug_print_cbc(firm_dbg_module_t *mod, cbc_t *cbc) {
869         ir_node    *n;
870         rss_edge_t *ke;
871
872         DBG((mod, LEVEL_3, "\t\tS = set of parents:\n"));
873         foreach_nodeset(cbc->parents, n) {
874                 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
875         }
876         DBG((mod, LEVEL_3, "\t\tT = set of children:\n"));
877         foreach_nodeset(cbc->children, n) {
878                 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
879         }
880         DBG((mod, LEVEL_3, "\t\tE = Edges from producers to consumers\n"));
881         foreach_pset(cbc->kill_edges, ke) {
882                 DBG((mod, LEVEL_3, "\t\t\t%+F -> %+F\n", ke->src, ke->tgt));
883         }
884 }
885 #endif
886
887 /**
888  * Construct the bipartite decomposition.
889  * Sid-Ahmed-Ali Touati, Phd Thesis
890  * Register Pressure in Instruction Level Parallelism, p. 71
891  */
892 static void compute_bipartite_decomposition(rss_t *rss) {
893         pset *epk    = new_pset(cmp_rss_edges, 10);
894         int  cur_num = 0;
895
896         plist_element_t *el;
897
898         DBG((rss->dbg, LEVEL_1, "\tcomputing bipartite decomposition:\n"));
899
900         build_kill_edges(rss, epk);
901
902         foreach_plist(rss->nodes, el) {
903                 ir_node   *u_irn   = plist_element_get_value(el);
904                 rss_irn_t *u       = get_rss_irn(rss, u_irn);
905                 int       p_change = 1;
906                 int       c_change = 1;
907                 int       vrfy_ok  = 1;
908
909                 cbc_t           *cbc;
910                 plist_element_t *el2;
911                 rss_edge_t      *k_edge;
912                 rss_edge_t      *kedge_root = NULL;
913                 ir_node         *t_irn, *s_irn;
914
915                 if (u->visited || u_irn == _sink)
916                         continue;
917
918                 DBG((rss->dbg, LEVEL_2, "\t\t%+F choosen:\n", u_irn));
919
920                 cbc     = obstack_alloc(phase_obst(&rss->ph), sizeof(*cbc));
921                 cbc->nr = cur_num++;
922
923                 /* initialize S_cb */
924                 cbc->parents = new_nodeset(5);
925                 nodeset_insert(cbc->parents, u_irn);
926                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (init)\n", u_irn));
927
928                 /* E_cb = empty */
929                 cbc->kill_edges = new_pset(cmp_rss_edges, 5);
930
931                 /* each parent gets killed by at least one value from children */
932
933                 /* T_cb = PK_successors(u) */
934                 cbc->children = new_nodeset(5);
935                 foreach_plist(u->pkiller_list, el2) {
936                         nodeset_insert(cbc->children, plist_element_get_value(el2));
937                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (init)\n", plist_element_get_value(el2)));
938                 }
939
940                 /*
941                         Now: insert the parents of all children into the parent set
942                         and insert the children of all parents into the children set
943                         until the sets don't change any more
944                 */
945                 while (p_change || c_change) {
946                         p_change = c_change = 0;
947
948                         /* accumulate parents */
949                         foreach_nodeset(cbc->children, t_irn) {
950                                 foreach_pset(epk, k_edge) {
951                                         ir_node *val = k_edge->src;
952
953                                         if (k_edge->tgt == t_irn && ! nodeset_find(cbc->parents, val)) {
954                                                 nodeset_insert(cbc->parents, val);
955                                                 p_change = 1;
956                                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (killed by %+F)\n", val, t_irn));
957                                         }
958                                 }
959                         }
960
961                         /* accumulate children */
962                         foreach_nodeset(cbc->parents, s_irn) {
963                                 foreach_pset(epk, k_edge) {
964                                         ir_node *val = k_edge->tgt;
965
966                                         if (k_edge->src == s_irn  && ! nodeset_find(cbc->children, val)) {
967                                                 nodeset_insert(cbc->children, val);
968                                                 c_change = 1;
969                                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (kills %+F)\n", val, s_irn));
970                                         }
971                                 }
972                         }
973                 }
974
975                 /* mark all parent values as visited */
976                 foreach_nodeset(cbc->parents, s_irn) {
977                         rss_irn_t *s = get_rss_irn(rss, s_irn);
978                         s->visited = 1;
979                         /* assure bipartite property */
980 #if 0
981                         if (nodeset_find(cbc->children, s_irn)) {
982                                 nodeset_remove(cbc->children, s_irn);
983                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F removed from to children\n", s_irn));
984                         }
985 #endif
986                 }
987
988                 /* update edges */
989                 foreach_pset(epk, k_edge) {
990                         if (nodeset_find(cbc->parents, k_edge->src) && nodeset_find(cbc->children, k_edge->tgt)) {
991                                 pset_insert(cbc->kill_edges, k_edge, HASH_RSS_EDGE(k_edge));
992                                 /*
993                                         Link all k_edges which are about to be removed together.
994                                         Beware: pset_remove kills the iterator.
995                                 */
996                                 k_edge->next = kedge_root;
997                                 kedge_root   = k_edge;
998                         }
999                 }
1000
1001                 /* remove all linked k_edges */
1002                 for (; kedge_root; kedge_root = kedge_root->next) {
1003                         pset_remove(epk, kedge_root, HASH_RSS_EDGE(kedge_root));
1004                 }
1005
1006                 /* verify the cbc */
1007                 foreach_nodeset(cbc->parents, s_irn) {
1008                         int is_killed = 0;
1009
1010                         foreach_pset(cbc->kill_edges, k_edge) {
1011                                 if (k_edge->src == s_irn) {
1012                                         is_killed = 1;
1013                                         pset_break(cbc->kill_edges);
1014                                         break;
1015                                 }
1016                         }
1017
1018                         if (! is_killed) {
1019                                 vrfy_ok = 0;
1020                                 ir_fprintf(stderr, "Warning: parent %+F is not killed in current cbc\n", s_irn);
1021                         }
1022                 }
1023                 assert(vrfy_ok && "Verification of CBC failed");
1024
1025                 /* add the connected bipartite component */
1026                 if (nodeset_count(cbc->parents) > 0 && nodeset_count(cbc->children) > 0 && pset_count(cbc->kill_edges) > 0)
1027                         pset_insert(rss->cbc_set, cbc, (unsigned)cbc->nr);
1028                 DBG((rss->dbg, LEVEL_2, "\tbipartite component %d inserted:\n", cbc->nr));
1029                 DEBUG_ONLY(
1030                         if (firm_dbg_get_mask(rss->dbg) & LEVEL_2)
1031                                 debug_print_cbc(rss->dbg, cbc);
1032                 );
1033         }
1034
1035         if (rss->opts->dump_flags & RSS_DUMP_CBC)
1036                 debug_vcg_dump_bipartite(rss);
1037
1038         del_pset(epk);
1039 }
1040
1041 /**
1042  * Select the child with the maximum cost.
1043  */
1044 static child_t *select_child_max_cost(rss_t *rss, nodeset *x, nodeset *y, child_t *t, cbc_t *cbc) {
1045         ir_node *child;
1046         float   max_cost = -1.0f;
1047
1048         DBG((rss->dbg, LEVEL_2, "\t\tcomputing children costs:\n"));
1049
1050         foreach_nodeset(cbc->children, child) {
1051                 rss_irn_t  *r_child             = get_rss_irn(rss, child);
1052                 int         num_unkilled_parents = 0;
1053                 int         num_descendants;
1054                 rss_edge_t *k_edge;
1055                 float       cost;
1056
1057                 /* get the number of unkilled parents */
1058                 foreach_pset(cbc->kill_edges, k_edge) {
1059                         if (k_edge->tgt == child && nodeset_find(x, k_edge->src))
1060                                 ++num_unkilled_parents;
1061                 }
1062
1063                 cost = (float)num_unkilled_parents;
1064
1065                 num_descendants = plist_count(r_child->descendant_list) + nodeset_count(y);
1066
1067                 if (num_descendants > 0)
1068                         cost /= (float)num_descendants;
1069
1070                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F, #desc %d, cost %.3f\n", child, num_descendants, cost));
1071
1072                 if (cost > max_cost) {
1073                         t->irn   = child;
1074                         t->cost  = cost;
1075                         max_cost = cost;
1076                 }
1077         }
1078
1079         return t;
1080 }
1081
1082 /**
1083  * Remove all parents from x which are killed by t_irn.
1084  */
1085 static void remove_covered_parents(rss_t *rss, nodeset *x, ir_node *t_irn, cbc_t *cbc) {
1086         rss_irn_t  *t = get_rss_irn(rss, t_irn);
1087         rss_edge_t *k_edge;
1088
1089         DBG((rss->dbg, LEVEL_2, "\t\tremoving parents covered by %+F:\n", t_irn));
1090
1091         foreach_pset(cbc->kill_edges, k_edge) {
1092                 if (k_edge->tgt == t_irn && nodeset_find(x, k_edge->src)) {
1093                         nodeset_remove(x, k_edge->src);
1094                         plist_insert_back(t->parent_list, k_edge->src);
1095                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", k_edge->src));
1096                 }
1097         }
1098 }
1099
1100 static void update_cumulated_descendent_values(rss_t *rss, nodeset *y, ir_node *t_irn) {
1101         rss_irn_t *t = get_rss_irn(rss, t_irn);
1102         plist_element_t *el;
1103
1104         DBG((rss->dbg, LEVEL_2, "\t\tupdating cumulated descendant value of %+F:\n", t_irn));
1105
1106         foreach_plist(t->descendant_list, el) {
1107                 nodeset_insert(y, plist_element_get_value(el));
1108                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", plist_element_get_value(el)));
1109         }
1110 }
1111
1112 /**
1113  * Greedy-k: a heuristics for the MMA problem
1114  */
1115 static void compute_killing_function(rss_t *rss) {
1116         cbc_t *cbc;
1117         struct obstack obst;
1118
1119         obstack_init(&obst);
1120
1121         rss->cbc_set = pset_new_ptr(5);
1122         compute_bipartite_decomposition(rss);
1123
1124         /* for all bipartite components do: */
1125         foreach_pset(rss->cbc_set, cbc) {
1126                 ir_node *p;
1127                 nodeset *x       = new_nodeset(10);
1128                 nodeset *y       = new_nodeset(10);
1129                 child_t **sks    = NEW_ARR_F(child_t *, 20);
1130                 int     cur_len  = 0;
1131                 int     cur_size = 20;
1132                 int     i;
1133
1134                 DBG((rss->dbg, LEVEL_1, "\tcomputing SKS for cbc %d:\n", cbc->nr));
1135                 DBG((rss->dbg, LEVEL_2, "\t\tinitializing parents X:\n"));
1136
1137                 /* X = S_cb (all parents are initially uncovered) */
1138                 foreach_nodeset(cbc->parents, p) {
1139                         nodeset_insert(x, p);
1140                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", p));
1141                 }
1142
1143                 /* while X not empty */
1144                 while (nodeset_count(x) > 0) {
1145                         child_t *t = obstack_alloc(&obst, sizeof(*t));
1146                         memset(t, 0, sizeof(*t));
1147
1148                         t = select_child_max_cost(rss, x, y, t, cbc);
1149
1150                         if (cur_len >= cur_size) {
1151                                 ARR_EXTO(child_t *, sks, cur_size * 2);
1152                                 cur_size *= 2;
1153                         }
1154
1155                         DBG((rss->dbg, LEVEL_2, "\t\tinsert child %+F (%.3f) into SKS at pos %d\n", t->irn, t->cost, cur_len));
1156
1157                         sks[cur_len++] = t;
1158                         remove_covered_parents(rss, x, t->irn, cbc);
1159                         update_cumulated_descendent_values(rss, y, t->irn);
1160                 }
1161
1162                 ARR_SHRINKLEN(sks, cur_len);
1163
1164                 /* sort SKS in increasing cost order */
1165                 qsort(sks, cur_len, sizeof(sks[0]), cmp_child_costs);
1166
1167                 DBG((rss->dbg, LEVEL_2, "\tprocessing SKS for cbc %d:\n", cbc->nr));
1168
1169                 /* build killing function */
1170                 for (i = cur_len - 1; i >= 0; --i) { /* loop over sks in decreasing cost order */
1171                         child_t         *t = sks[i];
1172                         rss_irn_t       *rt = get_rss_irn(rss, t->irn);
1173                         plist_element_t *p_el;
1174
1175                         DBG((rss->dbg, LEVEL_3, "\t\t\tkiller %+F (%.3f):\n", t->irn, t->cost));
1176
1177                         /* kill all unkilled parents of t */
1178                         foreach_plist(rt->parent_list, p_el) {
1179                                 ir_node    *par = plist_element_get_value(p_el);
1180                                 rss_irn_t *rpar = get_rss_irn(rss, par);
1181
1182                                 if (is_Sink(rpar->killer)) {
1183                                         rpar->killer = t->irn;
1184                                         DBG((rss->dbg, LEVEL_2, "\t\tkill %+F\n", rpar->irn));
1185                                 }
1186                                 else {
1187                                         DBG((rss->dbg, LEVEL_3, "\t\t\tkeeping %+F as killer for %+F\n", rpar->killer, rpar->irn));
1188                                 }
1189                         }
1190                 }
1191
1192                 del_nodeset(x);
1193                 del_nodeset(y);
1194                 DEL_ARR_F(sks);
1195         }
1196
1197         if (rss->opts->dump_flags & RSS_DUMP_KILL)
1198                 debug_vcg_dump_kill(rss);
1199
1200         del_pset(rss->cbc_set);
1201         obstack_free(&obst, NULL);
1202 }
1203
1204 /**
1205  * Adds the edge src -> tgt to the dvg. Checks if reverse edge is already there (asserts).
1206  */
1207 static INLINE void add_dvg_edge(rss_t *rss, dvg_t *dvg, ir_node *src, ir_node *tgt, int have_source) {
1208         rss_edge_t *dvg_edge;
1209         rss_edge_t key;
1210
1211         if (! have_source)
1212                 nodeset_insert(dvg->nodes, src);
1213         else
1214                 assert(nodeset_find(dvg->nodes, src) != NULL && "Wrong assumption");
1215
1216         nodeset_insert(dvg->nodes, tgt);
1217
1218         key.src = tgt;
1219         key.tgt = src;
1220         assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1221
1222         key.src = src;
1223         key.tgt = tgt;
1224         if (NULL != pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1225                 /* add the edge to the DVG */
1226                 dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1227
1228                 dvg_edge->src  = src;
1229                 dvg_edge->tgt  = tgt;
1230                 dvg_edge->next = NULL;
1231
1232                 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", src, tgt));
1233                 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1234         }
1235 }
1236
1237 /**
1238  * Computes the disjoint value DAG (DVG).
1239  * BEWARE: It is not made explicitly clear in the Touati paper,
1240  *         but the DVG is meant to be build from the KILLING DAG
1241  */
1242 static void compute_dvg(rss_t *rss, dvg_t *dvg) {
1243         plist_element_t *el;
1244
1245         DBG((rss->dbg, LEVEL_1, "\tcomputing DVG:\n"));
1246
1247         foreach_plist(rss->nodes, el) {
1248                 ir_node    *u_irn    = plist_element_get_value(el);
1249                 rss_irn_t  *u        = get_rss_irn(rss, u_irn);
1250                 rss_irn_t  *u_killer = get_rss_irn(rss, u->killer);
1251                 int        i;
1252
1253                 /* TODO: omit nodes only having sink as pkiller and killing no one */
1254
1255                 /* add an edge to killer */
1256                 add_dvg_edge(rss, dvg, u_irn, u->killer, 0);
1257
1258                 if (is_Sink(u->killer))
1259                         continue;
1260
1261                 /* We add an edge to every killer, from where we could be reached. */
1262                 for (i = ARR_LEN_SAFE(u_killer->descendants) - 1; i >= 0; --i) {
1263                         add_dvg_edge(rss, dvg, u_irn, u_killer->descendants[i], 1);
1264                 }
1265
1266 #if 0
1267
1268                 foreach_plist(rss->nodes, el2) {
1269                         ir_node *v_irn = plist_element_get_value(el2);
1270
1271                         /*
1272                                 There is an edge (u, v) in the DVG iff v is a descendant of the killer(u).
1273                         */
1274                         if (BSEARCH_IRN_ARR(v_irn, u_kill->descendants)) {
1275                                 rss_edge_t *dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1276                                 rss_edge_t key;
1277
1278                                 /* insert the user into the DVG and append it to the user list of u */
1279                                 nodeset_insert(dvg->nodes, v_irn);
1280                                 if (! plist_has_value(u->dvg_user_list, v_irn))
1281                                         plist_insert_back(u->dvg_user_list, v_irn);
1282
1283                                 dvg_edge->src  = u_irn;
1284                                 dvg_edge->tgt  = v_irn;
1285                                 dvg_edge->next = NULL;
1286
1287                                 key.src = v_irn;
1288                                 key.tgt = u_irn;
1289
1290                                 assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1291
1292                                 /* add the edge to the DVG */
1293                                 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", u_irn, v_irn));
1294                                 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1295                         }
1296                 }
1297 #endif /* if 0 */
1298         }
1299
1300         if (rss->opts->dump_flags & RSS_DUMP_DVG)
1301                 debug_vcg_dump_dvg(rss, dvg);
1302 }
1303
1304 /**
1305  * Updates the dvg structure when a serialization edge from src -> tgt is added.
1306  */
1307 static void update_dvg(rss_t *rss, dvg_t *dvg, rss_irn_t *src, rss_irn_t *tgt) {
1308         int i, j, idx;
1309         rss_edge_t *edge;
1310         rss_edge_t **arr = alloca(pset_count(dvg->edges) * sizeof(arr[0]));
1311
1312         /*
1313                 Add an edge from serialization target to serialization src:
1314                 src cannot be alive with target
1315         */
1316         add_dvg_edge(rss, dvg, tgt->irn, src->irn, 0);
1317
1318         /* Add edges to src's descendants as well, they also getting serialized. */
1319         for (i = ARR_LEN_SAFE(src->descendants) - 1; i >= 0; --i) {
1320                 add_dvg_edge(rss, dvg, tgt->irn, src->descendants[i], 1);
1321         }
1322
1323         /* We also have to add edges from targets predecessors in dvg */
1324         idx = 0;
1325         /* We cannot insert elements into set over which we iterate ... */
1326         foreach_pset(dvg->edges, edge) {
1327                 if (edge->tgt == tgt->irn) {
1328                         arr[idx++] = edge;
1329                 }
1330         }
1331
1332         for (i = 0; i < idx; ++i) {
1333                 edge = arr[i];
1334                 add_dvg_edge(rss, dvg, edge->src, src->irn, 1);
1335                 for (j = ARR_LEN_SAFE(src->descendants) - 1; j >= 0; --j) {
1336                         add_dvg_edge(rss, dvg, edge->src, src->descendants[j], 1);
1337                 }
1338         }
1339 }
1340
1341 #if 0
1342 /**
1343  * Accumulate all descendants for root into list.
1344  */
1345 static void accumulate_dvg_descendant_values(rss_t *rss, rss_irn_t *root, plist_t *list) {
1346         if (plist_count(root->dvg_user_list) > 0) {
1347                 plist_element_t *el;
1348
1349                 foreach_plist(root->dvg_user_list, el) {
1350                         ir_node   *v_irn = plist_element_get_value(el);
1351                         rss_irn_t *v     = get_rss_irn(rss, v_irn);
1352
1353                         /* add v as descendant */
1354                         if (! plist_has_value(list, v_irn)) {
1355                                 plist_insert_back(list, v_irn);
1356                                 DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG descendant %+F\n", v_irn));
1357                         }
1358
1359                         /* accumulate v's descendants */
1360                         accumulate_dvg_descendant_values(rss, v, list);
1361                 }
1362         }
1363 }
1364
1365 /**
1366  * Builds the list of potential killers for each node
1367  * in the given DVG.
1368  * Needs the descendant list for all user as sorted array.
1369  */
1370 static void build_dvg_pkiller_list(rss_t *rss, dvg_t *dvg) {
1371         ir_node *irn;
1372
1373         foreach_nodeset(dvg->nodes, irn) {
1374                 rss_irn_t       *node = get_rss_irn(rss, irn);
1375                 plist_element_t *el, *el2;
1376
1377                 DBG((rss->dbg, DEBUG_DVG, "\t\tbuilding pkiller list for %+F\n", irn));
1378
1379                 /* check each user */
1380                 foreach_plist(node->dvg_user_list, el) {
1381                         ir_node *u_irn = plist_element_get_value(el);
1382
1383                         /* is the current user u_irn not a descendant of any other user -> pkiller */
1384                         foreach_plist(node->dvg_user_list, el2) {
1385                                 ir_node   *v_irn = plist_element_get_value(el2);
1386                                 rss_irn_t *v     = get_rss_irn(rss, v_irn);
1387
1388                                 if (el != el2                             &&
1389                                         ! BSEARCH_IRN_ARR(u_irn, v->dvg_desc) &&
1390                                         ! plist_has_value(node->dvg_pkiller_list, u_irn))
1391                                 {
1392                                         plist_insert_back(node->dvg_pkiller_list, u_irn);
1393                                         DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG pkiller %+F\n", u_irn));
1394                                 }
1395                         }
1396                 }
1397
1398                 node->dvg_pkiller = build_sorted_array_from_list(node->dvg_pkiller_list, phase_obst(&rss->ph));
1399         }
1400
1401         DEBUG_ONLY(
1402                 if (firm_dbg_get_mask(rss->dbg) & DEBUG_DVG)
1403                         debug_vcg_dump_dvg_pkiller(rss, dvg);
1404         );
1405 }
1406
1407 #endif /* if 0 */
1408
1409 /**
1410  * Compute the maximal antichain of the current DVG.
1411  * This is a reimplementation of the MAXIMAL_ANTI_CHAIN function
1412  * from the DDG library 1.1 (DAG.cpp).
1413  */
1414 static nodeset *compute_maximal_antichain(rss_t *rss, dvg_t *dvg, int iteration) {
1415         int         n               = nodeset_count(dvg->nodes);
1416         int         *assignment     = alloca(n * sizeof(assignment[0]));
1417         int         *assignment_rev = alloca(n * sizeof(assignment_rev[0]));
1418         int         *idx_map        = alloca(n * sizeof(idx_map[0]));
1419         hungarian_problem_t *bp;
1420         nodeset     *values, *temp;
1421         ir_node     *u_irn;
1422         int         i, j, cost, cur_chain, res;
1423         rss_edge_t  *dvg_edge;
1424
1425 #define MAP_IDX(irn) bsearch_for_index(get_irn_idx(irn), idx_map,  n,  1)
1426
1427         if (pset_count(dvg->edges) == 0)
1428                 return NULL;
1429
1430         bp = hungarian_new(n, n, 1, HUNGARIAN_MATCH_NORMAL);
1431
1432         /*
1433                 At first, we build an index map for the nodes in the DVG,
1434                 because we cannot use the irn idx therefore as the resulting
1435                 bipartite data structure would have around 1.2GB.
1436                 So we limit the size to the number of nodes we have in the DVG
1437                 and build a sorted index map for their irn indices.
1438         */
1439         i = 0;
1440         foreach_nodeset(dvg->nodes, u_irn) {
1441                 idx_map[i++] = get_irn_idx(u_irn);
1442         }
1443         qsort(idx_map, n, sizeof(idx_map[0]), cmp_int);
1444
1445         foreach_pset(dvg->edges, dvg_edge) {
1446                 int idx_u = MAP_IDX(dvg_edge->src);
1447                 int idx_v = MAP_IDX(dvg_edge->tgt);
1448
1449                 /* add the entry to the bipartite data structure */
1450                 hungarian_add(bp, idx_u, idx_v, 1);
1451                 DBG((rss->dbg, LEVEL_3, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1452                         idx_u, dvg_edge->src, idx_v, dvg_edge->tgt));
1453         }
1454 #if 0
1455         /*
1456                 Add a bipartite entry for each pair of nodes (u, v), where exists a
1457                 path in the DVG from u to v, ie. connect all descendants(v) to v.
1458                 desc_dvg(v) = accumulated descendants of all z in dvg_user_list(v)
1459         */
1460         foreach_nodeset(dvg->nodes, u_irn) {
1461                 rss_irn_t *u        = get_rss_irn(rss, u_irn);
1462                 int       idx_u_irn = MAP_IDX(u_irn);
1463
1464                 DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG descendants of %+F:\n", u_irn));
1465
1466                 //plist_clear(u->dvg_desc_list);
1467                 //accumulate_dvg_descendant_values(rss, u, u->dvg_desc_list);
1468
1469                 /*
1470                         FIXME: The array is build on the phase obstack and we cannot free the data.
1471                         So the array get as many times allocated as this function is called.
1472                 */
1473
1474                 /* build the sorted array for faster searches */
1475                 //u->dvg_desc = build_sorted_array_from_list(u->dvg_desc_list, phase_obst(&rss->ph));
1476
1477                 DBG((rss->dbg, DEBUG_MAX_AC, "\t\tadding bipartite entries of %+F:\n", u_irn));
1478
1479                 /* add the bipartite entries for all descendants */
1480                 for (i = ARR_LEN_SAFE(u->dvg_desc) - 1; i >= 0; --i) {
1481                         rss_irn_t *desc    = get_rss_irn(rss, u->dvg_desc[i]);
1482                         int       idx_desc = MAP_IDX(u->dvg_desc[i]);
1483
1484                         /* add the entry to the bipartite data structure */
1485                         hungarian_add(bp, idx_u_irn, idx_desc, 1);
1486                         DBG((rss->dbg, DEBUG_MAX_AC, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1487                                 idx_u_irn, u_irn, idx_desc, u->dvg_desc[i]));
1488
1489                         need_matching = 1;
1490                 }
1491         }
1492 #endif
1493
1494         /* We want maximum cardinality matching */
1495         hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL);
1496
1497 #if 0
1498         DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG pkiller:\n"));
1499         /* beware: the following function needs the dvg_desc array */
1500         build_dvg_pkiller_list(rss, dvg);
1501 #endif
1502
1503         DBG((rss->dbg, LEVEL_2, "\t\tcomputing bipartite matching\n"));
1504         /*
1505                 The maximum cardinality bipartite matching gives us the minimal
1506                 chain partition, which corresponds to the maximum anti chains.
1507         */
1508         res = hungarian_solve(bp, assignment, &cost, 1);
1509         assert(res == 0 && "Bipartite matching failed!");
1510
1511         hungarian_free(bp);
1512         memset(assignment_rev, -1, n * sizeof(assignment_rev[0]));
1513
1514         /* build the reverse assignment, ie. foreach i -> j, add j -> i */
1515         for (i = 0; i < n; ++i) {
1516                 if (assignment[i] >= 0) {
1517                         int j = assignment[i];
1518                         assignment_rev[j] = i;
1519                 }
1520         }
1521
1522         DBG((rss->dbg, LEVEL_2, "\t\tgot assignment with cost %d\n", cost));
1523         DBG((rss->dbg, LEVEL_3, "\t\t\tassignment   ---   reverse assignment\n", cost));
1524         for (i = 0; i < n; ++i) {
1525                 DBG((rss->dbg, LEVEL_3, "\t\t\t%3d -> %3d         %3d -> %3d\n", i, assignment[i], i, assignment_rev[i]));
1526         }
1527
1528         values    = new_nodeset(10);
1529         cur_chain = 0;
1530         /* Construction of the minimal chain partition */
1531         for (j = 0; j < n; ++j) {
1532                 /* check nodes, which did not occur as target */
1533                 if (assignment_rev[j] == -1) {
1534                         int       xj      = idx_map[j];
1535                         ir_node   *xj_irn = get_idx_irn(rss->irg, xj);
1536                         rss_irn_t *xj_rss = get_rss_irn(rss, xj_irn);
1537                         chain_t   *c      = obstack_alloc(phase_obst(&rss->ph), sizeof(*c));
1538                         int       source;
1539
1540                         /* there was no source for j -> we have a source of a new chain */
1541                         nodeset_insert(values, xj_irn);
1542
1543                         c->elements = plist_obstack_new(phase_obst(&rss->ph));
1544                         c->nr       = cur_chain++;
1545                         plist_insert_back(c->elements, xj_irn);
1546
1547                         xj_rss->chain = c;
1548
1549                         DBG((rss->dbg, LEVEL_2, "\t\tstarting chain %d:\n", c->nr));
1550                         DBG((rss->dbg, LEVEL_2, "\t\t\t%+F (%d)", xj_irn, j));
1551
1552                         /* follow chain, having j as source */
1553                         source = j;
1554                         while (assignment[source] >= 0) {
1555                                 int       target  = assignment[source];
1556                                 int       irn_idx = idx_map[target];
1557                                 ir_node   *irn    = get_idx_irn(rss->irg, irn_idx);
1558                                 rss_irn_t *node   = get_rss_irn(rss, irn);
1559
1560                                 plist_insert_back(c->elements, irn);
1561                                 node->chain = c;
1562
1563                                 DB((rss->dbg, LEVEL_2, " -> %+F (%d)", irn, target));
1564
1565                                 /* new source = last target */
1566                                 source = target;
1567                         }
1568
1569                         DB((rss->dbg, LEVEL_2, "\n"));
1570                 }
1571         }
1572
1573         /*
1574                 Computing the maximal antichain: Select an element from each
1575                 chain such, such it is parallel with the others.
1576         */
1577         DBG((rss->dbg, LEVEL_1, "\t\tcomputing set of saturation values (MAX AC)\n"));
1578         DBG((rss->dbg, LEVEL_3, "\t\tstarting with:\n"));
1579
1580         DEBUG_ONLY(
1581                 if (firm_dbg_get_mask(rss->dbg) & LEVEL_3)
1582                         dump_nodeset(values, "\t\t\t");
1583         )
1584
1585         temp = NULL;
1586         do {
1587                 /*
1588                         We need an explicit array for the values as
1589                         we cannot iterate multiple times over the same
1590                         set at the same time. :-(((((
1591                 */
1592                 int     n         = nodeset_count(values);
1593                 int     i         = 0;
1594                 ir_node **val_arr = NEW_ARR_F(ir_node *, n);
1595
1596                 foreach_nodeset(values, u_irn)
1597                         val_arr[i++] = u_irn;
1598
1599                 if (temp)
1600                         del_nodeset(temp);
1601
1602                 temp = new_nodeset(10);
1603
1604                 /* Select all nodes from current value set, having another node in the set as descendant. */
1605                 for (i = 0; i < n; ++i) {
1606                         rss_irn_t *u = get_rss_irn(rss, val_arr[i]);
1607
1608                         for (j = 0; j < n; ++j) {
1609                                 if (i != j) {
1610                                         rss_edge_t key;
1611
1612                                         key.src = val_arr[i];
1613                                         key.tgt = val_arr[j];
1614
1615                                         if (pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1616                                                 /* v[j] is descendant of u -> remove u and break */
1617                                                 nodeset_insert(temp, u->irn);
1618                                                 nodeset_remove(values, u->irn);
1619
1620                                                 DBG((rss->dbg, LEVEL_3, "\t\t\tremoving %+F from values, adding it to temp\n", u->irn));
1621
1622                                                 break;
1623                                         }
1624                                 }
1625                         }
1626                 }
1627
1628                 /* Try to insert the chain predecessor of all selected u's */
1629                 foreach_nodeset(temp, u_irn) {
1630                         rss_irn_t       *u  = get_rss_irn(rss, u_irn);
1631                         chain_t         *c  = u->chain;
1632                         plist_element_t *el = plist_find_value(c->elements, u_irn);
1633
1634                         assert(el && "Missing element in chain!");
1635
1636                         /* If u has predecessor in chain: insert the predecessor */
1637                         if (el == plist_element_get_prev(el)) {
1638                                 nodeset_insert(values, plist_element_get_value(el));
1639                                 DBG((rss->dbg, LEVEL_3, "\t\t\tadding %+F to values\n", plist_element_get_value(el)));
1640                         }
1641                 }
1642
1643
1644                 DEL_ARR_F(val_arr);
1645         } while (nodeset_count(temp) > 0);
1646
1647         DBG((rss->dbg, LEVEL_2, "\t\tfinal set:\n"));
1648         DEBUG_ONLY(
1649                 if (firm_dbg_get_mask(rss->dbg) & LEVEL_2) {
1650                         dump_nodeset(values, "\t\t\t");
1651                 }
1652         );
1653
1654         if (rss->opts->dump_flags & RSS_DUMP_MAXAC)
1655                 debug_vcg_dump_pkg(rss, values, iteration);
1656
1657         del_nodeset(temp);
1658
1659         return values;
1660
1661 #undef MAP_IDX
1662 }
1663
1664 /**
1665  * Computes the best serialization between two nodes of sat_vals.
1666  */
1667 static serialization_t *compute_best_admissible_serialization(rss_t *rss, nodeset *sat_vals, serialization_t *ser, int num_regs) {
1668         int        n                    = nodeset_count(sat_vals);
1669         int        n_idx                = ARR_LEN_SAFE(rss->idx_map);
1670         int        i                    = 0;
1671         ir_node    **val_arr            = alloca(n * sizeof(val_arr[0]));
1672         bitset_t   *bs_sv               = bitset_alloca(n_idx);
1673         bitset_t   *bs_vdesc            = bitset_alloca(n_idx);
1674         bitset_t   *bs_tmp              = bitset_alloca(n_idx);
1675         bitset_t   *bs_ukilldesc        = bitset_alloca(n_idx);
1676         int        best_benefit         = INT_MAX;
1677         int        best_omega2          = INT_MAX;
1678         int        best_benefit_omega20 = INT_MAX;
1679         int        has_omega1           = 0;
1680         int        j, k;
1681         ir_node    *irn;
1682         rss_edge_t min_benefit_edge;
1683         rss_edge_t min_omega20_edge;
1684         rss_irn_t  *ser_u_omega1 = NULL, *ser_v_omega1 = NULL;
1685         rss_irn_t  *ser_u_omega20 = NULL, *ser_v_omega20 = NULL;
1686
1687         DBG((rss->dbg, LEVEL_1, "\tcomputing admissible serializations:\n"));
1688
1689         /*
1690                 We need an explicit array for the values as
1691                 we cannot iterate multiple times over the same
1692                 set at the same time. :-(((((
1693         */
1694
1695         foreach_nodeset(sat_vals, irn) {
1696                 val_arr[i++] = irn;
1697                 bitset_set(bs_sv, BLOCK_IDX_MAP(rss, irn));
1698         }
1699
1700         /*
1701                 We build all admissible serializations and remember the best found so far.
1702                 For u in sat_vals:
1703                  For v in sat_val:
1704                    if v in pkiller(u): add edge from v to all other pkiller(u)
1705                    else: for all vv in pkiller(u): add edge from v to vv if there exists no path from vv to v
1706         */
1707
1708 /*
1709         A node is unserializable if:
1710         - it has only one killer and this one is Sink
1711     - it kills no other values
1712         In this case there is no serialization which could
1713         reduce the registerpressure
1714 */
1715 #define IS_UNSERIALIZABLE_NODE(rss_node)                  \
1716         (                                                     \
1717                 (                                                 \
1718                         (plist_count(rss_node->pkiller_list) == 1) && \
1719                         is_Sink(rss_node->killer)                  && \
1720                         (rss_node->kill_count                == 0)    \
1721                 )                            ||                   \
1722                 be_is_Barrier(rss_node->irn) ||                   \
1723                 be_is_Keep(rss_node->irn)                         \
1724         )
1725
1726         /* for all u in sat_vals */
1727         for (i = 0; i < n; ++i) {
1728                 rss_irn_t       *u = get_rss_irn(rss, val_arr[i]);
1729                 plist_element_t *el;
1730
1731                 /* ignore nodes where serialization does not help */
1732                 if (IS_UNSERIALIZABLE_NODE(u)) {
1733                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", u->irn));
1734                         continue;
1735                 }
1736
1737                 /* accumulate all descendants of all pkiller(u) */
1738                 bitset_clear_all(bs_ukilldesc);
1739                 foreach_plist(u->pkiller_list, el) {
1740                         ir_node   *irn  = plist_element_get_value(el);
1741                         rss_irn_t *node = get_rss_irn(rss, irn);
1742
1743                         if (! is_Sink(irn))
1744                                 bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, irn));
1745                         else
1746                                 continue;
1747
1748                         for (k = ARR_LEN_SAFE(node->descendants) - 1; k >= 0; --k) {
1749                                 if (! is_Sink(node->descendants[k]))
1750                                         bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, node->descendants[k]));
1751                         }
1752                 }
1753
1754                 /* for all v in sat_vals */
1755                 for (j = 0; j < n; ++j) {
1756                         ir_node   *v_irn   = val_arr[j];
1757                         rss_irn_t *v       = get_rss_irn(rss, v_irn);
1758                         unsigned  v_height = get_irn_height(rss->h, v_irn);
1759                         int       omega1, omega2, is_pkiller;
1760
1761                         /* v cannot be serialized with itself
1762                          * ignore nodes where serialization does not help */
1763                         if (i == j || IS_UNSERIALIZABLE_NODE(v)) {
1764                                 if (i != j)
1765                                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", v->irn));
1766                                 continue;
1767                         }
1768
1769                         /* get descendants of v */
1770                         bitset_clear_all(bs_vdesc);
1771                         bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v_irn));
1772                         for (k = ARR_LEN_SAFE(v->descendants) - 1; k >= 0; --k) {
1773                                 if (! is_Sink(v->descendants[k]))
1774                                         bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v->descendants[k]));
1775                         }
1776
1777                         /* if v is in pkiller(u) */
1778                         is_pkiller = plist_has_value(u->pkiller_list, v_irn);
1779
1780                         /* for all vv in pkiller(u) */
1781                         foreach_plist(u->pkiller_list, el) {
1782                                 ir_node *vv_irn  = plist_element_get_value(el);
1783                                 int     add_edge;
1784
1785                                 if (is_Sink(vv_irn) || is_cfop(vv_irn))
1786                                         continue;
1787
1788                                 if (is_pkiller)
1789                                         add_edge = vv_irn != v_irn && skip_Proj(vv_irn) != skip_Proj(v_irn);
1790                                 else
1791                                         add_edge = ! heights_reachable_in_block(rss->h, skip_Proj(vv_irn), skip_Proj(v_irn));
1792
1793                                 /*
1794                                         As we add an edge from vv -> v, we have to make sure,
1795                                         that there exists no path from v to vv.
1796                                 */
1797
1798                                 if (add_edge) {
1799                                         int vv_height = get_irn_height(rss->h, vv_irn);
1800                                         int mu1, mu2, critical_path_cost;
1801
1802                                         /*
1803                                                 mu1 = | descendants(v) cut sat_vals |
1804                                                 the number of saturating values which cannot
1805                                                 be simultaneously alive with u
1806                                         */
1807                                         bitset_copy(bs_tmp, bs_vdesc);
1808                                         mu1 = bitset_popcnt(bitset_and(bs_tmp, bs_sv));
1809
1810                                         /*
1811                                                 mu2 = | accum_desc_all_pkiller(u) without descendants(v) |
1812                                         */
1813                                         if (is_pkiller) {
1814                                                 bitset_copy(bs_tmp, bs_ukilldesc);
1815                                                 mu2 = bitset_popcnt(bitset_andnot(bs_tmp, bs_vdesc));
1816                                         }
1817                                         else {
1818                                                 mu2 = 0;
1819                                         }
1820
1821                                         /* omega1 = mu1 - mu2 */
1822                                         omega1 = mu1 - mu2;
1823
1824                                         if (omega1 != 0)
1825                                                 has_omega1 = 1;
1826
1827                                         /* omega2 = increase of critical path */
1828                                         critical_path_cost =
1829                                                 v_height                        /* longest path from v to sink */
1830                                                 + rss->max_height - vv_height   /* longest path from source to vv */
1831                                                 + 1;                            /* edge */
1832
1833                                         /*
1834                                                 If critical_path_cost > max_height -> the new edge
1835                                                 would increase the longest critical path by the difference.
1836                                         */
1837                                         omega2 = critical_path_cost > rss->max_height ? critical_path_cost - rss->max_height : 0;
1838
1839                                         /* this keeps track of the edge with the best benefit */
1840                                         if (omega1 >= num_regs - n && omega1 < best_benefit) {
1841                                                 min_benefit_edge.src = v_irn;
1842                                                 min_benefit_edge.tgt = vv_irn;
1843
1844                                                 ser_u_omega1 = u;
1845                                                 ser_v_omega1 = v;
1846
1847                                                 best_benefit    = omega1;
1848                                                 ser->new_killer = is_pkiller;
1849                                         }
1850
1851                                         /* this keeps track of the edge with the best omega1 costs where omega2 == 0 */
1852                                         if (omega2 == 0 && omega1 >= num_regs - n && omega1 < best_benefit_omega20) {
1853                                                 min_omega20_edge.src = v_irn;
1854                                                 min_omega20_edge.tgt = vv_irn;
1855
1856                                                 ser_u_omega20 = u;
1857                                                 ser_v_omega20 = v;
1858
1859                                                 best_benefit_omega20 = omega1;
1860                                                 ser->new_killer      = is_pkiller;
1861                                         }
1862
1863                                         best_omega2 = MIN(best_omega2, omega2);
1864
1865                                         DBG((rss->dbg, LEVEL_2, "\t\tfound %+F -> %+F (w1 %d, w2 %d)\n",
1866                                                 v_irn, vv_irn, omega1, omega2));
1867                                 } /* if add_edge */
1868                         } /* for all vv in pkiller(u) */
1869                 } /* for all v in sat_vals */
1870         } /* for all u in sat_vals */
1871
1872         if (! has_omega1)
1873                 return NULL;
1874
1875         if (best_omega2 == 0) {
1876                 ser->u         = ser_u_omega20;
1877                 ser->v         = ser_v_omega20;
1878                 ser->edge->src = min_omega20_edge.src;
1879                 ser->edge->tgt = min_omega20_edge.tgt;
1880                 ser->omega1    = best_benefit_omega20;
1881                 ser->omega2    = best_omega2;
1882         }
1883         else {
1884                 ser->u         = ser_u_omega1;
1885                 ser->v         = ser_v_omega1;
1886                 ser->edge->src = min_benefit_edge.src;
1887                 ser->edge->tgt = min_benefit_edge.tgt;
1888                 ser->omega1    = best_benefit;
1889                 ser->omega2    = best_omega2;
1890         }
1891
1892         return ser;
1893
1894 #undef IS_UNSERIALIZABLE_NODE
1895 }
1896
1897 /**
1898  * Perform the value serialization heuristic and add all
1899  * computed serialization edges as dependencies to the irg.
1900  */
1901 static void perform_value_serialization_heuristic(rss_t *rss) {
1902         bitset_t *arch_nonign_bs = bitset_alloca(arch_register_class_n_regs(rss->cls));
1903         bitset_t *abi_ign_bs     = bitset_alloca(arch_register_class_n_regs(rss->cls));
1904         int      available_regs, iteration;
1905         dvg_t    dvg;
1906         nodeset  *sat_vals;
1907         pset *ser_set = new_pset(cmp_rss_edges, 20);
1908
1909         /* available_regs = R = |arch_non_ignore_regs cut ~abi_ignore_regs| */
1910         arch_put_non_ignore_regs(rss->arch_env, rss->cls, arch_nonign_bs);
1911         be_abi_put_ignore_regs(rss->abi, rss->cls, abi_ign_bs);
1912         bitset_andnot(arch_nonign_bs, abi_ign_bs);
1913         available_regs  = bitset_popcnt(arch_nonign_bs);
1914         //num_live = pset_count(rss->live_block);
1915         //available_regs -= num_live < available_regs ? num_live : 0;
1916
1917         DBG((rss->dbg, LEVEL_1, "\n\t#available regs: %d\n\n", available_regs));
1918
1919         /*
1920                 At first we need to compute the disjoint value DAG (DVG = {V, E_dv}).
1921                 V    = set of all nodes we are currently interested in
1922                 E_dv = there is an edge from u to v iff v is a descendant of killer(u), forall u, v in V
1923         */
1924         dvg.nodes = new_nodeset(plist_count(rss->nodes));
1925         dvg.edges = new_pset(cmp_rss_edges, plist_count(rss->nodes) * 5);
1926         compute_dvg(rss, &dvg);
1927
1928         /*
1929                 Then we perform the heuristic serialization algorithm
1930                 on the DVG which gives us all necessary serialization
1931                 edges.
1932         */
1933         DBG((rss->dbg, LEVEL_1, "\tcomputing maximal antichain:\n"));
1934         iteration = 0;
1935         sat_vals  = compute_maximal_antichain(rss, &dvg, iteration++);
1936         while (sat_vals && (nodeset_count(sat_vals) > available_regs)) {
1937                 serialization_t *ser, best_ser;
1938                 rss_edge_t      *edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*edge));
1939                 ir_node         *dep_src, *dep_tgt;
1940
1941                 best_ser.edge = edge;
1942                 ser = compute_best_admissible_serialization(rss, sat_vals, &best_ser, available_regs);
1943
1944                 DBG((rss->dbg, LEVEL_1, "\tcurrent register saturation %d, target %d\n", nodeset_count(sat_vals), available_regs));
1945
1946                 if (! ser) {
1947                         DBG((rss->dbg, LEVEL_1, "\tno RS improving serialization found, breaking at iteration %d\n", iteration));
1948                         break;
1949                 }
1950
1951                 /* Insert the serialization as dependency edge into the irg. */
1952                 DBG((rss->dbg, LEVEL_1, "\tserializing %+F -> %+F with edge %+F -> %+F and cost %d, %d\n",
1953                         ser->u->irn, ser->v->irn, ser->edge->src, ser->edge->tgt, ser->omega1, ser->omega2));
1954
1955                 if (pset_find(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge)))
1956                         ir_printf("WARNING: serialization %+F -> %+F computed twice!\n", ser->edge->src, ser->edge->tgt);
1957
1958
1959                 pset_insert(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge));
1960
1961                 /* update the dvg */
1962                 update_dvg(rss, &dvg, ser->v, ser->u);
1963                 update_dvg(rss, &dvg, get_rss_irn(rss, ser->edge->src), get_rss_irn(rss, ser->edge->tgt));
1964                 del_nodeset(sat_vals);
1965
1966                 dep_src = skip_Proj(ser->edge->src);
1967                 dep_tgt = ser->edge->tgt;
1968                 add_irn_dep(dep_src, dep_tgt);
1969
1970                 /* Update descendants, consumer and pkillers of the target */
1971                 update_node_info(rss, ser->edge->tgt, ser->edge->src);
1972
1973                 /* TODO: try to find a cheaper way for updating height information */
1974                 rss->max_height = heights_recompute_block(rss->h, rss->block);
1975
1976                 /* Recompute the antichain for next serialization */
1977                 DBG((rss->dbg, LEVEL_1, "\tre-computing maximal antichain:\n"));
1978                 sat_vals = compute_maximal_antichain(rss, &dvg, iteration++);
1979         }
1980
1981         del_nodeset(dvg.nodes);
1982         del_pset(dvg.edges);
1983 }
1984
1985 /**
1986  * Do initial calculations for a block.
1987  */
1988 static void process_block(ir_node *block, void *env) {
1989         rss_t *rss = env;
1990         int   i, n;
1991         const ir_edge_t *edge;
1992
1993         phase_init(&rss->ph, "rss block preprocessor", rss->irg, PHASE_DEFAULT_GROWTH, init_rss_irn);
1994
1995         DBG((rss->dbg, LEVEL_1, "preprocessing block %+F\n", block));
1996         rss->block = block;
1997
1998         /* build an index map for all nodes in the current block */
1999         i            = 0;
2000         n            = get_irn_n_edges(block);
2001         NEW_ARR_A(int *, rss->idx_map, n);
2002         foreach_out_edge(block, edge) {
2003                 ir_node *irn      = get_edge_src_irn(edge);
2004                 rss->idx_map[i++] = get_irn_idx(irn);
2005         }
2006         qsort(rss->idx_map, n, sizeof(rss->idx_map[0]), cmp_int);
2007         rss->max_height = heights_recompute_block(rss->h, block);
2008
2009         /* loop over all register classes */
2010         for (i = arch_isa_get_n_reg_class(rss->arch_env->isa) - 1; i >= 0; --i) {
2011                 const arch_register_class_t *cls = arch_isa_get_reg_class(rss->arch_env->isa, i);
2012
2013                 rss->cls = cls;
2014                 DBG((rss->dbg, LEVEL_1, "register class %s\n", arch_register_class_name(cls)));
2015
2016                 /* Get all live value at end of Block having current register class */
2017                 rss->live_block = pset_new_ptr(10);
2018                 be_liveness_end_of_block(rss->liveness, rss->arch_env, rss->cls, rss->block, rss->live_block);
2019
2020                 /* reset the list of interesting nodes */
2021                 plist_clear(rss->nodes);
2022                 plist_insert_back(rss->nodes, _sink);
2023
2024                 /* collect all nodes having a certain register class */
2025                 foreach_out_edge(block, edge) {
2026                         ir_node *irn  = get_edge_src_irn(edge);
2027                         ir_mode *mode = get_irn_mode(irn);
2028
2029                         /*
2030                                 We skip:
2031                                 - mode_T nodes (the projs are asked)
2032                                 - mode_X nodes (control flow nodes are always scheduled last)
2033                                 - Keeps (they are always immediately scheduled)
2034                                 - Phi (same as Keep)
2035                         */
2036                         if (mode == mode_T || mode == mode_X || is_Phi(irn))
2037                                 continue;
2038
2039                         /*
2040                                 In case of a proj, we skip
2041                                 - Barrier (they are a Barrier :)
2042                                 - Start
2043                                 - the Proj itself, as it's scheduled always with it's super node
2044                         */
2045                         if (is_Proj(irn)) {
2046                                 ir_node *pred = get_Proj_pred(irn);
2047                                 if (be_is_Barrier(pred) || is_Start(pred))
2048                                         continue;
2049                         }
2050
2051                         /* calculate the descendants and consumer for each node in the block */
2052                         collect_node_info(rss, skip_Proj(irn));
2053
2054                         if (be_is_Keep(irn))
2055                                 continue;
2056
2057                         if (! arch_irn_is(rss->arch_env, irn, ignore) && arch_get_irn_reg_class(rss->arch_env, irn, -1) == cls) {
2058                                 plist_insert_back(rss->nodes, skip_Proj(irn));
2059                         }
2060                         //}
2061                 }
2062
2063                 /* compute the potential killing set PK(G) */
2064                 compute_pkill_set(rss);
2065
2066                 /* compute the killing function k* */
2067                 compute_killing_function(rss);
2068
2069                 /*
2070                         Compute the heuristic value serialization and
2071                         add the necessary dependencies to the irg.
2072                 */
2073                 perform_value_serialization_heuristic(rss);
2074
2075                 del_pset(rss->live_block);
2076         }
2077
2078         phase_free(&rss->ph);
2079 }
2080
2081 /**
2082  * Register the options.
2083  */
2084 void be_init_schedrss(void) {
2085         lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
2086         lc_opt_entry_t *sched_grp = lc_opt_get_grp(be_grp, "sched");
2087         lc_opt_entry_t *rss_grp = lc_opt_get_grp(sched_grp, "rss");
2088
2089         lc_opt_add_table(rss_grp, rss_option_table);
2090 }
2091
2092 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_schedrss);
2093
2094 /**
2095  * Preprocess the irg for scheduling.
2096  */
2097 void rss_schedule_preparation(const be_irg_t *birg) {
2098         rss_t rss;
2099
2100         FIRM_DBG_REGISTER(rss.dbg, "firm.be.sched.rss");
2101
2102         //firm_dbg_set_mask(rss.dbg, LEVEL_1 | LEVEL_2 | LEVEL_3);
2103
2104         init_rss_special_nodes(birg->irg);
2105
2106         rss.irg      = birg->irg;
2107         rss.arch_env = birg->main_env->arch_env;
2108         rss.abi      = birg->abi;
2109         rss.h        = heights_new(birg->irg);
2110         rss.nodes    = plist_new();
2111         rss.opts     = &rss_options;
2112         rss.liveness = be_liveness(birg->irg);
2113         irg_block_walk_graph(birg->irg, NULL, process_block, &rss);
2114         heights_free(rss.h);
2115         plist_free(rss.nodes);
2116         be_liveness_free(rss.liveness);
2117
2118         if (birg->main_env->options->dump_flags & DUMP_SCHED)
2119                 be_dump(rss.irg, "-rss", dump_ir_block_graph);
2120 }