f8b107e7f08ee708def5e5a04781bbca878228d0
[libfirm] / ir / be / beschedrss.c
1 /**
2  * Implementation of a register saturating list scheduler
3  * as described in: Sid-Ahmed-Ali Touati
4  * Register Saturation in Superscalar and VLIW Codes
5  *
6  * @license This file protected by GPL -  GNU GENERAL PUBLIC LICENSE.
7  * @author  Christian Wuerdig
8  * @date    29.08.2006
9  * @cvs-id  $Id$
10  */
11 #ifdef HAVE_CONFIG_H
12 #include "config.h"
13 #endif
14
15 #include <limits.h>
16
17 #include "obst.h"
18 #include "debug.h"
19
20 #include "irgraph_t.h"
21 #include "irnode_t.h"
22 #include "iredges_t.h"
23 #include "ircons_t.h"
24 #include "irphase_t.h"
25 #include "irgwalk.h"
26 #include "irdump.h"
27 #include "irtools.h"
28 #include "irbitset.h"
29 #include "irprintf.h"
30 #include "bipartite.h"
31 #include "hungarian.h"
32 #include "plist.h"
33
34 #include "height.h"
35
36 #include "beabi.h"
37 #include "benode_t.h"
38 #include "besched_t.h"
39
40 #ifdef WITH_LIBCORE
41 #include <libcore/lc_opts.h>
42 #include <libcore/lc_opts_enum.h>
43 #endif /* WITH_LIBCORE */
44
45
46 #define ARR_LEN_SAFE(arr) ((arr) != NULL ? ARR_LEN((arr)) : 0)
47
48 #define HASH_RSS_EDGE(edge) ((get_irn_node_nr((edge)->src) << 16) | (get_irn_node_nr((edge)->tgt) & 0xFFFF))
49 #define BSEARCH_IRN_ARR(val, arr) \
50         bsearch(&(val), (arr), ARR_LEN_SAFE((arr)), sizeof((arr)[0]), cmp_irn_idx)
51
52 #define BLOCK_IDX_MAP(rss, irn) bsearch_for_index(get_irn_idx((irn)), (rss)->idx_map, ARR_LEN_SAFE((rss)->idx_map), 1)
53
54 /* the rss options */
55 typedef struct _rss_opts_t {
56         int dump_flags;
57 } rss_opts_t;
58
59 /* Represents a child with associated costs */
60 typedef struct _child {
61         ir_node *irn;
62         float   cost;
63 } child_t;
64
65 /* We need edges for several purposes. */
66 typedef struct _rss_edge {
67         ir_node *src;
68         ir_node *tgt;
69         void    *next;
70 } rss_edge_t;
71
72 /* Represents a connected bipartite component. */
73 typedef struct _cbc {
74         nodeset *parents;       /**< = S  a set of value producers */
75         nodeset *children;      /**< = T  a set of value consumers */
76         pset    *kill_edges;    /**< = E  a set of edges (t in T, s in S) such as each s in S gets killed by at least one t in T */
77         int     nr;             /**< a deterministic index for set insertion (used as hash) */
78 } cbc_t;
79
80 /* Represents a disjoint value DAG. */
81 typedef struct _dvg {
82         nodeset *nodes;
83         pset    *edges;
84 } dvg_t;
85
86 /* Represents a chain of nodes. */
87 typedef struct _chain {
88         plist_t *elements;   /**< List of chain elements */
89         int     nr;          /**< a deterministic index for set insertion (used as hash) */
90 } chain_t;
91
92 typedef struct _rss_irn {
93         plist_t  *consumer_list;    /**< List of consumers */
94         ir_node **consumer;         /**< Sorted consumer array (needed for faster access) */
95
96         plist_t  *parent_list;      /**< List of parents */
97         plist_t  *pkiller_list;     /**< List of potential killers */
98
99         plist_t  *descendant_list;  /**< List of descendants */
100         ir_node **descendants;      /**< Sorted descendant array (needed for faster access) */
101
102         ir_node  *killer;           /**< The selected unique killer */
103         ir_node  *irn;              /**< The corresponding firm node to this rss_irn */
104
105         chain_t  *chain;            /**< The chain, this node is associated to */
106
107         unsigned desc_walk;         /**< visited flag for collecting descendants */
108         unsigned kill_count;        /**< number of nodes killed by this one */
109
110         unsigned live_out : 1;      /**< irn has consumers outside of it's block */
111         unsigned visited  : 1;      /**< visited flag for bipartite decomposition */
112         unsigned havecons : 1;      /**< visited flag collect consumer */
113         unsigned handled  : 1;      /**< flag indicating whether or not the list structures have been build */
114         unsigned dumped   : 1;      /**< flag indication whether or not this node was dumped */
115 } rss_irn_t;
116
117 /* Represents a serialization edge with associated costs. */
118 typedef struct _serialization {
119         rss_irn_t  *u;       /* the top node */
120         rss_irn_t  *v;       /* the node about to be serialized after u */
121         rss_edge_t *edge;    /* the edge selected for the serialization */
122         int        omega1;   /* estimated: available regs - RS reduction */
123         int        omega2;   /* increase in critical path length */
124         int        new_killer;
125 } serialization_t;
126
127 typedef struct _rss {
128         phase_t          ph;              /**< Phase to hold some data */
129         heights_t        *h;              /**< The current height object */
130         ir_graph         *irg;            /**< The irg to preprocess */
131         plist_t          *nodes;          /**< The list of interesting nodes */
132         const arch_env_t *arch_env;       /**< The architecture environment */
133         be_abi_irg_t     *abi;            /**< The abi for this irg */
134         pset             *cbc_set;        /**< A set of connected bipartite components */
135         ir_node          *block;          /**< The current block in progress. */
136         int              *idx_map;        /**< Mapping irn indices to per block indices */
137         unsigned         max_height;      /**< maximum height in the current block */
138         rss_opts_t       *opts;           /**< The options */
139         const arch_register_class_t *cls; /**< The current register class */
140         DEBUG_ONLY(firm_dbg_module_t *dbg);
141 } rss_t;
142
143 #define get_rss_irn(rss, irn)  (phase_get_or_set_irn_data(&rss->ph, irn))
144
145 /**
146  * We need some special nodes:
147  * a source and a sink for all live-in and live-out values of a block
148  */
149
150 enum {
151         iro_rss_Source,
152         iro_rss_Sink,
153         iro_rss_last
154 };
155
156 static ir_node *_source = NULL;
157 static ir_node *_sink   = NULL;
158
159 #define is_Source(irn) ((irn) == _source)
160 #define is_Sink(irn)   ((irn) == _sink)
161
162 enum {
163         RSS_DUMP_NONE  = 0,
164         RSS_DUMP_CBC   = 1 << 0,
165         RSS_DUMP_PKG   = 1 << 1,
166         RSS_DUMP_KILL  = 1 << 2,
167         RSS_DUMP_DVG   = 1 << 3,
168         RSS_DUMP_MAXAC = 1 << 4,
169         RSS_DUMP_ALL   = (RSS_DUMP_MAXAC << 1) - 1,
170 };
171
172 static rss_opts_t rss_options = {
173         RSS_DUMP_NONE,
174 };
175
176 #ifdef WITH_LIBCORE
177 static const lc_opt_enum_int_items_t dump_items[] = {
178         { "none",  RSS_DUMP_NONE  },
179         { "cbc",   RSS_DUMP_CBC   },
180         { "pkg",   RSS_DUMP_PKG   },
181         { "kill",  RSS_DUMP_KILL  },
182         { "dvg",   RSS_DUMP_DVG   },
183         { "maxac", RSS_DUMP_MAXAC },
184         { "all",   RSS_DUMP_ALL   },
185         { NULL, 0 }
186 };
187
188 static lc_opt_enum_int_var_t dump_var = {
189         &rss_options.dump_flags, dump_items
190 };
191
192 static const lc_opt_table_entry_t rss_option_table[] = {
193         LC_OPT_ENT_ENUM_MASK("dump", "dump phases (none, cbc, pkg, kill, dvg, maxac, all)", &dump_var),
194         { NULL }
195 };
196 #endif /* WITH_LIBCORE */
197
198 /******************************************************************************
199  *  _          _                    __                  _   _
200  * | |        | |                  / _|                | | (_)
201  * | |__   ___| |_ __   ___ _ __  | |_ _   _ _ __   ___| |_ _  ___  _ __  ___
202  * | '_ \ / _ \ | '_ \ / _ \ '__| |  _| | | | '_ \ / __| __| |/ _ \| '_ \/ __|
203  * | | | |  __/ | |_) |  __/ |    | | | |_| | | | | (__| |_| | (_) | | | \__ \
204  * |_| |_|\___|_| .__/ \___|_|    |_|  \__,_|_| |_|\___|\__|_|\___/|_| |_|___/
205  *            | |
206  *            |_|
207  ******************************************************************************/
208
209 /**
210  * Acquire opcodes and create source and sink nodes.
211  */
212 static void init_rss_special_nodes(ir_graph *irg) {
213         ir_node *block         = get_irg_start_block(irg);
214         int     iro_rss_base   = get_next_ir_opcodes(iro_rss_last);
215         ir_op   *op_rss_Source = new_ir_op(iro_rss_base + iro_rss_Source, "rss_Source", op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
216         ir_op   *op_rss_Sink   = new_ir_op(iro_rss_base + iro_rss_Sink,   "rss_Sink",   op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
217         _source                = new_ir_node(NULL, irg, block, op_rss_Source, mode_ANY, 0, NULL);
218         _sink                  = new_ir_node(NULL, irg, block, op_rss_Sink, mode_ANY, 0, NULL);
219 }
220
221 static int cmp_int(const void *a, const void *b) {
222         const int *i1 = a;
223         const int *i2 = b;
224
225         return QSORT_CMP(*i1, *i2);
226 }
227
228 static int cmp_child_costs(const void *a, const void *b) {
229         const child_t *c1 = a;
230         const child_t *c2 = b;
231
232         return QSORT_CMP(c1->cost, c2->cost);
233 }
234
235 static int cmp_irn_idx(const void *a, const void *b) {
236         const ir_node *n1 = *(ir_node **)a;
237         const ir_node *n2 = *(ir_node **)b;
238
239         return QSORT_CMP(get_irn_idx(n1), get_irn_idx(n2));
240 }
241
242 static int cmp_rss_edges(const void *a, const void *b) {
243         const rss_edge_t *e1 = a;
244         const rss_edge_t *e2 = b;
245
246         return (e1->src != e2->src) || (e1->tgt != e2->tgt);
247 }
248
249 static int bsearch_for_index(int key, int *arr, size_t len, int force) {
250         int left = 0;
251         int right = len;
252
253         while (right >= left) {
254                 int idx = (left + right) / 2;
255
256                 if (key < arr[idx])
257                         right = idx - 1;
258                 else if (key > arr[idx])
259                         left = idx + 1;
260                 else
261                         return idx;
262         }
263
264         if (force)
265                 assert(0 && "Something is wrong, key not found.");
266         return -1;
267 }
268
269 static ir_node **build_sorted_array_from_list(plist_t *irn_list, struct obstack *obst) {
270         plist_element_t *el;
271         int     i     = 0;
272         int     len   = plist_count(irn_list);
273         ir_node **arr = NEW_ARR_D(ir_node *, obst, len);
274
275         /* copy the list into the array */
276         foreach_plist(irn_list, el) {
277                 arr[i++] = plist_element_get_value(el);
278         }
279
280         /* sort the array by node index */
281         qsort(arr, len, sizeof(arr[0]), cmp_irn_idx);
282
283         return arr;
284 }
285
286 /*****************************************************
287  *      _      _                       _
288  *     | |    | |                     (_)
289  *   __| | ___| |__  _   _  __ _  __ _ _ _ __   __ _
290  *  / _` |/ _ \ '_ \| | | |/ _` |/ _` | | '_ \ / _` |
291  * | (_| |  __/ |_) | |_| | (_| | (_| | | | | | (_| |
292  *  \__,_|\___|_.__/ \__,_|\__, |\__, |_|_| |_|\__, |
293  *                          __/ | __/ |         __/ |
294  *                         |___/ |___/         |___/
295  *
296  *****************************************************/
297
298 static void dump_nodeset(nodeset *ns, const char *prefix) {
299         ir_node *irn;
300         foreach_nodeset(ns, irn) {
301                 ir_fprintf(stderr, "%s%+F\n", prefix, irn);
302         }
303 }
304
305 static void build_file_name(rss_t *rss, const char *suffix, size_t suf_len, char *buf, size_t len) {
306         const char *irg_name;
307
308         memset(buf, 0, len);
309         irg_name = get_entity_name(get_irg_entity(rss->irg));
310         snprintf(buf, len - suf_len, "%s-%s-block-%ld",
311                 irg_name, arch_register_class_name(rss->cls), get_irn_node_nr(rss->block));
312         strcat(buf, suffix);
313 }
314
315 /* Dumps all collected bipartite components of current irg as vcg. */
316 static void debug_vcg_dump_bipartite(rss_t *rss) {
317         cbc_t *cbc;
318         FILE  *f;
319         char  file_name[256];
320         static const char suffix[] = "-RSS-CBC.vcg";
321
322         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
323         f = fopen(file_name, "w");
324
325         ir_fprintf(f, "graph: { title: \"connected bipartite component graph of %+F\"\n", rss->irg);
326         fprintf(f, "display_edge_labels: no\n");
327         fprintf(f, "layoutalgorithm: mindepth\n");
328         fprintf(f, "manhattan_edges: yes\n\n");
329
330         foreach_pset(rss->cbc_set, cbc) {
331                 ir_node    *n;
332                 rss_edge_t *ke;
333
334                 fprintf(f, "graph: { titel: \"cbc %d\" label: \"cbc %d\" status:clustered color:yellow\n", cbc->nr, cbc->nr);
335                 foreach_nodeset(cbc->parents, n) {
336                         ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
337                 }
338                 foreach_nodeset(cbc->children, n) {
339                         ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
340                 }
341                 foreach_pset(cbc->kill_edges, ke) {
342                         ir_fprintf(f, "edge: { sourcename: \"n%d_%d\" targetname: \"n%d_%d\" }\n",
343                                 get_irn_node_nr(ke->src), cbc->nr, get_irn_node_nr(ke->tgt), cbc->nr);
344                 }
345                 fprintf(f, "}\n\n");
346         }
347         fprintf(f, "}\n");
348         fclose(f);
349 }
350
351 /* Dump the computed killing function as vcg. */
352 static void debug_vcg_dump_kill(rss_t *rss) {
353         FILE            *f;
354         char            file_name[256];
355         plist_element_t *el;
356         static const char suffix[] = "-RSS-KILL.vcg";
357
358         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
359         f = fopen(file_name, "w");
360
361         ir_fprintf(f, "graph: { title: \"computed kill graph of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
362         fprintf(f, "display_edge_labels: no\n");
363         fprintf(f, "layoutalgorithm: mindepth\n");
364         fprintf(f, "manhattan_edges: yes\n\n");
365
366         /* first: reset dumped flag of all nodes */
367         foreach_plist(rss->nodes, el) {
368                 ir_node   *irn  = plist_element_get_value(el);
369                 rss_irn_t *rirn = get_rss_irn(rss, irn);
370                 rirn->dumped = 0;
371         }
372
373         /* dump all nodes and their killers */
374         foreach_plist(rss->nodes, el) {
375                 ir_node   *irn     = plist_element_get_value(el);
376                 rss_irn_t *rirn    = get_rss_irn(rss, irn);
377                 rss_irn_t *pk_rirn = get_rss_irn(rss, rirn->killer);
378
379                 if (! rirn->dumped) {
380                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
381                         rirn->dumped = 1;
382                 }
383
384                 if (! pk_rirn->dumped) {
385                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(rirn->killer), rirn->killer);
386                         pk_rirn->dumped = 1;
387                 }
388
389                 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
390                         get_irn_node_nr(rirn->killer), get_irn_node_nr(irn));
391         }
392
393         fprintf(f, "}\n");
394         fclose(f);
395 }
396
397 /* Dumps the potential killing DAG (PKG) as vcg. */
398 static void debug_vcg_dump_pkg(rss_t *rss, nodeset *max_ac, int iteration) {
399         FILE              *f;
400         char              file_name[256];
401         char              *suffix   = alloca(32);
402         static const char suffix1[] = "-RSS-PKG.vcg";
403         static const char suffix2[] = "-RSS-PKG-MAXAC.vcg";
404         plist_element_t   *el;
405
406         if (! max_ac) {
407                 snprintf(suffix, 32, "%s", suffix1);
408         }
409         else {
410                 snprintf(suffix, 32, "-%02d%s", iteration, suffix2);
411         }
412
413         build_file_name(rss, suffix, strlen(suffix) + 1, file_name, sizeof(file_name));
414         f = fopen(file_name, "w");
415
416         ir_fprintf(f, "graph: { title: \"potential killing DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
417         fprintf(f, "display_edge_labels: no\n");
418         fprintf(f, "layoutalgorithm: mindepth\n");
419         fprintf(f, "manhattan_edges: yes\n\n");
420
421         foreach_plist(rss->nodes, el) {
422                 ir_node   *irn  = plist_element_get_value(el);
423                 rss_irn_t *rirn = get_rss_irn(rss, irn);
424                 char      *c1   = "";
425                 plist_element_t *k_el;
426
427                 /* dump selected saturating values in yellow */
428                 if (max_ac && nodeset_find(max_ac, irn))
429                         c1 = "color:yellow";
430
431                 if (rirn->chain)
432                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F   c%d\" %s }\n", get_irn_node_nr(irn), irn, rirn->chain->nr, c1);
433                 else
434                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(irn), irn, c1);
435                 rirn->dumped = 1;
436
437                 foreach_plist(rirn->pkiller_list, k_el) {
438                         ir_node   *pkiller = plist_element_get_value(k_el);
439                         rss_irn_t *pk_rirn = get_rss_irn(rss, pkiller);
440                         char      *c2      = "";
441
442                         if (max_ac && nodeset_find(max_ac, pkiller))
443                                 c2 = "color:yellow";
444
445                         if (! pk_rirn->dumped) {
446                                 if (pk_rirn->chain)
447                                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F   c%d\" %s }\n", get_irn_node_nr(pkiller), pkiller, pk_rirn->chain->nr, c2);
448                                 else
449                                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(pkiller), pkiller, c2);
450                                 pk_rirn->dumped = 1;
451                         }
452                         ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
453                                 get_irn_node_nr(pkiller), get_irn_node_nr(irn));
454                 }
455         }
456         fprintf(f, "}\n");
457         fclose(f);
458 }
459
460 /* Dumps the disjoint value DAG (DVG) as vcg. */
461 static void debug_vcg_dump_dvg(rss_t *rss, dvg_t *dvg) {
462         static const char suffix[] = "-RSS-DVG.vcg";
463         FILE       *f;
464         char       file_name[256];
465         ir_node    *irn;
466         rss_edge_t *edge;
467
468         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
469         f = fopen(file_name, "w");
470
471         ir_fprintf(f, "graph: { title: \"disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
472         fprintf(f, "display_edge_labels: no\n");
473         fprintf(f, "layoutalgorithm: mindepth\n");
474         fprintf(f, "manhattan_edges: yes\n\n");
475
476         /* dump all nodes */
477         foreach_nodeset(dvg->nodes, irn) {
478                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
479         }
480
481         /* dump all edges */
482         foreach_pset(dvg->edges, edge) {
483                 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
484                         get_irn_node_nr(edge->src), get_irn_node_nr(edge->tgt));
485         }
486
487         fprintf(f, "}\n");
488         fclose(f);
489 }
490
491 #if 0
492 /* Dumps the PKG(DVG). */
493 static void debug_vcg_dump_dvg_pkiller(rss_t *rss, dvg_t *dvg) {
494         static const char suffix[] = "-RSS-DVG-PKG.vcg";
495         FILE       *f;
496         char       file_name[256];
497         ir_node    *irn;
498
499         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
500         f = fopen(file_name, "w");
501
502         ir_fprintf(f, "graph: { title: \"PKG of disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
503         fprintf(f, "display_edge_labels: no\n");
504         fprintf(f, "layoutalgorithm: mindepth\n");
505         fprintf(f, "manhattan_edges: yes\n\n");
506
507         /* dump all nodes */
508         foreach_nodeset(dvg->nodes, irn) {
509                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
510         }
511
512         /* dump all edges */
513         foreach_nodeset(dvg->nodes, irn) {
514                 rss_irn_t       *node = get_rss_irn(rss, irn);
515                 plist_element_t *el;
516
517                 foreach_plist(node->dvg_pkiller_list, el) {
518                         ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
519                                 get_irn_node_nr(plist_element_get_value(el)), get_irn_node_nr(irn));
520                 }
521         }
522
523         fprintf(f, "}\n");
524         fclose(f);
525 }
526 #endif /* if 0 */
527
528 /**
529  * In case there is no rss information for irn, initialize it.
530  */
531 static void *init_rss_irn(phase_t *ph, ir_node *irn, void *old) {
532         rss_irn_t *res = old ? old : phase_alloc(ph, sizeof(res[0]));
533
534         res->descendant_list = plist_obstack_new(phase_obst(ph));
535         res->descendants     = NULL;
536
537         res->consumer_list   = plist_obstack_new(phase_obst(ph));
538         res->consumer        = NULL;
539
540         res->pkiller_list    = plist_obstack_new(phase_obst(ph));
541
542         res->parent_list     = plist_obstack_new(phase_obst(ph));
543
544         res->killer          = NULL;
545         res->irn             = irn;
546         res->chain           = NULL;
547
548         res->kill_count      = 0;
549         res->desc_walk       = 0;
550         res->live_out        = 0;
551         res->visited         = 0;
552         res->handled         = 0;
553         res->dumped          = 0;
554         res->havecons        = 0;
555
556         return res;
557 }
558
559 /**
560  * Collect all nodes data dependent on current node.
561  */
562 static void collect_descendants(rss_t *rss, rss_irn_t *rirn, ir_node *irn, int *got_sink, unsigned cur_desc_walk) {
563         const ir_edge_t *edge;
564         rss_irn_t       *cur_node = get_rss_irn(rss, irn);
565         ir_node         *block    = rss->block;
566         ir_edge_kind_t  ekind[2]  = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
567         int             i;
568
569         if (cur_node->desc_walk >= cur_desc_walk)
570                 return;
571         cur_node->desc_walk = cur_desc_walk;
572
573         /* Stop at Barriers and Keeps */
574         if (be_is_Barrier(irn) || be_is_Keep(irn))
575                 return;
576
577         /* loop over normal and dependency edges */
578         for (i = 0; i < 2; ++i) {
579                 foreach_out_edge_kind(irn, edge, ekind[i]) {
580                         ir_node *user = get_edge_src_irn(edge);
581
582                         /* skip ignore nodes as they do not really contribute to register pressure */
583                         if (arch_irn_is(rss->arch_env, user, ignore))
584                                 continue;
585
586                         if (is_Proj(user)) {
587                                 if (get_irn_mode(user) == mode_X && ! *got_sink)
588                                         goto force_sink;
589
590                                 if (arch_get_irn_reg_class(rss->arch_env, user, -1) == rss->cls)
591                                         collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
592                         }
593                         else {
594                                 /* check if user lives in block and is not a control flow node */
595                                 if (get_nodes_block(user) == block) {
596                                         if (! plist_has_value(rirn->descendant_list, user)) {
597                                                 plist_insert_back(rirn->descendant_list, user);
598                                                 DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", user));
599                                         }
600                                         collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
601                                 }
602                                 else if (! *got_sink) {
603 force_sink:
604                                         /* user lives out of block: add sink as descendant if not already done */
605                                         plist_insert_back(rirn->descendant_list, _sink);
606                                         *got_sink = 1;
607                                         DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", _sink));
608                                 }
609                         }
610                 }
611         }
612 }
613
614 /**
615  * Handles a single consumer.
616  */
617 static void collect_single_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *consumer, int *got_sink) {
618         ir_node *block = rss->block;
619
620         assert(! is_Proj(consumer) && "Cannot handle Projs");
621
622         if (get_nodes_block(consumer) == block) {
623                 if (! arch_irn_is(rss->arch_env, consumer, ignore)) {
624                         plist_insert_back(rss_irn->consumer_list, consumer);
625                         DBG((rss->dbg, LEVEL_2, "\t\tconsumer %+F\n", consumer));
626                 }
627         }
628         else {
629                 rss_irn->live_out = 1;
630                 DBG((rss->dbg, LEVEL_2, "\t\tlive out %+F", consumer));
631                 if (! *got_sink) {
632                         plist_insert_back(rss_irn->consumer_list, _sink);
633                         *got_sink = 1;
634                         DB((rss->dbg, LEVEL_2, ", %+F added instead", _sink));
635                 }
636                 DB((rss->dbg, LEVEL_2, "\n"));
637         }
638 }
639
640 /**
641  * Collect all nodes consuming the value(s) produced by current node.
642  */
643 static void collect_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *irn, int *got_sink) {
644         const ir_edge_t *edge;
645         int             i;
646         ir_edge_kind_t  ekind[2]  = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
647         rss_irn_t       *cur_node = get_rss_irn(rss, irn);
648
649         if (cur_node->havecons)
650                 return;
651         cur_node->havecons = 1;
652
653         for (i = 0; i < 2; ++i) {
654                 foreach_out_edge_kind(irn, edge, ekind[i]) {
655                         ir_node *consumer = get_edge_src_irn(edge);
656
657                         if (is_Proj(consumer)) {
658                                 if (arch_get_irn_reg_class(rss->arch_env, consumer, -1) == rss->cls)
659                                         collect_consumer(rss, rss_irn, consumer, got_sink);
660                         }
661                         else
662                                 collect_single_consumer(rss, rss_irn, consumer, got_sink);
663                 }
664         }
665 }
666
667 /**
668  * Collects all consumer and descendant of a irn.
669  */
670 static void collect_node_info(rss_t *rss, ir_node *irn) {
671         static unsigned cur_desc_walk = 0;
672         rss_irn_t       *rss_irn      = get_rss_irn(rss, irn);
673         int             got_sink;
674
675         assert(! is_Proj(irn) && "Cannot handle Projs.");
676
677         /* check if node info is already available */
678         if (rss_irn->handled)
679                 phase_reinit_single_irn_data(&rss->ph, irn);
680
681         DBG((rss->dbg, LEVEL_1, "\tcomputing consumers of %+F:\n", irn));
682
683         /* collect all consumer */
684         got_sink = 0;
685         collect_consumer(rss, rss_irn, irn, &got_sink);
686
687         /* build sorted consumer array */
688         rss_irn->consumer = build_sorted_array_from_list(rss_irn->consumer_list, phase_obst(&rss->ph));
689
690         DBG((rss->dbg, LEVEL_1, "\tcompute descendants of %+F:\n", irn));
691
692         /* collect descendants */
693         got_sink = 0;
694         collect_descendants(rss, rss_irn, irn, &got_sink, ++cur_desc_walk);
695
696         /* build sorted descendant array */
697         rss_irn->descendants = build_sorted_array_from_list(rss_irn->descendant_list, phase_obst(&rss->ph));
698
699         rss_irn->handled = 1;
700 }
701
702 /**
703  * Checks if v is a potential killer of u.
704  * v is in pkill(u) iff descendants(v) cut consumer(u) is v
705  *
706  * @param rss   The rss object
707  * @param v      The node to check for killer
708  * @param u      The potentially killed value
709  * @return 1 if v is in pkill(u), 0 otherwise
710  */
711 static int is_potential_killer(rss_t *rss, rss_irn_t *v, rss_irn_t *u) {
712         plist_t *list;
713         ir_node **arr;
714         plist_element_t *el;
715
716         assert(is_Sink(v->irn) || ((plist_count(v->descendant_list) > 0 && v->descendants) || 1));
717         assert(is_Sink(u->irn) || ((plist_count(u->consumer_list)   > 0 && u->consumer)    || 1));
718
719         /* as we loop over the list: loop over the shorter one */
720         if (plist_count(v->descendant_list) > plist_count(u->consumer_list)) {
721                 list = u->consumer_list;
722                 arr  = v->descendants;
723         }
724         else {
725                 list = v->descendant_list;
726                 arr  = u->consumer;
727         }
728
729         /* for each list element: try to find element in array */
730         foreach_plist(list, el) {
731                 ir_node *irn   = plist_element_get_value(el);
732                 ir_node *match = BSEARCH_IRN_ARR(irn, arr);
733
734                 if (match && match != irn)
735                         return 0;
736         }
737
738         return 1;
739 }
740
741 /**
742  * Update descendants, consumer and pkiller list for the given irn.
743  */
744 static void update_node_info(rss_t *rss, ir_node *irn, ir_node *pk_irn) {
745         rss_irn_t *node    = get_rss_irn(rss, irn);
746         rss_irn_t *pkiller = get_rss_irn(rss, pk_irn);
747
748         /* add consumer and rebuild the consumer array */
749         if (! plist_has_value(node->consumer_list, pk_irn)) {
750                 plist_insert_back(node->consumer_list, pk_irn);
751                 node->consumer = build_sorted_array_from_list(node->consumer_list, phase_obst(&rss->ph));
752         }
753
754         /* add pk_irn as descendant, add it's descendants to irn's and rebuild array */
755         if (! plist_has_value(node->descendant_list, pk_irn)) {
756                 plist_element_t *el;
757
758                 plist_insert_back(node->descendant_list, pk_irn);
759
760                 foreach_plist(pkiller->descendant_list, el) {
761                         ir_node *desc = plist_element_get_value(el);
762
763                         if (! plist_has_value(node->descendant_list, desc))
764                                 plist_insert_back(node->descendant_list, desc);
765                 }
766
767                 node->descendants = build_sorted_array_from_list(node->descendant_list, phase_obst(&rss->ph));
768         }
769
770 }
771
772 /**
773  * Compute the potential killing set PK.
774  */
775 static void compute_pkill_set(rss_t *rss) {
776         plist_element_t *u_el, *v_el;
777
778         foreach_plist(rss->nodes, u_el) {
779                 ir_node   *u_irn = plist_element_get_value(u_el);
780                 rss_irn_t *u     = get_rss_irn(rss, u_irn);
781
782                 DBG((rss->dbg, LEVEL_1, "\tcomputing potential killers of %+F:\n", u_irn));
783
784                 /* check each consumer if it is a potential killer  */
785                 foreach_plist(u->consumer_list, v_el) {
786                         ir_node   *v_irn = plist_element_get_value(v_el);
787                         rss_irn_t *v     = get_rss_irn(rss, v_irn);
788
789                         /* check, if v is a potential killer of u */
790                         if (is_potential_killer(rss, v, u)) {
791                                 if (! plist_has_value(u->pkiller_list, v_irn))
792                                         plist_insert_back(u->pkiller_list, v_irn);
793                                 v->kill_count++;
794                                 DBG((rss->dbg, LEVEL_2, "\t\tpotential killer %+F\n", v_irn));
795                         }
796                 }
797
798                 u->killer = _sink;
799         }
800
801         if (rss->opts->dump_flags & RSS_DUMP_PKG)
802                 debug_vcg_dump_pkg(rss, NULL, 0);
803 }
804
805 /**
806  * Build set of killing edges (from values to their potential killers)
807  */
808 static void build_kill_edges(rss_t *rss, pset *epk) {
809         plist_element_t *el, *k_el;
810
811         foreach_plist(rss->nodes, el) {
812                 ir_node    *irn  = plist_element_get_value(el);
813                 rss_irn_t *rirn = get_rss_irn(rss, irn);
814
815                 DBG((rss->dbg, LEVEL_3, "\t\tbuilding kill edges for %+F:\n", irn));
816
817                 foreach_plist(rirn->pkiller_list, k_el) {
818                         ir_node    *pkiller = plist_element_get_value(k_el);
819                         rss_edge_t *ke      = obstack_alloc(phase_obst(&rss->ph), sizeof(*ke));
820
821                         ke->src  = irn;
822                         ke->tgt  = pkiller;
823                         ke->next = NULL;
824
825                         DBG((rss->dbg, LEVEL_3, "\t\t\ttarget %+F\n", pkiller));
826
827                         pset_insert(epk, ke, HASH_RSS_EDGE(ke));
828                 }
829         }
830 }
831
832 /* print the given cbc for debugging purpose */
833 static void debug_print_cbc(firm_dbg_module_t *mod, cbc_t *cbc) {
834         ir_node    *n;
835         rss_edge_t *ke;
836
837         DBG((mod, LEVEL_3, "\t\tS = set of parents:\n"));
838         foreach_nodeset(cbc->parents, n) {
839                 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
840         }
841         DBG((mod, LEVEL_3, "\t\tT = set of children:\n"));
842         foreach_nodeset(cbc->children, n) {
843                 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
844         }
845         DBG((mod, LEVEL_3, "\t\tE = Edges from producers to consumers\n"));
846         foreach_pset(cbc->kill_edges, ke) {
847                 DBG((mod, LEVEL_3, "\t\t\t%+F -> %+F\n", ke->src, ke->tgt));
848         }
849 }
850
851 /**
852  * Construct the bipartite decomposition.
853  * Sid-Ahmed-Ali Touati, Phd Thesis
854  * Register Pressure in Instruction Level Parallelism, p. 71
855  */
856 static void compute_bipartite_decomposition(rss_t *rss) {
857         pset *epk    = new_pset(cmp_rss_edges, 10);
858         int  cur_num = 0;
859
860         plist_element_t *el;
861
862         DBG((rss->dbg, LEVEL_1, "\tcomputing bipartite decomposition:\n"));
863
864         build_kill_edges(rss, epk);
865
866         foreach_plist(rss->nodes, el) {
867                 ir_node   *u_irn   = plist_element_get_value(el);
868                 rss_irn_t *u       = get_rss_irn(rss, u_irn);
869                 int       p_change = 1;
870                 int       c_change = 1;
871
872                 cbc_t           *cbc;
873                 plist_element_t *el2;
874                 rss_edge_t      *k_edge;
875                 rss_edge_t      *kedge_root = NULL;
876                 ir_node         *t_irn, *s_irn;
877
878                 if (u->visited || u_irn == _sink)
879                         continue;
880
881                 DBG((rss->dbg, LEVEL_2, "\t\t%+F choosen:\n", u_irn));
882
883                 cbc     = obstack_alloc(phase_obst(&rss->ph), sizeof(*cbc));
884                 cbc->nr = cur_num++;
885
886                 /* initialize S_cb */
887                 cbc->parents = new_nodeset(5);
888                 nodeset_insert(cbc->parents, u_irn);
889                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (init)\n", u_irn));
890
891                 /* E_cb = empty */
892                 cbc->kill_edges = new_pset(cmp_rss_edges, 5);
893
894                 /* each parent gets killed by at least one value from children */
895
896                 /* T_cb = PK_successors(u) */
897                 cbc->children = new_nodeset(5);
898                 foreach_plist(u->pkiller_list, el2) {
899                         nodeset_insert(cbc->children, plist_element_get_value(el2));
900                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (init)\n", plist_element_get_value(el2)));
901                 }
902
903                 /*
904                         Now: insert the parents of all children into the parent set
905                         and insert the children of all parents into the children set
906                         until the sets don't change any more
907                 */
908                 while (p_change || c_change) {
909                         p_change = c_change = 0;
910
911                         /* accumulate parents */
912                         foreach_nodeset(cbc->children, t_irn) {
913                                 foreach_pset(epk, k_edge) {
914                                         ir_node *val = k_edge->src;
915
916                                         if (k_edge->tgt == t_irn && ! nodeset_find(cbc->parents, val)) {
917                                                 nodeset_insert(cbc->parents, val);
918                                                 p_change = 1;
919                                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents\n", val));
920                                         }
921                                 }
922                         }
923
924                         /* accumulate children */
925                         foreach_nodeset(cbc->parents, s_irn) {
926                                 foreach_pset(epk, k_edge) {
927                                         ir_node *val = k_edge->tgt;
928
929                                         if (k_edge->src == s_irn  && ! nodeset_find(cbc->children, val)) {
930                                                 nodeset_insert(cbc->children, val);
931                                                 c_change = 1;
932                                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children\n", val));
933                                         }
934                                 }
935                         }
936                 }
937
938                 /* mark all parent values as visited */
939                 foreach_nodeset(cbc->parents, s_irn) {
940                         rss_irn_t *s = get_rss_irn(rss, s_irn);
941                         s->visited = 1;
942                         /* assure bipartite property */
943                         if (nodeset_find(cbc->children, s_irn)) {
944                                 nodeset_remove(cbc->children, s_irn);
945                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F removed from to children\n", s_irn));
946                         }
947                 }
948
949                 /* update edges */
950                 foreach_pset(epk, k_edge) {
951                         if (nodeset_find(cbc->parents, k_edge->src) && nodeset_find(cbc->children, k_edge->tgt)) {
952                                 pset_insert(cbc->kill_edges, k_edge, HASH_RSS_EDGE(k_edge));
953                                 /*
954                                         Link all k_edges which are about to be removed together.
955                                         Beware: pset_remove kills the iterator.
956                                 */
957                                 k_edge->next = kedge_root;
958                                 kedge_root   = k_edge;
959                         }
960                 }
961
962                 /* remove all linked k_edges */
963                 for (; kedge_root; kedge_root = kedge_root->next) {
964                         pset_remove(epk, kedge_root, HASH_RSS_EDGE(kedge_root));
965                 }
966
967                 /* add the connected bipartite component */
968                 if (nodeset_count(cbc->parents) > 0 && nodeset_count(cbc->children) > 0 && pset_count(cbc->kill_edges) > 0)
969                         pset_insert(rss->cbc_set, cbc, (unsigned)cbc->nr);
970                 DBG((rss->dbg, LEVEL_2, "\tbipartite component %d inserted:\n", cbc->nr));
971                 DEBUG_ONLY(
972                         if (firm_dbg_get_mask(rss->dbg) & LEVEL_2)
973                                 debug_print_cbc(rss->dbg, cbc);
974                 );
975         }
976
977         if (rss->opts->dump_flags & RSS_DUMP_CBC)
978                 debug_vcg_dump_bipartite(rss);
979
980         del_pset(epk);
981 }
982
983 /**
984  * Select the child with the maximum cost.
985  */
986 static child_t *select_child_max_cost(rss_t *rss, nodeset *x, nodeset *y, child_t *t, cbc_t *cbc) {
987         ir_node *child;
988         float   max_cost = -1.0f;
989
990         DBG((rss->dbg, LEVEL_2, "\t\tcomputing children costs:\n"));
991
992         foreach_nodeset(cbc->children, child) {
993                 rss_irn_t  *r_child             = get_rss_irn(rss, child);
994                 int         num_unkilled_parents = 0;
995                 int         num_descendants;
996                 rss_edge_t *k_edge;
997                 float       cost;
998
999                 /* get the number of unkilled parents */
1000                 foreach_pset(cbc->kill_edges, k_edge) {
1001                         if (k_edge->tgt == child && nodeset_find(x, k_edge->src))
1002                                 ++num_unkilled_parents;
1003                 }
1004
1005                 cost = (float)num_unkilled_parents;
1006
1007                 num_descendants = plist_count(r_child->descendant_list) + nodeset_count(y);
1008
1009                 if (num_descendants > 0)
1010                         cost /= (float)num_descendants;
1011
1012                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F, #desc %d, cost %.3f\n", child, num_descendants, cost));
1013
1014                 if (cost > max_cost) {
1015                         t->irn   = child;
1016                         t->cost  = cost;
1017                         max_cost = cost;
1018                 }
1019         }
1020
1021         return t;
1022 }
1023
1024 /**
1025  * Remove all parents from x which are killed by t_irn.
1026  */
1027 static void remove_covered_parents(rss_t *rss, nodeset *x, ir_node *t_irn, cbc_t *cbc) {
1028         rss_irn_t  *t = get_rss_irn(rss, t_irn);
1029         rss_edge_t *k_edge;
1030
1031         DBG((rss->dbg, LEVEL_2, "\t\tremoving parents covered by %+F:\n", t_irn));
1032
1033         foreach_pset(cbc->kill_edges, k_edge) {
1034                 if (k_edge->tgt == t_irn && nodeset_find(x, k_edge->src)) {
1035                         nodeset_remove(x, k_edge->src);
1036                         plist_insert_back(t->parent_list, k_edge->src);
1037                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", k_edge->src));
1038                 }
1039         }
1040 }
1041
1042 static void update_cumulated_descendent_values(rss_t *rss, nodeset *y, ir_node *t_irn) {
1043         rss_irn_t *t = get_rss_irn(rss, t_irn);
1044         plist_element_t *el;
1045
1046         DBG((rss->dbg, LEVEL_2, "\t\tupdating cumulated descendant value of %+F:\n", t_irn));
1047
1048         foreach_plist(t->descendant_list, el) {
1049                 nodeset_insert(y, plist_element_get_value(el));
1050                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", plist_element_get_value(el)));
1051         }
1052 }
1053
1054 /**
1055  * Greedy-k: a heuristics for the MMA problem
1056  */
1057 static void compute_killing_function(rss_t *rss) {
1058         cbc_t *cbc;
1059         struct obstack obst;
1060
1061         obstack_init(&obst);
1062
1063         rss->cbc_set = pset_new_ptr(5);
1064         compute_bipartite_decomposition(rss);
1065
1066         /* for all bipartite components do: */
1067         foreach_pset(rss->cbc_set, cbc) {
1068                 ir_node *p;
1069                 nodeset *x       = new_nodeset(10);
1070                 nodeset *y       = new_nodeset(10);
1071                 child_t **sks    = NEW_ARR_F(child_t *, 20);
1072                 int     cur_len  = 0;
1073                 int     cur_size = 20;
1074                 int     i;
1075
1076                 DBG((rss->dbg, LEVEL_1, "\tcomputing SKS for cbc %d:\n", cbc->nr));
1077                 DBG((rss->dbg, LEVEL_2, "\t\tinitializing parents X:\n"));
1078
1079                 /* X = S_cb (all parents are initially uncovered) */
1080                 foreach_nodeset(cbc->parents, p) {
1081                         nodeset_insert(x, p);
1082                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", p));
1083                 }
1084
1085                 /* while X not empty */
1086                 while (nodeset_count(x) > 0) {
1087                         child_t *t = obstack_alloc(&obst, sizeof(*t));
1088                         memset(t, 0, sizeof(*t));
1089
1090                         t = select_child_max_cost(rss, x, y, t, cbc);
1091
1092                         if (cur_len >= cur_size) {
1093                                 ARR_EXTO(child_t *, sks, cur_size * 2);
1094                                 cur_size *= 2;
1095                         }
1096
1097                         DBG((rss->dbg, LEVEL_2, "\t\tinsert child %+F (%.3f) into SKS at pos %d\n", t->irn, t->cost, cur_len));
1098
1099                         sks[cur_len++] = t;
1100                         remove_covered_parents(rss, x, t->irn, cbc);
1101                         update_cumulated_descendent_values(rss, y, t->irn);
1102                 }
1103
1104                 ARR_SHRINKLEN(sks, cur_len);
1105
1106                 /* sort SKS in increasing cost order */
1107                 qsort(sks, cur_len, sizeof(sks[0]), cmp_child_costs);
1108
1109                 DBG((rss->dbg, LEVEL_2, "\tprocessing SKS for cbc %d:\n", cbc->nr));
1110
1111                 /* build killing function */
1112                 for (i = cur_len - 1; i >= 0; --i) { /* loop over sks in decreasing cost order */
1113                         child_t         *t = sks[i];
1114                         rss_irn_t       *rt = get_rss_irn(rss, t->irn);
1115                         plist_element_t *p_el;
1116
1117                         DBG((rss->dbg, LEVEL_3, "\t\t\tkiller %+F (%.3f):\n", t->irn, t->cost));
1118
1119                         /* kill all unkilled parents of t */
1120                         foreach_plist(rt->parent_list, p_el) {
1121                                 ir_node    *par = plist_element_get_value(p_el);
1122                                 rss_irn_t *rpar = get_rss_irn(rss, par);
1123
1124                                 if (is_Sink(rpar->killer)) {
1125                                         rpar->killer = t->irn;
1126                                         DBG((rss->dbg, LEVEL_2, "\t\tkill %+F\n", rpar->irn));
1127                                 }
1128                                 else {
1129                                         DBG((rss->dbg, LEVEL_3, "\t\t\tkeeping %+F as killer for %+F\n", rpar->killer, rpar->irn));
1130                                 }
1131                         }
1132                 }
1133
1134                 del_nodeset(x);
1135                 del_nodeset(y);
1136                 DEL_ARR_F(sks);
1137         }
1138
1139         if (rss->opts->dump_flags & RSS_DUMP_KILL)
1140                 debug_vcg_dump_kill(rss);
1141
1142         del_pset(rss->cbc_set);
1143         obstack_free(&obst, NULL);
1144 }
1145
1146 /**
1147  * Adds the edge src -> tgt to the dvg. Checks if reverse edge is already there (asserts).
1148  */
1149 static INLINE void add_dvg_edge(rss_t *rss, dvg_t *dvg, ir_node *src, ir_node *tgt, int have_source) {
1150         rss_edge_t *dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1151         rss_edge_t key;
1152
1153         if (! have_source)
1154                 nodeset_insert(dvg->nodes, src);
1155         else
1156                 assert(nodeset_find(dvg->nodes, src) != NULL && "Wrong assumption");
1157
1158         nodeset_insert(dvg->nodes, tgt);
1159
1160         /* add an edge to our killer */
1161         dvg_edge->src  = src;
1162         dvg_edge->tgt  = tgt;
1163         dvg_edge->next = NULL;
1164
1165         key.src = tgt;
1166         key.tgt = src;
1167         assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1168
1169         /* add the edge to the DVG */
1170         DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", src, tgt));
1171         pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1172 }
1173
1174 /**
1175  * Computes the disjoint value DAG (DVG).
1176  * BEWARE: It is not made explicitly clear in the Touati paper,
1177  *         but the DVG is meant to be build from the KILLING DAG
1178  */
1179 static void compute_dvg(rss_t *rss, dvg_t *dvg) {
1180         plist_element_t *el;
1181
1182         DBG((rss->dbg, LEVEL_1, "\tcomputing DVG:\n"));
1183
1184         foreach_plist(rss->nodes, el) {
1185                 ir_node    *u_irn    = plist_element_get_value(el);
1186                 rss_irn_t  *u        = get_rss_irn(rss, u_irn);
1187                 rss_irn_t  *u_killer = get_rss_irn(rss, u->killer);
1188                 int        i;
1189
1190                 /* TODO: omit nodes only having sink as pkiller and killing no one */
1191
1192                 /* add an edge to killer */
1193                 add_dvg_edge(rss, dvg, u_irn, u->killer, 0);
1194
1195                 if (is_Sink(u->killer))
1196                         continue;
1197
1198                 /* We add an edge to every killer, from where we could be reached. */
1199                 for (i = ARR_LEN_SAFE(u_killer->descendants) - 1; i >= 0; --i) {
1200                         add_dvg_edge(rss, dvg, u_irn, u_killer->descendants[i], 1);
1201                 }
1202
1203 #if 0
1204
1205                 foreach_plist(rss->nodes, el2) {
1206                         ir_node *v_irn = plist_element_get_value(el2);
1207
1208                         /*
1209                                 There is an edge (u, v) in the DVG iff v is a descendant of the killer(u).
1210                         */
1211                         if (BSEARCH_IRN_ARR(v_irn, u_kill->descendants)) {
1212                                 rss_edge_t *dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1213                                 rss_edge_t key;
1214
1215                                 /* insert the user into the DVG and append it to the user list of u */
1216                                 nodeset_insert(dvg->nodes, v_irn);
1217                                 if (! plist_has_value(u->dvg_user_list, v_irn))
1218                                         plist_insert_back(u->dvg_user_list, v_irn);
1219
1220                                 dvg_edge->src  = u_irn;
1221                                 dvg_edge->tgt  = v_irn;
1222                                 dvg_edge->next = NULL;
1223
1224                                 key.src = v_irn;
1225                                 key.tgt = u_irn;
1226
1227                                 assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1228
1229                                 /* add the edge to the DVG */
1230                                 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", u_irn, v_irn));
1231                                 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1232                         }
1233                 }
1234 #endif /* if 0 */
1235         }
1236
1237         if (rss->opts->dump_flags & RSS_DUMP_DVG)
1238                 debug_vcg_dump_dvg(rss, dvg);
1239 }
1240
1241 /**
1242  * Updates the dvg structure when a serialization edge from src -> tgt is added.
1243  */
1244 static void update_dvg(rss_t *rss, dvg_t *dvg, rss_irn_t *src, rss_irn_t *tgt) {
1245         int i;
1246
1247         add_dvg_edge(rss, dvg, tgt->irn, src->irn, 0);
1248
1249         for (i = ARR_LEN_SAFE(src->descendants) - 1; i >= 0; --i) {
1250                 add_dvg_edge(rss, dvg, tgt->irn, src->descendants[i], 1);
1251         }
1252 }
1253
1254 #if 0
1255 /**
1256  * Accumulate all descendants for root into list.
1257  */
1258 static void accumulate_dvg_descendant_values(rss_t *rss, rss_irn_t *root, plist_t *list) {
1259         if (plist_count(root->dvg_user_list) > 0) {
1260                 plist_element_t *el;
1261
1262                 foreach_plist(root->dvg_user_list, el) {
1263                         ir_node   *v_irn = plist_element_get_value(el);
1264                         rss_irn_t *v     = get_rss_irn(rss, v_irn);
1265
1266                         /* add v as descendant */
1267                         if (! plist_has_value(list, v_irn)) {
1268                                 plist_insert_back(list, v_irn);
1269                                 DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG descendant %+F\n", v_irn));
1270                         }
1271
1272                         /* accumulate v's descendants */
1273                         accumulate_dvg_descendant_values(rss, v, list);
1274                 }
1275         }
1276 }
1277
1278 /**
1279  * Builds the list of potential killers for each node
1280  * in the given DVG.
1281  * Needs the descendant list for all user as sorted array.
1282  */
1283 static void build_dvg_pkiller_list(rss_t *rss, dvg_t *dvg) {
1284         ir_node *irn;
1285
1286         foreach_nodeset(dvg->nodes, irn) {
1287                 rss_irn_t       *node = get_rss_irn(rss, irn);
1288                 plist_element_t *el, *el2;
1289
1290                 DBG((rss->dbg, DEBUG_DVG, "\t\tbuilding pkiller list for %+F\n", irn));
1291
1292                 /* check each user */
1293                 foreach_plist(node->dvg_user_list, el) {
1294                         ir_node *u_irn = plist_element_get_value(el);
1295
1296                         /* is the current user u_irn not a descendant of any other user -> pkiller */
1297                         foreach_plist(node->dvg_user_list, el2) {
1298                                 ir_node   *v_irn = plist_element_get_value(el2);
1299                                 rss_irn_t *v     = get_rss_irn(rss, v_irn);
1300
1301                                 if (el != el2                             &&
1302                                         ! BSEARCH_IRN_ARR(u_irn, v->dvg_desc) &&
1303                                         ! plist_has_value(node->dvg_pkiller_list, u_irn))
1304                                 {
1305                                         plist_insert_back(node->dvg_pkiller_list, u_irn);
1306                                         DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG pkiller %+F\n", u_irn));
1307                                 }
1308                         }
1309                 }
1310
1311                 node->dvg_pkiller = build_sorted_array_from_list(node->dvg_pkiller_list, phase_obst(&rss->ph));
1312         }
1313
1314         DEBUG_ONLY(
1315                 if (firm_dbg_get_mask(rss->dbg) & DEBUG_DVG)
1316                         debug_vcg_dump_dvg_pkiller(rss, dvg);
1317         );
1318 }
1319
1320 #endif /* if 0 */
1321
1322 /**
1323  * Compute the maximal antichain of the current DVG.
1324  * This is a reimplementation of the MAXIMAL_ANTI_CHAIN function
1325  * from the DDG library 1.1 (DAG.cpp).
1326  */
1327 static nodeset *compute_maximal_antichain(rss_t *rss, dvg_t *dvg, int iteration) {
1328         int         n               = nodeset_count(dvg->nodes);
1329         int         *assignment     = alloca(n * sizeof(assignment[0]));
1330         int         *assignment_rev = alloca(n * sizeof(assignment_rev[0]));
1331         int         *idx_map        = alloca(n * sizeof(idx_map[0]));
1332         hungarian_problem_t *bp;
1333         nodeset     *values, *temp;
1334         ir_node     *u_irn;
1335         int         i, j, cost, cur_chain, res;
1336         rss_edge_t  *dvg_edge;
1337
1338 #define MAP_IDX(irn) bsearch_for_index(get_irn_idx(irn), idx_map,  n,  1)
1339
1340         if (pset_count(dvg->edges) == 0)
1341                 return NULL;
1342
1343         bp = hungarian_new(n, n, 1, HUNGARIAN_MATCH_NORMAL);
1344
1345         /*
1346                 At first, we build an index map for the nodes in the DVG,
1347                 because we cannot use the irn idx therefore as the resulting
1348                 bipartite data structure would have around 1.2GB.
1349                 So we limit the size to the number of nodes we have in the DVG
1350                 and build a sorted index map for their irn indices.
1351         */
1352         i = 0;
1353         foreach_nodeset(dvg->nodes, u_irn) {
1354                 idx_map[i++] = get_irn_idx(u_irn);
1355         }
1356         qsort(idx_map, n, sizeof(idx_map[0]), cmp_int);
1357
1358         foreach_pset(dvg->edges, dvg_edge) {
1359                 int idx_u = MAP_IDX(dvg_edge->src);
1360                 int idx_v = MAP_IDX(dvg_edge->tgt);
1361
1362                 /* add the entry to the bipartite data structure */
1363                 hungarian_add(bp, idx_u, idx_v, 1);
1364                 DBG((rss->dbg, LEVEL_3, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1365                         idx_u, dvg_edge->src, idx_v, dvg_edge->tgt));
1366         }
1367 #if 0
1368         /*
1369                 Add a bipartite entry for each pair of nodes (u, v), where exists a
1370                 path in the DVG from u to v, ie. connect all descendants(v) to v.
1371                 desc_dvg(v) = accumulated descendants of all z in dvg_user_list(v)
1372         */
1373         foreach_nodeset(dvg->nodes, u_irn) {
1374                 rss_irn_t *u        = get_rss_irn(rss, u_irn);
1375                 int       idx_u_irn = MAP_IDX(u_irn);
1376
1377                 DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG descendants of %+F:\n", u_irn));
1378
1379                 //plist_clear(u->dvg_desc_list);
1380                 //accumulate_dvg_descendant_values(rss, u, u->dvg_desc_list);
1381
1382                 /*
1383                         FIXME: The array is build on the phase obstack and we cannot free the data.
1384                         So the array get as many times allocated as this function is called.
1385                 */
1386
1387                 /* build the sorted array for faster searches */
1388                 //u->dvg_desc = build_sorted_array_from_list(u->dvg_desc_list, phase_obst(&rss->ph));
1389
1390                 DBG((rss->dbg, DEBUG_MAX_AC, "\t\tadding bipartite entries of %+F:\n", u_irn));
1391
1392                 /* add the bipartite entries for all descendants */
1393                 for (i = ARR_LEN_SAFE(u->dvg_desc) - 1; i >= 0; --i) {
1394                         rss_irn_t *desc    = get_rss_irn(rss, u->dvg_desc[i]);
1395                         int       idx_desc = MAP_IDX(u->dvg_desc[i]);
1396
1397                         /* add the entry to the bipartite data structure */
1398                         hungarian_add(bp, idx_u_irn, idx_desc, 1);
1399                         DBG((rss->dbg, DEBUG_MAX_AC, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1400                                 idx_u_irn, u_irn, idx_desc, u->dvg_desc[i]));
1401
1402                         need_matching = 1;
1403                 }
1404         }
1405 #endif
1406
1407         /* We want maximum cardinality matching */
1408         hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL);
1409
1410 #if 0
1411         DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG pkiller:\n"));
1412         /* beware: the following function needs the dvg_desc array */
1413         build_dvg_pkiller_list(rss, dvg);
1414 #endif
1415
1416         DBG((rss->dbg, LEVEL_2, "\t\tcomputing bipartite matching\n"));
1417         /*
1418                 The maximum cardinality bipartite matching gives us the minimal
1419                 chain partition, which corresponds to the maximum anti chains.
1420         */
1421         res = hungarian_solve(bp, assignment, &cost, 1);
1422         assert(res == 0 && "Bipartite matching failed!");
1423
1424         hungarian_free(bp);
1425         memset(assignment_rev, -1, n * sizeof(assignment_rev[0]));
1426
1427         /* build the reverse assignment, ie. foreach i -> j, add j -> i */
1428         for (i = 0; i < n; ++i) {
1429                 if (assignment[i] >= 0) {
1430                         int j = assignment[i];
1431                         assignment_rev[j] = i;
1432                 }
1433         }
1434
1435         DBG((rss->dbg, LEVEL_2, "\t\tgot assignment with cost %d\n", cost));
1436         DBG((rss->dbg, LEVEL_3, "\t\t\tassignment   ---   reverse assignment\n", cost));
1437         for (i = 0; i < n; ++i) {
1438                 DBG((rss->dbg, LEVEL_3, "\t\t\t%3d -> %3d         %3d -> %3d\n", i, assignment[i], i, assignment_rev[i]));
1439         }
1440
1441         values    = new_nodeset(10);
1442         cur_chain = 0;
1443         /* Construction of the minimal chain partition */
1444         for (j = 0; j < n; ++j) {
1445                 /* check nodes, which did not occur as target */
1446                 if (assignment_rev[j] == -1) {
1447                         int       xj      = idx_map[j];
1448                         ir_node   *xj_irn = get_idx_irn(rss->irg, xj);
1449                         rss_irn_t *xj_rss = get_rss_irn(rss, xj_irn);
1450                         chain_t   *c      = obstack_alloc(phase_obst(&rss->ph), sizeof(*c));
1451                         int       source;
1452
1453                         /* there was no source for j -> we have a source of a new chain */
1454                         nodeset_insert(values, xj_irn);
1455
1456                         c->elements = plist_obstack_new(phase_obst(&rss->ph));
1457                         c->nr       = cur_chain++;
1458                         plist_insert_back(c->elements, xj_irn);
1459
1460                         xj_rss->chain = c;
1461
1462                         DBG((rss->dbg, LEVEL_2, "\t\tstarting chain %d:\n", c->nr));
1463                         DBG((rss->dbg, LEVEL_2, "\t\t\t%+F (%d)", xj_irn, j));
1464
1465                         /* follow chain, having j as source */
1466                         source = j;
1467                         while (assignment[source] >= 0) {
1468                                 int       target  = assignment[source];
1469                                 int       irn_idx = idx_map[target];
1470                                 ir_node   *irn    = get_idx_irn(rss->irg, irn_idx);
1471                                 rss_irn_t *node   = get_rss_irn(rss, irn);
1472
1473                                 plist_insert_back(c->elements, irn);
1474                                 node->chain = c;
1475
1476                                 DB((rss->dbg, LEVEL_2, " -> %+F (%d)", irn, target));
1477
1478                                 /* new source = last target */
1479                                 source = target;
1480                         }
1481
1482                         DB((rss->dbg, LEVEL_2, "\n"));
1483                 }
1484         }
1485
1486         /*
1487                 Computing the maximal antichain: Select an element from each
1488                 chain such, such it is parallel with the others.
1489         */
1490         DBG((rss->dbg, LEVEL_1, "\t\tcomputing set of saturation values (MAX AC)\n"));
1491         DBG((rss->dbg, LEVEL_3, "\t\tstarting with:\n"));
1492
1493         DEBUG_ONLY(
1494                 if (firm_dbg_get_mask(rss->dbg) & LEVEL_3)
1495                         dump_nodeset(values, "\t\t\t");
1496         )
1497
1498         temp = NULL;
1499         do {
1500                 /*
1501                         We need an explicit array for the values as
1502                         we cannot iterate multiple times over the same
1503                         set at the same time. :-(((((
1504                 */
1505                 int     n         = nodeset_count(values);
1506                 int     i         = 0;
1507                 ir_node **val_arr = NEW_ARR_F(ir_node *, n);
1508
1509                 foreach_nodeset(values, u_irn)
1510                         val_arr[i++] = u_irn;
1511
1512                 if (temp)
1513                         del_nodeset(temp);
1514
1515                 temp = new_nodeset(10);
1516
1517                 /* Select all nodes from current value set, having another node in the set as descendant. */
1518                 for (i = 0; i < n; ++i) {
1519                         rss_irn_t *u = get_rss_irn(rss, val_arr[i]);
1520
1521                         for (j = 0; j < n; ++j) {
1522                                 if (i != j) {
1523                                         rss_edge_t key;
1524
1525                                         key.src = val_arr[i];
1526                                         key.tgt = val_arr[j];
1527
1528                                         if (pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1529                                                 /* v[j] is descendant of u -> remove u and break */
1530                                                 nodeset_insert(temp, u->irn);
1531                                                 nodeset_remove(values, u->irn);
1532
1533                                                 DBG((rss->dbg, LEVEL_3, "\t\t\tremoving %+F from values, adding it to temp\n", u->irn));
1534
1535                                                 break;
1536                                         }
1537                                 }
1538                         }
1539                 }
1540
1541                 /* Try to insert the chain predecessor of all selected u's */
1542                 foreach_nodeset(temp, u_irn) {
1543                         rss_irn_t       *u  = get_rss_irn(rss, u_irn);
1544                         chain_t         *c  = u->chain;
1545                         plist_element_t *el = plist_find_value(c->elements, u_irn);
1546
1547                         assert(el && "Missing element in chain!");
1548
1549                         /* If u has predecessor in chain: insert the predecessor */
1550                         if (el == plist_element_get_prev(el)) {
1551                                 nodeset_insert(values, plist_element_get_value(el));
1552                                 DBG((rss->dbg, LEVEL_3, "\t\t\tadding %+F to values\n", plist_element_get_value(el)));
1553                         }
1554                 }
1555
1556
1557                 DEL_ARR_F(val_arr);
1558         } while (nodeset_count(temp) > 0);
1559
1560         DBG((rss->dbg, LEVEL_2, "\t\tfinal set:\n"));
1561         DEBUG_ONLY(
1562                 if (firm_dbg_get_mask(rss->dbg) & LEVEL_2) {
1563                         dump_nodeset(values, "\t\t\t");
1564                 }
1565         );
1566
1567         if (rss->opts->dump_flags & RSS_DUMP_MAXAC)
1568                 debug_vcg_dump_pkg(rss, values, iteration);
1569
1570         del_nodeset(temp);
1571
1572         return values;
1573
1574 #undef MAP_IDX
1575 }
1576
1577 /**
1578  * Computes the best serialization between two nodes of sat_vals.
1579  */
1580 static serialization_t *compute_best_admissible_serialization(rss_t *rss, nodeset *sat_vals, serialization_t *ser, int num_regs) {
1581         int        n                    = nodeset_count(sat_vals);
1582         int        n_idx                = ARR_LEN_SAFE(rss->idx_map);
1583         int        i                    = 0;
1584         ir_node    **val_arr            = alloca(n * sizeof(val_arr[0]));
1585         bitset_t   *bs_sv               = bitset_alloca(n_idx);
1586         bitset_t   *bs_vdesc            = bitset_alloca(n_idx);
1587         bitset_t   *bs_tmp              = bitset_alloca(n_idx);
1588         bitset_t   *bs_ukilldesc        = bitset_alloca(n_idx);
1589         int        best_benefit         = INT_MAX;
1590         int        best_omega2          = INT_MAX;
1591         int        best_benefit_omega20 = INT_MAX;
1592         int        has_omega1           = 0;
1593         int        j, k;
1594         ir_node    *irn;
1595         rss_edge_t min_benefit_edge;
1596         rss_edge_t min_omega20_edge;
1597         rss_irn_t  *ser_u_omega1, *ser_v_omega1;
1598         rss_irn_t  *ser_u_omega20, *ser_v_omega20;
1599
1600         DBG((rss->dbg, LEVEL_1, "\tcomputing admissible serializations:\n"));
1601
1602         /*
1603                 We need an explicit array for the values as
1604                 we cannot iterate multiple times over the same
1605                 set at the same time. :-(((((
1606         */
1607
1608         foreach_nodeset(sat_vals, irn) {
1609                 val_arr[i++] = irn;
1610                 bitset_set(bs_sv, BLOCK_IDX_MAP(rss, irn));
1611         }
1612
1613         /*
1614                 We build all admissible serializations and remember the best found so far.
1615                 For u in sat_vals:
1616                  For v in sat_val:
1617                    if v in pkiller(u): add edge from v to all other pkiller(u)
1618                    else: for all vv in pkiller(u): add edge from v to vv if there exists no path from vv to v
1619         */
1620
1621 /*
1622         A node is unserializable if:
1623         - it has only one killer and this one is Sink
1624     - it kills no other values
1625         In this case there is no serialization which could
1626         reduce the registerpressure
1627 */
1628 #define IS_UNSERIALIZABLE_NODE(rss_node)           \
1629         ((plist_count(rss_node->pkiller_list) == 1) && \
1630         is_Sink(rss_node->killer)                   && \
1631         (rss_node->kill_count                 == 0))
1632
1633         /* for all u in sat_vals */
1634         for (i = 0; i < n; ++i) {
1635                 rss_irn_t       *u = get_rss_irn(rss, val_arr[i]);
1636                 plist_element_t *el;
1637
1638                 /* ignore nodes where serialization does not help */
1639                 if (IS_UNSERIALIZABLE_NODE(u)) {
1640                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", u->irn));
1641                         continue;
1642                 }
1643
1644                 /* accumulate all descendants of all pkiller(u) */
1645                 bitset_clear_all(bs_ukilldesc);
1646                 foreach_plist(u->pkiller_list, el) {
1647                         ir_node   *irn  = plist_element_get_value(el);
1648                         rss_irn_t *node = get_rss_irn(rss, irn);
1649
1650                         if (! is_Sink(irn))
1651                                 bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, irn));
1652                         else
1653                                 continue;
1654
1655                         for (k = ARR_LEN_SAFE(node->descendants) - 1; k >= 0; --k) {
1656                                 if (! is_Sink(node->descendants[k]))
1657                                         bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, node->descendants[k]));
1658                         }
1659                 }
1660
1661                 /* for all v in sat_vals */
1662                 for (j = 0; j < n; ++j) {
1663                         ir_node   *v_irn   = val_arr[j];
1664                         rss_irn_t *v       = get_rss_irn(rss, v_irn);
1665                         unsigned  v_height = get_irn_height(rss->h, v_irn);
1666                         int       omega1, omega2, is_pkiller;
1667
1668                         /* v cannot be serialized with itself
1669                          * ignore nodes where serialization does not help */
1670                         if (i == j || IS_UNSERIALIZABLE_NODE(v)) {
1671                                 if (i != j)
1672                                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", v->irn));
1673                                 continue;
1674                         }
1675
1676                         /* get descendants of v */
1677                         bitset_clear_all(bs_vdesc);
1678                         bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v_irn));
1679                         for (k = ARR_LEN_SAFE(v->descendants) - 1; k >= 0; --k) {
1680                                 if (! is_Sink(v->descendants[k]))
1681                                         bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v->descendants[k]));
1682                         }
1683
1684                         /* if v is in pkiller(u) */
1685                         is_pkiller = plist_has_value(u->pkiller_list, v_irn);
1686
1687                         /* for all vv in pkiller(u) */
1688                         foreach_plist(u->pkiller_list, el) {
1689                                 ir_node *vv_irn  = plist_element_get_value(el);
1690                                 int     add_edge;
1691
1692                                 if (is_Sink(vv_irn))
1693                                         continue;
1694
1695                                 if (is_pkiller)
1696                                         add_edge = vv_irn != v_irn && skip_Proj(vv_irn) != skip_Proj(v_irn);
1697                                 else
1698                                         add_edge = ! heights_reachable_in_block(rss->h, skip_Proj(vv_irn), skip_Proj(v_irn));
1699
1700                                 /*
1701                                         As we add an edge from vv -> v, we have to make sure,
1702                                         that there exists no path from v to vv.
1703                                 */
1704
1705                                 if (add_edge) {
1706                                         int vv_height = get_irn_height(rss->h, vv_irn);
1707                                         int mu1, mu2, critical_path_cost;
1708
1709                                         /*
1710                                                 mu1 = | descendants(v) cut sat_vals |
1711                                                 the number of saturating values which cannot
1712                                                 be simultaneously alive with u
1713                                         */
1714                                         bitset_copy(bs_tmp, bs_vdesc);
1715                                         mu1 = bitset_popcnt(bitset_and(bs_tmp, bs_sv));
1716
1717                                         /*
1718                                                 mu2 = | accum_desc_all_pkiller(u) without descendants(v) |
1719                                         */
1720                                         if (is_pkiller) {
1721                                                 bitset_copy(bs_tmp, bs_ukilldesc);
1722                                                 mu2 = bitset_popcnt(bitset_andnot(bs_tmp, bs_vdesc));
1723                                         }
1724                                         else {
1725                                                 mu2 = 0;
1726                                         }
1727
1728                                         /* omega1 = mu1 - mu2 */
1729                                         omega1 = mu1 - mu2;
1730
1731                                         if (omega1 != 0)
1732                                                 has_omega1 = 1;
1733
1734                                         /* omega2 = increase of critical path */
1735                                         critical_path_cost =
1736                                                 v_height                        /* longest path from v to sink */
1737                                                 + rss->max_height - vv_height   /* longest path from source to vv */
1738                                                 + 1;                            /* edge */
1739
1740                                         /*
1741                                                 If critical_path_cost > max_height -> the new edge
1742                                                 would increase the longest critical path by the difference.
1743                                         */
1744                                         omega2 = critical_path_cost > rss->max_height ? critical_path_cost - rss->max_height : 0;
1745
1746                                         /* this keeps track of the edge with the best benefit */
1747                                         if (omega1 >= num_regs - n && omega1 < best_benefit) {
1748                                                 min_benefit_edge.src = v_irn;
1749                                                 min_benefit_edge.tgt = vv_irn;
1750
1751                                                 ser_u_omega1 = u;
1752                                                 ser_v_omega1 = v;
1753
1754                                                 best_benefit    = omega1;
1755                                                 ser->new_killer = is_pkiller;
1756                                         }
1757
1758                                         /* this keeps track of the edge with the best omega1 costs where omega2 == 0 */
1759                                         if (omega2 == 0 && omega1 >= num_regs - n && omega1 < best_benefit_omega20) {
1760                                                 min_omega20_edge.src = v_irn;
1761                                                 min_omega20_edge.tgt = vv_irn;
1762
1763                                                 ser_u_omega20 = u;
1764                                                 ser_v_omega20 = v;
1765
1766                                                 best_benefit_omega20 = omega1;
1767                                                 ser->new_killer      = is_pkiller;
1768                                         }
1769
1770                                         best_omega2 = MIN(best_omega2, omega2);
1771
1772                                         DBG((rss->dbg, LEVEL_2, "\t\tfound %+F -> %+F (w1 %d, w2 %d)\n",
1773                                                 v_irn, vv_irn, omega1, omega2));
1774                                 } /* if add_edge */
1775                         } /* for all vv in pkiller(u) */
1776                 } /* for all v in sat_vals */
1777         } /* for all u in sat_vals */
1778
1779         if (! has_omega1)
1780                 return NULL;
1781
1782         if (best_omega2 == 0) {
1783                 ser->u         = ser_u_omega20;
1784                 ser->v         = ser_v_omega20;
1785                 ser->edge->src = min_omega20_edge.src;
1786                 ser->edge->tgt = min_omega20_edge.tgt;
1787                 ser->omega1    = best_benefit_omega20;
1788                 ser->omega2    = best_omega2;
1789         }
1790         else {
1791                 ser->u         = ser_u_omega1;
1792                 ser->v         = ser_v_omega1;
1793                 ser->edge->src = min_benefit_edge.src;
1794                 ser->edge->tgt = min_benefit_edge.tgt;
1795                 ser->omega1    = best_benefit;
1796                 ser->omega2    = best_omega2;
1797         }
1798
1799         return ser;
1800
1801 #undef IS_UNSERIALIZABLE_NODE
1802 }
1803
1804 /**
1805  * Perform the value serialization heuristic and add all
1806  * computed serialization edges as dependencies to the irg.
1807  */
1808 static void perform_value_serialization_heuristic(rss_t *rss) {
1809         bitset_t *arch_nonign_bs = bitset_alloca(arch_register_class_n_regs(rss->cls));
1810         bitset_t *abi_ign_bs     = bitset_alloca(arch_register_class_n_regs(rss->cls));
1811         int      available_regs, iteration;
1812         dvg_t    dvg;
1813         nodeset  *sat_vals;
1814         pset *ser_set = new_pset(cmp_rss_edges, 20);
1815
1816         /* available_regs = R = |arch_non_ignore_regs cut ~abi_ignore_regs| */
1817         arch_put_non_ignore_regs(rss->arch_env, rss->cls, arch_nonign_bs);
1818         be_abi_put_ignore_regs(rss->abi, rss->cls, abi_ign_bs);
1819         bitset_andnot(arch_nonign_bs, abi_ign_bs);
1820         available_regs = bitset_popcnt(arch_nonign_bs) - 1;
1821
1822         DBG((rss->dbg, LEVEL_1, "\n\t#available regs: %d\n\n", available_regs));
1823
1824         /*
1825                 At first we need to compute the disjoint value DAG (DVG = {V, E_dv}).
1826                 V    = set of all nodes we are currently interested in
1827                 E_dv = there is an edge from u to v iff v is a descendant of killer(u), forall u, v in V
1828         */
1829         dvg.nodes = new_nodeset(plist_count(rss->nodes));
1830         dvg.edges = new_pset(cmp_rss_edges, plist_count(rss->nodes) * 5);
1831         compute_dvg(rss, &dvg);
1832
1833         /*
1834                 Then we perform the heuristic serialization algorithm
1835                 on the DVG which gives us all necessary serialization
1836                 edges.
1837         */
1838         DBG((rss->dbg, LEVEL_1, "\tcomputing maximal antichain:\n"));
1839         iteration = 0;
1840         sat_vals  = compute_maximal_antichain(rss, &dvg, iteration++);
1841         while (sat_vals && (nodeset_count(sat_vals) > available_regs)) {
1842                 serialization_t *ser, best_ser;
1843                 rss_edge_t      *edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*edge));
1844                 ir_node         *dep_src, *dep_tgt;
1845
1846                 best_ser.edge = edge;
1847                 ser = compute_best_admissible_serialization(rss, sat_vals, &best_ser, available_regs);
1848
1849                 DBG((rss->dbg, LEVEL_1, "\tcurrent register saturation %d, target %d\n", nodeset_count(sat_vals), available_regs));
1850
1851                 if (! ser) {
1852                         DBG((rss->dbg, LEVEL_1, "\tno RS improving serialization found, breaking at iteration %d\n", iteration));
1853                         break;
1854                 }
1855
1856                 /* Insert the serialization as dependency edge into the irg. */
1857                 DBG((rss->dbg, LEVEL_1, "\tserializing %+F -> %+F with edge %+F -> %+F and cost %d, %d\n",
1858                         ser->u->irn, ser->v->irn, ser->edge->src, ser->edge->tgt, ser->omega1, ser->omega2));
1859
1860                 if (pset_find(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge)))
1861                         ir_printf("WARNING: serialization %+F -> %+F computed twice!\n", ser->edge->src, ser->edge->tgt);
1862
1863
1864                 pset_insert(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge));
1865
1866                 /* update the dvg */
1867                 update_dvg(rss, &dvg, ser->v, ser->u);
1868                 update_dvg(rss, &dvg, get_rss_irn(rss, ser->edge->src), get_rss_irn(rss, ser->edge->tgt));
1869                 del_nodeset(sat_vals);
1870
1871                 dep_src = skip_Proj(ser->edge->src);
1872                 dep_tgt = ser->edge->tgt;
1873                 add_irn_dep(dep_src, dep_tgt);
1874
1875                 /* Update descendants, consumer and pkillers of the target */
1876                 update_node_info(rss, ser->edge->tgt, ser->edge->src);
1877
1878                 /* TODO: try to find a cheaper way for updating height information */
1879                 rss->max_height = heights_recompute_block(rss->h, rss->block);
1880
1881                 /* Recompute the antichain for next serialization */
1882                 DBG((rss->dbg, LEVEL_1, "\tre-computing maximal antichain:\n"));
1883                 sat_vals = compute_maximal_antichain(rss, &dvg, iteration++);
1884         }
1885
1886         del_nodeset(dvg.nodes);
1887         del_pset(dvg.edges);
1888 }
1889
1890 /**
1891  * Do initial calculations for a block.
1892  */
1893 static void process_block(ir_node *block, void *env) {
1894         rss_t *rss = env;
1895         int   i, n;
1896         const ir_edge_t *edge;
1897
1898         phase_init(&rss->ph, "rss block preprocessor", rss->irg, PHASE_DEFAULT_GROWTH, init_rss_irn);
1899
1900         DBG((rss->dbg, LEVEL_1, "preprocessing block %+F\n", block));
1901         rss->block = block;
1902
1903         /* build an index map for all nodes in the current block */
1904         i            = 0;
1905         n            = get_irn_n_edges(block);
1906         NEW_ARR_A(int *, rss->idx_map, n);
1907         foreach_out_edge(block, edge) {
1908                 ir_node *irn      = get_edge_src_irn(edge);
1909                 rss->idx_map[i++] = get_irn_idx(irn);
1910         }
1911         qsort(rss->idx_map, n, sizeof(rss->idx_map[0]), cmp_int);
1912         rss->max_height = heights_recompute_block(rss->h, block);
1913
1914         /* loop over all register classes */
1915         for (i = arch_isa_get_n_reg_class(rss->arch_env->isa) - 1; i >= 0; --i) {
1916                 const arch_register_class_t *cls = arch_isa_get_reg_class(rss->arch_env->isa, i);
1917
1918                 rss->cls = cls;
1919                 DBG((rss->dbg, LEVEL_1, "register class %s\n", arch_register_class_name(cls)));
1920
1921                 /* reset the list of interesting nodes */
1922                 plist_clear(rss->nodes);
1923                 plist_insert_back(rss->nodes, _sink);
1924
1925                 /* collect all nodes having a certain register class */
1926                 foreach_out_edge(block, edge) {
1927                         ir_node *irn = get_edge_src_irn(edge);
1928
1929                         /*
1930                                 We skip:
1931                                 - mode_T nodes (the projs are asked)
1932                                 - mode_X nodes (control flow nodes are always scheduled last)
1933                                 - Keeps (they are always immediately scheduled)
1934                                 - Phi (same as Keep)
1935                         */
1936                         if (get_irn_mode(irn) == mode_T || be_is_Keep(irn) || is_Phi(irn))
1937                                 continue;
1938
1939                         /*
1940                                 In case of a proj, we skip
1941                                 - Barrier (they are a Barrier :)
1942                                 - Start
1943                                 - the Proj itself, as it's scheduled always with it's super node
1944                         */
1945                         if (is_Proj(irn)) {
1946                                 ir_node *pred = get_Proj_pred(irn);
1947                                 if (be_is_Barrier(pred) || is_Start(pred))
1948                                         continue;
1949                         }
1950
1951                         if (! arch_irn_is(rss->arch_env, irn, ignore) && arch_get_irn_reg_class(rss->arch_env, irn, -1) == cls) {
1952                                 plist_insert_back(rss->nodes, skip_Proj(irn));
1953
1954                                 /* calculate the descendants and consumer for each node in the block */
1955                                 collect_node_info(rss, skip_Proj(irn));
1956                         }
1957                 }
1958
1959                 /* compute the potential killing set PK(G) */
1960                 compute_pkill_set(rss);
1961
1962                 /* compute the killing function k* */
1963                 compute_killing_function(rss);
1964
1965                 /*
1966                         Compute the heuristic value serialization and
1967                         add the necessary dependencies to the irg.
1968                 */
1969                 perform_value_serialization_heuristic(rss);
1970         }
1971
1972         phase_free(&rss->ph);
1973 }
1974
1975 #ifdef WITH_LIBCORE
1976 /**
1977  * Register the options.
1978  */
1979 void rss_register_options(lc_opt_entry_t *grp) {
1980         static int     run_once = 0;
1981         lc_opt_entry_t *rss_grp;
1982
1983         if (! run_once) {
1984                 run_once = 1;
1985                 rss_grp  = lc_opt_get_grp(grp, "rss");
1986
1987                 lc_opt_add_table(rss_grp, rss_option_table);
1988         }
1989 }
1990 #endif /* WITH_LIBCORE */
1991
1992 /**
1993  * Preprocess the irg for scheduling.
1994  */
1995 void rss_schedule_preparation(const be_irg_t *birg) {
1996         rss_t rss;
1997
1998         FIRM_DBG_REGISTER(rss.dbg, "firm.be.sched.rss");
1999
2000         //firm_dbg_set_mask(rss.dbg, LEVEL_1 | LEVEL_2 | LEVEL_3);
2001
2002         init_rss_special_nodes(birg->irg);
2003
2004         rss.irg      = birg->irg;
2005         rss.arch_env = birg->main_env->arch_env;
2006         rss.abi      = birg->abi;
2007         rss.h        = heights_new(birg->irg);
2008         rss.nodes    = plist_new();
2009         rss.opts     = &rss_options;
2010         irg_block_walk_graph(birg->irg, NULL, process_block, &rss);
2011         heights_free(rss.h);
2012         plist_free(rss.nodes);
2013
2014         if (birg->main_env->options->dump_flags & DUMP_SCHED)
2015                 be_dump(rss.irg, "-rss", dump_ir_block_graph);
2016 }