- renamed access offset functions
[libfirm] / ir / be / beschedrss.c
1 /**
2  * Implementation of a register saturating list scheduler
3  * as described in: Sid-Ahmed-Ali Touati
4  * Register Saturation in Superscalar and VLIW Codes
5  *
6  * @license This file protected by GPL -  GNU GENERAL PUBLIC LICENSE.
7  * @author  Christian Wuerdig
8  * @date    29.08.2006
9  * @cvs-id  $Id$
10  */
11 #ifdef HAVE_CONFIG_H
12 #include "config.h"
13 #endif
14
15 #include <limits.h>
16
17 #include "obst.h"
18 #include "debug.h"
19
20 #include "irgraph_t.h"
21 #include "irnode_t.h"
22 #include "iredges_t.h"
23 #include "ircons_t.h"
24 #include "irphase_t.h"
25 #include "irgwalk.h"
26 #include "irdump.h"
27 #include "irtools.h"
28 #include "irbitset.h"
29 #include "irprintf.h"
30 #include "bipartite.h"
31 #include "hungarian.h"
32 #include "plist.h"
33
34 #include "height.h"
35
36 #include "beabi.h"
37 #include "benode_t.h"
38 #include "besched_t.h"
39
40 #ifdef WITH_LIBCORE
41 #include <libcore/lc_opts.h>
42 #include <libcore/lc_opts_enum.h>
43 #endif /* WITH_LIBCORE */
44
45
46 #define ARR_LEN_SAFE(arr) ((arr) != NULL ? ARR_LEN((arr)) : 0)
47
48 #define HASH_RSS_EDGE(edge) ((get_irn_node_nr((edge)->src) << 16) | (get_irn_node_nr((edge)->tgt) & 0xFFFF))
49 #define BSEARCH_IRN_ARR(val, arr) \
50         bsearch(&(val), (arr), ARR_LEN_SAFE((arr)), sizeof((arr)[0]), cmp_irn_idx)
51
52 #define BLOCK_IDX_MAP(rss, irn) bsearch_for_index(get_irn_idx((irn)), (rss)->idx_map, ARR_LEN_SAFE((rss)->idx_map), 1)
53
54 /* the rss options */
55 typedef struct _rss_opts_t {
56         int dump_flags;
57 } rss_opts_t;
58
59 /* Represents a child with associated costs */
60 typedef struct _child {
61         ir_node *irn;
62         float   cost;
63 } child_t;
64
65 /* We need edges for several purposes. */
66 typedef struct _rss_edge {
67         ir_node *src;
68         ir_node *tgt;
69         void    *next;
70 } rss_edge_t;
71
72 /* Represents a connected bipartite component. */
73 typedef struct _cbc {
74         nodeset *parents;       /**< = S  a set of value producers */
75         nodeset *children;      /**< = T  a set of value consumers */
76         pset    *kill_edges;    /**< = E  a set of edges (t in T, s in S) such as each s in S gets killed by at least one t in T */
77         int     nr;             /**< a deterministic index for set insertion (used as hash) */
78 } cbc_t;
79
80 /* Represents a disjoint value DAG. */
81 typedef struct _dvg {
82         nodeset *nodes;
83         pset    *edges;
84 } dvg_t;
85
86 /* Represents a chain of nodes. */
87 typedef struct _chain {
88         plist_t *elements;   /**< List of chain elements */
89         int     nr;          /**< a deterministic index for set insertion (used as hash) */
90 } chain_t;
91
92 typedef struct _rss_irn {
93         plist_t  *consumer_list;    /**< List of consumers */
94         ir_node **consumer;         /**< Sorted consumer array (needed for faster access) */
95
96         plist_t  *parent_list;      /**< List of parents */
97         plist_t  *pkiller_list;     /**< List of potential killers */
98
99         plist_t  *descendant_list;  /**< List of descendants */
100         ir_node **descendants;      /**< Sorted descendant array (needed for faster access) */
101
102         ir_node  *killer;           /**< The selected unique killer */
103         ir_node  *irn;              /**< The corresponding firm node to this rss_irn */
104
105         chain_t  *chain;            /**< The chain, this node is associated to */
106
107         unsigned desc_walk;         /**< visited flag for collecting descendants */
108         unsigned kill_count;        /**< number of nodes killed by this one */
109
110         unsigned live_out : 1;      /**< irn has consumers outside of it's block */
111         unsigned visited  : 1;      /**< visited flag for bipartite decomposition */
112         unsigned havecons : 1;      /**< visited flag collect consumer */
113         unsigned handled  : 1;      /**< flag indicating whether or not the list structures have been build */
114         unsigned dumped   : 1;      /**< flag indication whether or not this node was dumped */
115 } rss_irn_t;
116
117 /* Represents a serialization edge with associated costs. */
118 typedef struct _serialization {
119         rss_irn_t  *u;       /* the top node */
120         rss_irn_t  *v;       /* the node about to be serialized after u */
121         rss_edge_t *edge;    /* the edge selected for the serialization */
122         int        omega1;   /* estimated: available regs - RS reduction */
123         int        omega2;   /* increase in critical path length */
124         int        new_killer;
125 } serialization_t;
126
127 typedef struct _rss {
128         phase_t          ph;              /**< Phase to hold some data */
129         heights_t        *h;              /**< The current height object */
130         ir_graph         *irg;            /**< The irg to preprocess */
131         plist_t          *nodes;          /**< The list of interesting nodes */
132         const arch_env_t *arch_env;       /**< The architecture environment */
133         be_abi_irg_t     *abi;            /**< The abi for this irg */
134         pset             *cbc_set;        /**< A set of connected bipartite components */
135         ir_node          *block;          /**< The current block in progress. */
136         int              *idx_map;        /**< Mapping irn indices to per block indices */
137         unsigned         max_height;      /**< maximum height in the current block */
138         rss_opts_t       *opts;           /**< The options */
139         be_lv_t          *liveness;       /**< The liveness information for this irg */
140         pset             *live_block;     /**< Values alive at end of block */
141         const arch_register_class_t *cls; /**< The current register class */
142         DEBUG_ONLY(firm_dbg_module_t *dbg);
143 } rss_t;
144
145 #define get_rss_irn(rss, irn)  (phase_get_or_set_irn_data(&rss->ph, irn))
146
147 /**
148  * We need some special nodes:
149  * a source and a sink for all live-in and live-out values of a block
150  */
151
152 enum {
153         iro_rss_Source,
154         iro_rss_Sink,
155         iro_rss_last
156 };
157
158 static ir_node *_source = NULL;
159 static ir_node *_sink   = NULL;
160
161 #define is_Source(irn) ((irn) == _source)
162 #define is_Sink(irn)   ((irn) == _sink)
163
164 enum {
165         RSS_DUMP_NONE  = 0,
166         RSS_DUMP_CBC   = 1 << 0,
167         RSS_DUMP_PKG   = 1 << 1,
168         RSS_DUMP_KILL  = 1 << 2,
169         RSS_DUMP_DVG   = 1 << 3,
170         RSS_DUMP_MAXAC = 1 << 4,
171         RSS_DUMP_ALL   = (RSS_DUMP_MAXAC << 1) - 1,
172 };
173
174 static rss_opts_t rss_options = {
175         RSS_DUMP_NONE,
176 };
177
178 #ifdef WITH_LIBCORE
179 static const lc_opt_enum_int_items_t dump_items[] = {
180         { "none",  RSS_DUMP_NONE  },
181         { "cbc",   RSS_DUMP_CBC   },
182         { "pkg",   RSS_DUMP_PKG   },
183         { "kill",  RSS_DUMP_KILL  },
184         { "dvg",   RSS_DUMP_DVG   },
185         { "maxac", RSS_DUMP_MAXAC },
186         { "all",   RSS_DUMP_ALL   },
187         { NULL, 0 }
188 };
189
190 static lc_opt_enum_int_var_t dump_var = {
191         &rss_options.dump_flags, dump_items
192 };
193
194 static const lc_opt_table_entry_t rss_option_table[] = {
195         LC_OPT_ENT_ENUM_MASK("dump", "dump phases", &dump_var),
196         { NULL }
197 };
198 #endif /* WITH_LIBCORE */
199
200 /******************************************************************************
201  *  _          _                    __                  _   _
202  * | |        | |                  / _|                | | (_)
203  * | |__   ___| |_ __   ___ _ __  | |_ _   _ _ __   ___| |_ _  ___  _ __  ___
204  * | '_ \ / _ \ | '_ \ / _ \ '__| |  _| | | | '_ \ / __| __| |/ _ \| '_ \/ __|
205  * | | | |  __/ | |_) |  __/ |    | | | |_| | | | | (__| |_| | (_) | | | \__ \
206  * |_| |_|\___|_| .__/ \___|_|    |_|  \__,_|_| |_|\___|\__|_|\___/|_| |_|___/
207  *            | |
208  *            |_|
209  ******************************************************************************/
210
211 /**
212  * Acquire opcodes and create source and sink nodes.
213  */
214 static void init_rss_special_nodes(ir_graph *irg) {
215         ir_node *block         = get_irg_start_block(irg);
216         int     iro_rss_base   = get_next_ir_opcodes(iro_rss_last);
217         ir_op   *op_rss_Source = new_ir_op(iro_rss_base + iro_rss_Source, "rss_Source", op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
218         ir_op   *op_rss_Sink   = new_ir_op(iro_rss_base + iro_rss_Sink,   "rss_Sink",   op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
219         _source                = new_ir_node(NULL, irg, block, op_rss_Source, mode_ANY, 0, NULL);
220         _sink                  = new_ir_node(NULL, irg, block, op_rss_Sink, mode_ANY, 0, NULL);
221 }
222
223 static int cmp_int(const void *a, const void *b) {
224         const int *i1 = a;
225         const int *i2 = b;
226
227         return QSORT_CMP(*i1, *i2);
228 }
229
230 static int cmp_child_costs(const void *a, const void *b) {
231         const child_t *c1 = a;
232         const child_t *c2 = b;
233
234         return QSORT_CMP(c1->cost, c2->cost);
235 }
236
237 static int cmp_irn_idx(const void *a, const void *b) {
238         const ir_node *n1 = *(ir_node **)a;
239         const ir_node *n2 = *(ir_node **)b;
240
241         return QSORT_CMP(get_irn_idx(n1), get_irn_idx(n2));
242 }
243
244 static int cmp_rss_edges(const void *a, const void *b) {
245         const rss_edge_t *e1 = a;
246         const rss_edge_t *e2 = b;
247
248         return (e1->src != e2->src) || (e1->tgt != e2->tgt);
249 }
250
251 static int bsearch_for_index(int key, int *arr, size_t len, int force) {
252         int left = 0;
253         int right = len;
254
255         while (right >= left) {
256                 int idx = (left + right) / 2;
257
258                 if (key < arr[idx])
259                         right = idx - 1;
260                 else if (key > arr[idx])
261                         left = idx + 1;
262                 else
263                         return idx;
264         }
265
266         if (force)
267                 assert(0 && "Something is wrong, key not found.");
268         return -1;
269 }
270
271 static ir_node **build_sorted_array_from_list(plist_t *irn_list, struct obstack *obst) {
272         plist_element_t *el;
273         int     i     = 0;
274         int     len   = plist_count(irn_list);
275         ir_node **arr = NEW_ARR_D(ir_node *, obst, len);
276
277         /* copy the list into the array */
278         foreach_plist(irn_list, el) {
279                 arr[i++] = plist_element_get_value(el);
280         }
281
282         /* sort the array by node index */
283         qsort(arr, len, sizeof(arr[0]), cmp_irn_idx);
284
285         return arr;
286 }
287
288 /*****************************************************
289  *      _      _                       _
290  *     | |    | |                     (_)
291  *   __| | ___| |__  _   _  __ _  __ _ _ _ __   __ _
292  *  / _` |/ _ \ '_ \| | | |/ _` |/ _` | | '_ \ / _` |
293  * | (_| |  __/ |_) | |_| | (_| | (_| | | | | | (_| |
294  *  \__,_|\___|_.__/ \__,_|\__, |\__, |_|_| |_|\__, |
295  *                          __/ | __/ |         __/ |
296  *                         |___/ |___/         |___/
297  *
298  *****************************************************/
299
300 #ifdef DEBUG_libfirm
301 static void dump_nodeset(nodeset *ns, const char *prefix) {
302         ir_node *irn;
303         foreach_nodeset(ns, irn) {
304                 ir_fprintf(stderr, "%s%+F\n", prefix, irn);
305         }
306 }
307 #endif
308
309 static void build_file_name(rss_t *rss, const char *suffix, size_t suf_len, char *buf, size_t len) {
310         const char *irg_name;
311
312         memset(buf, 0, len);
313         irg_name = get_entity_name(get_irg_entity(rss->irg));
314         snprintf(buf, len - suf_len, "%s-%s-block-%ld",
315                 irg_name, arch_register_class_name(rss->cls), get_irn_node_nr(rss->block));
316         strcat(buf, suffix);
317 }
318
319 /* Dumps all collected bipartite components of current irg as vcg. */
320 static void debug_vcg_dump_bipartite(rss_t *rss) {
321         cbc_t *cbc;
322         FILE  *f;
323         char  file_name[256];
324         static const char suffix[] = "-RSS-CBC.vcg";
325
326         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
327         f = fopen(file_name, "w");
328
329         ir_fprintf(f, "graph: { title: \"connected bipartite component graph of %+F\"\n", rss->irg);
330         fprintf(f, "display_edge_labels: no\n");
331         fprintf(f, "layoutalgorithm: mindepth\n");
332         fprintf(f, "manhattan_edges: yes\n\n");
333
334         foreach_pset(rss->cbc_set, cbc) {
335                 ir_node    *n;
336                 rss_edge_t *ke;
337
338                 fprintf(f, "graph: { titel: \"cbc %d\" label: \"cbc %d\" status:clustered color:yellow\n", cbc->nr, cbc->nr);
339                 foreach_nodeset(cbc->parents, n) {
340                         ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
341                 }
342                 foreach_nodeset(cbc->children, n) {
343                         ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
344                 }
345                 foreach_pset(cbc->kill_edges, ke) {
346                         ir_fprintf(f, "edge: { sourcename: \"n%d_%d\" targetname: \"n%d_%d\" }\n",
347                                 get_irn_node_nr(ke->src), cbc->nr, get_irn_node_nr(ke->tgt), cbc->nr);
348                 }
349                 fprintf(f, "}\n\n");
350         }
351         fprintf(f, "}\n");
352         fclose(f);
353 }
354
355 /* Dump the computed killing function as vcg. */
356 static void debug_vcg_dump_kill(rss_t *rss) {
357         FILE            *f;
358         char            file_name[256];
359         plist_element_t *el;
360         static const char suffix[] = "-RSS-KILL.vcg";
361
362         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
363         f = fopen(file_name, "w");
364
365         ir_fprintf(f, "graph: { title: \"computed kill graph of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
366         fprintf(f, "display_edge_labels: no\n");
367         fprintf(f, "layoutalgorithm: mindepth\n");
368         fprintf(f, "manhattan_edges: yes\n\n");
369
370         {
371                 /* reset dump_flag */
372                 ir_node *irn;
373                 foreach_phase_irn(&rss->ph, irn) {
374                         rss_irn_t *node = get_rss_irn(rss, irn);
375                         node->dumped = 0;
376                 }
377         }
378
379         /* dump all nodes and their killers */
380         foreach_plist(rss->nodes, el) {
381                 ir_node   *irn     = plist_element_get_value(el);
382                 rss_irn_t *rirn    = get_rss_irn(rss, irn);
383                 rss_irn_t *pk_rirn = get_rss_irn(rss, rirn->killer);
384
385                 if (! rirn->dumped) {
386                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
387                         rirn->dumped = 1;
388                 }
389
390                 if (! pk_rirn->dumped) {
391                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(rirn->killer), rirn->killer);
392                         pk_rirn->dumped = 1;
393                 }
394
395                 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
396                         get_irn_node_nr(rirn->killer), get_irn_node_nr(irn));
397         }
398
399         fprintf(f, "}\n");
400         fclose(f);
401 }
402
403 /* Dumps the potential killing DAG (PKG) as vcg. */
404 static void debug_vcg_dump_pkg(rss_t *rss, nodeset *max_ac, int iteration) {
405         FILE              *f;
406         char              file_name[256];
407         char              *suffix   = alloca(32);
408         static const char suffix1[] = "-RSS-PKG.vcg";
409         static const char suffix2[] = "-RSS-PKG-MAXAC.vcg";
410         plist_element_t   *el;
411
412         if (! max_ac) {
413                 snprintf(suffix, 32, "%s", suffix1);
414         }
415         else {
416                 snprintf(suffix, 32, "-%02d%s", iteration, suffix2);
417         }
418
419         build_file_name(rss, suffix, strlen(suffix) + 1, file_name, sizeof(file_name));
420         f = fopen(file_name, "w");
421
422         ir_fprintf(f, "graph: { title: \"potential killing DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
423         fprintf(f, "display_edge_labels: no\n");
424         fprintf(f, "layoutalgorithm: mindepth\n");
425         fprintf(f, "manhattan_edges: yes\n\n");
426
427         {
428                 /* reset dump_flag */
429                 ir_node *irn;
430                 foreach_phase_irn(&rss->ph, irn) {
431                         rss_irn_t *node = get_rss_irn(rss, irn);
432                         node->dumped = 0;
433                 }
434         }
435
436         foreach_plist(rss->nodes, el) {
437                 ir_node   *irn  = plist_element_get_value(el);
438                 rss_irn_t *rirn = get_rss_irn(rss, irn);
439                 char      *c1   = "";
440                 plist_element_t *k_el;
441
442                 /* dump selected saturating values in yellow */
443                 if (max_ac && nodeset_find(max_ac, irn))
444                         c1 = "color:yellow";
445
446                 if (! rirn->dumped) {
447                         if (rirn->chain)
448                                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F   c%d\" %s }\n", get_irn_node_nr(irn), irn, rirn->chain->nr, c1);
449                         else
450                                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(irn), irn, c1);
451                         rirn->dumped = 1;
452                 }
453
454                 foreach_plist(rirn->pkiller_list, k_el) {
455                         ir_node   *pkiller = plist_element_get_value(k_el);
456                         rss_irn_t *pk_rirn = get_rss_irn(rss, pkiller);
457                         char      *c2      = "";
458
459                         if (max_ac && nodeset_find(max_ac, pkiller))
460                                 c2 = "color:yellow";
461
462                         if (! pk_rirn->dumped) {
463                                 if (pk_rirn->chain)
464                                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F   c%d\" %s }\n", get_irn_node_nr(pkiller), pkiller, pk_rirn->chain->nr, c2);
465                                 else
466                                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(pkiller), pkiller, c2);
467                                 pk_rirn->dumped = 1;
468                         }
469                         ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
470                                 get_irn_node_nr(pkiller), get_irn_node_nr(irn));
471                 }
472         }
473         fprintf(f, "}\n");
474         fclose(f);
475 }
476
477 /* Dumps the disjoint value DAG (DVG) as vcg. */
478 static void debug_vcg_dump_dvg(rss_t *rss, dvg_t *dvg) {
479         static const char suffix[] = "-RSS-DVG.vcg";
480         FILE       *f;
481         char       file_name[256];
482         ir_node    *irn;
483         rss_edge_t *edge;
484
485         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
486         f = fopen(file_name, "w");
487
488         ir_fprintf(f, "graph: { title: \"disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
489         fprintf(f, "display_edge_labels: no\n");
490         fprintf(f, "layoutalgorithm: mindepth\n");
491         fprintf(f, "manhattan_edges: yes\n\n");
492
493         /* dump all nodes */
494         foreach_nodeset(dvg->nodes, irn) {
495                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
496         }
497
498         /* dump all edges */
499         foreach_pset(dvg->edges, edge) {
500                 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
501                         get_irn_node_nr(edge->src), get_irn_node_nr(edge->tgt));
502         }
503
504         fprintf(f, "}\n");
505         fclose(f);
506 }
507
508 #if 0
509 /* Dumps the PKG(DVG). */
510 static void debug_vcg_dump_dvg_pkiller(rss_t *rss, dvg_t *dvg) {
511         static const char suffix[] = "-RSS-DVG-PKG.vcg";
512         FILE       *f;
513         char       file_name[256];
514         ir_node    *irn;
515
516         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
517         f = fopen(file_name, "w");
518
519         ir_fprintf(f, "graph: { title: \"PKG of disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
520         fprintf(f, "display_edge_labels: no\n");
521         fprintf(f, "layoutalgorithm: mindepth\n");
522         fprintf(f, "manhattan_edges: yes\n\n");
523
524         /* dump all nodes */
525         foreach_nodeset(dvg->nodes, irn) {
526                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
527         }
528
529         /* dump all edges */
530         foreach_nodeset(dvg->nodes, irn) {
531                 rss_irn_t       *node = get_rss_irn(rss, irn);
532                 plist_element_t *el;
533
534                 foreach_plist(node->dvg_pkiller_list, el) {
535                         ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
536                                 get_irn_node_nr(plist_element_get_value(el)), get_irn_node_nr(irn));
537                 }
538         }
539
540         fprintf(f, "}\n");
541         fclose(f);
542 }
543 #endif /* if 0 */
544
545 /**
546  * In case there is no rss information for irn, initialize it.
547  */
548 static void *init_rss_irn(phase_t *ph, ir_node *irn, void *old) {
549         rss_irn_t *res = old ? old : phase_alloc(ph, sizeof(res[0]));
550
551         res->descendant_list = plist_obstack_new(phase_obst(ph));
552         res->descendants     = NULL;
553
554         res->consumer_list   = plist_obstack_new(phase_obst(ph));
555         res->consumer        = NULL;
556
557         res->pkiller_list    = plist_obstack_new(phase_obst(ph));
558
559         res->parent_list     = plist_obstack_new(phase_obst(ph));
560
561         res->killer          = NULL;
562         res->irn             = irn;
563         res->chain           = NULL;
564
565         res->kill_count      = 0;
566         res->desc_walk       = 0;
567         res->live_out        = 0;
568         res->visited         = 0;
569         res->handled         = 0;
570         res->dumped          = 0;
571         res->havecons        = 0;
572
573         return res;
574 }
575
576 /**
577  * Collect all nodes data dependent on current node.
578  */
579 static void collect_descendants(rss_t *rss, rss_irn_t *rirn, ir_node *irn, int *got_sink, unsigned cur_desc_walk) {
580         const ir_edge_t *edge;
581         rss_irn_t       *cur_node = get_rss_irn(rss, irn);
582         ir_node         *block    = rss->block;
583         ir_edge_kind_t  ekind[2]  = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
584         int             i;
585
586         if (cur_node->desc_walk >= cur_desc_walk)
587                 return;
588         cur_node->desc_walk = cur_desc_walk;
589
590         /* Stop at Barriers */
591         if (be_is_Barrier(irn))
592                 return;
593
594         /* loop over normal and dependency edges */
595         for (i = 0; i < 2; ++i) {
596                 foreach_out_edge_kind(irn, edge, ekind[i]) {
597                         ir_node *user = get_edge_src_irn(edge);
598
599                         /* skip ignore nodes as they do not really contribute to register pressure */
600                         if (arch_irn_is(rss->arch_env, user, ignore))
601                                 continue;
602
603                         /*
604                                 (a) mode_X means Jump            -> out_edge
605                                 (b) Phi as user of a normal node -> out-edge
606                         */
607                         if (get_irn_mode(user) == mode_X || is_Phi(user)) {
608                                 if (! *got_sink)
609                                         goto force_sink;
610                                 else
611                                         continue;
612                         }
613
614                         if (is_Proj(user)) {
615
616                                 //if (arch_get_irn_reg_class(rss->arch_env, user, -1) == rss->cls)
617                                         collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
618                         }
619                         else {
620                                 /* check if user lives in block and is not a control flow node */
621                                 if (get_nodes_block(user) == block) {
622                                         if (! plist_has_value(rirn->descendant_list, user)) {
623                                                 plist_insert_back(rirn->descendant_list, user);
624                                                 DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", user));
625                                         }
626                                         collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
627                                 }
628                                 else if (! *got_sink) {
629 force_sink:
630                                         /* user lives out of block: add sink as descendant if not already done */
631                                         plist_insert_back(rirn->descendant_list, _sink);
632                                         *got_sink = 1;
633                                         DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", _sink));
634                                 }
635                         }
636                 }
637         }
638 }
639
640 /**
641  * Handles a single consumer.
642  */
643 static void collect_single_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *consumer, int *got_sink) {
644         ir_node *block = rss->block;
645
646         assert(! is_Proj(consumer) && "Cannot handle Projs");
647
648         if (! is_Phi(consumer) && ! is_Block(consumer) && get_nodes_block(consumer) == block) {
649                 if (! arch_irn_is(rss->arch_env, consumer, ignore) && ! plist_has_value(rss_irn->consumer_list, consumer)) {
650                         plist_insert_back(rss_irn->consumer_list, consumer);
651                         DBG((rss->dbg, LEVEL_2, "\t\tconsumer %+F\n", consumer));
652                 }
653         }
654         else {
655                 rss_irn->live_out = 1;
656                 DBG((rss->dbg, LEVEL_2, "\t\tlive out %+F", consumer));
657                 if (! *got_sink) {
658                         plist_insert_back(rss_irn->consumer_list, _sink);
659                         *got_sink = 1;
660                         DB((rss->dbg, LEVEL_2, ", %+F added instead", _sink));
661                 }
662                 DB((rss->dbg, LEVEL_2, "\n"));
663         }
664 }
665
666 /**
667  * Collect all nodes consuming the value(s) produced by current node.
668  */
669 static void collect_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *irn, int *got_sink) {
670         const ir_edge_t *edge;
671         int             i;
672         ir_edge_kind_t  ekind[2]  = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
673         rss_irn_t       *cur_node = get_rss_irn(rss, irn);
674
675         if (cur_node->havecons)
676                 return;
677         cur_node->havecons = 1;
678
679         for (i = 0; i < 2; ++i) {
680                 foreach_out_edge_kind(irn, edge, ekind[i]) {
681                         ir_node *consumer = get_edge_src_irn(edge);
682
683                         if (is_Proj(consumer)) {
684                                 //if (arch_get_irn_reg_class(rss->arch_env, consumer, -1) == rss->cls)
685                                         collect_consumer(rss, rss_irn, consumer, got_sink);
686                         }
687                         else
688                                 collect_single_consumer(rss, rss_irn, consumer, got_sink);
689                 }
690         }
691 }
692
693 /**
694  * Collects all consumer and descendant of a irn.
695  */
696 static void collect_node_info(rss_t *rss, ir_node *irn) {
697         static unsigned cur_desc_walk = 0;
698         rss_irn_t       *rss_irn      = get_rss_irn(rss, irn);
699         int             got_sink;
700
701         assert(! is_Proj(irn) && "Cannot handle Projs.");
702
703         /* check if node info is already available */
704         if (rss_irn->handled)
705                 return;
706                 //phase_reinit_single_irn_data(&rss->ph, irn);
707
708         DBG((rss->dbg, LEVEL_1, "\tcomputing consumers of %+F:\n", irn));
709
710         /* collect all consumer */
711         got_sink = 0;
712         collect_consumer(rss, rss_irn, irn, &got_sink);
713
714         /* build sorted consumer array */
715         rss_irn->consumer = build_sorted_array_from_list(rss_irn->consumer_list, phase_obst(&rss->ph));
716
717         DBG((rss->dbg, LEVEL_1, "\tcompute descendants of %+F:\n", irn));
718
719         /* collect descendants */
720         got_sink = 0;
721         collect_descendants(rss, rss_irn, irn, &got_sink, ++cur_desc_walk);
722
723         /* build sorted descendant array */
724         rss_irn->descendants = build_sorted_array_from_list(rss_irn->descendant_list, phase_obst(&rss->ph));
725
726         rss_irn->handled = 1;
727 }
728
729 /**
730  * Checks if v is a potential killer of u.
731  * v is in pkill(u) iff descendants(v) cut consumer(u) is v
732  *
733  * @param rss   The rss object
734  * @param v      The node to check for killer
735  * @param u      The potentially killed value
736  * @return 1 if v is in pkill(u), 0 otherwise
737  */
738 static int is_potential_killer(rss_t *rss, rss_irn_t *v, rss_irn_t *u) {
739         plist_t *list;
740         ir_node **arr;
741         plist_element_t *el;
742
743         assert(is_Sink(v->irn) || ((plist_count(v->descendant_list) > 0 && v->descendants) || 1));
744         assert(is_Sink(u->irn) || ((plist_count(u->consumer_list)   > 0 && u->consumer)    || 1));
745
746         /* as we loop over the list: loop over the shorter one */
747         if (plist_count(v->descendant_list) > plist_count(u->consumer_list)) {
748                 list = u->consumer_list;
749                 arr  = v->descendants;
750         }
751         else {
752                 list = v->descendant_list;
753                 arr  = u->consumer;
754         }
755
756         /* for each list element: try to find element in array */
757         foreach_plist(list, el) {
758                 ir_node *irn   = plist_element_get_value(el);
759                 ir_node *match = BSEARCH_IRN_ARR(irn, arr);
760
761                 if (match && match != irn)
762                         return 0;
763         }
764
765         return 1;
766 }
767
768 /**
769  * Update descendants, consumer and pkiller list for the given irn.
770  */
771 static void update_node_info(rss_t *rss, ir_node *irn, ir_node *pk_irn) {
772         rss_irn_t *node    = get_rss_irn(rss, irn);
773         rss_irn_t *pkiller = get_rss_irn(rss, pk_irn);
774
775         /* add consumer and rebuild the consumer array */
776         if (! plist_has_value(node->consumer_list, pk_irn)) {
777                 plist_insert_back(node->consumer_list, pk_irn);
778                 node->consumer = build_sorted_array_from_list(node->consumer_list, phase_obst(&rss->ph));
779         }
780
781         /* add pk_irn as descendant, add it's descendants to irn's and rebuild array */
782         if (! plist_has_value(node->descendant_list, pk_irn)) {
783                 plist_element_t *el;
784
785                 plist_insert_back(node->descendant_list, pk_irn);
786
787                 foreach_plist(pkiller->descendant_list, el) {
788                         ir_node *desc = plist_element_get_value(el);
789
790                         if (! plist_has_value(node->descendant_list, desc))
791                                 plist_insert_back(node->descendant_list, desc);
792                 }
793
794                 node->descendants = build_sorted_array_from_list(node->descendant_list, phase_obst(&rss->ph));
795         }
796
797 }
798
799 /**
800  * Compute the potential killing set PK.
801  */
802 static void compute_pkill_set(rss_t *rss) {
803         plist_element_t *u_el, *v_el;
804
805         foreach_plist(rss->nodes, u_el) {
806                 ir_node   *u_irn = plist_element_get_value(u_el);
807                 rss_irn_t *u     = get_rss_irn(rss, u_irn);
808
809                 DBG((rss->dbg, LEVEL_1, "\tcomputing potential killers of %+F:\n", u_irn));
810
811                 /* check each consumer if it is a potential killer  */
812                 foreach_plist(u->consumer_list, v_el) {
813                         ir_node   *v_irn = plist_element_get_value(v_el);
814                         rss_irn_t *v     = get_rss_irn(rss, v_irn);
815
816                         /* check, if v is a potential killer of u */
817                         if (is_potential_killer(rss, v, u)) {
818                                 if (! plist_has_value(u->pkiller_list, v_irn))
819                                         plist_insert_back(u->pkiller_list, v_irn);
820                                 v->kill_count++;
821                                 DBG((rss->dbg, LEVEL_2, "\t\tpotential killer %+F\n", v_irn));
822                         }
823                 }
824
825                 u->killer = _sink;
826         }
827
828         if (rss->opts->dump_flags & RSS_DUMP_PKG)
829                 debug_vcg_dump_pkg(rss, NULL, 0);
830 }
831
832 /**
833  * Build set of killing edges (from values to their potential killers)
834  */
835 static void build_kill_edges(rss_t *rss, pset *epk) {
836         plist_element_t *el, *k_el;
837
838         foreach_plist(rss->nodes, el) {
839                 ir_node    *irn  = plist_element_get_value(el);
840                 rss_irn_t *rirn = get_rss_irn(rss, irn);
841
842                 DBG((rss->dbg, LEVEL_3, "\t\tbuilding kill edges for %+F:\n", irn));
843
844                 foreach_plist(rirn->pkiller_list, k_el) {
845                         ir_node    *pkiller = plist_element_get_value(k_el);
846                         rss_edge_t *ke      = obstack_alloc(phase_obst(&rss->ph), sizeof(*ke));
847
848                         ke->src  = irn;
849                         ke->tgt  = pkiller;
850                         ke->next = NULL;
851
852                         DBG((rss->dbg, LEVEL_3, "\t\t\ttarget %+F\n", pkiller));
853
854                         pset_insert(epk, ke, HASH_RSS_EDGE(ke));
855                 }
856         }
857 }
858
859 #ifdef DEBUG_libfirm
860 /* print the given cbc for debugging purpose */
861 static void debug_print_cbc(firm_dbg_module_t *mod, cbc_t *cbc) {
862         ir_node    *n;
863         rss_edge_t *ke;
864
865         DBG((mod, LEVEL_3, "\t\tS = set of parents:\n"));
866         foreach_nodeset(cbc->parents, n) {
867                 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
868         }
869         DBG((mod, LEVEL_3, "\t\tT = set of children:\n"));
870         foreach_nodeset(cbc->children, n) {
871                 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
872         }
873         DBG((mod, LEVEL_3, "\t\tE = Edges from producers to consumers\n"));
874         foreach_pset(cbc->kill_edges, ke) {
875                 DBG((mod, LEVEL_3, "\t\t\t%+F -> %+F\n", ke->src, ke->tgt));
876         }
877 }
878 #endif
879
880 /**
881  * Construct the bipartite decomposition.
882  * Sid-Ahmed-Ali Touati, Phd Thesis
883  * Register Pressure in Instruction Level Parallelism, p. 71
884  */
885 static void compute_bipartite_decomposition(rss_t *rss) {
886         pset *epk    = new_pset(cmp_rss_edges, 10);
887         int  cur_num = 0;
888
889         plist_element_t *el;
890
891         DBG((rss->dbg, LEVEL_1, "\tcomputing bipartite decomposition:\n"));
892
893         build_kill_edges(rss, epk);
894
895         foreach_plist(rss->nodes, el) {
896                 ir_node   *u_irn   = plist_element_get_value(el);
897                 rss_irn_t *u       = get_rss_irn(rss, u_irn);
898                 int       p_change = 1;
899                 int       c_change = 1;
900                 int       vrfy_ok  = 1;
901
902                 cbc_t           *cbc;
903                 plist_element_t *el2;
904                 rss_edge_t      *k_edge;
905                 rss_edge_t      *kedge_root = NULL;
906                 ir_node         *t_irn, *s_irn;
907
908                 if (u->visited || u_irn == _sink)
909                         continue;
910
911                 DBG((rss->dbg, LEVEL_2, "\t\t%+F choosen:\n", u_irn));
912
913                 cbc     = obstack_alloc(phase_obst(&rss->ph), sizeof(*cbc));
914                 cbc->nr = cur_num++;
915
916                 /* initialize S_cb */
917                 cbc->parents = new_nodeset(5);
918                 nodeset_insert(cbc->parents, u_irn);
919                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (init)\n", u_irn));
920
921                 /* E_cb = empty */
922                 cbc->kill_edges = new_pset(cmp_rss_edges, 5);
923
924                 /* each parent gets killed by at least one value from children */
925
926                 /* T_cb = PK_successors(u) */
927                 cbc->children = new_nodeset(5);
928                 foreach_plist(u->pkiller_list, el2) {
929                         nodeset_insert(cbc->children, plist_element_get_value(el2));
930                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (init)\n", plist_element_get_value(el2)));
931                 }
932
933                 /*
934                         Now: insert the parents of all children into the parent set
935                         and insert the children of all parents into the children set
936                         until the sets don't change any more
937                 */
938                 while (p_change || c_change) {
939                         p_change = c_change = 0;
940
941                         /* accumulate parents */
942                         foreach_nodeset(cbc->children, t_irn) {
943                                 foreach_pset(epk, k_edge) {
944                                         ir_node *val = k_edge->src;
945
946                                         if (k_edge->tgt == t_irn && ! nodeset_find(cbc->parents, val)) {
947                                                 nodeset_insert(cbc->parents, val);
948                                                 p_change = 1;
949                                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (killed by %+F)\n", val, t_irn));
950                                         }
951                                 }
952                         }
953
954                         /* accumulate children */
955                         foreach_nodeset(cbc->parents, s_irn) {
956                                 foreach_pset(epk, k_edge) {
957                                         ir_node *val = k_edge->tgt;
958
959                                         if (k_edge->src == s_irn  && ! nodeset_find(cbc->children, val)) {
960                                                 nodeset_insert(cbc->children, val);
961                                                 c_change = 1;
962                                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (kills %+F)\n", val, s_irn));
963                                         }
964                                 }
965                         }
966                 }
967
968                 /* mark all parent values as visited */
969                 foreach_nodeset(cbc->parents, s_irn) {
970                         rss_irn_t *s = get_rss_irn(rss, s_irn);
971                         s->visited = 1;
972                         /* assure bipartite property */
973 #if 0
974                         if (nodeset_find(cbc->children, s_irn)) {
975                                 nodeset_remove(cbc->children, s_irn);
976                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F removed from to children\n", s_irn));
977                         }
978 #endif
979                 }
980
981                 /* update edges */
982                 foreach_pset(epk, k_edge) {
983                         if (nodeset_find(cbc->parents, k_edge->src) && nodeset_find(cbc->children, k_edge->tgt)) {
984                                 pset_insert(cbc->kill_edges, k_edge, HASH_RSS_EDGE(k_edge));
985                                 /*
986                                         Link all k_edges which are about to be removed together.
987                                         Beware: pset_remove kills the iterator.
988                                 */
989                                 k_edge->next = kedge_root;
990                                 kedge_root   = k_edge;
991                         }
992                 }
993
994                 /* remove all linked k_edges */
995                 for (; kedge_root; kedge_root = kedge_root->next) {
996                         pset_remove(epk, kedge_root, HASH_RSS_EDGE(kedge_root));
997                 }
998
999                 /* verify the cbc */
1000                 foreach_nodeset(cbc->parents, s_irn) {
1001                         int is_killed = 0;
1002
1003                         foreach_pset(cbc->kill_edges, k_edge) {
1004                                 if (k_edge->src == s_irn) {
1005                                         is_killed = 1;
1006                                         pset_break(cbc->kill_edges);
1007                                         break;
1008                                 }
1009                         }
1010
1011                         if (! is_killed) {
1012                                 vrfy_ok = 0;
1013                                 ir_fprintf(stderr, "Warning: parent %+F is not killed in current cbc\n", s_irn);
1014                         }
1015                 }
1016                 assert(vrfy_ok && "Verification of CBC failed");
1017
1018                 /* add the connected bipartite component */
1019                 if (nodeset_count(cbc->parents) > 0 && nodeset_count(cbc->children) > 0 && pset_count(cbc->kill_edges) > 0)
1020                         pset_insert(rss->cbc_set, cbc, (unsigned)cbc->nr);
1021                 DBG((rss->dbg, LEVEL_2, "\tbipartite component %d inserted:\n", cbc->nr));
1022                 DEBUG_ONLY(
1023                         if (firm_dbg_get_mask(rss->dbg) & LEVEL_2)
1024                                 debug_print_cbc(rss->dbg, cbc);
1025                 );
1026         }
1027
1028         if (rss->opts->dump_flags & RSS_DUMP_CBC)
1029                 debug_vcg_dump_bipartite(rss);
1030
1031         del_pset(epk);
1032 }
1033
1034 /**
1035  * Select the child with the maximum cost.
1036  */
1037 static child_t *select_child_max_cost(rss_t *rss, nodeset *x, nodeset *y, child_t *t, cbc_t *cbc) {
1038         ir_node *child;
1039         float   max_cost = -1.0f;
1040
1041         DBG((rss->dbg, LEVEL_2, "\t\tcomputing children costs:\n"));
1042
1043         foreach_nodeset(cbc->children, child) {
1044                 rss_irn_t  *r_child             = get_rss_irn(rss, child);
1045                 int         num_unkilled_parents = 0;
1046                 int         num_descendants;
1047                 rss_edge_t *k_edge;
1048                 float       cost;
1049
1050                 /* get the number of unkilled parents */
1051                 foreach_pset(cbc->kill_edges, k_edge) {
1052                         if (k_edge->tgt == child && nodeset_find(x, k_edge->src))
1053                                 ++num_unkilled_parents;
1054                 }
1055
1056                 cost = (float)num_unkilled_parents;
1057
1058                 num_descendants = plist_count(r_child->descendant_list) + nodeset_count(y);
1059
1060                 if (num_descendants > 0)
1061                         cost /= (float)num_descendants;
1062
1063                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F, #desc %d, cost %.3f\n", child, num_descendants, cost));
1064
1065                 if (cost > max_cost) {
1066                         t->irn   = child;
1067                         t->cost  = cost;
1068                         max_cost = cost;
1069                 }
1070         }
1071
1072         return t;
1073 }
1074
1075 /**
1076  * Remove all parents from x which are killed by t_irn.
1077  */
1078 static void remove_covered_parents(rss_t *rss, nodeset *x, ir_node *t_irn, cbc_t *cbc) {
1079         rss_irn_t  *t = get_rss_irn(rss, t_irn);
1080         rss_edge_t *k_edge;
1081
1082         DBG((rss->dbg, LEVEL_2, "\t\tremoving parents covered by %+F:\n", t_irn));
1083
1084         foreach_pset(cbc->kill_edges, k_edge) {
1085                 if (k_edge->tgt == t_irn && nodeset_find(x, k_edge->src)) {
1086                         nodeset_remove(x, k_edge->src);
1087                         plist_insert_back(t->parent_list, k_edge->src);
1088                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", k_edge->src));
1089                 }
1090         }
1091 }
1092
1093 static void update_cumulated_descendent_values(rss_t *rss, nodeset *y, ir_node *t_irn) {
1094         rss_irn_t *t = get_rss_irn(rss, t_irn);
1095         plist_element_t *el;
1096
1097         DBG((rss->dbg, LEVEL_2, "\t\tupdating cumulated descendant value of %+F:\n", t_irn));
1098
1099         foreach_plist(t->descendant_list, el) {
1100                 nodeset_insert(y, plist_element_get_value(el));
1101                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", plist_element_get_value(el)));
1102         }
1103 }
1104
1105 /**
1106  * Greedy-k: a heuristics for the MMA problem
1107  */
1108 static void compute_killing_function(rss_t *rss) {
1109         cbc_t *cbc;
1110         struct obstack obst;
1111
1112         obstack_init(&obst);
1113
1114         rss->cbc_set = pset_new_ptr(5);
1115         compute_bipartite_decomposition(rss);
1116
1117         /* for all bipartite components do: */
1118         foreach_pset(rss->cbc_set, cbc) {
1119                 ir_node *p;
1120                 nodeset *x       = new_nodeset(10);
1121                 nodeset *y       = new_nodeset(10);
1122                 child_t **sks    = NEW_ARR_F(child_t *, 20);
1123                 int     cur_len  = 0;
1124                 int     cur_size = 20;
1125                 int     i;
1126
1127                 DBG((rss->dbg, LEVEL_1, "\tcomputing SKS for cbc %d:\n", cbc->nr));
1128                 DBG((rss->dbg, LEVEL_2, "\t\tinitializing parents X:\n"));
1129
1130                 /* X = S_cb (all parents are initially uncovered) */
1131                 foreach_nodeset(cbc->parents, p) {
1132                         nodeset_insert(x, p);
1133                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", p));
1134                 }
1135
1136                 /* while X not empty */
1137                 while (nodeset_count(x) > 0) {
1138                         child_t *t = obstack_alloc(&obst, sizeof(*t));
1139                         memset(t, 0, sizeof(*t));
1140
1141                         t = select_child_max_cost(rss, x, y, t, cbc);
1142
1143                         if (cur_len >= cur_size) {
1144                                 ARR_EXTO(child_t *, sks, cur_size * 2);
1145                                 cur_size *= 2;
1146                         }
1147
1148                         DBG((rss->dbg, LEVEL_2, "\t\tinsert child %+F (%.3f) into SKS at pos %d\n", t->irn, t->cost, cur_len));
1149
1150                         sks[cur_len++] = t;
1151                         remove_covered_parents(rss, x, t->irn, cbc);
1152                         update_cumulated_descendent_values(rss, y, t->irn);
1153                 }
1154
1155                 ARR_SHRINKLEN(sks, cur_len);
1156
1157                 /* sort SKS in increasing cost order */
1158                 qsort(sks, cur_len, sizeof(sks[0]), cmp_child_costs);
1159
1160                 DBG((rss->dbg, LEVEL_2, "\tprocessing SKS for cbc %d:\n", cbc->nr));
1161
1162                 /* build killing function */
1163                 for (i = cur_len - 1; i >= 0; --i) { /* loop over sks in decreasing cost order */
1164                         child_t         *t = sks[i];
1165                         rss_irn_t       *rt = get_rss_irn(rss, t->irn);
1166                         plist_element_t *p_el;
1167
1168                         DBG((rss->dbg, LEVEL_3, "\t\t\tkiller %+F (%.3f):\n", t->irn, t->cost));
1169
1170                         /* kill all unkilled parents of t */
1171                         foreach_plist(rt->parent_list, p_el) {
1172                                 ir_node    *par = plist_element_get_value(p_el);
1173                                 rss_irn_t *rpar = get_rss_irn(rss, par);
1174
1175                                 if (is_Sink(rpar->killer)) {
1176                                         rpar->killer = t->irn;
1177                                         DBG((rss->dbg, LEVEL_2, "\t\tkill %+F\n", rpar->irn));
1178                                 }
1179                                 else {
1180                                         DBG((rss->dbg, LEVEL_3, "\t\t\tkeeping %+F as killer for %+F\n", rpar->killer, rpar->irn));
1181                                 }
1182                         }
1183                 }
1184
1185                 del_nodeset(x);
1186                 del_nodeset(y);
1187                 DEL_ARR_F(sks);
1188         }
1189
1190         if (rss->opts->dump_flags & RSS_DUMP_KILL)
1191                 debug_vcg_dump_kill(rss);
1192
1193         del_pset(rss->cbc_set);
1194         obstack_free(&obst, NULL);
1195 }
1196
1197 /**
1198  * Adds the edge src -> tgt to the dvg. Checks if reverse edge is already there (asserts).
1199  */
1200 static INLINE void add_dvg_edge(rss_t *rss, dvg_t *dvg, ir_node *src, ir_node *tgt, int have_source) {
1201         rss_edge_t *dvg_edge;
1202         rss_edge_t key;
1203
1204         if (! have_source)
1205                 nodeset_insert(dvg->nodes, src);
1206         else
1207                 assert(nodeset_find(dvg->nodes, src) != NULL && "Wrong assumption");
1208
1209         nodeset_insert(dvg->nodes, tgt);
1210
1211         key.src = tgt;
1212         key.tgt = src;
1213         assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1214
1215         key.src = src;
1216         key.tgt = tgt;
1217         if (NULL != pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1218                 /* add the edge to the DVG */
1219                 dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1220
1221                 dvg_edge->src  = src;
1222                 dvg_edge->tgt  = tgt;
1223                 dvg_edge->next = NULL;
1224
1225                 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", src, tgt));
1226                 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1227         }
1228 }
1229
1230 /**
1231  * Computes the disjoint value DAG (DVG).
1232  * BEWARE: It is not made explicitly clear in the Touati paper,
1233  *         but the DVG is meant to be build from the KILLING DAG
1234  */
1235 static void compute_dvg(rss_t *rss, dvg_t *dvg) {
1236         plist_element_t *el;
1237
1238         DBG((rss->dbg, LEVEL_1, "\tcomputing DVG:\n"));
1239
1240         foreach_plist(rss->nodes, el) {
1241                 ir_node    *u_irn    = plist_element_get_value(el);
1242                 rss_irn_t  *u        = get_rss_irn(rss, u_irn);
1243                 rss_irn_t  *u_killer = get_rss_irn(rss, u->killer);
1244                 int        i;
1245
1246                 /* TODO: omit nodes only having sink as pkiller and killing no one */
1247
1248                 /* add an edge to killer */
1249                 add_dvg_edge(rss, dvg, u_irn, u->killer, 0);
1250
1251                 if (is_Sink(u->killer))
1252                         continue;
1253
1254                 /* We add an edge to every killer, from where we could be reached. */
1255                 for (i = ARR_LEN_SAFE(u_killer->descendants) - 1; i >= 0; --i) {
1256                         add_dvg_edge(rss, dvg, u_irn, u_killer->descendants[i], 1);
1257                 }
1258
1259 #if 0
1260
1261                 foreach_plist(rss->nodes, el2) {
1262                         ir_node *v_irn = plist_element_get_value(el2);
1263
1264                         /*
1265                                 There is an edge (u, v) in the DVG iff v is a descendant of the killer(u).
1266                         */
1267                         if (BSEARCH_IRN_ARR(v_irn, u_kill->descendants)) {
1268                                 rss_edge_t *dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1269                                 rss_edge_t key;
1270
1271                                 /* insert the user into the DVG and append it to the user list of u */
1272                                 nodeset_insert(dvg->nodes, v_irn);
1273                                 if (! plist_has_value(u->dvg_user_list, v_irn))
1274                                         plist_insert_back(u->dvg_user_list, v_irn);
1275
1276                                 dvg_edge->src  = u_irn;
1277                                 dvg_edge->tgt  = v_irn;
1278                                 dvg_edge->next = NULL;
1279
1280                                 key.src = v_irn;
1281                                 key.tgt = u_irn;
1282
1283                                 assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1284
1285                                 /* add the edge to the DVG */
1286                                 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", u_irn, v_irn));
1287                                 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1288                         }
1289                 }
1290 #endif /* if 0 */
1291         }
1292
1293         if (rss->opts->dump_flags & RSS_DUMP_DVG)
1294                 debug_vcg_dump_dvg(rss, dvg);
1295 }
1296
1297 /**
1298  * Updates the dvg structure when a serialization edge from src -> tgt is added.
1299  */
1300 static void update_dvg(rss_t *rss, dvg_t *dvg, rss_irn_t *src, rss_irn_t *tgt) {
1301         int i, j, idx;
1302         rss_edge_t *edge;
1303         rss_edge_t **arr = alloca(pset_count(dvg->edges) * sizeof(arr[0]));
1304
1305         /*
1306                 Add an edge from serialization target to serialization src:
1307                 src cannot be alive with target
1308         */
1309         add_dvg_edge(rss, dvg, tgt->irn, src->irn, 0);
1310
1311         /* Add edges to src's descendants as well, they also getting serialized. */
1312         for (i = ARR_LEN_SAFE(src->descendants) - 1; i >= 0; --i) {
1313                 add_dvg_edge(rss, dvg, tgt->irn, src->descendants[i], 1);
1314         }
1315
1316         /* We also have to add edges from targets predecessors in dvg */
1317         idx = 0;
1318         /* We cannot insert elements into set over which we iterate ... */
1319         foreach_pset(dvg->edges, edge) {
1320                 if (edge->tgt == tgt->irn) {
1321                         arr[idx++] = edge;
1322                 }
1323         }
1324
1325         for (i = 0; i < idx; ++i) {
1326                 edge = arr[i];
1327                 add_dvg_edge(rss, dvg, edge->src, src->irn, 1);
1328                 for (j = ARR_LEN_SAFE(src->descendants) - 1; j >= 0; --j) {
1329                         add_dvg_edge(rss, dvg, edge->src, src->descendants[j], 1);
1330                 }
1331         }
1332 }
1333
1334 #if 0
1335 /**
1336  * Accumulate all descendants for root into list.
1337  */
1338 static void accumulate_dvg_descendant_values(rss_t *rss, rss_irn_t *root, plist_t *list) {
1339         if (plist_count(root->dvg_user_list) > 0) {
1340                 plist_element_t *el;
1341
1342                 foreach_plist(root->dvg_user_list, el) {
1343                         ir_node   *v_irn = plist_element_get_value(el);
1344                         rss_irn_t *v     = get_rss_irn(rss, v_irn);
1345
1346                         /* add v as descendant */
1347                         if (! plist_has_value(list, v_irn)) {
1348                                 plist_insert_back(list, v_irn);
1349                                 DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG descendant %+F\n", v_irn));
1350                         }
1351
1352                         /* accumulate v's descendants */
1353                         accumulate_dvg_descendant_values(rss, v, list);
1354                 }
1355         }
1356 }
1357
1358 /**
1359  * Builds the list of potential killers for each node
1360  * in the given DVG.
1361  * Needs the descendant list for all user as sorted array.
1362  */
1363 static void build_dvg_pkiller_list(rss_t *rss, dvg_t *dvg) {
1364         ir_node *irn;
1365
1366         foreach_nodeset(dvg->nodes, irn) {
1367                 rss_irn_t       *node = get_rss_irn(rss, irn);
1368                 plist_element_t *el, *el2;
1369
1370                 DBG((rss->dbg, DEBUG_DVG, "\t\tbuilding pkiller list for %+F\n", irn));
1371
1372                 /* check each user */
1373                 foreach_plist(node->dvg_user_list, el) {
1374                         ir_node *u_irn = plist_element_get_value(el);
1375
1376                         /* is the current user u_irn not a descendant of any other user -> pkiller */
1377                         foreach_plist(node->dvg_user_list, el2) {
1378                                 ir_node   *v_irn = plist_element_get_value(el2);
1379                                 rss_irn_t *v     = get_rss_irn(rss, v_irn);
1380
1381                                 if (el != el2                             &&
1382                                         ! BSEARCH_IRN_ARR(u_irn, v->dvg_desc) &&
1383                                         ! plist_has_value(node->dvg_pkiller_list, u_irn))
1384                                 {
1385                                         plist_insert_back(node->dvg_pkiller_list, u_irn);
1386                                         DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG pkiller %+F\n", u_irn));
1387                                 }
1388                         }
1389                 }
1390
1391                 node->dvg_pkiller = build_sorted_array_from_list(node->dvg_pkiller_list, phase_obst(&rss->ph));
1392         }
1393
1394         DEBUG_ONLY(
1395                 if (firm_dbg_get_mask(rss->dbg) & DEBUG_DVG)
1396                         debug_vcg_dump_dvg_pkiller(rss, dvg);
1397         );
1398 }
1399
1400 #endif /* if 0 */
1401
1402 /**
1403  * Compute the maximal antichain of the current DVG.
1404  * This is a reimplementation of the MAXIMAL_ANTI_CHAIN function
1405  * from the DDG library 1.1 (DAG.cpp).
1406  */
1407 static nodeset *compute_maximal_antichain(rss_t *rss, dvg_t *dvg, int iteration) {
1408         int         n               = nodeset_count(dvg->nodes);
1409         int         *assignment     = alloca(n * sizeof(assignment[0]));
1410         int         *assignment_rev = alloca(n * sizeof(assignment_rev[0]));
1411         int         *idx_map        = alloca(n * sizeof(idx_map[0]));
1412         hungarian_problem_t *bp;
1413         nodeset     *values, *temp;
1414         ir_node     *u_irn;
1415         int         i, j, cost, cur_chain, res;
1416         rss_edge_t  *dvg_edge;
1417
1418 #define MAP_IDX(irn) bsearch_for_index(get_irn_idx(irn), idx_map,  n,  1)
1419
1420         if (pset_count(dvg->edges) == 0)
1421                 return NULL;
1422
1423         bp = hungarian_new(n, n, 1, HUNGARIAN_MATCH_NORMAL);
1424
1425         /*
1426                 At first, we build an index map for the nodes in the DVG,
1427                 because we cannot use the irn idx therefore as the resulting
1428                 bipartite data structure would have around 1.2GB.
1429                 So we limit the size to the number of nodes we have in the DVG
1430                 and build a sorted index map for their irn indices.
1431         */
1432         i = 0;
1433         foreach_nodeset(dvg->nodes, u_irn) {
1434                 idx_map[i++] = get_irn_idx(u_irn);
1435         }
1436         qsort(idx_map, n, sizeof(idx_map[0]), cmp_int);
1437
1438         foreach_pset(dvg->edges, dvg_edge) {
1439                 int idx_u = MAP_IDX(dvg_edge->src);
1440                 int idx_v = MAP_IDX(dvg_edge->tgt);
1441
1442                 /* add the entry to the bipartite data structure */
1443                 hungarian_add(bp, idx_u, idx_v, 1);
1444                 DBG((rss->dbg, LEVEL_3, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1445                         idx_u, dvg_edge->src, idx_v, dvg_edge->tgt));
1446         }
1447 #if 0
1448         /*
1449                 Add a bipartite entry for each pair of nodes (u, v), where exists a
1450                 path in the DVG from u to v, ie. connect all descendants(v) to v.
1451                 desc_dvg(v) = accumulated descendants of all z in dvg_user_list(v)
1452         */
1453         foreach_nodeset(dvg->nodes, u_irn) {
1454                 rss_irn_t *u        = get_rss_irn(rss, u_irn);
1455                 int       idx_u_irn = MAP_IDX(u_irn);
1456
1457                 DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG descendants of %+F:\n", u_irn));
1458
1459                 //plist_clear(u->dvg_desc_list);
1460                 //accumulate_dvg_descendant_values(rss, u, u->dvg_desc_list);
1461
1462                 /*
1463                         FIXME: The array is build on the phase obstack and we cannot free the data.
1464                         So the array get as many times allocated as this function is called.
1465                 */
1466
1467                 /* build the sorted array for faster searches */
1468                 //u->dvg_desc = build_sorted_array_from_list(u->dvg_desc_list, phase_obst(&rss->ph));
1469
1470                 DBG((rss->dbg, DEBUG_MAX_AC, "\t\tadding bipartite entries of %+F:\n", u_irn));
1471
1472                 /* add the bipartite entries for all descendants */
1473                 for (i = ARR_LEN_SAFE(u->dvg_desc) - 1; i >= 0; --i) {
1474                         rss_irn_t *desc    = get_rss_irn(rss, u->dvg_desc[i]);
1475                         int       idx_desc = MAP_IDX(u->dvg_desc[i]);
1476
1477                         /* add the entry to the bipartite data structure */
1478                         hungarian_add(bp, idx_u_irn, idx_desc, 1);
1479                         DBG((rss->dbg, DEBUG_MAX_AC, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1480                                 idx_u_irn, u_irn, idx_desc, u->dvg_desc[i]));
1481
1482                         need_matching = 1;
1483                 }
1484         }
1485 #endif
1486
1487         /* We want maximum cardinality matching */
1488         hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL);
1489
1490 #if 0
1491         DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG pkiller:\n"));
1492         /* beware: the following function needs the dvg_desc array */
1493         build_dvg_pkiller_list(rss, dvg);
1494 #endif
1495
1496         DBG((rss->dbg, LEVEL_2, "\t\tcomputing bipartite matching\n"));
1497         /*
1498                 The maximum cardinality bipartite matching gives us the minimal
1499                 chain partition, which corresponds to the maximum anti chains.
1500         */
1501         res = hungarian_solve(bp, assignment, &cost, 1);
1502         assert(res == 0 && "Bipartite matching failed!");
1503
1504         hungarian_free(bp);
1505         memset(assignment_rev, -1, n * sizeof(assignment_rev[0]));
1506
1507         /* build the reverse assignment, ie. foreach i -> j, add j -> i */
1508         for (i = 0; i < n; ++i) {
1509                 if (assignment[i] >= 0) {
1510                         int j = assignment[i];
1511                         assignment_rev[j] = i;
1512                 }
1513         }
1514
1515         DBG((rss->dbg, LEVEL_2, "\t\tgot assignment with cost %d\n", cost));
1516         DBG((rss->dbg, LEVEL_3, "\t\t\tassignment   ---   reverse assignment\n", cost));
1517         for (i = 0; i < n; ++i) {
1518                 DBG((rss->dbg, LEVEL_3, "\t\t\t%3d -> %3d         %3d -> %3d\n", i, assignment[i], i, assignment_rev[i]));
1519         }
1520
1521         values    = new_nodeset(10);
1522         cur_chain = 0;
1523         /* Construction of the minimal chain partition */
1524         for (j = 0; j < n; ++j) {
1525                 /* check nodes, which did not occur as target */
1526                 if (assignment_rev[j] == -1) {
1527                         int       xj      = idx_map[j];
1528                         ir_node   *xj_irn = get_idx_irn(rss->irg, xj);
1529                         rss_irn_t *xj_rss = get_rss_irn(rss, xj_irn);
1530                         chain_t   *c      = obstack_alloc(phase_obst(&rss->ph), sizeof(*c));
1531                         int       source;
1532
1533                         /* there was no source for j -> we have a source of a new chain */
1534                         nodeset_insert(values, xj_irn);
1535
1536                         c->elements = plist_obstack_new(phase_obst(&rss->ph));
1537                         c->nr       = cur_chain++;
1538                         plist_insert_back(c->elements, xj_irn);
1539
1540                         xj_rss->chain = c;
1541
1542                         DBG((rss->dbg, LEVEL_2, "\t\tstarting chain %d:\n", c->nr));
1543                         DBG((rss->dbg, LEVEL_2, "\t\t\t%+F (%d)", xj_irn, j));
1544
1545                         /* follow chain, having j as source */
1546                         source = j;
1547                         while (assignment[source] >= 0) {
1548                                 int       target  = assignment[source];
1549                                 int       irn_idx = idx_map[target];
1550                                 ir_node   *irn    = get_idx_irn(rss->irg, irn_idx);
1551                                 rss_irn_t *node   = get_rss_irn(rss, irn);
1552
1553                                 plist_insert_back(c->elements, irn);
1554                                 node->chain = c;
1555
1556                                 DB((rss->dbg, LEVEL_2, " -> %+F (%d)", irn, target));
1557
1558                                 /* new source = last target */
1559                                 source = target;
1560                         }
1561
1562                         DB((rss->dbg, LEVEL_2, "\n"));
1563                 }
1564         }
1565
1566         /*
1567                 Computing the maximal antichain: Select an element from each
1568                 chain such, such it is parallel with the others.
1569         */
1570         DBG((rss->dbg, LEVEL_1, "\t\tcomputing set of saturation values (MAX AC)\n"));
1571         DBG((rss->dbg, LEVEL_3, "\t\tstarting with:\n"));
1572
1573         DEBUG_ONLY(
1574                 if (firm_dbg_get_mask(rss->dbg) & LEVEL_3)
1575                         dump_nodeset(values, "\t\t\t");
1576         )
1577
1578         temp = NULL;
1579         do {
1580                 /*
1581                         We need an explicit array for the values as
1582                         we cannot iterate multiple times over the same
1583                         set at the same time. :-(((((
1584                 */
1585                 int     n         = nodeset_count(values);
1586                 int     i         = 0;
1587                 ir_node **val_arr = NEW_ARR_F(ir_node *, n);
1588
1589                 foreach_nodeset(values, u_irn)
1590                         val_arr[i++] = u_irn;
1591
1592                 if (temp)
1593                         del_nodeset(temp);
1594
1595                 temp = new_nodeset(10);
1596
1597                 /* Select all nodes from current value set, having another node in the set as descendant. */
1598                 for (i = 0; i < n; ++i) {
1599                         rss_irn_t *u = get_rss_irn(rss, val_arr[i]);
1600
1601                         for (j = 0; j < n; ++j) {
1602                                 if (i != j) {
1603                                         rss_edge_t key;
1604
1605                                         key.src = val_arr[i];
1606                                         key.tgt = val_arr[j];
1607
1608                                         if (pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1609                                                 /* v[j] is descendant of u -> remove u and break */
1610                                                 nodeset_insert(temp, u->irn);
1611                                                 nodeset_remove(values, u->irn);
1612
1613                                                 DBG((rss->dbg, LEVEL_3, "\t\t\tremoving %+F from values, adding it to temp\n", u->irn));
1614
1615                                                 break;
1616                                         }
1617                                 }
1618                         }
1619                 }
1620
1621                 /* Try to insert the chain predecessor of all selected u's */
1622                 foreach_nodeset(temp, u_irn) {
1623                         rss_irn_t       *u  = get_rss_irn(rss, u_irn);
1624                         chain_t         *c  = u->chain;
1625                         plist_element_t *el = plist_find_value(c->elements, u_irn);
1626
1627                         assert(el && "Missing element in chain!");
1628
1629                         /* If u has predecessor in chain: insert the predecessor */
1630                         if (el == plist_element_get_prev(el)) {
1631                                 nodeset_insert(values, plist_element_get_value(el));
1632                                 DBG((rss->dbg, LEVEL_3, "\t\t\tadding %+F to values\n", plist_element_get_value(el)));
1633                         }
1634                 }
1635
1636
1637                 DEL_ARR_F(val_arr);
1638         } while (nodeset_count(temp) > 0);
1639
1640         DBG((rss->dbg, LEVEL_2, "\t\tfinal set:\n"));
1641         DEBUG_ONLY(
1642                 if (firm_dbg_get_mask(rss->dbg) & LEVEL_2) {
1643                         dump_nodeset(values, "\t\t\t");
1644                 }
1645         );
1646
1647         if (rss->opts->dump_flags & RSS_DUMP_MAXAC)
1648                 debug_vcg_dump_pkg(rss, values, iteration);
1649
1650         del_nodeset(temp);
1651
1652         return values;
1653
1654 #undef MAP_IDX
1655 }
1656
1657 /**
1658  * Computes the best serialization between two nodes of sat_vals.
1659  */
1660 static serialization_t *compute_best_admissible_serialization(rss_t *rss, nodeset *sat_vals, serialization_t *ser, int num_regs) {
1661         int        n                    = nodeset_count(sat_vals);
1662         int        n_idx                = ARR_LEN_SAFE(rss->idx_map);
1663         int        i                    = 0;
1664         ir_node    **val_arr            = alloca(n * sizeof(val_arr[0]));
1665         bitset_t   *bs_sv               = bitset_alloca(n_idx);
1666         bitset_t   *bs_vdesc            = bitset_alloca(n_idx);
1667         bitset_t   *bs_tmp              = bitset_alloca(n_idx);
1668         bitset_t   *bs_ukilldesc        = bitset_alloca(n_idx);
1669         int        best_benefit         = INT_MAX;
1670         int        best_omega2          = INT_MAX;
1671         int        best_benefit_omega20 = INT_MAX;
1672         int        has_omega1           = 0;
1673         int        j, k;
1674         ir_node    *irn;
1675         rss_edge_t min_benefit_edge;
1676         rss_edge_t min_omega20_edge;
1677         rss_irn_t  *ser_u_omega1 = NULL, *ser_v_omega1 = NULL;
1678         rss_irn_t  *ser_u_omega20 = NULL, *ser_v_omega20 = NULL;
1679
1680         DBG((rss->dbg, LEVEL_1, "\tcomputing admissible serializations:\n"));
1681
1682         /*
1683                 We need an explicit array for the values as
1684                 we cannot iterate multiple times over the same
1685                 set at the same time. :-(((((
1686         */
1687
1688         foreach_nodeset(sat_vals, irn) {
1689                 val_arr[i++] = irn;
1690                 bitset_set(bs_sv, BLOCK_IDX_MAP(rss, irn));
1691         }
1692
1693         /*
1694                 We build all admissible serializations and remember the best found so far.
1695                 For u in sat_vals:
1696                  For v in sat_val:
1697                    if v in pkiller(u): add edge from v to all other pkiller(u)
1698                    else: for all vv in pkiller(u): add edge from v to vv if there exists no path from vv to v
1699         */
1700
1701 /*
1702         A node is unserializable if:
1703         - it has only one killer and this one is Sink
1704     - it kills no other values
1705         In this case there is no serialization which could
1706         reduce the registerpressure
1707 */
1708 #define IS_UNSERIALIZABLE_NODE(rss_node)                  \
1709         (                                                     \
1710                 (                                                 \
1711                         (plist_count(rss_node->pkiller_list) == 1) && \
1712                         is_Sink(rss_node->killer)                  && \
1713                         (rss_node->kill_count                == 0)    \
1714                 )                            ||                   \
1715                 be_is_Barrier(rss_node->irn) ||                   \
1716                 be_is_Keep(rss_node->irn)                         \
1717         )
1718
1719         /* for all u in sat_vals */
1720         for (i = 0; i < n; ++i) {
1721                 rss_irn_t       *u = get_rss_irn(rss, val_arr[i]);
1722                 plist_element_t *el;
1723
1724                 /* ignore nodes where serialization does not help */
1725                 if (IS_UNSERIALIZABLE_NODE(u)) {
1726                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", u->irn));
1727                         continue;
1728                 }
1729
1730                 /* accumulate all descendants of all pkiller(u) */
1731                 bitset_clear_all(bs_ukilldesc);
1732                 foreach_plist(u->pkiller_list, el) {
1733                         ir_node   *irn  = plist_element_get_value(el);
1734                         rss_irn_t *node = get_rss_irn(rss, irn);
1735
1736                         if (! is_Sink(irn))
1737                                 bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, irn));
1738                         else
1739                                 continue;
1740
1741                         for (k = ARR_LEN_SAFE(node->descendants) - 1; k >= 0; --k) {
1742                                 if (! is_Sink(node->descendants[k]))
1743                                         bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, node->descendants[k]));
1744                         }
1745                 }
1746
1747                 /* for all v in sat_vals */
1748                 for (j = 0; j < n; ++j) {
1749                         ir_node   *v_irn   = val_arr[j];
1750                         rss_irn_t *v       = get_rss_irn(rss, v_irn);
1751                         unsigned  v_height = get_irn_height(rss->h, v_irn);
1752                         int       omega1, omega2, is_pkiller;
1753
1754                         /* v cannot be serialized with itself
1755                          * ignore nodes where serialization does not help */
1756                         if (i == j || IS_UNSERIALIZABLE_NODE(v)) {
1757                                 if (i != j)
1758                                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", v->irn));
1759                                 continue;
1760                         }
1761
1762                         /* get descendants of v */
1763                         bitset_clear_all(bs_vdesc);
1764                         bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v_irn));
1765                         for (k = ARR_LEN_SAFE(v->descendants) - 1; k >= 0; --k) {
1766                                 if (! is_Sink(v->descendants[k]))
1767                                         bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v->descendants[k]));
1768                         }
1769
1770                         /* if v is in pkiller(u) */
1771                         is_pkiller = plist_has_value(u->pkiller_list, v_irn);
1772
1773                         /* for all vv in pkiller(u) */
1774                         foreach_plist(u->pkiller_list, el) {
1775                                 ir_node *vv_irn  = plist_element_get_value(el);
1776                                 int     add_edge;
1777
1778                                 if (is_Sink(vv_irn) || is_cfop(vv_irn))
1779                                         continue;
1780
1781                                 if (is_pkiller)
1782                                         add_edge = vv_irn != v_irn && skip_Proj(vv_irn) != skip_Proj(v_irn);
1783                                 else
1784                                         add_edge = ! heights_reachable_in_block(rss->h, skip_Proj(vv_irn), skip_Proj(v_irn));
1785
1786                                 /*
1787                                         As we add an edge from vv -> v, we have to make sure,
1788                                         that there exists no path from v to vv.
1789                                 */
1790
1791                                 if (add_edge) {
1792                                         int vv_height = get_irn_height(rss->h, vv_irn);
1793                                         int mu1, mu2, critical_path_cost;
1794
1795                                         /*
1796                                                 mu1 = | descendants(v) cut sat_vals |
1797                                                 the number of saturating values which cannot
1798                                                 be simultaneously alive with u
1799                                         */
1800                                         bitset_copy(bs_tmp, bs_vdesc);
1801                                         mu1 = bitset_popcnt(bitset_and(bs_tmp, bs_sv));
1802
1803                                         /*
1804                                                 mu2 = | accum_desc_all_pkiller(u) without descendants(v) |
1805                                         */
1806                                         if (is_pkiller) {
1807                                                 bitset_copy(bs_tmp, bs_ukilldesc);
1808                                                 mu2 = bitset_popcnt(bitset_andnot(bs_tmp, bs_vdesc));
1809                                         }
1810                                         else {
1811                                                 mu2 = 0;
1812                                         }
1813
1814                                         /* omega1 = mu1 - mu2 */
1815                                         omega1 = mu1 - mu2;
1816
1817                                         if (omega1 != 0)
1818                                                 has_omega1 = 1;
1819
1820                                         /* omega2 = increase of critical path */
1821                                         critical_path_cost =
1822                                                 v_height                        /* longest path from v to sink */
1823                                                 + rss->max_height - vv_height   /* longest path from source to vv */
1824                                                 + 1;                            /* edge */
1825
1826                                         /*
1827                                                 If critical_path_cost > max_height -> the new edge
1828                                                 would increase the longest critical path by the difference.
1829                                         */
1830                                         omega2 = critical_path_cost > rss->max_height ? critical_path_cost - rss->max_height : 0;
1831
1832                                         /* this keeps track of the edge with the best benefit */
1833                                         if (omega1 >= num_regs - n && omega1 < best_benefit) {
1834                                                 min_benefit_edge.src = v_irn;
1835                                                 min_benefit_edge.tgt = vv_irn;
1836
1837                                                 ser_u_omega1 = u;
1838                                                 ser_v_omega1 = v;
1839
1840                                                 best_benefit    = omega1;
1841                                                 ser->new_killer = is_pkiller;
1842                                         }
1843
1844                                         /* this keeps track of the edge with the best omega1 costs where omega2 == 0 */
1845                                         if (omega2 == 0 && omega1 >= num_regs - n && omega1 < best_benefit_omega20) {
1846                                                 min_omega20_edge.src = v_irn;
1847                                                 min_omega20_edge.tgt = vv_irn;
1848
1849                                                 ser_u_omega20 = u;
1850                                                 ser_v_omega20 = v;
1851
1852                                                 best_benefit_omega20 = omega1;
1853                                                 ser->new_killer      = is_pkiller;
1854                                         }
1855
1856                                         best_omega2 = MIN(best_omega2, omega2);
1857
1858                                         DBG((rss->dbg, LEVEL_2, "\t\tfound %+F -> %+F (w1 %d, w2 %d)\n",
1859                                                 v_irn, vv_irn, omega1, omega2));
1860                                 } /* if add_edge */
1861                         } /* for all vv in pkiller(u) */
1862                 } /* for all v in sat_vals */
1863         } /* for all u in sat_vals */
1864
1865         if (! has_omega1)
1866                 return NULL;
1867
1868         if (best_omega2 == 0) {
1869                 ser->u         = ser_u_omega20;
1870                 ser->v         = ser_v_omega20;
1871                 ser->edge->src = min_omega20_edge.src;
1872                 ser->edge->tgt = min_omega20_edge.tgt;
1873                 ser->omega1    = best_benefit_omega20;
1874                 ser->omega2    = best_omega2;
1875         }
1876         else {
1877                 ser->u         = ser_u_omega1;
1878                 ser->v         = ser_v_omega1;
1879                 ser->edge->src = min_benefit_edge.src;
1880                 ser->edge->tgt = min_benefit_edge.tgt;
1881                 ser->omega1    = best_benefit;
1882                 ser->omega2    = best_omega2;
1883         }
1884
1885         return ser;
1886
1887 #undef IS_UNSERIALIZABLE_NODE
1888 }
1889
1890 /**
1891  * Perform the value serialization heuristic and add all
1892  * computed serialization edges as dependencies to the irg.
1893  */
1894 static void perform_value_serialization_heuristic(rss_t *rss) {
1895         bitset_t *arch_nonign_bs = bitset_alloca(arch_register_class_n_regs(rss->cls));
1896         bitset_t *abi_ign_bs     = bitset_alloca(arch_register_class_n_regs(rss->cls));
1897         int      available_regs, iteration;
1898         dvg_t    dvg;
1899         nodeset  *sat_vals;
1900         pset *ser_set = new_pset(cmp_rss_edges, 20);
1901
1902         /* available_regs = R = |arch_non_ignore_regs cut ~abi_ignore_regs| */
1903         arch_put_non_ignore_regs(rss->arch_env, rss->cls, arch_nonign_bs);
1904         be_abi_put_ignore_regs(rss->abi, rss->cls, abi_ign_bs);
1905         bitset_andnot(arch_nonign_bs, abi_ign_bs);
1906         available_regs  = bitset_popcnt(arch_nonign_bs);
1907         //num_live = pset_count(rss->live_block);
1908         //available_regs -= num_live < available_regs ? num_live : 0;
1909
1910         DBG((rss->dbg, LEVEL_1, "\n\t#available regs: %d\n\n", available_regs));
1911
1912         /*
1913                 At first we need to compute the disjoint value DAG (DVG = {V, E_dv}).
1914                 V    = set of all nodes we are currently interested in
1915                 E_dv = there is an edge from u to v iff v is a descendant of killer(u), forall u, v in V
1916         */
1917         dvg.nodes = new_nodeset(plist_count(rss->nodes));
1918         dvg.edges = new_pset(cmp_rss_edges, plist_count(rss->nodes) * 5);
1919         compute_dvg(rss, &dvg);
1920
1921         /*
1922                 Then we perform the heuristic serialization algorithm
1923                 on the DVG which gives us all necessary serialization
1924                 edges.
1925         */
1926         DBG((rss->dbg, LEVEL_1, "\tcomputing maximal antichain:\n"));
1927         iteration = 0;
1928         sat_vals  = compute_maximal_antichain(rss, &dvg, iteration++);
1929         while (sat_vals && (nodeset_count(sat_vals) > available_regs)) {
1930                 serialization_t *ser, best_ser;
1931                 rss_edge_t      *edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*edge));
1932                 ir_node         *dep_src, *dep_tgt;
1933
1934                 best_ser.edge = edge;
1935                 ser = compute_best_admissible_serialization(rss, sat_vals, &best_ser, available_regs);
1936
1937                 DBG((rss->dbg, LEVEL_1, "\tcurrent register saturation %d, target %d\n", nodeset_count(sat_vals), available_regs));
1938
1939                 if (! ser) {
1940                         DBG((rss->dbg, LEVEL_1, "\tno RS improving serialization found, breaking at iteration %d\n", iteration));
1941                         break;
1942                 }
1943
1944                 /* Insert the serialization as dependency edge into the irg. */
1945                 DBG((rss->dbg, LEVEL_1, "\tserializing %+F -> %+F with edge %+F -> %+F and cost %d, %d\n",
1946                         ser->u->irn, ser->v->irn, ser->edge->src, ser->edge->tgt, ser->omega1, ser->omega2));
1947
1948                 if (pset_find(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge)))
1949                         ir_printf("WARNING: serialization %+F -> %+F computed twice!\n", ser->edge->src, ser->edge->tgt);
1950
1951
1952                 pset_insert(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge));
1953
1954                 /* update the dvg */
1955                 update_dvg(rss, &dvg, ser->v, ser->u);
1956                 update_dvg(rss, &dvg, get_rss_irn(rss, ser->edge->src), get_rss_irn(rss, ser->edge->tgt));
1957                 del_nodeset(sat_vals);
1958
1959                 dep_src = skip_Proj(ser->edge->src);
1960                 dep_tgt = ser->edge->tgt;
1961                 add_irn_dep(dep_src, dep_tgt);
1962
1963                 /* Update descendants, consumer and pkillers of the target */
1964                 update_node_info(rss, ser->edge->tgt, ser->edge->src);
1965
1966                 /* TODO: try to find a cheaper way for updating height information */
1967                 rss->max_height = heights_recompute_block(rss->h, rss->block);
1968
1969                 /* Recompute the antichain for next serialization */
1970                 DBG((rss->dbg, LEVEL_1, "\tre-computing maximal antichain:\n"));
1971                 sat_vals = compute_maximal_antichain(rss, &dvg, iteration++);
1972         }
1973
1974         del_nodeset(dvg.nodes);
1975         del_pset(dvg.edges);
1976 }
1977
1978 /**
1979  * Do initial calculations for a block.
1980  */
1981 static void process_block(ir_node *block, void *env) {
1982         rss_t *rss = env;
1983         int   i, n;
1984         const ir_edge_t *edge;
1985
1986         phase_init(&rss->ph, "rss block preprocessor", rss->irg, PHASE_DEFAULT_GROWTH, init_rss_irn);
1987
1988         DBG((rss->dbg, LEVEL_1, "preprocessing block %+F\n", block));
1989         rss->block = block;
1990
1991         /* build an index map for all nodes in the current block */
1992         i            = 0;
1993         n            = get_irn_n_edges(block);
1994         NEW_ARR_A(int *, rss->idx_map, n);
1995         foreach_out_edge(block, edge) {
1996                 ir_node *irn      = get_edge_src_irn(edge);
1997                 rss->idx_map[i++] = get_irn_idx(irn);
1998         }
1999         qsort(rss->idx_map, n, sizeof(rss->idx_map[0]), cmp_int);
2000         rss->max_height = heights_recompute_block(rss->h, block);
2001
2002         /* loop over all register classes */
2003         for (i = arch_isa_get_n_reg_class(rss->arch_env->isa) - 1; i >= 0; --i) {
2004                 const arch_register_class_t *cls = arch_isa_get_reg_class(rss->arch_env->isa, i);
2005
2006                 rss->cls = cls;
2007                 DBG((rss->dbg, LEVEL_1, "register class %s\n", arch_register_class_name(cls)));
2008
2009                 /* Get all live value at end of Block having current register class */
2010                 rss->live_block = pset_new_ptr(10);
2011                 be_liveness_end_of_block(rss->liveness, rss->arch_env, rss->cls, rss->block, rss->live_block);
2012
2013                 /* reset the list of interesting nodes */
2014                 plist_clear(rss->nodes);
2015                 plist_insert_back(rss->nodes, _sink);
2016
2017                 /* collect all nodes having a certain register class */
2018                 foreach_out_edge(block, edge) {
2019                         ir_node *irn  = get_edge_src_irn(edge);
2020                         ir_mode *mode = get_irn_mode(irn);
2021
2022                         /*
2023                                 We skip:
2024                                 - mode_T nodes (the projs are asked)
2025                                 - mode_X nodes (control flow nodes are always scheduled last)
2026                                 - Keeps (they are always immediately scheduled)
2027                                 - Phi (same as Keep)
2028                         */
2029                         if (mode == mode_T || mode == mode_X || is_Phi(irn))
2030                                 continue;
2031
2032                         /*
2033                                 In case of a proj, we skip
2034                                 - Barrier (they are a Barrier :)
2035                                 - Start
2036                                 - the Proj itself, as it's scheduled always with it's super node
2037                         */
2038                         if (is_Proj(irn)) {
2039                                 ir_node *pred = get_Proj_pred(irn);
2040                                 if (be_is_Barrier(pred) || is_Start(pred))
2041                                         continue;
2042                         }
2043
2044                         /* calculate the descendants and consumer for each node in the block */
2045                         collect_node_info(rss, skip_Proj(irn));
2046
2047                         if (be_is_Keep(irn))
2048                                 continue;
2049
2050                         if (! arch_irn_is(rss->arch_env, irn, ignore) && arch_get_irn_reg_class(rss->arch_env, irn, -1) == cls) {
2051                                 plist_insert_back(rss->nodes, skip_Proj(irn));
2052                         }
2053                         //}
2054                 }
2055
2056                 /* compute the potential killing set PK(G) */
2057                 compute_pkill_set(rss);
2058
2059                 /* compute the killing function k* */
2060                 compute_killing_function(rss);
2061
2062                 /*
2063                         Compute the heuristic value serialization and
2064                         add the necessary dependencies to the irg.
2065                 */
2066                 perform_value_serialization_heuristic(rss);
2067
2068                 del_pset(rss->live_block);
2069         }
2070
2071         phase_free(&rss->ph);
2072 }
2073
2074 #ifdef WITH_LIBCORE
2075 /**
2076  * Register the options.
2077  */
2078 void rss_register_options(lc_opt_entry_t *grp) {
2079         static int     run_once = 0;
2080         lc_opt_entry_t *rss_grp;
2081
2082         if (! run_once) {
2083                 run_once = 1;
2084                 rss_grp  = lc_opt_get_grp(grp, "rss");
2085
2086                 lc_opt_add_table(rss_grp, rss_option_table);
2087         }
2088 }
2089 #endif /* WITH_LIBCORE */
2090
2091 /**
2092  * Preprocess the irg for scheduling.
2093  */
2094 void rss_schedule_preparation(const be_irg_t *birg) {
2095         rss_t rss;
2096
2097         FIRM_DBG_REGISTER(rss.dbg, "firm.be.sched.rss");
2098
2099         //firm_dbg_set_mask(rss.dbg, LEVEL_1 | LEVEL_2 | LEVEL_3);
2100
2101         init_rss_special_nodes(birg->irg);
2102
2103         rss.irg      = birg->irg;
2104         rss.arch_env = birg->main_env->arch_env;
2105         rss.abi      = birg->abi;
2106         rss.h        = heights_new(birg->irg);
2107         rss.nodes    = plist_new();
2108         rss.opts     = &rss_options;
2109         rss.liveness = be_liveness(birg->irg);
2110         irg_block_walk_graph(birg->irg, NULL, process_block, &rss);
2111         heights_free(rss.h);
2112         plist_free(rss.nodes);
2113         be_liveness_free(rss.liveness);
2114
2115         if (birg->main_env->options->dump_flags & DUMP_SCHED)
2116                 be_dump(rss.irg, "-rss", dump_ir_block_graph);
2117 }