start register allocator again, fix typo
[libfirm] / ir / be / beschedrss.c
1 /**
2  * Implementation of a register saturating list scheduler
3  * as described in: Sid-Ahmed-Ali Touati
4  * Register Saturation in Superscalar and VLIW Codes
5  *
6  * @license This file protected by GPL -  GNU GENERAL PUBLIC LICENSE.
7  * @author  Christian Wuerdig
8  * @date    29.08.2006
9  * @cvs-id  $Id$
10  */
11 #ifdef HAVE_CONFIG_H
12 #include "config.h"
13 #endif
14
15 #include <limits.h>
16
17 #include "obst.h"
18 #include "debug.h"
19
20 #include "irgraph_t.h"
21 #include "irnode_t.h"
22 #include "iredges_t.h"
23 #include "ircons_t.h"
24 #include "irphase_t.h"
25 #include "irgwalk.h"
26 #include "irdump.h"
27 #include "irtools.h"
28 #include "irbitset.h"
29 #include "irprintf.h"
30 #include "bipartite.h"
31 #include "hungarian.h"
32 #include "plist.h"
33
34 #include "height.h"
35
36 #include "beabi.h"
37 #include "bemodule.h"
38 #include "benode_t.h"
39 #include "besched_t.h"
40
41 #ifdef WITH_LIBCORE
42 #include <libcore/lc_opts.h>
43 #include <libcore/lc_opts_enum.h>
44 #endif /* WITH_LIBCORE */
45
46
47 #define ARR_LEN_SAFE(arr) ((arr) != NULL ? ARR_LEN((arr)) : 0)
48
49 #define HASH_RSS_EDGE(edge) ((get_irn_node_nr((edge)->src) << 16) | (get_irn_node_nr((edge)->tgt) & 0xFFFF))
50 #define BSEARCH_IRN_ARR(val, arr) \
51         bsearch(&(val), (arr), ARR_LEN_SAFE((arr)), sizeof((arr)[0]), cmp_irn_idx)
52
53 #define BLOCK_IDX_MAP(rss, irn) bsearch_for_index(get_irn_idx((irn)), (rss)->idx_map, ARR_LEN_SAFE((rss)->idx_map), 1)
54
55 /* the rss options */
56 typedef struct _rss_opts_t {
57         int dump_flags;
58 } rss_opts_t;
59
60 /* Represents a child with associated costs */
61 typedef struct _child {
62         ir_node *irn;
63         float   cost;
64 } child_t;
65
66 /* We need edges for several purposes. */
67 typedef struct _rss_edge {
68         ir_node *src;
69         ir_node *tgt;
70         void    *next;
71 } rss_edge_t;
72
73 /* Represents a connected bipartite component. */
74 typedef struct _cbc {
75         nodeset *parents;       /**< = S  a set of value producers */
76         nodeset *children;      /**< = T  a set of value consumers */
77         pset    *kill_edges;    /**< = E  a set of edges (t in T, s in S) such as each s in S gets killed by at least one t in T */
78         int     nr;             /**< a deterministic index for set insertion (used as hash) */
79 } cbc_t;
80
81 /* Represents a disjoint value DAG. */
82 typedef struct _dvg {
83         nodeset *nodes;
84         pset    *edges;
85 } dvg_t;
86
87 /* Represents a chain of nodes. */
88 typedef struct _chain {
89         plist_t *elements;   /**< List of chain elements */
90         int     nr;          /**< a deterministic index for set insertion (used as hash) */
91 } chain_t;
92
93 typedef struct _rss_irn {
94         plist_t  *consumer_list;    /**< List of consumers */
95         ir_node **consumer;         /**< Sorted consumer array (needed for faster access) */
96
97         plist_t  *parent_list;      /**< List of parents */
98         plist_t  *pkiller_list;     /**< List of potential killers */
99
100         plist_t  *descendant_list;  /**< List of descendants */
101         ir_node **descendants;      /**< Sorted descendant array (needed for faster access) */
102
103         ir_node  *killer;           /**< The selected unique killer */
104         ir_node  *irn;              /**< The corresponding firm node to this rss_irn */
105
106         chain_t  *chain;            /**< The chain, this node is associated to */
107
108         unsigned desc_walk;         /**< visited flag for collecting descendants */
109         unsigned kill_count;        /**< number of nodes killed by this one */
110
111         unsigned live_out : 1;      /**< irn has consumers outside of it's block */
112         unsigned visited  : 1;      /**< visited flag for bipartite decomposition */
113         unsigned havecons : 1;      /**< visited flag collect consumer */
114         unsigned handled  : 1;      /**< flag indicating whether or not the list structures have been build */
115         unsigned dumped   : 1;      /**< flag indication whether or not this node was dumped */
116 } rss_irn_t;
117
118 /* Represents a serialization edge with associated costs. */
119 typedef struct _serialization {
120         rss_irn_t  *u;       /* the top node */
121         rss_irn_t  *v;       /* the node about to be serialized after u */
122         rss_edge_t *edge;    /* the edge selected for the serialization */
123         int        omega1;   /* estimated: available regs - RS reduction */
124         int        omega2;   /* increase in critical path length */
125         int        new_killer;
126 } serialization_t;
127
128 typedef struct _rss {
129         phase_t          ph;              /**< Phase to hold some data */
130         heights_t        *h;              /**< The current height object */
131         ir_graph         *irg;            /**< The irg to preprocess */
132         plist_t          *nodes;          /**< The list of interesting nodes */
133         const arch_env_t *arch_env;       /**< The architecture environment */
134         be_abi_irg_t     *abi;            /**< The abi for this irg */
135         pset             *cbc_set;        /**< A set of connected bipartite components */
136         ir_node          *block;          /**< The current block in progress. */
137         int              *idx_map;        /**< Mapping irn indices to per block indices */
138         unsigned         max_height;      /**< maximum height in the current block */
139         rss_opts_t       *opts;           /**< The options */
140         be_lv_t          *liveness;       /**< The liveness information for this irg */
141         pset             *live_block;     /**< Values alive at end of block */
142         const arch_register_class_t *cls; /**< The current register class */
143         DEBUG_ONLY(firm_dbg_module_t *dbg);
144 } rss_t;
145
146 #define get_rss_irn(rss, irn)  (phase_get_or_set_irn_data(&rss->ph, irn))
147
148 /**
149  * We need some special nodes:
150  * a source and a sink for all live-in and live-out values of a block
151  */
152
153 enum {
154         iro_rss_Source,
155         iro_rss_Sink,
156         iro_rss_last
157 };
158
159 static ir_node *_source = NULL;
160 static ir_node *_sink   = NULL;
161
162 #define is_Source(irn) ((irn) == _source)
163 #define is_Sink(irn)   ((irn) == _sink)
164
165 enum {
166         RSS_DUMP_NONE  = 0,
167         RSS_DUMP_CBC   = 1 << 0,
168         RSS_DUMP_PKG   = 1 << 1,
169         RSS_DUMP_KILL  = 1 << 2,
170         RSS_DUMP_DVG   = 1 << 3,
171         RSS_DUMP_MAXAC = 1 << 4,
172         RSS_DUMP_ALL   = (RSS_DUMP_MAXAC << 1) - 1,
173 };
174
175 static rss_opts_t rss_options = {
176         RSS_DUMP_NONE,
177 };
178
179 #ifdef WITH_LIBCORE
180 static const lc_opt_enum_int_items_t dump_items[] = {
181         { "none",  RSS_DUMP_NONE  },
182         { "cbc",   RSS_DUMP_CBC   },
183         { "pkg",   RSS_DUMP_PKG   },
184         { "kill",  RSS_DUMP_KILL  },
185         { "dvg",   RSS_DUMP_DVG   },
186         { "maxac", RSS_DUMP_MAXAC },
187         { "all",   RSS_DUMP_ALL   },
188         { NULL, 0 }
189 };
190
191 static lc_opt_enum_int_var_t dump_var = {
192         &rss_options.dump_flags, dump_items
193 };
194
195 static const lc_opt_table_entry_t rss_option_table[] = {
196         LC_OPT_ENT_ENUM_MASK("dump", "dump phases", &dump_var),
197         { NULL }
198 };
199 #endif /* WITH_LIBCORE */
200
201 /******************************************************************************
202  *  _          _                    __                  _   _
203  * | |        | |                  / _|                | | (_)
204  * | |__   ___| |_ __   ___ _ __  | |_ _   _ _ __   ___| |_ _  ___  _ __  ___
205  * | '_ \ / _ \ | '_ \ / _ \ '__| |  _| | | | '_ \ / __| __| |/ _ \| '_ \/ __|
206  * | | | |  __/ | |_) |  __/ |    | | | |_| | | | | (__| |_| | (_) | | | \__ \
207  * |_| |_|\___|_| .__/ \___|_|    |_|  \__,_|_| |_|\___|\__|_|\___/|_| |_|___/
208  *            | |
209  *            |_|
210  ******************************************************************************/
211
212 /**
213  * Acquire opcodes and create source and sink nodes.
214  */
215 static void init_rss_special_nodes(ir_graph *irg) {
216         ir_node *block         = get_irg_start_block(irg);
217         int     iro_rss_base   = get_next_ir_opcodes(iro_rss_last);
218         ir_op   *op_rss_Source = new_ir_op(iro_rss_base + iro_rss_Source, "rss_Source", op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
219         ir_op   *op_rss_Sink   = new_ir_op(iro_rss_base + iro_rss_Sink,   "rss_Sink",   op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
220         _source                = new_ir_node(NULL, irg, block, op_rss_Source, mode_ANY, 0, NULL);
221         _sink                  = new_ir_node(NULL, irg, block, op_rss_Sink, mode_ANY, 0, NULL);
222 }
223
224 static int cmp_int(const void *a, const void *b) {
225         const int *i1 = a;
226         const int *i2 = b;
227
228         return QSORT_CMP(*i1, *i2);
229 }
230
231 static int cmp_child_costs(const void *a, const void *b) {
232         const child_t *c1 = a;
233         const child_t *c2 = b;
234
235         return QSORT_CMP(c1->cost, c2->cost);
236 }
237
238 static int cmp_irn_idx(const void *a, const void *b) {
239         const ir_node *n1 = *(ir_node **)a;
240         const ir_node *n2 = *(ir_node **)b;
241
242         return QSORT_CMP(get_irn_idx(n1), get_irn_idx(n2));
243 }
244
245 static int cmp_rss_edges(const void *a, const void *b) {
246         const rss_edge_t *e1 = a;
247         const rss_edge_t *e2 = b;
248
249         return (e1->src != e2->src) || (e1->tgt != e2->tgt);
250 }
251
252 static int bsearch_for_index(int key, int *arr, size_t len, int force) {
253         int left = 0;
254         int right = len;
255
256         while (right >= left) {
257                 int idx = (left + right) / 2;
258
259                 if (key < arr[idx])
260                         right = idx - 1;
261                 else if (key > arr[idx])
262                         left = idx + 1;
263                 else
264                         return idx;
265         }
266
267         if (force)
268                 assert(0 && "Something is wrong, key not found.");
269         return -1;
270 }
271
272 static ir_node **build_sorted_array_from_list(plist_t *irn_list, struct obstack *obst) {
273         plist_element_t *el;
274         int     i     = 0;
275         int     len   = plist_count(irn_list);
276         ir_node **arr = NEW_ARR_D(ir_node *, obst, len);
277
278         /* copy the list into the array */
279         foreach_plist(irn_list, el) {
280                 arr[i++] = plist_element_get_value(el);
281         }
282
283         /* sort the array by node index */
284         qsort(arr, len, sizeof(arr[0]), cmp_irn_idx);
285
286         return arr;
287 }
288
289 /*****************************************************
290  *      _      _                       _
291  *     | |    | |                     (_)
292  *   __| | ___| |__  _   _  __ _  __ _ _ _ __   __ _
293  *  / _` |/ _ \ '_ \| | | |/ _` |/ _` | | '_ \ / _` |
294  * | (_| |  __/ |_) | |_| | (_| | (_| | | | | | (_| |
295  *  \__,_|\___|_.__/ \__,_|\__, |\__, |_|_| |_|\__, |
296  *                          __/ | __/ |         __/ |
297  *                         |___/ |___/         |___/
298  *
299  *****************************************************/
300
301 #ifdef DEBUG_libfirm
302 static void dump_nodeset(nodeset *ns, const char *prefix) {
303         ir_node *irn;
304         foreach_nodeset(ns, irn) {
305                 ir_fprintf(stderr, "%s%+F\n", prefix, irn);
306         }
307 }
308 #endif
309
310 static void build_file_name(rss_t *rss, const char *suffix, size_t suf_len, char *buf, size_t len) {
311         const char *irg_name;
312
313         memset(buf, 0, len);
314         irg_name = get_entity_name(get_irg_entity(rss->irg));
315         snprintf(buf, len - suf_len, "%s-%s-block-%ld",
316                 irg_name, arch_register_class_name(rss->cls), get_irn_node_nr(rss->block));
317         strcat(buf, suffix);
318 }
319
320 /* Dumps all collected bipartite components of current irg as vcg. */
321 static void debug_vcg_dump_bipartite(rss_t *rss) {
322         cbc_t *cbc;
323         FILE  *f;
324         char  file_name[256];
325         static const char suffix[] = "-RSS-CBC.vcg";
326
327         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
328         f = fopen(file_name, "w");
329
330         ir_fprintf(f, "graph: { title: \"connected bipartite component graph of %+F\"\n", rss->irg);
331         fprintf(f, "display_edge_labels: no\n");
332         fprintf(f, "layoutalgorithm: mindepth\n");
333         fprintf(f, "manhattan_edges: yes\n\n");
334
335         foreach_pset(rss->cbc_set, cbc) {
336                 ir_node    *n;
337                 rss_edge_t *ke;
338
339                 fprintf(f, "graph: { titel: \"cbc %d\" label: \"cbc %d\" status:clustered color:yellow\n", cbc->nr, cbc->nr);
340                 foreach_nodeset(cbc->parents, n) {
341                         ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
342                 }
343                 foreach_nodeset(cbc->children, n) {
344                         ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
345                 }
346                 foreach_pset(cbc->kill_edges, ke) {
347                         ir_fprintf(f, "edge: { sourcename: \"n%d_%d\" targetname: \"n%d_%d\" }\n",
348                                 get_irn_node_nr(ke->src), cbc->nr, get_irn_node_nr(ke->tgt), cbc->nr);
349                 }
350                 fprintf(f, "}\n\n");
351         }
352         fprintf(f, "}\n");
353         fclose(f);
354 }
355
356 /* Dump the computed killing function as vcg. */
357 static void debug_vcg_dump_kill(rss_t *rss) {
358         FILE            *f;
359         char            file_name[256];
360         plist_element_t *el;
361         static const char suffix[] = "-RSS-KILL.vcg";
362
363         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
364         f = fopen(file_name, "w");
365
366         ir_fprintf(f, "graph: { title: \"computed kill graph of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
367         fprintf(f, "display_edge_labels: no\n");
368         fprintf(f, "layoutalgorithm: mindepth\n");
369         fprintf(f, "manhattan_edges: yes\n\n");
370
371         {
372                 /* reset dump_flag */
373                 ir_node *irn;
374                 foreach_phase_irn(&rss->ph, irn) {
375                         rss_irn_t *node = get_rss_irn(rss, irn);
376                         node->dumped = 0;
377                 }
378         }
379
380         /* dump all nodes and their killers */
381         foreach_plist(rss->nodes, el) {
382                 ir_node   *irn     = plist_element_get_value(el);
383                 rss_irn_t *rirn    = get_rss_irn(rss, irn);
384                 rss_irn_t *pk_rirn = get_rss_irn(rss, rirn->killer);
385
386                 if (! rirn->dumped) {
387                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
388                         rirn->dumped = 1;
389                 }
390
391                 if (! pk_rirn->dumped) {
392                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(rirn->killer), rirn->killer);
393                         pk_rirn->dumped = 1;
394                 }
395
396                 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
397                         get_irn_node_nr(rirn->killer), get_irn_node_nr(irn));
398         }
399
400         fprintf(f, "}\n");
401         fclose(f);
402 }
403
404 /* Dumps the potential killing DAG (PKG) as vcg. */
405 static void debug_vcg_dump_pkg(rss_t *rss, nodeset *max_ac, int iteration) {
406         FILE              *f;
407         char              file_name[256];
408         char              *suffix   = alloca(32);
409         static const char suffix1[] = "-RSS-PKG.vcg";
410         static const char suffix2[] = "-RSS-PKG-MAXAC.vcg";
411         plist_element_t   *el;
412
413         if (! max_ac) {
414                 snprintf(suffix, 32, "%s", suffix1);
415         }
416         else {
417                 snprintf(suffix, 32, "-%02d%s", iteration, suffix2);
418         }
419
420         build_file_name(rss, suffix, strlen(suffix) + 1, file_name, sizeof(file_name));
421         f = fopen(file_name, "w");
422
423         ir_fprintf(f, "graph: { title: \"potential killing DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
424         fprintf(f, "display_edge_labels: no\n");
425         fprintf(f, "layoutalgorithm: mindepth\n");
426         fprintf(f, "manhattan_edges: yes\n\n");
427
428         {
429                 /* reset dump_flag */
430                 ir_node *irn;
431                 foreach_phase_irn(&rss->ph, irn) {
432                         rss_irn_t *node = get_rss_irn(rss, irn);
433                         node->dumped = 0;
434                 }
435         }
436
437         foreach_plist(rss->nodes, el) {
438                 ir_node   *irn  = plist_element_get_value(el);
439                 rss_irn_t *rirn = get_rss_irn(rss, irn);
440                 char      *c1   = "";
441                 plist_element_t *k_el;
442
443                 /* dump selected saturating values in yellow */
444                 if (max_ac && nodeset_find(max_ac, irn))
445                         c1 = "color:yellow";
446
447                 if (! rirn->dumped) {
448                         if (rirn->chain)
449                                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F   c%d\" %s }\n", get_irn_node_nr(irn), irn, rirn->chain->nr, c1);
450                         else
451                                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(irn), irn, c1);
452                         rirn->dumped = 1;
453                 }
454
455                 foreach_plist(rirn->pkiller_list, k_el) {
456                         ir_node   *pkiller = plist_element_get_value(k_el);
457                         rss_irn_t *pk_rirn = get_rss_irn(rss, pkiller);
458                         char      *c2      = "";
459
460                         if (max_ac && nodeset_find(max_ac, pkiller))
461                                 c2 = "color:yellow";
462
463                         if (! pk_rirn->dumped) {
464                                 if (pk_rirn->chain)
465                                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F   c%d\" %s }\n", get_irn_node_nr(pkiller), pkiller, pk_rirn->chain->nr, c2);
466                                 else
467                                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(pkiller), pkiller, c2);
468                                 pk_rirn->dumped = 1;
469                         }
470                         ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
471                                 get_irn_node_nr(pkiller), get_irn_node_nr(irn));
472                 }
473         }
474         fprintf(f, "}\n");
475         fclose(f);
476 }
477
478 /* Dumps the disjoint value DAG (DVG) as vcg. */
479 static void debug_vcg_dump_dvg(rss_t *rss, dvg_t *dvg) {
480         static const char suffix[] = "-RSS-DVG.vcg";
481         FILE       *f;
482         char       file_name[256];
483         ir_node    *irn;
484         rss_edge_t *edge;
485
486         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
487         f = fopen(file_name, "w");
488
489         ir_fprintf(f, "graph: { title: \"disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
490         fprintf(f, "display_edge_labels: no\n");
491         fprintf(f, "layoutalgorithm: mindepth\n");
492         fprintf(f, "manhattan_edges: yes\n\n");
493
494         /* dump all nodes */
495         foreach_nodeset(dvg->nodes, irn) {
496                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
497         }
498
499         /* dump all edges */
500         foreach_pset(dvg->edges, edge) {
501                 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
502                         get_irn_node_nr(edge->src), get_irn_node_nr(edge->tgt));
503         }
504
505         fprintf(f, "}\n");
506         fclose(f);
507 }
508
509 #if 0
510 /* Dumps the PKG(DVG). */
511 static void debug_vcg_dump_dvg_pkiller(rss_t *rss, dvg_t *dvg) {
512         static const char suffix[] = "-RSS-DVG-PKG.vcg";
513         FILE       *f;
514         char       file_name[256];
515         ir_node    *irn;
516
517         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
518         f = fopen(file_name, "w");
519
520         ir_fprintf(f, "graph: { title: \"PKG of disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
521         fprintf(f, "display_edge_labels: no\n");
522         fprintf(f, "layoutalgorithm: mindepth\n");
523         fprintf(f, "manhattan_edges: yes\n\n");
524
525         /* dump all nodes */
526         foreach_nodeset(dvg->nodes, irn) {
527                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
528         }
529
530         /* dump all edges */
531         foreach_nodeset(dvg->nodes, irn) {
532                 rss_irn_t       *node = get_rss_irn(rss, irn);
533                 plist_element_t *el;
534
535                 foreach_plist(node->dvg_pkiller_list, el) {
536                         ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
537                                 get_irn_node_nr(plist_element_get_value(el)), get_irn_node_nr(irn));
538                 }
539         }
540
541         fprintf(f, "}\n");
542         fclose(f);
543 }
544 #endif /* if 0 */
545
546 /**
547  * In case there is no rss information for irn, initialize it.
548  */
549 static void *init_rss_irn(phase_t *ph, ir_node *irn, void *old) {
550         rss_irn_t *res = old ? old : phase_alloc(ph, sizeof(res[0]));
551
552         res->descendant_list = plist_obstack_new(phase_obst(ph));
553         res->descendants     = NULL;
554
555         res->consumer_list   = plist_obstack_new(phase_obst(ph));
556         res->consumer        = NULL;
557
558         res->pkiller_list    = plist_obstack_new(phase_obst(ph));
559
560         res->parent_list     = plist_obstack_new(phase_obst(ph));
561
562         res->killer          = NULL;
563         res->irn             = irn;
564         res->chain           = NULL;
565
566         res->kill_count      = 0;
567         res->desc_walk       = 0;
568         res->live_out        = 0;
569         res->visited         = 0;
570         res->handled         = 0;
571         res->dumped          = 0;
572         res->havecons        = 0;
573
574         return res;
575 }
576
577 /**
578  * Collect all nodes data dependent on current node.
579  */
580 static void collect_descendants(rss_t *rss, rss_irn_t *rirn, ir_node *irn, int *got_sink, unsigned cur_desc_walk) {
581         const ir_edge_t *edge;
582         rss_irn_t       *cur_node = get_rss_irn(rss, irn);
583         ir_node         *block    = rss->block;
584         ir_edge_kind_t  ekind[2]  = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
585         int             i;
586
587         if (cur_node->desc_walk >= cur_desc_walk)
588                 return;
589         cur_node->desc_walk = cur_desc_walk;
590
591         /* Stop at Barriers */
592         if (be_is_Barrier(irn))
593                 return;
594
595         /* loop over normal and dependency edges */
596         for (i = 0; i < 2; ++i) {
597                 foreach_out_edge_kind(irn, edge, ekind[i]) {
598                         ir_node *user = get_edge_src_irn(edge);
599
600                         /* skip ignore nodes as they do not really contribute to register pressure */
601                         if (arch_irn_is(rss->arch_env, user, ignore))
602                                 continue;
603
604                         /*
605                                 (a) mode_X means Jump            -> out_edge
606                                 (b) Phi as user of a normal node -> out-edge
607                         */
608                         if (get_irn_mode(user) == mode_X || is_Phi(user)) {
609                                 if (! *got_sink)
610                                         goto force_sink;
611                                 else
612                                         continue;
613                         }
614
615                         if (is_Proj(user)) {
616
617                                 //if (arch_get_irn_reg_class(rss->arch_env, user, -1) == rss->cls)
618                                         collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
619                         }
620                         else {
621                                 /* check if user lives in block and is not a control flow node */
622                                 if (get_nodes_block(user) == block) {
623                                         if (! plist_has_value(rirn->descendant_list, user)) {
624                                                 plist_insert_back(rirn->descendant_list, user);
625                                                 DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", user));
626                                         }
627                                         collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
628                                 }
629                                 else if (! *got_sink) {
630 force_sink:
631                                         /* user lives out of block: add sink as descendant if not already done */
632                                         plist_insert_back(rirn->descendant_list, _sink);
633                                         *got_sink = 1;
634                                         DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", _sink));
635                                 }
636                         }
637                 }
638         }
639 }
640
641 /**
642  * Handles a single consumer.
643  */
644 static void collect_single_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *consumer, int *got_sink) {
645         ir_node *block = rss->block;
646
647         assert(! is_Proj(consumer) && "Cannot handle Projs");
648
649         if (! is_Phi(consumer) && ! is_Block(consumer) && get_nodes_block(consumer) == block) {
650                 if (! arch_irn_is(rss->arch_env, consumer, ignore) && ! plist_has_value(rss_irn->consumer_list, consumer)) {
651                         plist_insert_back(rss_irn->consumer_list, consumer);
652                         DBG((rss->dbg, LEVEL_2, "\t\tconsumer %+F\n", consumer));
653                 }
654         }
655         else {
656                 rss_irn->live_out = 1;
657                 DBG((rss->dbg, LEVEL_2, "\t\tlive out %+F", consumer));
658                 if (! *got_sink) {
659                         plist_insert_back(rss_irn->consumer_list, _sink);
660                         *got_sink = 1;
661                         DB((rss->dbg, LEVEL_2, ", %+F added instead", _sink));
662                 }
663                 DB((rss->dbg, LEVEL_2, "\n"));
664         }
665 }
666
667 /**
668  * Collect all nodes consuming the value(s) produced by current node.
669  */
670 static void collect_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *irn, int *got_sink) {
671         const ir_edge_t *edge;
672         int             i;
673         ir_edge_kind_t  ekind[2]  = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
674         rss_irn_t       *cur_node = get_rss_irn(rss, irn);
675
676         if (cur_node->havecons)
677                 return;
678         cur_node->havecons = 1;
679
680         for (i = 0; i < 2; ++i) {
681                 foreach_out_edge_kind(irn, edge, ekind[i]) {
682                         ir_node *consumer = get_edge_src_irn(edge);
683
684                         if (is_Proj(consumer)) {
685                                 //if (arch_get_irn_reg_class(rss->arch_env, consumer, -1) == rss->cls)
686                                         collect_consumer(rss, rss_irn, consumer, got_sink);
687                         }
688                         else
689                                 collect_single_consumer(rss, rss_irn, consumer, got_sink);
690                 }
691         }
692 }
693
694 /**
695  * Collects all consumer and descendant of a irn.
696  */
697 static void collect_node_info(rss_t *rss, ir_node *irn) {
698         static unsigned cur_desc_walk = 0;
699         rss_irn_t       *rss_irn      = get_rss_irn(rss, irn);
700         int             got_sink;
701
702         assert(! is_Proj(irn) && "Cannot handle Projs.");
703
704         /* check if node info is already available */
705         if (rss_irn->handled)
706                 return;
707                 //phase_reinit_single_irn_data(&rss->ph, irn);
708
709         DBG((rss->dbg, LEVEL_1, "\tcomputing consumers of %+F:\n", irn));
710
711         /* collect all consumer */
712         got_sink = 0;
713         collect_consumer(rss, rss_irn, irn, &got_sink);
714
715         /* build sorted consumer array */
716         rss_irn->consumer = build_sorted_array_from_list(rss_irn->consumer_list, phase_obst(&rss->ph));
717
718         DBG((rss->dbg, LEVEL_1, "\tcompute descendants of %+F:\n", irn));
719
720         /* collect descendants */
721         got_sink = 0;
722         collect_descendants(rss, rss_irn, irn, &got_sink, ++cur_desc_walk);
723
724         /* build sorted descendant array */
725         rss_irn->descendants = build_sorted_array_from_list(rss_irn->descendant_list, phase_obst(&rss->ph));
726
727         rss_irn->handled = 1;
728 }
729
730 /**
731  * Checks if v is a potential killer of u.
732  * v is in pkill(u) iff descendants(v) cut consumer(u) is v
733  *
734  * @param rss   The rss object
735  * @param v      The node to check for killer
736  * @param u      The potentially killed value
737  * @return 1 if v is in pkill(u), 0 otherwise
738  */
739 static int is_potential_killer(rss_t *rss, rss_irn_t *v, rss_irn_t *u) {
740         plist_t *list;
741         ir_node **arr;
742         plist_element_t *el;
743
744         assert(is_Sink(v->irn) || ((plist_count(v->descendant_list) > 0 && v->descendants) || 1));
745         assert(is_Sink(u->irn) || ((plist_count(u->consumer_list)   > 0 && u->consumer)    || 1));
746
747         /* as we loop over the list: loop over the shorter one */
748         if (plist_count(v->descendant_list) > plist_count(u->consumer_list)) {
749                 list = u->consumer_list;
750                 arr  = v->descendants;
751         }
752         else {
753                 list = v->descendant_list;
754                 arr  = u->consumer;
755         }
756
757         /* for each list element: try to find element in array */
758         foreach_plist(list, el) {
759                 ir_node *irn   = plist_element_get_value(el);
760                 ir_node *match = BSEARCH_IRN_ARR(irn, arr);
761
762                 if (match && match != irn)
763                         return 0;
764         }
765
766         return 1;
767 }
768
769 /**
770  * Update descendants, consumer and pkiller list for the given irn.
771  */
772 static void update_node_info(rss_t *rss, ir_node *irn, ir_node *pk_irn) {
773         rss_irn_t *node    = get_rss_irn(rss, irn);
774         rss_irn_t *pkiller = get_rss_irn(rss, pk_irn);
775
776         /* add consumer and rebuild the consumer array */
777         if (! plist_has_value(node->consumer_list, pk_irn)) {
778                 plist_insert_back(node->consumer_list, pk_irn);
779                 node->consumer = build_sorted_array_from_list(node->consumer_list, phase_obst(&rss->ph));
780         }
781
782         /* add pk_irn as descendant, add it's descendants to irn's and rebuild array */
783         if (! plist_has_value(node->descendant_list, pk_irn)) {
784                 plist_element_t *el;
785
786                 plist_insert_back(node->descendant_list, pk_irn);
787
788                 foreach_plist(pkiller->descendant_list, el) {
789                         ir_node *desc = plist_element_get_value(el);
790
791                         if (! plist_has_value(node->descendant_list, desc))
792                                 plist_insert_back(node->descendant_list, desc);
793                 }
794
795                 node->descendants = build_sorted_array_from_list(node->descendant_list, phase_obst(&rss->ph));
796         }
797
798 }
799
800 /**
801  * Compute the potential killing set PK.
802  */
803 static void compute_pkill_set(rss_t *rss) {
804         plist_element_t *u_el, *v_el;
805
806         foreach_plist(rss->nodes, u_el) {
807                 ir_node   *u_irn = plist_element_get_value(u_el);
808                 rss_irn_t *u     = get_rss_irn(rss, u_irn);
809
810                 DBG((rss->dbg, LEVEL_1, "\tcomputing potential killers of %+F:\n", u_irn));
811
812                 /* check each consumer if it is a potential killer  */
813                 foreach_plist(u->consumer_list, v_el) {
814                         ir_node   *v_irn = plist_element_get_value(v_el);
815                         rss_irn_t *v     = get_rss_irn(rss, v_irn);
816
817                         /* check, if v is a potential killer of u */
818                         if (is_potential_killer(rss, v, u)) {
819                                 if (! plist_has_value(u->pkiller_list, v_irn))
820                                         plist_insert_back(u->pkiller_list, v_irn);
821                                 v->kill_count++;
822                                 DBG((rss->dbg, LEVEL_2, "\t\tpotential killer %+F\n", v_irn));
823                         }
824                 }
825
826                 u->killer = _sink;
827         }
828
829         if (rss->opts->dump_flags & RSS_DUMP_PKG)
830                 debug_vcg_dump_pkg(rss, NULL, 0);
831 }
832
833 /**
834  * Build set of killing edges (from values to their potential killers)
835  */
836 static void build_kill_edges(rss_t *rss, pset *epk) {
837         plist_element_t *el, *k_el;
838
839         foreach_plist(rss->nodes, el) {
840                 ir_node    *irn  = plist_element_get_value(el);
841                 rss_irn_t *rirn = get_rss_irn(rss, irn);
842
843                 DBG((rss->dbg, LEVEL_3, "\t\tbuilding kill edges for %+F:\n", irn));
844
845                 foreach_plist(rirn->pkiller_list, k_el) {
846                         ir_node    *pkiller = plist_element_get_value(k_el);
847                         rss_edge_t *ke      = obstack_alloc(phase_obst(&rss->ph), sizeof(*ke));
848
849                         ke->src  = irn;
850                         ke->tgt  = pkiller;
851                         ke->next = NULL;
852
853                         DBG((rss->dbg, LEVEL_3, "\t\t\ttarget %+F\n", pkiller));
854
855                         pset_insert(epk, ke, HASH_RSS_EDGE(ke));
856                 }
857         }
858 }
859
860 #ifdef DEBUG_libfirm
861 /* print the given cbc for debugging purpose */
862 static void debug_print_cbc(firm_dbg_module_t *mod, cbc_t *cbc) {
863         ir_node    *n;
864         rss_edge_t *ke;
865
866         DBG((mod, LEVEL_3, "\t\tS = set of parents:\n"));
867         foreach_nodeset(cbc->parents, n) {
868                 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
869         }
870         DBG((mod, LEVEL_3, "\t\tT = set of children:\n"));
871         foreach_nodeset(cbc->children, n) {
872                 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
873         }
874         DBG((mod, LEVEL_3, "\t\tE = Edges from producers to consumers\n"));
875         foreach_pset(cbc->kill_edges, ke) {
876                 DBG((mod, LEVEL_3, "\t\t\t%+F -> %+F\n", ke->src, ke->tgt));
877         }
878 }
879 #endif
880
881 /**
882  * Construct the bipartite decomposition.
883  * Sid-Ahmed-Ali Touati, Phd Thesis
884  * Register Pressure in Instruction Level Parallelism, p. 71
885  */
886 static void compute_bipartite_decomposition(rss_t *rss) {
887         pset *epk    = new_pset(cmp_rss_edges, 10);
888         int  cur_num = 0;
889
890         plist_element_t *el;
891
892         DBG((rss->dbg, LEVEL_1, "\tcomputing bipartite decomposition:\n"));
893
894         build_kill_edges(rss, epk);
895
896         foreach_plist(rss->nodes, el) {
897                 ir_node   *u_irn   = plist_element_get_value(el);
898                 rss_irn_t *u       = get_rss_irn(rss, u_irn);
899                 int       p_change = 1;
900                 int       c_change = 1;
901                 int       vrfy_ok  = 1;
902
903                 cbc_t           *cbc;
904                 plist_element_t *el2;
905                 rss_edge_t      *k_edge;
906                 rss_edge_t      *kedge_root = NULL;
907                 ir_node         *t_irn, *s_irn;
908
909                 if (u->visited || u_irn == _sink)
910                         continue;
911
912                 DBG((rss->dbg, LEVEL_2, "\t\t%+F choosen:\n", u_irn));
913
914                 cbc     = obstack_alloc(phase_obst(&rss->ph), sizeof(*cbc));
915                 cbc->nr = cur_num++;
916
917                 /* initialize S_cb */
918                 cbc->parents = new_nodeset(5);
919                 nodeset_insert(cbc->parents, u_irn);
920                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (init)\n", u_irn));
921
922                 /* E_cb = empty */
923                 cbc->kill_edges = new_pset(cmp_rss_edges, 5);
924
925                 /* each parent gets killed by at least one value from children */
926
927                 /* T_cb = PK_successors(u) */
928                 cbc->children = new_nodeset(5);
929                 foreach_plist(u->pkiller_list, el2) {
930                         nodeset_insert(cbc->children, plist_element_get_value(el2));
931                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (init)\n", plist_element_get_value(el2)));
932                 }
933
934                 /*
935                         Now: insert the parents of all children into the parent set
936                         and insert the children of all parents into the children set
937                         until the sets don't change any more
938                 */
939                 while (p_change || c_change) {
940                         p_change = c_change = 0;
941
942                         /* accumulate parents */
943                         foreach_nodeset(cbc->children, t_irn) {
944                                 foreach_pset(epk, k_edge) {
945                                         ir_node *val = k_edge->src;
946
947                                         if (k_edge->tgt == t_irn && ! nodeset_find(cbc->parents, val)) {
948                                                 nodeset_insert(cbc->parents, val);
949                                                 p_change = 1;
950                                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (killed by %+F)\n", val, t_irn));
951                                         }
952                                 }
953                         }
954
955                         /* accumulate children */
956                         foreach_nodeset(cbc->parents, s_irn) {
957                                 foreach_pset(epk, k_edge) {
958                                         ir_node *val = k_edge->tgt;
959
960                                         if (k_edge->src == s_irn  && ! nodeset_find(cbc->children, val)) {
961                                                 nodeset_insert(cbc->children, val);
962                                                 c_change = 1;
963                                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (kills %+F)\n", val, s_irn));
964                                         }
965                                 }
966                         }
967                 }
968
969                 /* mark all parent values as visited */
970                 foreach_nodeset(cbc->parents, s_irn) {
971                         rss_irn_t *s = get_rss_irn(rss, s_irn);
972                         s->visited = 1;
973                         /* assure bipartite property */
974 #if 0
975                         if (nodeset_find(cbc->children, s_irn)) {
976                                 nodeset_remove(cbc->children, s_irn);
977                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F removed from to children\n", s_irn));
978                         }
979 #endif
980                 }
981
982                 /* update edges */
983                 foreach_pset(epk, k_edge) {
984                         if (nodeset_find(cbc->parents, k_edge->src) && nodeset_find(cbc->children, k_edge->tgt)) {
985                                 pset_insert(cbc->kill_edges, k_edge, HASH_RSS_EDGE(k_edge));
986                                 /*
987                                         Link all k_edges which are about to be removed together.
988                                         Beware: pset_remove kills the iterator.
989                                 */
990                                 k_edge->next = kedge_root;
991                                 kedge_root   = k_edge;
992                         }
993                 }
994
995                 /* remove all linked k_edges */
996                 for (; kedge_root; kedge_root = kedge_root->next) {
997                         pset_remove(epk, kedge_root, HASH_RSS_EDGE(kedge_root));
998                 }
999
1000                 /* verify the cbc */
1001                 foreach_nodeset(cbc->parents, s_irn) {
1002                         int is_killed = 0;
1003
1004                         foreach_pset(cbc->kill_edges, k_edge) {
1005                                 if (k_edge->src == s_irn) {
1006                                         is_killed = 1;
1007                                         pset_break(cbc->kill_edges);
1008                                         break;
1009                                 }
1010                         }
1011
1012                         if (! is_killed) {
1013                                 vrfy_ok = 0;
1014                                 ir_fprintf(stderr, "Warning: parent %+F is not killed in current cbc\n", s_irn);
1015                         }
1016                 }
1017                 assert(vrfy_ok && "Verification of CBC failed");
1018
1019                 /* add the connected bipartite component */
1020                 if (nodeset_count(cbc->parents) > 0 && nodeset_count(cbc->children) > 0 && pset_count(cbc->kill_edges) > 0)
1021                         pset_insert(rss->cbc_set, cbc, (unsigned)cbc->nr);
1022                 DBG((rss->dbg, LEVEL_2, "\tbipartite component %d inserted:\n", cbc->nr));
1023                 DEBUG_ONLY(
1024                         if (firm_dbg_get_mask(rss->dbg) & LEVEL_2)
1025                                 debug_print_cbc(rss->dbg, cbc);
1026                 );
1027         }
1028
1029         if (rss->opts->dump_flags & RSS_DUMP_CBC)
1030                 debug_vcg_dump_bipartite(rss);
1031
1032         del_pset(epk);
1033 }
1034
1035 /**
1036  * Select the child with the maximum cost.
1037  */
1038 static child_t *select_child_max_cost(rss_t *rss, nodeset *x, nodeset *y, child_t *t, cbc_t *cbc) {
1039         ir_node *child;
1040         float   max_cost = -1.0f;
1041
1042         DBG((rss->dbg, LEVEL_2, "\t\tcomputing children costs:\n"));
1043
1044         foreach_nodeset(cbc->children, child) {
1045                 rss_irn_t  *r_child             = get_rss_irn(rss, child);
1046                 int         num_unkilled_parents = 0;
1047                 int         num_descendants;
1048                 rss_edge_t *k_edge;
1049                 float       cost;
1050
1051                 /* get the number of unkilled parents */
1052                 foreach_pset(cbc->kill_edges, k_edge) {
1053                         if (k_edge->tgt == child && nodeset_find(x, k_edge->src))
1054                                 ++num_unkilled_parents;
1055                 }
1056
1057                 cost = (float)num_unkilled_parents;
1058
1059                 num_descendants = plist_count(r_child->descendant_list) + nodeset_count(y);
1060
1061                 if (num_descendants > 0)
1062                         cost /= (float)num_descendants;
1063
1064                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F, #desc %d, cost %.3f\n", child, num_descendants, cost));
1065
1066                 if (cost > max_cost) {
1067                         t->irn   = child;
1068                         t->cost  = cost;
1069                         max_cost = cost;
1070                 }
1071         }
1072
1073         return t;
1074 }
1075
1076 /**
1077  * Remove all parents from x which are killed by t_irn.
1078  */
1079 static void remove_covered_parents(rss_t *rss, nodeset *x, ir_node *t_irn, cbc_t *cbc) {
1080         rss_irn_t  *t = get_rss_irn(rss, t_irn);
1081         rss_edge_t *k_edge;
1082
1083         DBG((rss->dbg, LEVEL_2, "\t\tremoving parents covered by %+F:\n", t_irn));
1084
1085         foreach_pset(cbc->kill_edges, k_edge) {
1086                 if (k_edge->tgt == t_irn && nodeset_find(x, k_edge->src)) {
1087                         nodeset_remove(x, k_edge->src);
1088                         plist_insert_back(t->parent_list, k_edge->src);
1089                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", k_edge->src));
1090                 }
1091         }
1092 }
1093
1094 static void update_cumulated_descendent_values(rss_t *rss, nodeset *y, ir_node *t_irn) {
1095         rss_irn_t *t = get_rss_irn(rss, t_irn);
1096         plist_element_t *el;
1097
1098         DBG((rss->dbg, LEVEL_2, "\t\tupdating cumulated descendant value of %+F:\n", t_irn));
1099
1100         foreach_plist(t->descendant_list, el) {
1101                 nodeset_insert(y, plist_element_get_value(el));
1102                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", plist_element_get_value(el)));
1103         }
1104 }
1105
1106 /**
1107  * Greedy-k: a heuristics for the MMA problem
1108  */
1109 static void compute_killing_function(rss_t *rss) {
1110         cbc_t *cbc;
1111         struct obstack obst;
1112
1113         obstack_init(&obst);
1114
1115         rss->cbc_set = pset_new_ptr(5);
1116         compute_bipartite_decomposition(rss);
1117
1118         /* for all bipartite components do: */
1119         foreach_pset(rss->cbc_set, cbc) {
1120                 ir_node *p;
1121                 nodeset *x       = new_nodeset(10);
1122                 nodeset *y       = new_nodeset(10);
1123                 child_t **sks    = NEW_ARR_F(child_t *, 20);
1124                 int     cur_len  = 0;
1125                 int     cur_size = 20;
1126                 int     i;
1127
1128                 DBG((rss->dbg, LEVEL_1, "\tcomputing SKS for cbc %d:\n", cbc->nr));
1129                 DBG((rss->dbg, LEVEL_2, "\t\tinitializing parents X:\n"));
1130
1131                 /* X = S_cb (all parents are initially uncovered) */
1132                 foreach_nodeset(cbc->parents, p) {
1133                         nodeset_insert(x, p);
1134                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", p));
1135                 }
1136
1137                 /* while X not empty */
1138                 while (nodeset_count(x) > 0) {
1139                         child_t *t = obstack_alloc(&obst, sizeof(*t));
1140                         memset(t, 0, sizeof(*t));
1141
1142                         t = select_child_max_cost(rss, x, y, t, cbc);
1143
1144                         if (cur_len >= cur_size) {
1145                                 ARR_EXTO(child_t *, sks, cur_size * 2);
1146                                 cur_size *= 2;
1147                         }
1148
1149                         DBG((rss->dbg, LEVEL_2, "\t\tinsert child %+F (%.3f) into SKS at pos %d\n", t->irn, t->cost, cur_len));
1150
1151                         sks[cur_len++] = t;
1152                         remove_covered_parents(rss, x, t->irn, cbc);
1153                         update_cumulated_descendent_values(rss, y, t->irn);
1154                 }
1155
1156                 ARR_SHRINKLEN(sks, cur_len);
1157
1158                 /* sort SKS in increasing cost order */
1159                 qsort(sks, cur_len, sizeof(sks[0]), cmp_child_costs);
1160
1161                 DBG((rss->dbg, LEVEL_2, "\tprocessing SKS for cbc %d:\n", cbc->nr));
1162
1163                 /* build killing function */
1164                 for (i = cur_len - 1; i >= 0; --i) { /* loop over sks in decreasing cost order */
1165                         child_t         *t = sks[i];
1166                         rss_irn_t       *rt = get_rss_irn(rss, t->irn);
1167                         plist_element_t *p_el;
1168
1169                         DBG((rss->dbg, LEVEL_3, "\t\t\tkiller %+F (%.3f):\n", t->irn, t->cost));
1170
1171                         /* kill all unkilled parents of t */
1172                         foreach_plist(rt->parent_list, p_el) {
1173                                 ir_node    *par = plist_element_get_value(p_el);
1174                                 rss_irn_t *rpar = get_rss_irn(rss, par);
1175
1176                                 if (is_Sink(rpar->killer)) {
1177                                         rpar->killer = t->irn;
1178                                         DBG((rss->dbg, LEVEL_2, "\t\tkill %+F\n", rpar->irn));
1179                                 }
1180                                 else {
1181                                         DBG((rss->dbg, LEVEL_3, "\t\t\tkeeping %+F as killer for %+F\n", rpar->killer, rpar->irn));
1182                                 }
1183                         }
1184                 }
1185
1186                 del_nodeset(x);
1187                 del_nodeset(y);
1188                 DEL_ARR_F(sks);
1189         }
1190
1191         if (rss->opts->dump_flags & RSS_DUMP_KILL)
1192                 debug_vcg_dump_kill(rss);
1193
1194         del_pset(rss->cbc_set);
1195         obstack_free(&obst, NULL);
1196 }
1197
1198 /**
1199  * Adds the edge src -> tgt to the dvg. Checks if reverse edge is already there (asserts).
1200  */
1201 static INLINE void add_dvg_edge(rss_t *rss, dvg_t *dvg, ir_node *src, ir_node *tgt, int have_source) {
1202         rss_edge_t *dvg_edge;
1203         rss_edge_t key;
1204
1205         if (! have_source)
1206                 nodeset_insert(dvg->nodes, src);
1207         else
1208                 assert(nodeset_find(dvg->nodes, src) != NULL && "Wrong assumption");
1209
1210         nodeset_insert(dvg->nodes, tgt);
1211
1212         key.src = tgt;
1213         key.tgt = src;
1214         assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1215
1216         key.src = src;
1217         key.tgt = tgt;
1218         if (NULL != pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1219                 /* add the edge to the DVG */
1220                 dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1221
1222                 dvg_edge->src  = src;
1223                 dvg_edge->tgt  = tgt;
1224                 dvg_edge->next = NULL;
1225
1226                 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", src, tgt));
1227                 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1228         }
1229 }
1230
1231 /**
1232  * Computes the disjoint value DAG (DVG).
1233  * BEWARE: It is not made explicitly clear in the Touati paper,
1234  *         but the DVG is meant to be build from the KILLING DAG
1235  */
1236 static void compute_dvg(rss_t *rss, dvg_t *dvg) {
1237         plist_element_t *el;
1238
1239         DBG((rss->dbg, LEVEL_1, "\tcomputing DVG:\n"));
1240
1241         foreach_plist(rss->nodes, el) {
1242                 ir_node    *u_irn    = plist_element_get_value(el);
1243                 rss_irn_t  *u        = get_rss_irn(rss, u_irn);
1244                 rss_irn_t  *u_killer = get_rss_irn(rss, u->killer);
1245                 int        i;
1246
1247                 /* TODO: omit nodes only having sink as pkiller and killing no one */
1248
1249                 /* add an edge to killer */
1250                 add_dvg_edge(rss, dvg, u_irn, u->killer, 0);
1251
1252                 if (is_Sink(u->killer))
1253                         continue;
1254
1255                 /* We add an edge to every killer, from where we could be reached. */
1256                 for (i = ARR_LEN_SAFE(u_killer->descendants) - 1; i >= 0; --i) {
1257                         add_dvg_edge(rss, dvg, u_irn, u_killer->descendants[i], 1);
1258                 }
1259
1260 #if 0
1261
1262                 foreach_plist(rss->nodes, el2) {
1263                         ir_node *v_irn = plist_element_get_value(el2);
1264
1265                         /*
1266                                 There is an edge (u, v) in the DVG iff v is a descendant of the killer(u).
1267                         */
1268                         if (BSEARCH_IRN_ARR(v_irn, u_kill->descendants)) {
1269                                 rss_edge_t *dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1270                                 rss_edge_t key;
1271
1272                                 /* insert the user into the DVG and append it to the user list of u */
1273                                 nodeset_insert(dvg->nodes, v_irn);
1274                                 if (! plist_has_value(u->dvg_user_list, v_irn))
1275                                         plist_insert_back(u->dvg_user_list, v_irn);
1276
1277                                 dvg_edge->src  = u_irn;
1278                                 dvg_edge->tgt  = v_irn;
1279                                 dvg_edge->next = NULL;
1280
1281                                 key.src = v_irn;
1282                                 key.tgt = u_irn;
1283
1284                                 assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1285
1286                                 /* add the edge to the DVG */
1287                                 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", u_irn, v_irn));
1288                                 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1289                         }
1290                 }
1291 #endif /* if 0 */
1292         }
1293
1294         if (rss->opts->dump_flags & RSS_DUMP_DVG)
1295                 debug_vcg_dump_dvg(rss, dvg);
1296 }
1297
1298 /**
1299  * Updates the dvg structure when a serialization edge from src -> tgt is added.
1300  */
1301 static void update_dvg(rss_t *rss, dvg_t *dvg, rss_irn_t *src, rss_irn_t *tgt) {
1302         int i, j, idx;
1303         rss_edge_t *edge;
1304         rss_edge_t **arr = alloca(pset_count(dvg->edges) * sizeof(arr[0]));
1305
1306         /*
1307                 Add an edge from serialization target to serialization src:
1308                 src cannot be alive with target
1309         */
1310         add_dvg_edge(rss, dvg, tgt->irn, src->irn, 0);
1311
1312         /* Add edges to src's descendants as well, they also getting serialized. */
1313         for (i = ARR_LEN_SAFE(src->descendants) - 1; i >= 0; --i) {
1314                 add_dvg_edge(rss, dvg, tgt->irn, src->descendants[i], 1);
1315         }
1316
1317         /* We also have to add edges from targets predecessors in dvg */
1318         idx = 0;
1319         /* We cannot insert elements into set over which we iterate ... */
1320         foreach_pset(dvg->edges, edge) {
1321                 if (edge->tgt == tgt->irn) {
1322                         arr[idx++] = edge;
1323                 }
1324         }
1325
1326         for (i = 0; i < idx; ++i) {
1327                 edge = arr[i];
1328                 add_dvg_edge(rss, dvg, edge->src, src->irn, 1);
1329                 for (j = ARR_LEN_SAFE(src->descendants) - 1; j >= 0; --j) {
1330                         add_dvg_edge(rss, dvg, edge->src, src->descendants[j], 1);
1331                 }
1332         }
1333 }
1334
1335 #if 0
1336 /**
1337  * Accumulate all descendants for root into list.
1338  */
1339 static void accumulate_dvg_descendant_values(rss_t *rss, rss_irn_t *root, plist_t *list) {
1340         if (plist_count(root->dvg_user_list) > 0) {
1341                 plist_element_t *el;
1342
1343                 foreach_plist(root->dvg_user_list, el) {
1344                         ir_node   *v_irn = plist_element_get_value(el);
1345                         rss_irn_t *v     = get_rss_irn(rss, v_irn);
1346
1347                         /* add v as descendant */
1348                         if (! plist_has_value(list, v_irn)) {
1349                                 plist_insert_back(list, v_irn);
1350                                 DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG descendant %+F\n", v_irn));
1351                         }
1352
1353                         /* accumulate v's descendants */
1354                         accumulate_dvg_descendant_values(rss, v, list);
1355                 }
1356         }
1357 }
1358
1359 /**
1360  * Builds the list of potential killers for each node
1361  * in the given DVG.
1362  * Needs the descendant list for all user as sorted array.
1363  */
1364 static void build_dvg_pkiller_list(rss_t *rss, dvg_t *dvg) {
1365         ir_node *irn;
1366
1367         foreach_nodeset(dvg->nodes, irn) {
1368                 rss_irn_t       *node = get_rss_irn(rss, irn);
1369                 plist_element_t *el, *el2;
1370
1371                 DBG((rss->dbg, DEBUG_DVG, "\t\tbuilding pkiller list for %+F\n", irn));
1372
1373                 /* check each user */
1374                 foreach_plist(node->dvg_user_list, el) {
1375                         ir_node *u_irn = plist_element_get_value(el);
1376
1377                         /* is the current user u_irn not a descendant of any other user -> pkiller */
1378                         foreach_plist(node->dvg_user_list, el2) {
1379                                 ir_node   *v_irn = plist_element_get_value(el2);
1380                                 rss_irn_t *v     = get_rss_irn(rss, v_irn);
1381
1382                                 if (el != el2                             &&
1383                                         ! BSEARCH_IRN_ARR(u_irn, v->dvg_desc) &&
1384                                         ! plist_has_value(node->dvg_pkiller_list, u_irn))
1385                                 {
1386                                         plist_insert_back(node->dvg_pkiller_list, u_irn);
1387                                         DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG pkiller %+F\n", u_irn));
1388                                 }
1389                         }
1390                 }
1391
1392                 node->dvg_pkiller = build_sorted_array_from_list(node->dvg_pkiller_list, phase_obst(&rss->ph));
1393         }
1394
1395         DEBUG_ONLY(
1396                 if (firm_dbg_get_mask(rss->dbg) & DEBUG_DVG)
1397                         debug_vcg_dump_dvg_pkiller(rss, dvg);
1398         );
1399 }
1400
1401 #endif /* if 0 */
1402
1403 /**
1404  * Compute the maximal antichain of the current DVG.
1405  * This is a reimplementation of the MAXIMAL_ANTI_CHAIN function
1406  * from the DDG library 1.1 (DAG.cpp).
1407  */
1408 static nodeset *compute_maximal_antichain(rss_t *rss, dvg_t *dvg, int iteration) {
1409         int         n               = nodeset_count(dvg->nodes);
1410         int         *assignment     = alloca(n * sizeof(assignment[0]));
1411         int         *assignment_rev = alloca(n * sizeof(assignment_rev[0]));
1412         int         *idx_map        = alloca(n * sizeof(idx_map[0]));
1413         hungarian_problem_t *bp;
1414         nodeset     *values, *temp;
1415         ir_node     *u_irn;
1416         int         i, j, cost, cur_chain, res;
1417         rss_edge_t  *dvg_edge;
1418
1419 #define MAP_IDX(irn) bsearch_for_index(get_irn_idx(irn), idx_map,  n,  1)
1420
1421         if (pset_count(dvg->edges) == 0)
1422                 return NULL;
1423
1424         bp = hungarian_new(n, n, 1, HUNGARIAN_MATCH_NORMAL);
1425
1426         /*
1427                 At first, we build an index map for the nodes in the DVG,
1428                 because we cannot use the irn idx therefore as the resulting
1429                 bipartite data structure would have around 1.2GB.
1430                 So we limit the size to the number of nodes we have in the DVG
1431                 and build a sorted index map for their irn indices.
1432         */
1433         i = 0;
1434         foreach_nodeset(dvg->nodes, u_irn) {
1435                 idx_map[i++] = get_irn_idx(u_irn);
1436         }
1437         qsort(idx_map, n, sizeof(idx_map[0]), cmp_int);
1438
1439         foreach_pset(dvg->edges, dvg_edge) {
1440                 int idx_u = MAP_IDX(dvg_edge->src);
1441                 int idx_v = MAP_IDX(dvg_edge->tgt);
1442
1443                 /* add the entry to the bipartite data structure */
1444                 hungarian_add(bp, idx_u, idx_v, 1);
1445                 DBG((rss->dbg, LEVEL_3, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1446                         idx_u, dvg_edge->src, idx_v, dvg_edge->tgt));
1447         }
1448 #if 0
1449         /*
1450                 Add a bipartite entry for each pair of nodes (u, v), where exists a
1451                 path in the DVG from u to v, ie. connect all descendants(v) to v.
1452                 desc_dvg(v) = accumulated descendants of all z in dvg_user_list(v)
1453         */
1454         foreach_nodeset(dvg->nodes, u_irn) {
1455                 rss_irn_t *u        = get_rss_irn(rss, u_irn);
1456                 int       idx_u_irn = MAP_IDX(u_irn);
1457
1458                 DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG descendants of %+F:\n", u_irn));
1459
1460                 //plist_clear(u->dvg_desc_list);
1461                 //accumulate_dvg_descendant_values(rss, u, u->dvg_desc_list);
1462
1463                 /*
1464                         FIXME: The array is build on the phase obstack and we cannot free the data.
1465                         So the array get as many times allocated as this function is called.
1466                 */
1467
1468                 /* build the sorted array for faster searches */
1469                 //u->dvg_desc = build_sorted_array_from_list(u->dvg_desc_list, phase_obst(&rss->ph));
1470
1471                 DBG((rss->dbg, DEBUG_MAX_AC, "\t\tadding bipartite entries of %+F:\n", u_irn));
1472
1473                 /* add the bipartite entries for all descendants */
1474                 for (i = ARR_LEN_SAFE(u->dvg_desc) - 1; i >= 0; --i) {
1475                         rss_irn_t *desc    = get_rss_irn(rss, u->dvg_desc[i]);
1476                         int       idx_desc = MAP_IDX(u->dvg_desc[i]);
1477
1478                         /* add the entry to the bipartite data structure */
1479                         hungarian_add(bp, idx_u_irn, idx_desc, 1);
1480                         DBG((rss->dbg, DEBUG_MAX_AC, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1481                                 idx_u_irn, u_irn, idx_desc, u->dvg_desc[i]));
1482
1483                         need_matching = 1;
1484                 }
1485         }
1486 #endif
1487
1488         /* We want maximum cardinality matching */
1489         hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL);
1490
1491 #if 0
1492         DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG pkiller:\n"));
1493         /* beware: the following function needs the dvg_desc array */
1494         build_dvg_pkiller_list(rss, dvg);
1495 #endif
1496
1497         DBG((rss->dbg, LEVEL_2, "\t\tcomputing bipartite matching\n"));
1498         /*
1499                 The maximum cardinality bipartite matching gives us the minimal
1500                 chain partition, which corresponds to the maximum anti chains.
1501         */
1502         res = hungarian_solve(bp, assignment, &cost, 1);
1503         assert(res == 0 && "Bipartite matching failed!");
1504
1505         hungarian_free(bp);
1506         memset(assignment_rev, -1, n * sizeof(assignment_rev[0]));
1507
1508         /* build the reverse assignment, ie. foreach i -> j, add j -> i */
1509         for (i = 0; i < n; ++i) {
1510                 if (assignment[i] >= 0) {
1511                         int j = assignment[i];
1512                         assignment_rev[j] = i;
1513                 }
1514         }
1515
1516         DBG((rss->dbg, LEVEL_2, "\t\tgot assignment with cost %d\n", cost));
1517         DBG((rss->dbg, LEVEL_3, "\t\t\tassignment   ---   reverse assignment\n", cost));
1518         for (i = 0; i < n; ++i) {
1519                 DBG((rss->dbg, LEVEL_3, "\t\t\t%3d -> %3d         %3d -> %3d\n", i, assignment[i], i, assignment_rev[i]));
1520         }
1521
1522         values    = new_nodeset(10);
1523         cur_chain = 0;
1524         /* Construction of the minimal chain partition */
1525         for (j = 0; j < n; ++j) {
1526                 /* check nodes, which did not occur as target */
1527                 if (assignment_rev[j] == -1) {
1528                         int       xj      = idx_map[j];
1529                         ir_node   *xj_irn = get_idx_irn(rss->irg, xj);
1530                         rss_irn_t *xj_rss = get_rss_irn(rss, xj_irn);
1531                         chain_t   *c      = obstack_alloc(phase_obst(&rss->ph), sizeof(*c));
1532                         int       source;
1533
1534                         /* there was no source for j -> we have a source of a new chain */
1535                         nodeset_insert(values, xj_irn);
1536
1537                         c->elements = plist_obstack_new(phase_obst(&rss->ph));
1538                         c->nr       = cur_chain++;
1539                         plist_insert_back(c->elements, xj_irn);
1540
1541                         xj_rss->chain = c;
1542
1543                         DBG((rss->dbg, LEVEL_2, "\t\tstarting chain %d:\n", c->nr));
1544                         DBG((rss->dbg, LEVEL_2, "\t\t\t%+F (%d)", xj_irn, j));
1545
1546                         /* follow chain, having j as source */
1547                         source = j;
1548                         while (assignment[source] >= 0) {
1549                                 int       target  = assignment[source];
1550                                 int       irn_idx = idx_map[target];
1551                                 ir_node   *irn    = get_idx_irn(rss->irg, irn_idx);
1552                                 rss_irn_t *node   = get_rss_irn(rss, irn);
1553
1554                                 plist_insert_back(c->elements, irn);
1555                                 node->chain = c;
1556
1557                                 DB((rss->dbg, LEVEL_2, " -> %+F (%d)", irn, target));
1558
1559                                 /* new source = last target */
1560                                 source = target;
1561                         }
1562
1563                         DB((rss->dbg, LEVEL_2, "\n"));
1564                 }
1565         }
1566
1567         /*
1568                 Computing the maximal antichain: Select an element from each
1569                 chain such, such it is parallel with the others.
1570         */
1571         DBG((rss->dbg, LEVEL_1, "\t\tcomputing set of saturation values (MAX AC)\n"));
1572         DBG((rss->dbg, LEVEL_3, "\t\tstarting with:\n"));
1573
1574         DEBUG_ONLY(
1575                 if (firm_dbg_get_mask(rss->dbg) & LEVEL_3)
1576                         dump_nodeset(values, "\t\t\t");
1577         )
1578
1579         temp = NULL;
1580         do {
1581                 /*
1582                         We need an explicit array for the values as
1583                         we cannot iterate multiple times over the same
1584                         set at the same time. :-(((((
1585                 */
1586                 int     n         = nodeset_count(values);
1587                 int     i         = 0;
1588                 ir_node **val_arr = NEW_ARR_F(ir_node *, n);
1589
1590                 foreach_nodeset(values, u_irn)
1591                         val_arr[i++] = u_irn;
1592
1593                 if (temp)
1594                         del_nodeset(temp);
1595
1596                 temp = new_nodeset(10);
1597
1598                 /* Select all nodes from current value set, having another node in the set as descendant. */
1599                 for (i = 0; i < n; ++i) {
1600                         rss_irn_t *u = get_rss_irn(rss, val_arr[i]);
1601
1602                         for (j = 0; j < n; ++j) {
1603                                 if (i != j) {
1604                                         rss_edge_t key;
1605
1606                                         key.src = val_arr[i];
1607                                         key.tgt = val_arr[j];
1608
1609                                         if (pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1610                                                 /* v[j] is descendant of u -> remove u and break */
1611                                                 nodeset_insert(temp, u->irn);
1612                                                 nodeset_remove(values, u->irn);
1613
1614                                                 DBG((rss->dbg, LEVEL_3, "\t\t\tremoving %+F from values, adding it to temp\n", u->irn));
1615
1616                                                 break;
1617                                         }
1618                                 }
1619                         }
1620                 }
1621
1622                 /* Try to insert the chain predecessor of all selected u's */
1623                 foreach_nodeset(temp, u_irn) {
1624                         rss_irn_t       *u  = get_rss_irn(rss, u_irn);
1625                         chain_t         *c  = u->chain;
1626                         plist_element_t *el = plist_find_value(c->elements, u_irn);
1627
1628                         assert(el && "Missing element in chain!");
1629
1630                         /* If u has predecessor in chain: insert the predecessor */
1631                         if (el == plist_element_get_prev(el)) {
1632                                 nodeset_insert(values, plist_element_get_value(el));
1633                                 DBG((rss->dbg, LEVEL_3, "\t\t\tadding %+F to values\n", plist_element_get_value(el)));
1634                         }
1635                 }
1636
1637
1638                 DEL_ARR_F(val_arr);
1639         } while (nodeset_count(temp) > 0);
1640
1641         DBG((rss->dbg, LEVEL_2, "\t\tfinal set:\n"));
1642         DEBUG_ONLY(
1643                 if (firm_dbg_get_mask(rss->dbg) & LEVEL_2) {
1644                         dump_nodeset(values, "\t\t\t");
1645                 }
1646         );
1647
1648         if (rss->opts->dump_flags & RSS_DUMP_MAXAC)
1649                 debug_vcg_dump_pkg(rss, values, iteration);
1650
1651         del_nodeset(temp);
1652
1653         return values;
1654
1655 #undef MAP_IDX
1656 }
1657
1658 /**
1659  * Computes the best serialization between two nodes of sat_vals.
1660  */
1661 static serialization_t *compute_best_admissible_serialization(rss_t *rss, nodeset *sat_vals, serialization_t *ser, int num_regs) {
1662         int        n                    = nodeset_count(sat_vals);
1663         int        n_idx                = ARR_LEN_SAFE(rss->idx_map);
1664         int        i                    = 0;
1665         ir_node    **val_arr            = alloca(n * sizeof(val_arr[0]));
1666         bitset_t   *bs_sv               = bitset_alloca(n_idx);
1667         bitset_t   *bs_vdesc            = bitset_alloca(n_idx);
1668         bitset_t   *bs_tmp              = bitset_alloca(n_idx);
1669         bitset_t   *bs_ukilldesc        = bitset_alloca(n_idx);
1670         int        best_benefit         = INT_MAX;
1671         int        best_omega2          = INT_MAX;
1672         int        best_benefit_omega20 = INT_MAX;
1673         int        has_omega1           = 0;
1674         int        j, k;
1675         ir_node    *irn;
1676         rss_edge_t min_benefit_edge;
1677         rss_edge_t min_omega20_edge;
1678         rss_irn_t  *ser_u_omega1 = NULL, *ser_v_omega1 = NULL;
1679         rss_irn_t  *ser_u_omega20 = NULL, *ser_v_omega20 = NULL;
1680
1681         DBG((rss->dbg, LEVEL_1, "\tcomputing admissible serializations:\n"));
1682
1683         /*
1684                 We need an explicit array for the values as
1685                 we cannot iterate multiple times over the same
1686                 set at the same time. :-(((((
1687         */
1688
1689         foreach_nodeset(sat_vals, irn) {
1690                 val_arr[i++] = irn;
1691                 bitset_set(bs_sv, BLOCK_IDX_MAP(rss, irn));
1692         }
1693
1694         /*
1695                 We build all admissible serializations and remember the best found so far.
1696                 For u in sat_vals:
1697                  For v in sat_val:
1698                    if v in pkiller(u): add edge from v to all other pkiller(u)
1699                    else: for all vv in pkiller(u): add edge from v to vv if there exists no path from vv to v
1700         */
1701
1702 /*
1703         A node is unserializable if:
1704         - it has only one killer and this one is Sink
1705     - it kills no other values
1706         In this case there is no serialization which could
1707         reduce the registerpressure
1708 */
1709 #define IS_UNSERIALIZABLE_NODE(rss_node)                  \
1710         (                                                     \
1711                 (                                                 \
1712                         (plist_count(rss_node->pkiller_list) == 1) && \
1713                         is_Sink(rss_node->killer)                  && \
1714                         (rss_node->kill_count                == 0)    \
1715                 )                            ||                   \
1716                 be_is_Barrier(rss_node->irn) ||                   \
1717                 be_is_Keep(rss_node->irn)                         \
1718         )
1719
1720         /* for all u in sat_vals */
1721         for (i = 0; i < n; ++i) {
1722                 rss_irn_t       *u = get_rss_irn(rss, val_arr[i]);
1723                 plist_element_t *el;
1724
1725                 /* ignore nodes where serialization does not help */
1726                 if (IS_UNSERIALIZABLE_NODE(u)) {
1727                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", u->irn));
1728                         continue;
1729                 }
1730
1731                 /* accumulate all descendants of all pkiller(u) */
1732                 bitset_clear_all(bs_ukilldesc);
1733                 foreach_plist(u->pkiller_list, el) {
1734                         ir_node   *irn  = plist_element_get_value(el);
1735                         rss_irn_t *node = get_rss_irn(rss, irn);
1736
1737                         if (! is_Sink(irn))
1738                                 bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, irn));
1739                         else
1740                                 continue;
1741
1742                         for (k = ARR_LEN_SAFE(node->descendants) - 1; k >= 0; --k) {
1743                                 if (! is_Sink(node->descendants[k]))
1744                                         bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, node->descendants[k]));
1745                         }
1746                 }
1747
1748                 /* for all v in sat_vals */
1749                 for (j = 0; j < n; ++j) {
1750                         ir_node   *v_irn   = val_arr[j];
1751                         rss_irn_t *v       = get_rss_irn(rss, v_irn);
1752                         unsigned  v_height = get_irn_height(rss->h, v_irn);
1753                         int       omega1, omega2, is_pkiller;
1754
1755                         /* v cannot be serialized with itself
1756                          * ignore nodes where serialization does not help */
1757                         if (i == j || IS_UNSERIALIZABLE_NODE(v)) {
1758                                 if (i != j)
1759                                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", v->irn));
1760                                 continue;
1761                         }
1762
1763                         /* get descendants of v */
1764                         bitset_clear_all(bs_vdesc);
1765                         bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v_irn));
1766                         for (k = ARR_LEN_SAFE(v->descendants) - 1; k >= 0; --k) {
1767                                 if (! is_Sink(v->descendants[k]))
1768                                         bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v->descendants[k]));
1769                         }
1770
1771                         /* if v is in pkiller(u) */
1772                         is_pkiller = plist_has_value(u->pkiller_list, v_irn);
1773
1774                         /* for all vv in pkiller(u) */
1775                         foreach_plist(u->pkiller_list, el) {
1776                                 ir_node *vv_irn  = plist_element_get_value(el);
1777                                 int     add_edge;
1778
1779                                 if (is_Sink(vv_irn) || is_cfop(vv_irn))
1780                                         continue;
1781
1782                                 if (is_pkiller)
1783                                         add_edge = vv_irn != v_irn && skip_Proj(vv_irn) != skip_Proj(v_irn);
1784                                 else
1785                                         add_edge = ! heights_reachable_in_block(rss->h, skip_Proj(vv_irn), skip_Proj(v_irn));
1786
1787                                 /*
1788                                         As we add an edge from vv -> v, we have to make sure,
1789                                         that there exists no path from v to vv.
1790                                 */
1791
1792                                 if (add_edge) {
1793                                         int vv_height = get_irn_height(rss->h, vv_irn);
1794                                         int mu1, mu2, critical_path_cost;
1795
1796                                         /*
1797                                                 mu1 = | descendants(v) cut sat_vals |
1798                                                 the number of saturating values which cannot
1799                                                 be simultaneously alive with u
1800                                         */
1801                                         bitset_copy(bs_tmp, bs_vdesc);
1802                                         mu1 = bitset_popcnt(bitset_and(bs_tmp, bs_sv));
1803
1804                                         /*
1805                                                 mu2 = | accum_desc_all_pkiller(u) without descendants(v) |
1806                                         */
1807                                         if (is_pkiller) {
1808                                                 bitset_copy(bs_tmp, bs_ukilldesc);
1809                                                 mu2 = bitset_popcnt(bitset_andnot(bs_tmp, bs_vdesc));
1810                                         }
1811                                         else {
1812                                                 mu2 = 0;
1813                                         }
1814
1815                                         /* omega1 = mu1 - mu2 */
1816                                         omega1 = mu1 - mu2;
1817
1818                                         if (omega1 != 0)
1819                                                 has_omega1 = 1;
1820
1821                                         /* omega2 = increase of critical path */
1822                                         critical_path_cost =
1823                                                 v_height                        /* longest path from v to sink */
1824                                                 + rss->max_height - vv_height   /* longest path from source to vv */
1825                                                 + 1;                            /* edge */
1826
1827                                         /*
1828                                                 If critical_path_cost > max_height -> the new edge
1829                                                 would increase the longest critical path by the difference.
1830                                         */
1831                                         omega2 = critical_path_cost > rss->max_height ? critical_path_cost - rss->max_height : 0;
1832
1833                                         /* this keeps track of the edge with the best benefit */
1834                                         if (omega1 >= num_regs - n && omega1 < best_benefit) {
1835                                                 min_benefit_edge.src = v_irn;
1836                                                 min_benefit_edge.tgt = vv_irn;
1837
1838                                                 ser_u_omega1 = u;
1839                                                 ser_v_omega1 = v;
1840
1841                                                 best_benefit    = omega1;
1842                                                 ser->new_killer = is_pkiller;
1843                                         }
1844
1845                                         /* this keeps track of the edge with the best omega1 costs where omega2 == 0 */
1846                                         if (omega2 == 0 && omega1 >= num_regs - n && omega1 < best_benefit_omega20) {
1847                                                 min_omega20_edge.src = v_irn;
1848                                                 min_omega20_edge.tgt = vv_irn;
1849
1850                                                 ser_u_omega20 = u;
1851                                                 ser_v_omega20 = v;
1852
1853                                                 best_benefit_omega20 = omega1;
1854                                                 ser->new_killer      = is_pkiller;
1855                                         }
1856
1857                                         best_omega2 = MIN(best_omega2, omega2);
1858
1859                                         DBG((rss->dbg, LEVEL_2, "\t\tfound %+F -> %+F (w1 %d, w2 %d)\n",
1860                                                 v_irn, vv_irn, omega1, omega2));
1861                                 } /* if add_edge */
1862                         } /* for all vv in pkiller(u) */
1863                 } /* for all v in sat_vals */
1864         } /* for all u in sat_vals */
1865
1866         if (! has_omega1)
1867                 return NULL;
1868
1869         if (best_omega2 == 0) {
1870                 ser->u         = ser_u_omega20;
1871                 ser->v         = ser_v_omega20;
1872                 ser->edge->src = min_omega20_edge.src;
1873                 ser->edge->tgt = min_omega20_edge.tgt;
1874                 ser->omega1    = best_benefit_omega20;
1875                 ser->omega2    = best_omega2;
1876         }
1877         else {
1878                 ser->u         = ser_u_omega1;
1879                 ser->v         = ser_v_omega1;
1880                 ser->edge->src = min_benefit_edge.src;
1881                 ser->edge->tgt = min_benefit_edge.tgt;
1882                 ser->omega1    = best_benefit;
1883                 ser->omega2    = best_omega2;
1884         }
1885
1886         return ser;
1887
1888 #undef IS_UNSERIALIZABLE_NODE
1889 }
1890
1891 /**
1892  * Perform the value serialization heuristic and add all
1893  * computed serialization edges as dependencies to the irg.
1894  */
1895 static void perform_value_serialization_heuristic(rss_t *rss) {
1896         bitset_t *arch_nonign_bs = bitset_alloca(arch_register_class_n_regs(rss->cls));
1897         bitset_t *abi_ign_bs     = bitset_alloca(arch_register_class_n_regs(rss->cls));
1898         int      available_regs, iteration;
1899         dvg_t    dvg;
1900         nodeset  *sat_vals;
1901         pset *ser_set = new_pset(cmp_rss_edges, 20);
1902
1903         /* available_regs = R = |arch_non_ignore_regs cut ~abi_ignore_regs| */
1904         arch_put_non_ignore_regs(rss->arch_env, rss->cls, arch_nonign_bs);
1905         be_abi_put_ignore_regs(rss->abi, rss->cls, abi_ign_bs);
1906         bitset_andnot(arch_nonign_bs, abi_ign_bs);
1907         available_regs  = bitset_popcnt(arch_nonign_bs);
1908         //num_live = pset_count(rss->live_block);
1909         //available_regs -= num_live < available_regs ? num_live : 0;
1910
1911         DBG((rss->dbg, LEVEL_1, "\n\t#available regs: %d\n\n", available_regs));
1912
1913         /*
1914                 At first we need to compute the disjoint value DAG (DVG = {V, E_dv}).
1915                 V    = set of all nodes we are currently interested in
1916                 E_dv = there is an edge from u to v iff v is a descendant of killer(u), forall u, v in V
1917         */
1918         dvg.nodes = new_nodeset(plist_count(rss->nodes));
1919         dvg.edges = new_pset(cmp_rss_edges, plist_count(rss->nodes) * 5);
1920         compute_dvg(rss, &dvg);
1921
1922         /*
1923                 Then we perform the heuristic serialization algorithm
1924                 on the DVG which gives us all necessary serialization
1925                 edges.
1926         */
1927         DBG((rss->dbg, LEVEL_1, "\tcomputing maximal antichain:\n"));
1928         iteration = 0;
1929         sat_vals  = compute_maximal_antichain(rss, &dvg, iteration++);
1930         while (sat_vals && (nodeset_count(sat_vals) > available_regs)) {
1931                 serialization_t *ser, best_ser;
1932                 rss_edge_t      *edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*edge));
1933                 ir_node         *dep_src, *dep_tgt;
1934
1935                 best_ser.edge = edge;
1936                 ser = compute_best_admissible_serialization(rss, sat_vals, &best_ser, available_regs);
1937
1938                 DBG((rss->dbg, LEVEL_1, "\tcurrent register saturation %d, target %d\n", nodeset_count(sat_vals), available_regs));
1939
1940                 if (! ser) {
1941                         DBG((rss->dbg, LEVEL_1, "\tno RS improving serialization found, breaking at iteration %d\n", iteration));
1942                         break;
1943                 }
1944
1945                 /* Insert the serialization as dependency edge into the irg. */
1946                 DBG((rss->dbg, LEVEL_1, "\tserializing %+F -> %+F with edge %+F -> %+F and cost %d, %d\n",
1947                         ser->u->irn, ser->v->irn, ser->edge->src, ser->edge->tgt, ser->omega1, ser->omega2));
1948
1949                 if (pset_find(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge)))
1950                         ir_printf("WARNING: serialization %+F -> %+F computed twice!\n", ser->edge->src, ser->edge->tgt);
1951
1952
1953                 pset_insert(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge));
1954
1955                 /* update the dvg */
1956                 update_dvg(rss, &dvg, ser->v, ser->u);
1957                 update_dvg(rss, &dvg, get_rss_irn(rss, ser->edge->src), get_rss_irn(rss, ser->edge->tgt));
1958                 del_nodeset(sat_vals);
1959
1960                 dep_src = skip_Proj(ser->edge->src);
1961                 dep_tgt = ser->edge->tgt;
1962                 add_irn_dep(dep_src, dep_tgt);
1963
1964                 /* Update descendants, consumer and pkillers of the target */
1965                 update_node_info(rss, ser->edge->tgt, ser->edge->src);
1966
1967                 /* TODO: try to find a cheaper way for updating height information */
1968                 rss->max_height = heights_recompute_block(rss->h, rss->block);
1969
1970                 /* Recompute the antichain for next serialization */
1971                 DBG((rss->dbg, LEVEL_1, "\tre-computing maximal antichain:\n"));
1972                 sat_vals = compute_maximal_antichain(rss, &dvg, iteration++);
1973         }
1974
1975         del_nodeset(dvg.nodes);
1976         del_pset(dvg.edges);
1977 }
1978
1979 /**
1980  * Do initial calculations for a block.
1981  */
1982 static void process_block(ir_node *block, void *env) {
1983         rss_t *rss = env;
1984         int   i, n;
1985         const ir_edge_t *edge;
1986
1987         phase_init(&rss->ph, "rss block preprocessor", rss->irg, PHASE_DEFAULT_GROWTH, init_rss_irn);
1988
1989         DBG((rss->dbg, LEVEL_1, "preprocessing block %+F\n", block));
1990         rss->block = block;
1991
1992         /* build an index map for all nodes in the current block */
1993         i            = 0;
1994         n            = get_irn_n_edges(block);
1995         NEW_ARR_A(int *, rss->idx_map, n);
1996         foreach_out_edge(block, edge) {
1997                 ir_node *irn      = get_edge_src_irn(edge);
1998                 rss->idx_map[i++] = get_irn_idx(irn);
1999         }
2000         qsort(rss->idx_map, n, sizeof(rss->idx_map[0]), cmp_int);
2001         rss->max_height = heights_recompute_block(rss->h, block);
2002
2003         /* loop over all register classes */
2004         for (i = arch_isa_get_n_reg_class(rss->arch_env->isa) - 1; i >= 0; --i) {
2005                 const arch_register_class_t *cls = arch_isa_get_reg_class(rss->arch_env->isa, i);
2006
2007                 rss->cls = cls;
2008                 DBG((rss->dbg, LEVEL_1, "register class %s\n", arch_register_class_name(cls)));
2009
2010                 /* Get all live value at end of Block having current register class */
2011                 rss->live_block = pset_new_ptr(10);
2012                 be_liveness_end_of_block(rss->liveness, rss->arch_env, rss->cls, rss->block, rss->live_block);
2013
2014                 /* reset the list of interesting nodes */
2015                 plist_clear(rss->nodes);
2016                 plist_insert_back(rss->nodes, _sink);
2017
2018                 /* collect all nodes having a certain register class */
2019                 foreach_out_edge(block, edge) {
2020                         ir_node *irn  = get_edge_src_irn(edge);
2021                         ir_mode *mode = get_irn_mode(irn);
2022
2023                         /*
2024                                 We skip:
2025                                 - mode_T nodes (the projs are asked)
2026                                 - mode_X nodes (control flow nodes are always scheduled last)
2027                                 - Keeps (they are always immediately scheduled)
2028                                 - Phi (same as Keep)
2029                         */
2030                         if (mode == mode_T || mode == mode_X || is_Phi(irn))
2031                                 continue;
2032
2033                         /*
2034                                 In case of a proj, we skip
2035                                 - Barrier (they are a Barrier :)
2036                                 - Start
2037                                 - the Proj itself, as it's scheduled always with it's super node
2038                         */
2039                         if (is_Proj(irn)) {
2040                                 ir_node *pred = get_Proj_pred(irn);
2041                                 if (be_is_Barrier(pred) || is_Start(pred))
2042                                         continue;
2043                         }
2044
2045                         /* calculate the descendants and consumer for each node in the block */
2046                         collect_node_info(rss, skip_Proj(irn));
2047
2048                         if (be_is_Keep(irn))
2049                                 continue;
2050
2051                         if (! arch_irn_is(rss->arch_env, irn, ignore) && arch_get_irn_reg_class(rss->arch_env, irn, -1) == cls) {
2052                                 plist_insert_back(rss->nodes, skip_Proj(irn));
2053                         }
2054                         //}
2055                 }
2056
2057                 /* compute the potential killing set PK(G) */
2058                 compute_pkill_set(rss);
2059
2060                 /* compute the killing function k* */
2061                 compute_killing_function(rss);
2062
2063                 /*
2064                         Compute the heuristic value serialization and
2065                         add the necessary dependencies to the irg.
2066                 */
2067                 perform_value_serialization_heuristic(rss);
2068
2069                 del_pset(rss->live_block);
2070         }
2071
2072         phase_free(&rss->ph);
2073 }
2074
2075 #ifdef WITH_LIBCORE
2076 /**
2077  * Register the options.
2078  */
2079 void be_init_schedrss(void) {
2080         lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
2081         lc_opt_entry_t *sched_grp = lc_opt_get_grp(be_grp, "sched");
2082         lc_opt_entry_t *rss_grp = lc_opt_get_grp(sched_grp, "rss");
2083
2084         lc_opt_add_table(rss_grp, rss_option_table);
2085 }
2086
2087 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_schedrss);
2088 #endif /* WITH_LIBCORE */
2089
2090 /**
2091  * Preprocess the irg for scheduling.
2092  */
2093 void rss_schedule_preparation(const be_irg_t *birg) {
2094         rss_t rss;
2095
2096         FIRM_DBG_REGISTER(rss.dbg, "firm.be.sched.rss");
2097
2098         //firm_dbg_set_mask(rss.dbg, LEVEL_1 | LEVEL_2 | LEVEL_3);
2099
2100         init_rss_special_nodes(birg->irg);
2101
2102         rss.irg      = birg->irg;
2103         rss.arch_env = birg->main_env->arch_env;
2104         rss.abi      = birg->abi;
2105         rss.h        = heights_new(birg->irg);
2106         rss.nodes    = plist_new();
2107         rss.opts     = &rss_options;
2108         rss.liveness = be_liveness(birg->irg);
2109         irg_block_walk_graph(birg->irg, NULL, process_block, &rss);
2110         heights_free(rss.h);
2111         plist_free(rss.nodes);
2112         be_liveness_free(rss.liveness);
2113
2114         if (birg->main_env->options->dump_flags & DUMP_SCHED)
2115                 be_dump(rss.irg, "-rss", dump_ir_block_graph);
2116 }