61d84a6058372ea86db3ec7275099f88deea892d
[libfirm] / ir / be / beschedrss.c
1 /**
2  * Implementation of a register saturating list scheduler
3  * as described in: Sid-Ahmed-Ali Touati
4  * Register Saturation in Superscalar and VLIW Codes
5  *
6  * @license This file protected by GPL -  GNU GENERAL PUBLIC LICENSE.
7  * @author  Christian Wuerdig
8  * @date    29.08.2006
9  * @cvs-id  $Id$
10  */
11 #ifdef HAVE_CONFIG_H
12 #include "config.h"
13 #endif
14
15 #include <limits.h>
16
17 #include "obst.h"
18 #include "debug.h"
19
20 #include "irgraph_t.h"
21 #include "irnode_t.h"
22 #include "iredges_t.h"
23 #include "ircons_t.h"
24 #include "irphase_t.h"
25 #include "irgwalk.h"
26 #include "irdump.h"
27 #include "irtools.h"
28 #include "irbitset.h"
29 #include "irprintf.h"
30 #include "bipartite.h"
31 #include "hungarian.h"
32 #include "plist.h"
33
34 #include "height.h"
35
36 #include "beabi.h"
37 #include "bemodule.h"
38 #include "benode_t.h"
39 #include "besched_t.h"
40
41 #include <libcore/lc_opts.h>
42 #include <libcore/lc_opts_enum.h>
43
44
45 #define ARR_LEN_SAFE(arr) ((arr) != NULL ? ARR_LEN((arr)) : 0)
46
47 #define HASH_RSS_EDGE(edge) ((get_irn_node_nr((edge)->src) << 16) | (get_irn_node_nr((edge)->tgt) & 0xFFFF))
48 #define BSEARCH_IRN_ARR(val, arr) \
49         bsearch(&(val), (arr), ARR_LEN_SAFE((arr)), sizeof((arr)[0]), cmp_irn_idx)
50
51 #define BLOCK_IDX_MAP(rss, irn) bsearch_for_index(get_irn_idx((irn)), (rss)->idx_map, ARR_LEN_SAFE((rss)->idx_map), 1)
52
53 /* the rss options */
54 typedef struct _rss_opts_t {
55         int dump_flags;
56 } rss_opts_t;
57
58 /* Represents a child with associated costs */
59 typedef struct _child {
60         ir_node *irn;
61         float   cost;
62 } child_t;
63
64 /* We need edges for several purposes. */
65 typedef struct _rss_edge {
66         ir_node *src;
67         ir_node *tgt;
68         void    *next;
69 } rss_edge_t;
70
71 /* Represents a connected bipartite component. */
72 typedef struct _cbc {
73         ir_nodeset_t parents;       /**< = S  a set of value producers */
74         ir_nodeset_t children;      /**< = T  a set of value consumers */
75         pset    *kill_edges;    /**< = E  a set of edges (t in T, s in S) such as each s in S gets killed by at least one t in T */
76         int     nr;             /**< a deterministic index for set insertion (used as hash) */
77 } cbc_t;
78
79 /* Represents a disjoint value DAG. */
80 typedef struct _dvg {
81         ir_nodeset_t nodes;
82         pset    *edges;
83 } dvg_t;
84
85 /* Represents a chain of nodes. */
86 typedef struct _chain {
87         plist_t *elements;   /**< List of chain elements */
88         int     nr;          /**< a deterministic index for set insertion (used as hash) */
89 } chain_t;
90
91 typedef struct _rss_irn {
92         plist_t  *consumer_list;    /**< List of consumers */
93         ir_node **consumer;         /**< Sorted consumer array (needed for faster access) */
94
95         plist_t  *parent_list;      /**< List of parents */
96         plist_t  *pkiller_list;     /**< List of potential killers */
97
98         plist_t  *descendant_list;  /**< List of descendants */
99         ir_node **descendants;      /**< Sorted descendant array (needed for faster access) */
100
101         ir_node  *killer;           /**< The selected unique killer */
102         ir_node  *irn;              /**< The corresponding firm node to this rss_irn */
103
104         chain_t  *chain;            /**< The chain, this node is associated to */
105
106         unsigned desc_walk;         /**< visited flag for collecting descendants */
107         unsigned kill_count;        /**< number of nodes killed by this one */
108
109         unsigned live_out : 1;      /**< irn has consumers outside of it's block */
110         unsigned visited  : 1;      /**< visited flag for bipartite decomposition */
111         unsigned havecons : 1;      /**< visited flag collect consumer */
112         unsigned handled  : 1;      /**< flag indicating whether or not the list structures have been build */
113         unsigned dumped   : 1;      /**< flag indication whether or not this node was dumped */
114 } rss_irn_t;
115
116 /* Represents a serialization edge with associated costs. */
117 typedef struct _serialization {
118         rss_irn_t  *u;       /* the top node */
119         rss_irn_t  *v;       /* the node about to be serialized after u */
120         rss_edge_t *edge;    /* the edge selected for the serialization */
121         int        omega1;   /* estimated: available regs - RS reduction */
122         int        omega2;   /* increase in critical path length */
123         int        new_killer;
124 } serialization_t;
125
126 typedef struct _rss {
127         ir_phase          ph;              /**< Phase to hold some data */
128         heights_t        *h;              /**< The current height object */
129         ir_graph         *irg;            /**< The irg to preprocess */
130         plist_t          *nodes;          /**< The list of interesting nodes */
131         const arch_env_t *arch_env;       /**< The architecture environment */
132         be_abi_irg_t     *abi;            /**< The abi for this irg */
133         pset             *cbc_set;        /**< A set of connected bipartite components */
134         ir_node          *block;          /**< The current block in progress. */
135         int              *idx_map;        /**< Mapping irn indices to per block indices */
136         unsigned         max_height;      /**< maximum height in the current block */
137         rss_opts_t       *opts;           /**< The options */
138         be_lv_t          *liveness;       /**< The liveness information for this irg */
139         pset             *live_block;     /**< Values alive at end of block */
140         const arch_register_class_t *cls; /**< The current register class */
141         DEBUG_ONLY(firm_dbg_module_t *dbg);
142 } rss_t;
143
144 #define get_rss_irn(rss, irn)  (phase_get_or_set_irn_data(&rss->ph, irn))
145
146 /**
147  * We need some special nodes:
148  * a source and a sink for all live-in and live-out values of a block
149  */
150 enum {
151         iro_rss_Source,
152         iro_rss_Sink,
153         iro_rss_last
154 };
155
156 /** The opcode of the rss_Source node once allocated. */
157 static ir_op *op_rss_Source;
158 /** The opcode of the rss_Sink node once allocated. */
159 static ir_op *op_rss_Sink;
160
161 /** The rss_Source node of the current graph. */
162 static ir_node *_source = NULL;
163 /** The rss_Sink node of the current graph. */
164 static ir_node *_sink   = NULL;
165
166 #define is_Source(irn) ((irn) == _source)
167 #define is_Sink(irn)   ((irn) == _sink)
168
169 enum {
170         RSS_DUMP_NONE  = 0,
171         RSS_DUMP_CBC   = 1 << 0,
172         RSS_DUMP_PKG   = 1 << 1,
173         RSS_DUMP_KILL  = 1 << 2,
174         RSS_DUMP_DVG   = 1 << 3,
175         RSS_DUMP_MAXAC = 1 << 4,
176         RSS_DUMP_ALL   = (RSS_DUMP_MAXAC << 1) - 1,
177 };
178
179 static rss_opts_t rss_options = {
180         RSS_DUMP_NONE,
181 };
182
183 static const lc_opt_enum_int_items_t dump_items[] = {
184         { "none",  RSS_DUMP_NONE  },
185         { "cbc",   RSS_DUMP_CBC   },
186         { "pkg",   RSS_DUMP_PKG   },
187         { "kill",  RSS_DUMP_KILL  },
188         { "dvg",   RSS_DUMP_DVG   },
189         { "maxac", RSS_DUMP_MAXAC },
190         { "all",   RSS_DUMP_ALL   },
191         { NULL, 0 }
192 };
193
194 static lc_opt_enum_int_var_t dump_var = {
195         &rss_options.dump_flags, dump_items
196 };
197
198 static const lc_opt_table_entry_t rss_option_table[] = {
199         LC_OPT_ENT_ENUM_MASK("dump", "dump phases", &dump_var),
200         { NULL }
201 };
202
203 /******************************************************************************
204  *  _          _                    __                  _   _
205  * | |        | |                  / _|                | | (_)
206  * | |__   ___| |_ __   ___ _ __  | |_ _   _ _ __   ___| |_ _  ___  _ __  ___
207  * | '_ \ / _ \ | '_ \ / _ \ '__| |  _| | | | '_ \ / __| __| |/ _ \| '_ \/ __|
208  * | | | |  __/ | |_) |  __/ |    | | | |_| | | | | (__| |_| | (_) | | | \__ \
209  * |_| |_|\___|_| .__/ \___|_|    |_|  \__,_|_| |_|\___|\__|_|\___/|_| |_|___/
210  *            | |
211  *            |_|
212  ******************************************************************************/
213
214 /**
215  * Acquire opcodes if needed and create source and sink nodes.
216  */
217 static void init_rss_special_nodes(ir_graph *irg) {
218         ir_node *block;
219
220         if (op_rss_Source == NULL) {
221                 int iro_rss_base = get_next_ir_opcodes(iro_rss_last);
222                 op_rss_Source = new_ir_op(iro_rss_base + iro_rss_Source, "rss_Source", op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
223                 op_rss_Sink   = new_ir_op(iro_rss_base + iro_rss_Sink,   "rss_Sink",   op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
224         }
225         block   = get_irg_start_block(irg);
226         _source = new_ir_node(NULL, irg, block, op_rss_Source, mode_ANY, 0, NULL);
227         _sink   = new_ir_node(NULL, irg, block, op_rss_Sink, mode_ANY, 0, NULL);
228 }
229
230 static int cmp_int(const void *a, const void *b) {
231         const int *i1 = a;
232         const int *i2 = b;
233
234         return QSORT_CMP(*i1, *i2);
235 }
236
237 static int cmp_child_costs(const void *a, const void *b) {
238         const child_t *c1 = a;
239         const child_t *c2 = b;
240
241         return QSORT_CMP(c1->cost, c2->cost);
242 }
243
244 static int cmp_irn_idx(const void *a, const void *b) {
245         const ir_node *n1 = *(ir_node **)a;
246         const ir_node *n2 = *(ir_node **)b;
247
248         return QSORT_CMP(get_irn_idx(n1), get_irn_idx(n2));
249 }
250
251 static int cmp_rss_edges(const void *a, const void *b) {
252         const rss_edge_t *e1 = a;
253         const rss_edge_t *e2 = b;
254
255         return (e1->src != e2->src) || (e1->tgt != e2->tgt);
256 }
257
258 static int bsearch_for_index(int key, int *arr, size_t len, int force) {
259         int left = 0;
260         int right = len;
261
262         while (right >= left) {
263                 int idx = (left + right) / 2;
264
265                 if (key < arr[idx])
266                         right = idx - 1;
267                 else if (key > arr[idx])
268                         left = idx + 1;
269                 else
270                         return idx;
271         }
272
273         if (force)
274                 assert(0 && "Something is wrong, key not found.");
275         return -1;
276 }
277
278 static ir_node **build_sorted_array_from_list(plist_t *irn_list, struct obstack *obst) {
279         plist_element_t *el;
280         int     i     = 0;
281         int     len   = plist_count(irn_list);
282         ir_node **arr = NEW_ARR_D(ir_node *, obst, len);
283
284         /* copy the list into the array */
285         foreach_plist(irn_list, el) {
286                 arr[i++] = plist_element_get_value(el);
287         }
288
289         /* sort the array by node index */
290         qsort(arr, len, sizeof(arr[0]), cmp_irn_idx);
291
292         return arr;
293 }
294
295 /*****************************************************
296  *      _      _                       _
297  *     | |    | |                     (_)
298  *   __| | ___| |__  _   _  __ _  __ _ _ _ __   __ _
299  *  / _` |/ _ \ '_ \| | | |/ _` |/ _` | | '_ \ / _` |
300  * | (_| |  __/ |_) | |_| | (_| | (_| | | | | | (_| |
301  *  \__,_|\___|_.__/ \__,_|\__, |\__, |_|_| |_|\__, |
302  *                          __/ | __/ |         __/ |
303  *                         |___/ |___/         |___/
304  *
305  *****************************************************/
306
307 #ifdef DEBUG_libfirm
308 static void dump_nodeset(ir_nodeset_t *ns, const char *prefix) {
309         ir_nodeset_iterator_t iter;
310         ir_node *irn;
311
312         ir_nodeset_iterator_init(&iter, ns);
313         while( (irn = ir_nodeset_iterator_next(&iter)) != NULL ) {
314                 ir_fprintf(stderr, "%s%+F\n", prefix, irn);
315         }
316 }
317 #endif
318
319 static void build_file_name(rss_t *rss, const char *suffix, size_t suf_len, char *buf, size_t len) {
320         const char *irg_name;
321
322         memset(buf, 0, len);
323         irg_name = get_entity_name(get_irg_entity(rss->irg));
324         snprintf(buf, len - suf_len, "%s-%s-block-%ld",
325                 irg_name, arch_register_class_name(rss->cls), get_irn_node_nr(rss->block));
326         strcat(buf, suffix);
327 }
328
329 /* Dumps all collected bipartite components of current irg as vcg. */
330 static void debug_vcg_dump_bipartite(rss_t *rss) {
331         cbc_t *cbc;
332         FILE  *f;
333         char  file_name[256];
334         static const char suffix[] = "-RSS-CBC.vcg";
335
336         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
337         f = fopen(file_name, "w");
338
339         ir_fprintf(f, "graph: { title: \"connected bipartite component graph of %+F\"\n", rss->irg);
340         fprintf(f, "display_edge_labels: no\n");
341         fprintf(f, "layoutalgorithm: mindepth\n");
342         fprintf(f, "manhattan_edges: yes\n\n");
343
344         foreach_pset(rss->cbc_set, cbc) {
345                 ir_nodeset_iterator_t iter;
346                 ir_node    *n;
347                 rss_edge_t *ke;
348
349                 fprintf(f, "graph: { titel: \"cbc %d\" label: \"cbc %d\" status:clustered color:yellow\n", cbc->nr, cbc->nr);
350                 foreach_ir_nodeset(&cbc->parents, n, iter) {
351                         ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
352                 }
353                 foreach_ir_nodeset(&cbc->children, n, iter) {
354                         ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
355                 }
356                 foreach_pset(cbc->kill_edges, ke) {
357                         ir_fprintf(f, "edge: { sourcename: \"n%d_%d\" targetname: \"n%d_%d\" }\n",
358                                 get_irn_node_nr(ke->src), cbc->nr, get_irn_node_nr(ke->tgt), cbc->nr);
359                 }
360                 fprintf(f, "}\n\n");
361         }
362         fprintf(f, "}\n");
363         fclose(f);
364 }
365
366 /* Dump the computed killing function as vcg. */
367 static void debug_vcg_dump_kill(rss_t *rss) {
368         FILE            *f;
369         char            file_name[256];
370         plist_element_t *el;
371         static const char suffix[] = "-RSS-KILL.vcg";
372
373         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
374         f = fopen(file_name, "w");
375
376         ir_fprintf(f, "graph: { title: \"computed kill graph of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
377         fprintf(f, "display_edge_labels: no\n");
378         fprintf(f, "layoutalgorithm: mindepth\n");
379         fprintf(f, "manhattan_edges: yes\n\n");
380
381         {
382                 /* reset dump_flag */
383                 ir_node *irn;
384                 foreach_phase_irn(&rss->ph, irn) {
385                         rss_irn_t *node = get_rss_irn(rss, irn);
386                         node->dumped = 0;
387                 }
388         }
389
390         /* dump all nodes and their killers */
391         foreach_plist(rss->nodes, el) {
392                 ir_node   *irn     = plist_element_get_value(el);
393                 rss_irn_t *rirn    = get_rss_irn(rss, irn);
394                 rss_irn_t *pk_rirn = get_rss_irn(rss, rirn->killer);
395
396                 if (! rirn->dumped) {
397                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
398                         rirn->dumped = 1;
399                 }
400
401                 if (! pk_rirn->dumped) {
402                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(rirn->killer), rirn->killer);
403                         pk_rirn->dumped = 1;
404                 }
405
406                 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
407                         get_irn_node_nr(rirn->killer), get_irn_node_nr(irn));
408         }
409
410         fprintf(f, "}\n");
411         fclose(f);
412 }
413
414 /* Dumps the potential killing DAG (PKG) as vcg. */
415 static void debug_vcg_dump_pkg(rss_t *rss, ir_nodeset_t *max_ac, int iteration) {
416         FILE              *f;
417         char              file_name[256];
418         char              *suffix   = alloca(32);
419         static const char suffix1[] = "-RSS-PKG.vcg";
420         static const char suffix2[] = "-RSS-PKG-MAXAC.vcg";
421         plist_element_t   *el;
422
423         if (! max_ac) {
424                 snprintf(suffix, 32, "%s", suffix1);
425         }
426         else {
427                 snprintf(suffix, 32, "-%02d%s", iteration, suffix2);
428         }
429
430         build_file_name(rss, suffix, strlen(suffix) + 1, file_name, sizeof(file_name));
431         f = fopen(file_name, "w");
432
433         ir_fprintf(f, "graph: { title: \"potential killing DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
434         fprintf(f, "display_edge_labels: no\n");
435         fprintf(f, "layoutalgorithm: mindepth\n");
436         fprintf(f, "manhattan_edges: yes\n\n");
437
438         {
439                 /* reset dump_flag */
440                 ir_node *irn;
441                 foreach_phase_irn(&rss->ph, irn) {
442                         rss_irn_t *node = get_rss_irn(rss, irn);
443                         node->dumped = 0;
444                 }
445         }
446
447         foreach_plist(rss->nodes, el) {
448                 ir_node   *irn  = plist_element_get_value(el);
449                 rss_irn_t *rirn = get_rss_irn(rss, irn);
450                 char      *c1   = "";
451                 plist_element_t *k_el;
452
453                 /* dump selected saturating values in yellow */
454                 if (max_ac && ir_nodeset_contains(max_ac, irn))
455                         c1 = "color:yellow";
456
457                 if (! rirn->dumped) {
458                         if (rirn->chain)
459                                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F   c%d\" %s }\n", get_irn_node_nr(irn), irn, rirn->chain->nr, c1);
460                         else
461                                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(irn), irn, c1);
462                         rirn->dumped = 1;
463                 }
464
465                 foreach_plist(rirn->pkiller_list, k_el) {
466                         ir_node   *pkiller = plist_element_get_value(k_el);
467                         rss_irn_t *pk_rirn = get_rss_irn(rss, pkiller);
468                         char      *c2      = "";
469
470                         if (max_ac && ir_nodeset_contains(max_ac, pkiller))
471                                 c2 = "color:yellow";
472
473                         if (! pk_rirn->dumped) {
474                                 if (pk_rirn->chain)
475                                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F   c%d\" %s }\n", get_irn_node_nr(pkiller), pkiller, pk_rirn->chain->nr, c2);
476                                 else
477                                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(pkiller), pkiller, c2);
478                                 pk_rirn->dumped = 1;
479                         }
480                         ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
481                                 get_irn_node_nr(pkiller), get_irn_node_nr(irn));
482                 }
483         }
484         fprintf(f, "}\n");
485         fclose(f);
486 }
487
488 /* Dumps the disjoint value DAG (DVG) as vcg. */
489 static void debug_vcg_dump_dvg(rss_t *rss, dvg_t *dvg) {
490         static const char suffix[] = "-RSS-DVG.vcg";
491         FILE       *f;
492         char       file_name[256];
493         ir_nodeset_iterator_t iter;
494         ir_node    *irn;
495         rss_edge_t *edge;
496
497         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
498         f = fopen(file_name, "w");
499
500         ir_fprintf(f, "graph: { title: \"disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
501         fprintf(f, "display_edge_labels: no\n");
502         fprintf(f, "layoutalgorithm: mindepth\n");
503         fprintf(f, "manhattan_edges: yes\n\n");
504
505         /* dump all nodes */
506         foreach_ir_nodeset(&dvg->nodes, irn, iter) {
507                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
508         }
509
510         /* dump all edges */
511         foreach_pset(dvg->edges, edge) {
512                 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
513                         get_irn_node_nr(edge->src), get_irn_node_nr(edge->tgt));
514         }
515
516         fprintf(f, "}\n");
517         fclose(f);
518 }
519
520 #if 0
521 /* Dumps the PKG(DVG). */
522 static void debug_vcg_dump_dvg_pkiller(rss_t *rss, dvg_t *dvg) {
523         static const char suffix[] = "-RSS-DVG-PKG.vcg";
524         FILE       *f;
525         char       file_name[256];
526         ir_nodeset_iterator_t iter;
527         ir_node    *irn;
528
529         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
530         f = fopen(file_name, "w");
531
532         ir_fprintf(f, "graph: { title: \"PKG of disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
533         fprintf(f, "display_edge_labels: no\n");
534         fprintf(f, "layoutalgorithm: mindepth\n");
535         fprintf(f, "manhattan_edges: yes\n\n");
536
537         /* dump all nodes */
538         foreach_ir_nodeset(&dvg->nodes, irn, iter) {
539                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
540         }
541
542         /* dump all edges */
543         foreach_ir_nodeset(&dvg->nodes, irn, iter) {
544                 rss_irn_t       *node = get_rss_irn(rss, irn);
545                 plist_element_t *el;
546
547                 foreach_plist(node->dvg_pkiller_list, el) {
548                         ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
549                                 get_irn_node_nr(plist_element_get_value(el)), get_irn_node_nr(irn));
550                 }
551         }
552
553         fprintf(f, "}\n");
554         fclose(f);
555 }
556 #endif /* if 0 */
557
558 /**
559  * In case there is no rss information for irn, initialize it.
560  */
561 static void *init_rss_irn(ir_phase *ph, ir_node *irn, void *old) {
562         rss_irn_t *res = old ? old : phase_alloc(ph, sizeof(res[0]));
563
564         res->descendant_list = plist_obstack_new(phase_obst(ph));
565         res->descendants     = NULL;
566
567         res->consumer_list   = plist_obstack_new(phase_obst(ph));
568         res->consumer        = NULL;
569
570         res->pkiller_list    = plist_obstack_new(phase_obst(ph));
571
572         res->parent_list     = plist_obstack_new(phase_obst(ph));
573
574         res->killer          = NULL;
575         res->irn             = irn;
576         res->chain           = NULL;
577
578         res->kill_count      = 0;
579         res->desc_walk       = 0;
580         res->live_out        = 0;
581         res->visited         = 0;
582         res->handled         = 0;
583         res->dumped          = 0;
584         res->havecons        = 0;
585
586         return res;
587 }
588
589 /**
590  * Collect all nodes data dependent on current node.
591  */
592 static void collect_descendants(rss_t *rss, rss_irn_t *rirn, ir_node *irn, int *got_sink, unsigned cur_desc_walk) {
593         const ir_edge_t *edge;
594         rss_irn_t       *cur_node = get_rss_irn(rss, irn);
595         ir_node         *block    = rss->block;
596         ir_edge_kind_t  ekind[2]  = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
597         int             i;
598
599         if (cur_node->desc_walk >= cur_desc_walk)
600                 return;
601         cur_node->desc_walk = cur_desc_walk;
602
603         /* Stop at Barriers */
604         if (be_is_Barrier(irn))
605                 return;
606
607         /* loop over normal and dependency edges */
608         for (i = 0; i < 2; ++i) {
609                 foreach_out_edge_kind(irn, edge, ekind[i]) {
610                         ir_node *user = get_edge_src_irn(edge);
611
612                         /* skip ignore nodes as they do not really contribute to register pressure */
613                         if (arch_irn_is(rss->arch_env, user, ignore))
614                                 continue;
615
616                         /*
617                                 (a) mode_X means Jump            -> out_edge
618                                 (b) Phi as user of a normal node -> out-edge
619                         */
620                         if (get_irn_mode(user) == mode_X || is_Phi(user)) {
621                                 if (! *got_sink)
622                                         goto force_sink;
623                                 else
624                                         continue;
625                         }
626
627                         if (is_Proj(user)) {
628
629                                 //if (arch_get_irn_reg_class(rss->arch_env, user, -1) == rss->cls)
630                                         collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
631                         }
632                         else {
633                                 /* check if user lives in block and is not a control flow node */
634                                 if (get_nodes_block(user) == block) {
635                                         if (! plist_has_value(rirn->descendant_list, user)) {
636                                                 plist_insert_back(rirn->descendant_list, user);
637                                                 DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", user));
638                                         }
639                                         collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
640                                 }
641                                 else if (! *got_sink) {
642 force_sink:
643                                         /* user lives out of block: add sink as descendant if not already done */
644                                         plist_insert_back(rirn->descendant_list, _sink);
645                                         *got_sink = 1;
646                                         DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", _sink));
647                                 }
648                         }
649                 }
650         }
651 }
652
653 /**
654  * Handles a single consumer.
655  */
656 static void collect_single_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *consumer, int *got_sink) {
657         ir_node *block = rss->block;
658
659         assert(! is_Proj(consumer) && "Cannot handle Projs");
660
661         if (! is_Phi(consumer) && ! is_Block(consumer) && get_nodes_block(consumer) == block) {
662                 if (! arch_irn_is(rss->arch_env, consumer, ignore) && ! plist_has_value(rss_irn->consumer_list, consumer)) {
663                         plist_insert_back(rss_irn->consumer_list, consumer);
664                         DBG((rss->dbg, LEVEL_2, "\t\tconsumer %+F\n", consumer));
665                 }
666         }
667         else {
668                 rss_irn->live_out = 1;
669                 DBG((rss->dbg, LEVEL_2, "\t\tlive out %+F", consumer));
670                 if (! *got_sink) {
671                         plist_insert_back(rss_irn->consumer_list, _sink);
672                         *got_sink = 1;
673                         DB((rss->dbg, LEVEL_2, ", %+F added instead", _sink));
674                 }
675                 DB((rss->dbg, LEVEL_2, "\n"));
676         }
677 }
678
679 /**
680  * Collect all nodes consuming the value(s) produced by current node.
681  */
682 static void collect_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *irn, int *got_sink) {
683         const ir_edge_t *edge;
684         int             i;
685         ir_edge_kind_t  ekind[2]  = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
686         rss_irn_t       *cur_node = get_rss_irn(rss, irn);
687
688         if (cur_node->havecons)
689                 return;
690         cur_node->havecons = 1;
691
692         for (i = 0; i < 2; ++i) {
693                 foreach_out_edge_kind(irn, edge, ekind[i]) {
694                         ir_node *consumer = get_edge_src_irn(edge);
695
696                         if (is_Proj(consumer)) {
697                                 //if (arch_get_irn_reg_class(rss->arch_env, consumer, -1) == rss->cls)
698                                         collect_consumer(rss, rss_irn, consumer, got_sink);
699                         }
700                         else
701                                 collect_single_consumer(rss, rss_irn, consumer, got_sink);
702                 }
703         }
704 }
705
706 /**
707  * Collects all consumer and descendant of a irn.
708  */
709 static void collect_node_info(rss_t *rss, ir_node *irn) {
710         static unsigned cur_desc_walk = 0;
711         rss_irn_t       *rss_irn      = get_rss_irn(rss, irn);
712         int             got_sink;
713
714         assert(! is_Proj(irn) && "Cannot handle Projs.");
715
716         /* check if node info is already available */
717         if (rss_irn->handled)
718                 return;
719                 //phase_reinit_single_irn_data(&rss->ph, irn);
720
721         DBG((rss->dbg, LEVEL_1, "\tcomputing consumers of %+F:\n", irn));
722
723         /* collect all consumer */
724         got_sink = 0;
725         collect_consumer(rss, rss_irn, irn, &got_sink);
726
727         /* build sorted consumer array */
728         rss_irn->consumer = build_sorted_array_from_list(rss_irn->consumer_list, phase_obst(&rss->ph));
729
730         DBG((rss->dbg, LEVEL_1, "\tcompute descendants of %+F:\n", irn));
731
732         /* collect descendants */
733         got_sink = 0;
734         collect_descendants(rss, rss_irn, irn, &got_sink, ++cur_desc_walk);
735
736         /* build sorted descendant array */
737         rss_irn->descendants = build_sorted_array_from_list(rss_irn->descendant_list, phase_obst(&rss->ph));
738
739         rss_irn->handled = 1;
740 }
741
742 /**
743  * Checks if v is a potential killer of u.
744  * v is in pkill(u) iff descendants(v) cut consumer(u) is v
745  *
746  * @param rss   The rss object
747  * @param v      The node to check for killer
748  * @param u      The potentially killed value
749  * @return 1 if v is in pkill(u), 0 otherwise
750  */
751 static int is_potential_killer(rss_t *rss, rss_irn_t *v, rss_irn_t *u) {
752         plist_t *list;
753         ir_node **arr;
754         plist_element_t *el;
755
756         assert(is_Sink(v->irn) || ((plist_count(v->descendant_list) > 0 && v->descendants) || 1));
757         assert(is_Sink(u->irn) || ((plist_count(u->consumer_list)   > 0 && u->consumer)    || 1));
758
759         /* as we loop over the list: loop over the shorter one */
760         if (plist_count(v->descendant_list) > plist_count(u->consumer_list)) {
761                 list = u->consumer_list;
762                 arr  = v->descendants;
763         }
764         else {
765                 list = v->descendant_list;
766                 arr  = u->consumer;
767         }
768
769         /* for each list element: try to find element in array */
770         foreach_plist(list, el) {
771                 ir_node *irn   = plist_element_get_value(el);
772                 ir_node *match = BSEARCH_IRN_ARR(irn, arr);
773
774                 if (match && match != irn)
775                         return 0;
776         }
777
778         return 1;
779 }
780
781 /**
782  * Update descendants, consumer and pkiller list for the given irn.
783  */
784 static void update_node_info(rss_t *rss, ir_node *irn, ir_node *pk_irn) {
785         rss_irn_t *node    = get_rss_irn(rss, irn);
786         rss_irn_t *pkiller = get_rss_irn(rss, pk_irn);
787
788         /* add consumer and rebuild the consumer array */
789         if (! plist_has_value(node->consumer_list, pk_irn)) {
790                 plist_insert_back(node->consumer_list, pk_irn);
791                 node->consumer = build_sorted_array_from_list(node->consumer_list, phase_obst(&rss->ph));
792         }
793
794         /* add pk_irn as descendant, add it's descendants to irn's and rebuild array */
795         if (! plist_has_value(node->descendant_list, pk_irn)) {
796                 plist_element_t *el;
797
798                 plist_insert_back(node->descendant_list, pk_irn);
799
800                 foreach_plist(pkiller->descendant_list, el) {
801                         ir_node *desc = plist_element_get_value(el);
802
803                         if (! plist_has_value(node->descendant_list, desc))
804                                 plist_insert_back(node->descendant_list, desc);
805                 }
806
807                 node->descendants = build_sorted_array_from_list(node->descendant_list, phase_obst(&rss->ph));
808         }
809
810 }
811
812 /**
813  * Compute the potential killing set PK.
814  */
815 static void compute_pkill_set(rss_t *rss) {
816         plist_element_t *u_el, *v_el;
817
818         foreach_plist(rss->nodes, u_el) {
819                 ir_node   *u_irn = plist_element_get_value(u_el);
820                 rss_irn_t *u     = get_rss_irn(rss, u_irn);
821
822                 DBG((rss->dbg, LEVEL_1, "\tcomputing potential killers of %+F:\n", u_irn));
823
824                 /* check each consumer if it is a potential killer  */
825                 foreach_plist(u->consumer_list, v_el) {
826                         ir_node   *v_irn = plist_element_get_value(v_el);
827                         rss_irn_t *v     = get_rss_irn(rss, v_irn);
828
829                         /* check, if v is a potential killer of u */
830                         if (is_potential_killer(rss, v, u)) {
831                                 if (! plist_has_value(u->pkiller_list, v_irn))
832                                         plist_insert_back(u->pkiller_list, v_irn);
833                                 v->kill_count++;
834                                 DBG((rss->dbg, LEVEL_2, "\t\tpotential killer %+F\n", v_irn));
835                         }
836                 }
837
838                 u->killer = _sink;
839         }
840
841         if (rss->opts->dump_flags & RSS_DUMP_PKG)
842                 debug_vcg_dump_pkg(rss, NULL, 0);
843 }
844
845 /**
846  * Build set of killing edges (from values to their potential killers)
847  */
848 static void build_kill_edges(rss_t *rss, pset *epk) {
849         plist_element_t *el, *k_el;
850
851         foreach_plist(rss->nodes, el) {
852                 ir_node    *irn  = plist_element_get_value(el);
853                 rss_irn_t *rirn = get_rss_irn(rss, irn);
854
855                 DBG((rss->dbg, LEVEL_3, "\t\tbuilding kill edges for %+F:\n", irn));
856
857                 foreach_plist(rirn->pkiller_list, k_el) {
858                         ir_node    *pkiller = plist_element_get_value(k_el);
859                         rss_edge_t *ke      = obstack_alloc(phase_obst(&rss->ph), sizeof(*ke));
860
861                         ke->src  = irn;
862                         ke->tgt  = pkiller;
863                         ke->next = NULL;
864
865                         DBG((rss->dbg, LEVEL_3, "\t\t\ttarget %+F\n", pkiller));
866
867                         pset_insert(epk, ke, HASH_RSS_EDGE(ke));
868                 }
869         }
870 }
871
872 #ifdef DEBUG_libfirm
873 /* print the given cbc for debugging purpose */
874 static void debug_print_cbc(firm_dbg_module_t *mod, cbc_t *cbc) {
875         ir_nodeset_iterator_t iter;
876         ir_node    *n;
877         rss_edge_t *ke;
878
879         DBG((mod, LEVEL_3, "\t\tS = set of parents:\n"));
880         foreach_ir_nodeset(&cbc->parents, n, iter) {
881                 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
882         }
883         DBG((mod, LEVEL_3, "\t\tT = set of children:\n"));
884         foreach_ir_nodeset(&cbc->children, n, iter) {
885                 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
886         }
887         DBG((mod, LEVEL_3, "\t\tE = Edges from producers to consumers\n"));
888         foreach_pset(cbc->kill_edges, ke) {
889                 DBG((mod, LEVEL_3, "\t\t\t%+F -> %+F\n", ke->src, ke->tgt));
890         }
891 }
892 #endif
893
894 /**
895  * Construct the bipartite decomposition.
896  * Sid-Ahmed-Ali Touati, Phd Thesis
897  * Register Pressure in Instruction Level Parallelism, p. 71
898  */
899 static void compute_bipartite_decomposition(rss_t *rss) {
900         pset *epk    = new_pset(cmp_rss_edges, 10);
901         int  cur_num = 0;
902
903         plist_element_t *el;
904
905         DBG((rss->dbg, LEVEL_1, "\tcomputing bipartite decomposition:\n"));
906
907         build_kill_edges(rss, epk);
908
909         foreach_plist(rss->nodes, el) {
910                 ir_node   *u_irn   = plist_element_get_value(el);
911                 rss_irn_t *u       = get_rss_irn(rss, u_irn);
912                 int       p_change = 1;
913                 int       c_change = 1;
914                 int       vrfy_ok  = 1;
915
916                 cbc_t           *cbc;
917                 plist_element_t *el2;
918                 rss_edge_t      *k_edge;
919                 rss_edge_t      *kedge_root = NULL;
920                 ir_node         *t_irn, *s_irn;
921                 ir_nodeset_iterator_t iter;
922
923                 if (u->visited || u_irn == _sink)
924                         continue;
925
926                 DBG((rss->dbg, LEVEL_2, "\t\t%+F choosen:\n", u_irn));
927
928                 cbc     = obstack_alloc(phase_obst(&rss->ph), sizeof(*cbc));
929                 cbc->nr = cur_num++;
930
931                 /* initialize S_cb */
932                 ir_nodeset_init_size(&cbc->parents, 5);
933                 ir_nodeset_insert(&cbc->parents, u_irn);
934                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (init)\n", u_irn));
935
936                 /* E_cb = empty */
937                 cbc->kill_edges = new_pset(cmp_rss_edges, 5);
938
939                 /* each parent gets killed by at least one value from children */
940
941                 /* T_cb = PK_successors(u) */
942                 ir_nodeset_init_size(&cbc->children, 5);
943                 foreach_plist(u->pkiller_list, el2) {
944                         ir_nodeset_insert(&cbc->children, plist_element_get_value(el2));
945                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (init)\n", plist_element_get_value(el2)));
946                 }
947
948                 /*
949                         Now: insert the parents of all children into the parent set
950                         and insert the children of all parents into the children set
951                         until the sets don't change any more
952                 */
953                 while (p_change || c_change) {
954                         ir_nodeset_iterator_t iter;
955                         p_change = c_change = 0;
956
957                         /* accumulate parents */
958                         foreach_ir_nodeset(&cbc->children, t_irn, iter) {
959                                 foreach_pset(epk, k_edge) {
960                                         ir_node *val = k_edge->src;
961
962                                         if (k_edge->tgt == t_irn && ! ir_nodeset_contains(&cbc->parents, val)) {
963                                                 ir_nodeset_insert(&cbc->parents, val);
964                                                 p_change = 1;
965                                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (killed by %+F)\n", val, t_irn));
966                                         }
967                                 }
968                         }
969
970                         /* accumulate children */
971                         foreach_ir_nodeset(&cbc->parents, s_irn, iter) {
972                                 foreach_pset(epk, k_edge) {
973                                         ir_node *val = k_edge->tgt;
974
975                                         if (k_edge->src == s_irn  && ! ir_nodeset_contains(&cbc->children, val)) {
976                                                 ir_nodeset_insert(&cbc->children, val);
977                                                 c_change = 1;
978                                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (kills %+F)\n", val, s_irn));
979                                         }
980                                 }
981                         }
982                 }
983
984                 /* mark all parent values as visited */
985                 foreach_ir_nodeset(&cbc->parents, s_irn, iter) {
986                         rss_irn_t *s = get_rss_irn(rss, s_irn);
987                         s->visited = 1;
988                         /* assure bipartite property */
989 #if 0
990                         if (ir_nodeset_contains(&cbc->children, s_irn)) {
991                                 ir_nodeset_remove(&cbc->children, s_irn);
992                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F removed from to children\n", s_irn));
993                         }
994 #endif
995                 }
996
997                 /* update edges */
998                 foreach_pset(epk, k_edge) {
999                         if (ir_nodeset_contains(&cbc->parents, k_edge->src) &&
1000                             ir_nodeset_contains(&cbc->children, k_edge->tgt)) {
1001                                 pset_insert(cbc->kill_edges, k_edge, HASH_RSS_EDGE(k_edge));
1002                                 /*
1003                                         Link all k_edges which are about to be removed together.
1004                                         Beware: pset_remove kills the iterator.
1005                                 */
1006                                 k_edge->next = kedge_root;
1007                                 kedge_root   = k_edge;
1008                         }
1009                 }
1010
1011                 /* remove all linked k_edges */
1012                 for (; kedge_root; kedge_root = kedge_root->next) {
1013                         pset_remove(epk, kedge_root, HASH_RSS_EDGE(kedge_root));
1014                 }
1015
1016                 /* verify the cbc */
1017                 foreach_ir_nodeset(&cbc->parents, s_irn, iter) {
1018                         int is_killed = 0;
1019
1020                         foreach_pset(cbc->kill_edges, k_edge) {
1021                                 if (k_edge->src == s_irn) {
1022                                         is_killed = 1;
1023                                         pset_break(cbc->kill_edges);
1024                                         break;
1025                                 }
1026                         }
1027
1028                         if (! is_killed) {
1029                                 vrfy_ok = 0;
1030                                 ir_fprintf(stderr, "Warning: parent %+F is not killed in current cbc\n", s_irn);
1031                         }
1032                 }
1033                 assert(vrfy_ok && "Verification of CBC failed");
1034
1035                 /* add the connected bipartite component */
1036                 if (ir_nodeset_size(&cbc->parents) > 0 &&
1037                     ir_nodeset_size(&cbc->children) > 0 &&
1038                         pset_count(cbc->kill_edges) > 0)
1039                         pset_insert(rss->cbc_set, cbc, (unsigned)cbc->nr);
1040                 DBG((rss->dbg, LEVEL_2, "\tbipartite component %d inserted:\n", cbc->nr));
1041                 DEBUG_ONLY(
1042                         if (firm_dbg_get_mask(rss->dbg) & LEVEL_2)
1043                                 debug_print_cbc(rss->dbg, cbc);
1044                 );
1045         }
1046
1047         if (rss->opts->dump_flags & RSS_DUMP_CBC)
1048                 debug_vcg_dump_bipartite(rss);
1049
1050         del_pset(epk);
1051 }
1052
1053 /**
1054  * Select the child with the maximum cost.
1055  */
1056 static child_t *select_child_max_cost(rss_t *rss, ir_nodeset_t *x, ir_nodeset_t *y, child_t *t, cbc_t *cbc) {
1057         ir_node *child;
1058         ir_nodeset_iterator_t iter;
1059         float   max_cost = -1.0f;
1060
1061         DBG((rss->dbg, LEVEL_2, "\t\tcomputing children costs:\n"));
1062
1063         foreach_ir_nodeset(&cbc->children, child, iter) {
1064                 rss_irn_t  *r_child             = get_rss_irn(rss, child);
1065                 int         num_unkilled_parents = 0;
1066                 int         num_descendants;
1067                 rss_edge_t *k_edge;
1068                 float       cost;
1069
1070                 /* get the number of unkilled parents */
1071                 foreach_pset(cbc->kill_edges, k_edge) {
1072                         if (k_edge->tgt == child && ir_nodeset_contains(x, k_edge->src))
1073                                 ++num_unkilled_parents;
1074                 }
1075
1076                 cost = (float)num_unkilled_parents;
1077
1078                 num_descendants = plist_count(r_child->descendant_list) + ir_nodeset_size(y);
1079
1080                 if (num_descendants > 0)
1081                         cost /= (float)num_descendants;
1082
1083                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F, #desc %d, cost %.3f\n", child, num_descendants, cost));
1084
1085                 if (cost > max_cost) {
1086                         t->irn   = child;
1087                         t->cost  = cost;
1088                         max_cost = cost;
1089                 }
1090         }
1091
1092         return t;
1093 }
1094
1095 /**
1096  * Remove all parents from x which are killed by t_irn.
1097  */
1098 static void remove_covered_parents(rss_t *rss, ir_nodeset_t *x, ir_node *t_irn, cbc_t *cbc) {
1099         rss_irn_t  *t = get_rss_irn(rss, t_irn);
1100         rss_edge_t *k_edge;
1101
1102         DBG((rss->dbg, LEVEL_2, "\t\tremoving parents covered by %+F:\n", t_irn));
1103
1104         foreach_pset(cbc->kill_edges, k_edge) {
1105                 if (k_edge->tgt == t_irn && ir_nodeset_contains(x, k_edge->src)) {
1106                         ir_nodeset_remove(x, k_edge->src);
1107                         plist_insert_back(t->parent_list, k_edge->src);
1108                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", k_edge->src));
1109                 }
1110         }
1111 }
1112
1113 static void update_cumulated_descendent_values(rss_t *rss, ir_nodeset_t *y, ir_node *t_irn) {
1114         rss_irn_t *t = get_rss_irn(rss, t_irn);
1115         plist_element_t *el;
1116
1117         DBG((rss->dbg, LEVEL_2, "\t\tupdating cumulated descendant value of %+F:\n", t_irn));
1118
1119         foreach_plist(t->descendant_list, el) {
1120                 ir_nodeset_insert(y, plist_element_get_value(el));
1121                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", plist_element_get_value(el)));
1122         }
1123 }
1124
1125 /**
1126  * Greedy-k: a heuristics for the MMA problem
1127  */
1128 static void compute_killing_function(rss_t *rss) {
1129         cbc_t *cbc;
1130         struct obstack obst;
1131
1132         obstack_init(&obst);
1133
1134         rss->cbc_set = pset_new_ptr(5);
1135         compute_bipartite_decomposition(rss);
1136
1137         /* for all bipartite components do: */
1138         foreach_pset(rss->cbc_set, cbc) {
1139                 ir_node *p;
1140                 ir_nodeset_t x;
1141                 ir_nodeset_t y;
1142                 ir_nodeset_iterator_t iter;
1143                 child_t **sks    = NEW_ARR_F(child_t *, 20);
1144                 int     cur_len  = 0;
1145                 int     cur_size = 20;
1146                 int     i;
1147
1148                 ir_nodeset_init_size(&x, 10);
1149                 ir_nodeset_init_size(&y, 10);
1150
1151                 DBG((rss->dbg, LEVEL_1, "\tcomputing SKS for cbc %d:\n", cbc->nr));
1152                 DBG((rss->dbg, LEVEL_2, "\t\tinitializing parents X:\n"));
1153
1154                 /* X = S_cb (all parents are initially uncovered) */
1155                 foreach_ir_nodeset(&cbc->parents, p, iter) {
1156                         ir_nodeset_insert(&x, p);
1157                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", p));
1158                 }
1159
1160                 /* while X not empty */
1161                 while (ir_nodeset_size(&x) > 0) {
1162                         child_t *t = obstack_alloc(&obst, sizeof(*t));
1163                         memset(t, 0, sizeof(*t));
1164
1165                         t = select_child_max_cost(rss, &x, &y, t, cbc);
1166
1167                         if (cur_len >= cur_size) {
1168                                 ARR_EXTO(child_t *, sks, cur_size * 2);
1169                                 cur_size *= 2;
1170                         }
1171
1172                         DBG((rss->dbg, LEVEL_2, "\t\tinsert child %+F (%.3f) into SKS at pos %d\n", t->irn, t->cost, cur_len));
1173
1174                         sks[cur_len++] = t;
1175                         remove_covered_parents(rss, &x, t->irn, cbc);
1176                         update_cumulated_descendent_values(rss, &y, t->irn);
1177                 }
1178
1179                 ARR_SHRINKLEN(sks, cur_len);
1180
1181                 /* sort SKS in increasing cost order */
1182                 qsort(sks, cur_len, sizeof(sks[0]), cmp_child_costs);
1183
1184                 DBG((rss->dbg, LEVEL_2, "\tprocessing SKS for cbc %d:\n", cbc->nr));
1185
1186                 /* build killing function */
1187                 for (i = cur_len - 1; i >= 0; --i) { /* loop over sks in decreasing cost order */
1188                         child_t         *t = sks[i];
1189                         rss_irn_t       *rt = get_rss_irn(rss, t->irn);
1190                         plist_element_t *p_el;
1191
1192                         DBG((rss->dbg, LEVEL_3, "\t\t\tkiller %+F (%.3f):\n", t->irn, t->cost));
1193
1194                         /* kill all unkilled parents of t */
1195                         foreach_plist(rt->parent_list, p_el) {
1196                                 ir_node    *par = plist_element_get_value(p_el);
1197                                 rss_irn_t *rpar = get_rss_irn(rss, par);
1198
1199                                 if (is_Sink(rpar->killer)) {
1200                                         rpar->killer = t->irn;
1201                                         DBG((rss->dbg, LEVEL_2, "\t\tkill %+F\n", rpar->irn));
1202                                 }
1203                                 else {
1204                                         DBG((rss->dbg, LEVEL_3, "\t\t\tkeeping %+F as killer for %+F\n", rpar->killer, rpar->irn));
1205                                 }
1206                         }
1207                 }
1208
1209                 ir_nodeset_destroy(&x);
1210                 ir_nodeset_destroy(&y);
1211                 DEL_ARR_F(sks);
1212         }
1213
1214         if (rss->opts->dump_flags & RSS_DUMP_KILL)
1215                 debug_vcg_dump_kill(rss);
1216
1217         del_pset(rss->cbc_set);
1218         obstack_free(&obst, NULL);
1219 }
1220
1221 /**
1222  * Adds the edge src -> tgt to the dvg. Checks if reverse edge is already there (asserts).
1223  */
1224 static INLINE void add_dvg_edge(rss_t *rss, dvg_t *dvg, ir_node *src, ir_node *tgt, int have_source) {
1225         rss_edge_t *dvg_edge;
1226         rss_edge_t key;
1227
1228         if (! have_source)
1229                 ir_nodeset_insert(&dvg->nodes, src);
1230         else
1231                 assert(ir_nodeset_contains(&dvg->nodes, src) && "Wrong assumption");
1232
1233         ir_nodeset_insert(&dvg->nodes, tgt);
1234
1235         key.src = tgt;
1236         key.tgt = src;
1237         assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1238
1239         key.src = src;
1240         key.tgt = tgt;
1241         if (NULL != pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1242                 /* add the edge to the DVG */
1243                 dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1244
1245                 dvg_edge->src  = src;
1246                 dvg_edge->tgt  = tgt;
1247                 dvg_edge->next = NULL;
1248
1249                 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", src, tgt));
1250                 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1251         }
1252 }
1253
1254 /**
1255  * Computes the disjoint value DAG (DVG).
1256  * BEWARE: It is not made explicitly clear in the Touati paper,
1257  *         but the DVG is meant to be build from the KILLING DAG
1258  */
1259 static void compute_dvg(rss_t *rss, dvg_t *dvg) {
1260         plist_element_t *el;
1261
1262         DBG((rss->dbg, LEVEL_1, "\tcomputing DVG:\n"));
1263
1264         foreach_plist(rss->nodes, el) {
1265                 ir_node    *u_irn    = plist_element_get_value(el);
1266                 rss_irn_t  *u        = get_rss_irn(rss, u_irn);
1267                 rss_irn_t  *u_killer = get_rss_irn(rss, u->killer);
1268                 int        i;
1269
1270                 /* TODO: omit nodes only having sink as pkiller and killing no one */
1271
1272                 /* add an edge to killer */
1273                 add_dvg_edge(rss, dvg, u_irn, u->killer, 0);
1274
1275                 if (is_Sink(u->killer))
1276                         continue;
1277
1278                 /* We add an edge to every killer, from where we could be reached. */
1279                 for (i = ARR_LEN_SAFE(u_killer->descendants) - 1; i >= 0; --i) {
1280                         add_dvg_edge(rss, dvg, u_irn, u_killer->descendants[i], 1);
1281                 }
1282
1283 #if 0
1284
1285                 foreach_plist(rss->nodes, el2) {
1286                         ir_node *v_irn = plist_element_get_value(el2);
1287
1288                         /*
1289                                 There is an edge (u, v) in the DVG iff v is a descendant of the killer(u).
1290                         */
1291                         if (BSEARCH_IRN_ARR(v_irn, u_kill->descendants)) {
1292                                 rss_edge_t *dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1293                                 rss_edge_t key;
1294
1295                                 /* insert the user into the DVG and append it to the user list of u */
1296                                 ir_nodeset_insert(&dvg->nodes, v_irn);
1297                                 if (! plist_has_value(u->dvg_user_list, v_irn))
1298                                         plist_insert_back(u->dvg_user_list, v_irn);
1299
1300                                 dvg_edge->src  = u_irn;
1301                                 dvg_edge->tgt  = v_irn;
1302                                 dvg_edge->next = NULL;
1303
1304                                 key.src = v_irn;
1305                                 key.tgt = u_irn;
1306
1307                                 assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1308
1309                                 /* add the edge to the DVG */
1310                                 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", u_irn, v_irn));
1311                                 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1312                         }
1313                 }
1314 #endif /* if 0 */
1315         }
1316
1317         if (rss->opts->dump_flags & RSS_DUMP_DVG)
1318                 debug_vcg_dump_dvg(rss, dvg);
1319 }
1320
1321 /**
1322  * Updates the dvg structure when a serialization edge from src -> tgt is added.
1323  */
1324 static void update_dvg(rss_t *rss, dvg_t *dvg, rss_irn_t *src, rss_irn_t *tgt) {
1325         int i, j, idx;
1326         rss_edge_t *edge;
1327         rss_edge_t **arr = alloca(pset_count(dvg->edges) * sizeof(arr[0]));
1328
1329         /*
1330                 Add an edge from serialization target to serialization src:
1331                 src cannot be alive with target
1332         */
1333         add_dvg_edge(rss, dvg, tgt->irn, src->irn, 0);
1334
1335         /* Add edges to src's descendants as well, they also getting serialized. */
1336         for (i = ARR_LEN_SAFE(src->descendants) - 1; i >= 0; --i) {
1337                 add_dvg_edge(rss, dvg, tgt->irn, src->descendants[i], 1);
1338         }
1339
1340         /* We also have to add edges from targets predecessors in dvg */
1341         idx = 0;
1342         /* We cannot insert elements into set over which we iterate ... */
1343         foreach_pset(dvg->edges, edge) {
1344                 if (edge->tgt == tgt->irn) {
1345                         arr[idx++] = edge;
1346                 }
1347         }
1348
1349         for (i = 0; i < idx; ++i) {
1350                 edge = arr[i];
1351                 add_dvg_edge(rss, dvg, edge->src, src->irn, 1);
1352                 for (j = ARR_LEN_SAFE(src->descendants) - 1; j >= 0; --j) {
1353                         add_dvg_edge(rss, dvg, edge->src, src->descendants[j], 1);
1354                 }
1355         }
1356 }
1357
1358 #if 0
1359 /**
1360  * Accumulate all descendants for root into list.
1361  */
1362 static void accumulate_dvg_descendant_values(rss_t *rss, rss_irn_t *root, plist_t *list) {
1363         if (plist_count(root->dvg_user_list) > 0) {
1364                 plist_element_t *el;
1365
1366                 foreach_plist(root->dvg_user_list, el) {
1367                         ir_node   *v_irn = plist_element_get_value(el);
1368                         rss_irn_t *v     = get_rss_irn(rss, v_irn);
1369
1370                         /* add v as descendant */
1371                         if (! plist_has_value(list, v_irn)) {
1372                                 plist_insert_back(list, v_irn);
1373                                 DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG descendant %+F\n", v_irn));
1374                         }
1375
1376                         /* accumulate v's descendants */
1377                         accumulate_dvg_descendant_values(rss, v, list);
1378                 }
1379         }
1380 }
1381
1382 /**
1383  * Builds the list of potential killers for each node
1384  * in the given DVG.
1385  * Needs the descendant list for all user as sorted array.
1386  */
1387 static void build_dvg_pkiller_list(rss_t *rss, dvg_t *dvg) {
1388         ir_nodeset_iterator_t iter;
1389         ir_node *irn;
1390
1391         foreach_nodeset(&dvg->nodes, irn, iter) {
1392                 rss_irn_t       *node = get_rss_irn(rss, irn);
1393                 plist_element_t *el, *el2;
1394
1395                 DBG((rss->dbg, DEBUG_DVG, "\t\tbuilding pkiller list for %+F\n", irn));
1396
1397                 /* check each user */
1398                 foreach_plist(node->dvg_user_list, el) {
1399                         ir_node *u_irn = plist_element_get_value(el);
1400
1401                         /* is the current user u_irn not a descendant of any other user -> pkiller */
1402                         foreach_plist(node->dvg_user_list, el2) {
1403                                 ir_node   *v_irn = plist_element_get_value(el2);
1404                                 rss_irn_t *v     = get_rss_irn(rss, v_irn);
1405
1406                                 if (el != el2                             &&
1407                                         ! BSEARCH_IRN_ARR(u_irn, v->dvg_desc) &&
1408                                         ! plist_has_value(node->dvg_pkiller_list, u_irn))
1409                                 {
1410                                         plist_insert_back(node->dvg_pkiller_list, u_irn);
1411                                         DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG pkiller %+F\n", u_irn));
1412                                 }
1413                         }
1414                 }
1415
1416                 node->dvg_pkiller = build_sorted_array_from_list(node->dvg_pkiller_list, phase_obst(&rss->ph));
1417         }
1418
1419         DEBUG_ONLY(
1420                 if (firm_dbg_get_mask(rss->dbg) & DEBUG_DVG)
1421                         debug_vcg_dump_dvg_pkiller(rss, dvg);
1422         );
1423 }
1424
1425 #endif /* if 0 */
1426
1427 /**
1428  * Compute the maximal antichain of the current DVG.
1429  * This is a reimplementation of the MAXIMAL_ANTI_CHAIN function
1430  * from the DDG library 1.1 (DAG.cpp).
1431  */
1432 static ir_nodeset_t *compute_maximal_antichain(rss_t *rss, dvg_t *dvg, int iteration) {
1433         int         n               = ir_nodeset_size(&dvg->nodes);
1434         int         *assignment     = alloca(n * sizeof(assignment[0]));
1435         int         *assignment_rev = alloca(n * sizeof(assignment_rev[0]));
1436         int         *idx_map        = alloca(n * sizeof(idx_map[0]));
1437         hungarian_problem_t *bp;
1438         ir_nodeset_t *values, *temp;
1439         ir_nodeset_iterator_t iter;
1440         ir_node     *u_irn;
1441         int         i, j, cost, cur_chain, res;
1442         rss_edge_t  *dvg_edge;
1443
1444 #define MAP_IDX(irn) bsearch_for_index(get_irn_idx(irn), idx_map,  n,  1)
1445
1446         if (pset_count(dvg->edges) == 0)
1447                 return NULL;
1448
1449         bp = hungarian_new(n, n, 1, HUNGARIAN_MATCH_NORMAL);
1450
1451         /*
1452                 At first, we build an index map for the nodes in the DVG,
1453                 because we cannot use the irn idx therefore as the resulting
1454                 bipartite data structure would have around 1.2GB.
1455                 So we limit the size to the number of nodes we have in the DVG
1456                 and build a sorted index map for their irn indices.
1457         */
1458         i = 0;
1459         foreach_ir_nodeset(&dvg->nodes, u_irn, iter) {
1460                 idx_map[i++] = get_irn_idx(u_irn);
1461         }
1462         qsort(idx_map, n, sizeof(idx_map[0]), cmp_int);
1463
1464         foreach_pset(dvg->edges, dvg_edge) {
1465                 int idx_u = MAP_IDX(dvg_edge->src);
1466                 int idx_v = MAP_IDX(dvg_edge->tgt);
1467
1468                 /* add the entry to the bipartite data structure */
1469                 hungarian_add(bp, idx_u, idx_v, 1);
1470                 DBG((rss->dbg, LEVEL_3, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1471                         idx_u, dvg_edge->src, idx_v, dvg_edge->tgt));
1472         }
1473 #if 0
1474         /*
1475                 Add a bipartite entry for each pair of nodes (u, v), where exists a
1476                 path in the DVG from u to v, ie. connect all descendants(v) to v.
1477                 desc_dvg(v) = accumulated descendants of all z in dvg_user_list(v)
1478         */
1479         foreach_ir_nodeset(&dvg->nodes, u_irn, iter) {
1480                 rss_irn_t *u        = get_rss_irn(rss, u_irn);
1481                 int       idx_u_irn = MAP_IDX(u_irn);
1482
1483                 DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG descendants of %+F:\n", u_irn));
1484
1485                 //plist_clear(u->dvg_desc_list);
1486                 //accumulate_dvg_descendant_values(rss, u, u->dvg_desc_list);
1487
1488                 /*
1489                         FIXME: The array is build on the phase obstack and we cannot free the data.
1490                         So the array get as many times allocated as this function is called.
1491                 */
1492
1493                 /* build the sorted array for faster searches */
1494                 //u->dvg_desc = build_sorted_array_from_list(u->dvg_desc_list, phase_obst(&rss->ph));
1495
1496                 DBG((rss->dbg, DEBUG_MAX_AC, "\t\tadding bipartite entries of %+F:\n", u_irn));
1497
1498                 /* add the bipartite entries for all descendants */
1499                 for (i = ARR_LEN_SAFE(u->dvg_desc) - 1; i >= 0; --i) {
1500                         rss_irn_t *desc    = get_rss_irn(rss, u->dvg_desc[i]);
1501                         int       idx_desc = MAP_IDX(u->dvg_desc[i]);
1502
1503                         /* add the entry to the bipartite data structure */
1504                         hungarian_add(bp, idx_u_irn, idx_desc, 1);
1505                         DBG((rss->dbg, DEBUG_MAX_AC, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1506                                 idx_u_irn, u_irn, idx_desc, u->dvg_desc[i]));
1507
1508                         need_matching = 1;
1509                 }
1510         }
1511 #endif
1512
1513         /* We want maximum cardinality matching */
1514         hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL);
1515
1516 #if 0
1517         DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG pkiller:\n"));
1518         /* beware: the following function needs the dvg_desc array */
1519         build_dvg_pkiller_list(rss, dvg);
1520 #endif
1521
1522         DBG((rss->dbg, LEVEL_2, "\t\tcomputing bipartite matching\n"));
1523         /*
1524                 The maximum cardinality bipartite matching gives us the minimal
1525                 chain partition, which corresponds to the maximum anti chains.
1526         */
1527         res = hungarian_solve(bp, assignment, &cost, 1);
1528         assert(res == 0 && "Bipartite matching failed!");
1529
1530         hungarian_free(bp);
1531         memset(assignment_rev, -1, n * sizeof(assignment_rev[0]));
1532
1533         /* build the reverse assignment, ie. foreach i -> j, add j -> i */
1534         for (i = 0; i < n; ++i) {
1535                 if (assignment[i] >= 0) {
1536                         int j = assignment[i];
1537                         assignment_rev[j] = i;
1538                 }
1539         }
1540
1541         DBG((rss->dbg, LEVEL_2, "\t\tgot assignment with cost %d\n", cost));
1542         DBG((rss->dbg, LEVEL_3, "\t\t\tassignment   ---   reverse assignment\n", cost));
1543         for (i = 0; i < n; ++i) {
1544                 DBG((rss->dbg, LEVEL_3, "\t\t\t%3d -> %3d         %3d -> %3d\n", i, assignment[i], i, assignment_rev[i]));
1545         }
1546
1547         values    = xmalloc(sizeof(values[0]));
1548         ir_nodeset_init_size(values, 10);
1549         cur_chain = 0;
1550         /* Construction of the minimal chain partition */
1551         for (j = 0; j < n; ++j) {
1552                 /* check nodes, which did not occur as target */
1553                 if (assignment_rev[j] == -1) {
1554                         int       xj      = idx_map[j];
1555                         ir_node   *xj_irn = get_idx_irn(rss->irg, xj);
1556                         rss_irn_t *xj_rss = get_rss_irn(rss, xj_irn);
1557                         chain_t   *c      = obstack_alloc(phase_obst(&rss->ph), sizeof(*c));
1558                         int       source;
1559
1560                         /* there was no source for j -> we have a source of a new chain */
1561                         ir_nodeset_insert(values, xj_irn);
1562
1563                         c->elements = plist_obstack_new(phase_obst(&rss->ph));
1564                         c->nr       = cur_chain++;
1565                         plist_insert_back(c->elements, xj_irn);
1566
1567                         xj_rss->chain = c;
1568
1569                         DBG((rss->dbg, LEVEL_2, "\t\tstarting chain %d:\n", c->nr));
1570                         DBG((rss->dbg, LEVEL_2, "\t\t\t%+F (%d)", xj_irn, j));
1571
1572                         /* follow chain, having j as source */
1573                         source = j;
1574                         while (assignment[source] >= 0) {
1575                                 int       target  = assignment[source];
1576                                 int       irn_idx = idx_map[target];
1577                                 ir_node   *irn    = get_idx_irn(rss->irg, irn_idx);
1578                                 rss_irn_t *node   = get_rss_irn(rss, irn);
1579
1580                                 plist_insert_back(c->elements, irn);
1581                                 node->chain = c;
1582
1583                                 DB((rss->dbg, LEVEL_2, " -> %+F (%d)", irn, target));
1584
1585                                 /* new source = last target */
1586                                 source = target;
1587                         }
1588
1589                         DB((rss->dbg, LEVEL_2, "\n"));
1590                 }
1591         }
1592
1593         /*
1594                 Computing the maximal antichain: Select an element from each
1595                 chain such, such it is parallel with the others.
1596         */
1597         DBG((rss->dbg, LEVEL_1, "\t\tcomputing set of saturation values (MAX AC)\n"));
1598         DBG((rss->dbg, LEVEL_3, "\t\tstarting with:\n"));
1599
1600         DEBUG_ONLY(
1601                 if (firm_dbg_get_mask(rss->dbg) & LEVEL_3)
1602                         dump_nodeset(values, "\t\t\t");
1603         )
1604
1605         temp = NULL;
1606         do {
1607                 /*
1608                         We need an explicit array for the values as
1609                         we cannot iterate multiple times over the same
1610                         set at the same time. :-(((((
1611                         TODO Matze: now we can, so rewrite this...
1612                 */
1613                 int     n         = ir_nodeset_size(values);
1614                 int     i         = 0;
1615                 ir_node **val_arr = NEW_ARR_F(ir_node *, n);
1616
1617                 foreach_ir_nodeset(values, u_irn, iter)
1618                         val_arr[i++] = u_irn;
1619
1620                 if (temp) {
1621                         ir_nodeset_destroy(temp);
1622                         free(temp);
1623                 }
1624
1625                 temp = xmalloc(sizeof(temp[0]));
1626                 ir_nodeset_init_size(temp, 10);
1627
1628                 /* Select all nodes from current value set, having another node in the set as descendant. */
1629                 for (i = 0; i < n; ++i) {
1630                         rss_irn_t *u = get_rss_irn(rss, val_arr[i]);
1631
1632                         for (j = 0; j < n; ++j) {
1633                                 if (i != j) {
1634                                         rss_edge_t key;
1635
1636                                         key.src = val_arr[i];
1637                                         key.tgt = val_arr[j];
1638
1639                                         if (pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1640                                                 /* v[j] is descendant of u -> remove u and break */
1641                                                 ir_nodeset_insert(temp, u->irn);
1642                                                 ir_nodeset_remove(values, u->irn);
1643
1644                                                 DBG((rss->dbg, LEVEL_3, "\t\t\tremoving %+F from values, adding it to temp\n", u->irn));
1645
1646                                                 break;
1647                                         }
1648                                 }
1649                         }
1650                 }
1651
1652                 /* Try to insert the chain predecessor of all selected u's */
1653                 foreach_ir_nodeset(temp, u_irn, iter) {
1654                         rss_irn_t       *u  = get_rss_irn(rss, u_irn);
1655                         chain_t         *c  = u->chain;
1656                         plist_element_t *el = plist_find_value(c->elements, u_irn);
1657
1658                         assert(el && "Missing element in chain!");
1659
1660                         /* If u has predecessor in chain: insert the predecessor */
1661                         if (el == plist_element_get_prev(el)) {
1662                                 ir_nodeset_insert(values, plist_element_get_value(el));
1663                                 DBG((rss->dbg, LEVEL_3, "\t\t\tadding %+F to values\n", plist_element_get_value(el)));
1664                         }
1665                 }
1666
1667
1668                 DEL_ARR_F(val_arr);
1669         } while (ir_nodeset_size(temp) > 0);
1670
1671         DBG((rss->dbg, LEVEL_2, "\t\tfinal set:\n"));
1672         DEBUG_ONLY(
1673                 if (firm_dbg_get_mask(rss->dbg) & LEVEL_2) {
1674                         dump_nodeset(values, "\t\t\t");
1675                 }
1676         );
1677
1678         if (rss->opts->dump_flags & RSS_DUMP_MAXAC)
1679                 debug_vcg_dump_pkg(rss, values, iteration);
1680
1681         if(temp != NULL) {
1682                 ir_nodeset_destroy(temp);
1683                 free(temp);
1684         }
1685
1686         return values;
1687
1688 #undef MAP_IDX
1689 }
1690
1691 /**
1692  * Computes the best serialization between two nodes of sat_vals.
1693  */
1694 static serialization_t *compute_best_admissible_serialization(rss_t *rss, ir_nodeset_t *sat_vals, serialization_t *ser, int num_regs) {
1695         int        n                    = ir_nodeset_size(sat_vals);
1696         int        n_idx                = ARR_LEN_SAFE(rss->idx_map);
1697         int        i                    = 0;
1698         ir_node    **val_arr            = alloca(n * sizeof(val_arr[0]));
1699         bitset_t   *bs_sv               = bitset_alloca(n_idx);
1700         bitset_t   *bs_vdesc            = bitset_alloca(n_idx);
1701         bitset_t   *bs_tmp              = bitset_alloca(n_idx);
1702         bitset_t   *bs_ukilldesc        = bitset_alloca(n_idx);
1703         int        best_benefit         = INT_MAX;
1704         int        best_omega2          = INT_MAX;
1705         int        best_benefit_omega20 = INT_MAX;
1706         int        has_omega1           = 0;
1707         int        j, k;
1708         ir_node    *irn;
1709         ir_nodeset_iterator_t iter;
1710         rss_edge_t min_benefit_edge;
1711         rss_edge_t min_omega20_edge;
1712         rss_irn_t  *ser_u_omega1 = NULL, *ser_v_omega1 = NULL;
1713         rss_irn_t  *ser_u_omega20 = NULL, *ser_v_omega20 = NULL;
1714
1715         DBG((rss->dbg, LEVEL_1, "\tcomputing admissible serializations:\n"));
1716
1717         /*
1718                 We need an explicit array for the values as
1719                 we cannot iterate multiple times over the same
1720                 set at the same time. :-(((((
1721         */
1722
1723         foreach_ir_nodeset(sat_vals, irn, iter) {
1724                 val_arr[i++] = irn;
1725                 bitset_set(bs_sv, BLOCK_IDX_MAP(rss, irn));
1726         }
1727
1728         /*
1729                 We build all admissible serializations and remember the best found so far.
1730                 For u in sat_vals:
1731                  For v in sat_val:
1732                    if v in pkiller(u): add edge from v to all other pkiller(u)
1733                    else: for all vv in pkiller(u): add edge from v to vv if there exists no path from vv to v
1734         */
1735
1736 /*
1737         A node is unserializable if:
1738         - it has only one killer and this one is Sink
1739     - it kills no other values
1740         In this case there is no serialization which could
1741         reduce the registerpressure
1742 */
1743 #define IS_UNSERIALIZABLE_NODE(rss_node)                  \
1744         (                                                     \
1745                 (                                                 \
1746                         (plist_count(rss_node->pkiller_list) == 1) && \
1747                         is_Sink(rss_node->killer)                  && \
1748                         (rss_node->kill_count                == 0)    \
1749                 )                            ||                   \
1750                 be_is_Barrier(rss_node->irn) ||                   \
1751                 be_is_Keep(rss_node->irn)                         \
1752         )
1753
1754         /* for all u in sat_vals */
1755         for (i = 0; i < n; ++i) {
1756                 rss_irn_t       *u = get_rss_irn(rss, val_arr[i]);
1757                 plist_element_t *el;
1758
1759                 /* ignore nodes where serialization does not help */
1760                 if (IS_UNSERIALIZABLE_NODE(u)) {
1761                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", u->irn));
1762                         continue;
1763                 }
1764
1765                 /* accumulate all descendants of all pkiller(u) */
1766                 bitset_clear_all(bs_ukilldesc);
1767                 foreach_plist(u->pkiller_list, el) {
1768                         ir_node   *irn  = plist_element_get_value(el);
1769                         rss_irn_t *node = get_rss_irn(rss, irn);
1770
1771                         if (! is_Sink(irn))
1772                                 bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, irn));
1773                         else
1774                                 continue;
1775
1776                         for (k = ARR_LEN_SAFE(node->descendants) - 1; k >= 0; --k) {
1777                                 if (! is_Sink(node->descendants[k]))
1778                                         bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, node->descendants[k]));
1779                         }
1780                 }
1781
1782                 /* for all v in sat_vals */
1783                 for (j = 0; j < n; ++j) {
1784                         ir_node   *v_irn   = val_arr[j];
1785                         rss_irn_t *v       = get_rss_irn(rss, v_irn);
1786                         unsigned  v_height = get_irn_height(rss->h, v_irn);
1787                         int       omega1, omega2, is_pkiller;
1788
1789                         /* v cannot be serialized with itself
1790                          * ignore nodes where serialization does not help */
1791                         if (i == j || IS_UNSERIALIZABLE_NODE(v)) {
1792                                 if (i != j)
1793                                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", v->irn));
1794                                 continue;
1795                         }
1796
1797                         /* get descendants of v */
1798                         bitset_clear_all(bs_vdesc);
1799                         bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v_irn));
1800                         for (k = ARR_LEN_SAFE(v->descendants) - 1; k >= 0; --k) {
1801                                 if (! is_Sink(v->descendants[k]))
1802                                         bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v->descendants[k]));
1803                         }
1804
1805                         /* if v is in pkiller(u) */
1806                         is_pkiller = plist_has_value(u->pkiller_list, v_irn);
1807
1808                         /* for all vv in pkiller(u) */
1809                         foreach_plist(u->pkiller_list, el) {
1810                                 ir_node *vv_irn  = plist_element_get_value(el);
1811                                 int     add_edge;
1812
1813                                 if (is_Sink(vv_irn) || is_cfop(vv_irn))
1814                                         continue;
1815
1816                                 if (is_pkiller)
1817                                         add_edge = vv_irn != v_irn && skip_Proj(vv_irn) != skip_Proj(v_irn);
1818                                 else
1819                                         add_edge = ! heights_reachable_in_block(rss->h, skip_Proj(vv_irn), skip_Proj(v_irn));
1820
1821                                 /*
1822                                         As we add an edge from vv -> v, we have to make sure,
1823                                         that there exists no path from v to vv.
1824                                 */
1825
1826                                 if (add_edge) {
1827                                         int vv_height = get_irn_height(rss->h, vv_irn);
1828                                         int mu1, mu2, critical_path_cost;
1829
1830                                         /*
1831                                                 mu1 = | descendants(v) cut sat_vals |
1832                                                 the number of saturating values which cannot
1833                                                 be simultaneously alive with u
1834                                         */
1835                                         bitset_copy(bs_tmp, bs_vdesc);
1836                                         mu1 = bitset_popcnt(bitset_and(bs_tmp, bs_sv));
1837
1838                                         /*
1839                                                 mu2 = | accum_desc_all_pkiller(u) without descendants(v) |
1840                                         */
1841                                         if (is_pkiller) {
1842                                                 bitset_copy(bs_tmp, bs_ukilldesc);
1843                                                 mu2 = bitset_popcnt(bitset_andnot(bs_tmp, bs_vdesc));
1844                                         }
1845                                         else {
1846                                                 mu2 = 0;
1847                                         }
1848
1849                                         /* omega1 = mu1 - mu2 */
1850                                         omega1 = mu1 - mu2;
1851
1852                                         if (omega1 != 0)
1853                                                 has_omega1 = 1;
1854
1855                                         /* omega2 = increase of critical path */
1856                                         critical_path_cost =
1857                                                 v_height                        /* longest path from v to sink */
1858                                                 + rss->max_height - vv_height   /* longest path from source to vv */
1859                                                 + 1;                            /* edge */
1860
1861                                         /*
1862                                                 If critical_path_cost > max_height -> the new edge
1863                                                 would increase the longest critical path by the difference.
1864                                         */
1865                                         omega2 = critical_path_cost > rss->max_height ? critical_path_cost - rss->max_height : 0;
1866
1867                                         /* this keeps track of the edge with the best benefit */
1868                                         if (omega1 >= num_regs - n && omega1 < best_benefit) {
1869                                                 min_benefit_edge.src = v_irn;
1870                                                 min_benefit_edge.tgt = vv_irn;
1871
1872                                                 ser_u_omega1 = u;
1873                                                 ser_v_omega1 = v;
1874
1875                                                 best_benefit    = omega1;
1876                                                 ser->new_killer = is_pkiller;
1877                                         }
1878
1879                                         /* this keeps track of the edge with the best omega1 costs where omega2 == 0 */
1880                                         if (omega2 == 0 && omega1 >= num_regs - n && omega1 < best_benefit_omega20) {
1881                                                 min_omega20_edge.src = v_irn;
1882                                                 min_omega20_edge.tgt = vv_irn;
1883
1884                                                 ser_u_omega20 = u;
1885                                                 ser_v_omega20 = v;
1886
1887                                                 best_benefit_omega20 = omega1;
1888                                                 ser->new_killer      = is_pkiller;
1889                                         }
1890
1891                                         best_omega2 = MIN(best_omega2, omega2);
1892
1893                                         DBG((rss->dbg, LEVEL_2, "\t\tfound %+F -> %+F (w1 %d, w2 %d)\n",
1894                                                 v_irn, vv_irn, omega1, omega2));
1895                                 } /* if add_edge */
1896                         } /* for all vv in pkiller(u) */
1897                 } /* for all v in sat_vals */
1898         } /* for all u in sat_vals */
1899
1900         if (! has_omega1)
1901                 return NULL;
1902
1903         if (best_omega2 == 0) {
1904                 ser->u         = ser_u_omega20;
1905                 ser->v         = ser_v_omega20;
1906                 ser->edge->src = min_omega20_edge.src;
1907                 ser->edge->tgt = min_omega20_edge.tgt;
1908                 ser->omega1    = best_benefit_omega20;
1909                 ser->omega2    = best_omega2;
1910         }
1911         else {
1912                 ser->u         = ser_u_omega1;
1913                 ser->v         = ser_v_omega1;
1914                 ser->edge->src = min_benefit_edge.src;
1915                 ser->edge->tgt = min_benefit_edge.tgt;
1916                 ser->omega1    = best_benefit;
1917                 ser->omega2    = best_omega2;
1918         }
1919
1920         return ser;
1921
1922 #undef IS_UNSERIALIZABLE_NODE
1923 }
1924
1925 /**
1926  * Perform the value serialization heuristic and add all
1927  * computed serialization edges as dependencies to the irg.
1928  */
1929 static void perform_value_serialization_heuristic(rss_t *rss) {
1930         bitset_t *arch_nonign_bs = bitset_alloca(arch_register_class_n_regs(rss->cls));
1931         bitset_t *abi_ign_bs     = bitset_alloca(arch_register_class_n_regs(rss->cls));
1932         int      available_regs, iteration;
1933         dvg_t    dvg;
1934         ir_nodeset_t *sat_vals;
1935         pset *ser_set = new_pset(cmp_rss_edges, 20);
1936
1937         /* available_regs = R = |arch_non_ignore_regs cut ~abi_ignore_regs| */
1938         arch_put_non_ignore_regs(rss->arch_env, rss->cls, arch_nonign_bs);
1939         be_abi_put_ignore_regs(rss->abi, rss->cls, abi_ign_bs);
1940         bitset_andnot(arch_nonign_bs, abi_ign_bs);
1941         available_regs  = bitset_popcnt(arch_nonign_bs);
1942         //num_live = pset_count(rss->live_block);
1943         //available_regs -= num_live < available_regs ? num_live : 0;
1944
1945         DBG((rss->dbg, LEVEL_1, "\n\t#available regs: %d\n\n", available_regs));
1946
1947         /*
1948                 At first we need to compute the disjoint value DAG (DVG = {V, E_dv}).
1949                 V    = set of all nodes we are currently interested in
1950                 E_dv = there is an edge from u to v iff v is a descendant of killer(u), forall u, v in V
1951         */
1952         ir_nodeset_init_size(&dvg.nodes, plist_count(rss->nodes));
1953         dvg.edges = new_pset(cmp_rss_edges, plist_count(rss->nodes) * 5);
1954         compute_dvg(rss, &dvg);
1955
1956         /*
1957                 Then we perform the heuristic serialization algorithm
1958                 on the DVG which gives us all necessary serialization
1959                 edges.
1960         */
1961         DBG((rss->dbg, LEVEL_1, "\tcomputing maximal antichain:\n"));
1962         iteration = 0;
1963         sat_vals  = compute_maximal_antichain(rss, &dvg, iteration++);
1964         while (sat_vals && (ir_nodeset_size(sat_vals) > available_regs)) {
1965                 serialization_t *ser, best_ser;
1966                 rss_edge_t      *edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*edge));
1967                 ir_node         *dep_src, *dep_tgt;
1968
1969                 best_ser.edge = edge;
1970                 ser = compute_best_admissible_serialization(rss, sat_vals, &best_ser, available_regs);
1971
1972                 DBG((rss->dbg, LEVEL_1, "\tcurrent register saturation %d, target %d\n", ir_nodeset_size(sat_vals), available_regs));
1973
1974                 if (! ser) {
1975                         DBG((rss->dbg, LEVEL_1, "\tno RS improving serialization found, breaking at iteration %d\n", iteration));
1976                         break;
1977                 }
1978
1979                 /* Insert the serialization as dependency edge into the irg. */
1980                 DBG((rss->dbg, LEVEL_1, "\tserializing %+F -> %+F with edge %+F -> %+F and cost %d, %d\n",
1981                         ser->u->irn, ser->v->irn, ser->edge->src, ser->edge->tgt, ser->omega1, ser->omega2));
1982
1983                 if (pset_find(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge)))
1984                         ir_printf("WARNING: serialization %+F -> %+F computed twice!\n", ser->edge->src, ser->edge->tgt);
1985
1986
1987                 pset_insert(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge));
1988
1989                 /* update the dvg */
1990                 update_dvg(rss, &dvg, ser->v, ser->u);
1991                 update_dvg(rss, &dvg, get_rss_irn(rss, ser->edge->src), get_rss_irn(rss, ser->edge->tgt));
1992                 if(sat_vals != NULL) {
1993                         ir_nodeset_destroy(sat_vals);
1994                         free(sat_vals);
1995                 }
1996
1997                 dep_src = skip_Proj(ser->edge->src);
1998                 dep_tgt = ser->edge->tgt;
1999                 add_irn_dep(dep_src, dep_tgt);
2000
2001                 /* Update descendants, consumer and pkillers of the target */
2002                 update_node_info(rss, ser->edge->tgt, ser->edge->src);
2003
2004                 /* TODO: try to find a cheaper way for updating height information */
2005                 rss->max_height = heights_recompute_block(rss->h, rss->block);
2006
2007                 /* Recompute the antichain for next serialization */
2008                 DBG((rss->dbg, LEVEL_1, "\tre-computing maximal antichain:\n"));
2009                 sat_vals = compute_maximal_antichain(rss, &dvg, iteration++);
2010         }
2011
2012         ir_nodeset_destroy(&dvg.nodes);
2013         del_pset(dvg.edges);
2014 }
2015
2016 /**
2017  * Do initial calculations for a block.
2018  */
2019 static void process_block(ir_node *block, void *env) {
2020         rss_t *rss = env;
2021         int   i, n;
2022         const ir_edge_t *edge;
2023
2024         phase_init(&rss->ph, "rss block preprocessor", rss->irg, PHASE_DEFAULT_GROWTH, init_rss_irn);
2025
2026         DBG((rss->dbg, LEVEL_1, "preprocessing block %+F\n", block));
2027         rss->block = block;
2028
2029         /* build an index map for all nodes in the current block */
2030         i            = 0;
2031         n            = get_irn_n_edges(block);
2032         NEW_ARR_A(int *, rss->idx_map, n);
2033         foreach_out_edge(block, edge) {
2034                 ir_node *irn      = get_edge_src_irn(edge);
2035                 rss->idx_map[i++] = get_irn_idx(irn);
2036         }
2037         qsort(rss->idx_map, n, sizeof(rss->idx_map[0]), cmp_int);
2038         rss->max_height = heights_recompute_block(rss->h, block);
2039
2040         /* loop over all register classes */
2041         for (i = arch_isa_get_n_reg_class(rss->arch_env->isa) - 1; i >= 0; --i) {
2042                 const arch_register_class_t *cls = arch_isa_get_reg_class(rss->arch_env->isa, i);
2043
2044                 rss->cls = cls;
2045                 DBG((rss->dbg, LEVEL_1, "register class %s\n", arch_register_class_name(cls)));
2046
2047                 /* Get all live value at end of Block having current register class */
2048                 rss->live_block = pset_new_ptr(10);
2049                 be_liveness_end_of_block(rss->liveness, rss->arch_env, rss->cls, rss->block, rss->live_block);
2050
2051                 /* reset the list of interesting nodes */
2052                 plist_clear(rss->nodes);
2053                 plist_insert_back(rss->nodes, _sink);
2054
2055                 /* collect all nodes having a certain register class */
2056                 foreach_out_edge(block, edge) {
2057                         ir_node *irn  = get_edge_src_irn(edge);
2058                         ir_mode *mode = get_irn_mode(irn);
2059
2060                         /*
2061                                 We skip:
2062                                 - mode_T nodes (the projs are asked)
2063                                 - mode_X nodes (control flow nodes are always scheduled last)
2064                                 - Keeps (they are always immediately scheduled)
2065                                 - Phi (same as Keep)
2066                         */
2067                         if (mode == mode_T || mode == mode_X || is_Phi(irn))
2068                                 continue;
2069
2070                         /*
2071                                 In case of a proj, we skip
2072                                 - Barrier (they are a Barrier :)
2073                                 - Start
2074                                 - the Proj itself, as it's scheduled always with it's super node
2075                         */
2076                         if (is_Proj(irn)) {
2077                                 ir_node *pred = get_Proj_pred(irn);
2078                                 if (be_is_Barrier(pred) || is_Start(pred))
2079                                         continue;
2080                         }
2081
2082                         /* calculate the descendants and consumer for each node in the block */
2083                         collect_node_info(rss, skip_Proj(irn));
2084
2085                         if (be_is_Keep(irn))
2086                                 continue;
2087
2088                         if (! arch_irn_is(rss->arch_env, irn, ignore) && arch_get_irn_reg_class(rss->arch_env, irn, -1) == cls) {
2089                                 plist_insert_back(rss->nodes, skip_Proj(irn));
2090                         }
2091                         //}
2092                 }
2093
2094                 /* compute the potential killing set PK(G) */
2095                 compute_pkill_set(rss);
2096
2097                 /* compute the killing function k* */
2098                 compute_killing_function(rss);
2099
2100                 /*
2101                         Compute the heuristic value serialization and
2102                         add the necessary dependencies to the irg.
2103                 */
2104                 perform_value_serialization_heuristic(rss);
2105
2106                 del_pset(rss->live_block);
2107         }
2108
2109         phase_free(&rss->ph);
2110 }
2111
2112 /**
2113  * Register the options.
2114  */
2115 void be_init_schedrss(void) {
2116         lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
2117         lc_opt_entry_t *sched_grp = lc_opt_get_grp(be_grp, "sched");
2118         lc_opt_entry_t *rss_grp = lc_opt_get_grp(sched_grp, "rss");
2119
2120         lc_opt_add_table(rss_grp, rss_option_table);
2121 }
2122
2123 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_schedrss);
2124
2125 /**
2126  * Preprocess the irg for scheduling.
2127  */
2128 void rss_schedule_preparation(const be_irg_t *birg) {
2129         rss_t rss;
2130
2131         FIRM_DBG_REGISTER(rss.dbg, "firm.be.sched.rss");
2132
2133         //firm_dbg_set_mask(rss.dbg, LEVEL_1 | LEVEL_2 | LEVEL_3);
2134
2135         init_rss_special_nodes(birg->irg);
2136
2137         rss.irg      = birg->irg;
2138         rss.arch_env = birg->main_env->arch_env;
2139         rss.abi      = birg->abi;
2140         rss.h        = heights_new(birg->irg);
2141         rss.nodes    = plist_new();
2142         rss.opts     = &rss_options;
2143         rss.liveness = be_liveness(birg->irg);
2144         irg_block_walk_graph(birg->irg, NULL, process_block, &rss);
2145         heights_free(rss.h);
2146         plist_free(rss.nodes);
2147         be_liveness_free(rss.liveness);
2148
2149         if (birg->main_env->options->dump_flags & DUMP_SCHED)
2150                 be_dump(rss.irg, "-rss", dump_ir_block_graph);
2151 }