ec7385a0a2cd3302f7fc65cf03a750a01e7cf097
[libfirm] / ir / be / beschedrss.c
1 /**
2  * Implementation of a register saturating list scheduler
3  * as described in: Sid-Ahmed-Ali Touati
4  * Register Saturation in Superscalar and VLIW Codes
5  *
6  * @license This file protected by GPL -  GNU GENERAL PUBLIC LICENSE.
7  * @author  Christian Wuerdig
8  * @date    29.08.2006
9  * @cvs-id  $Id$
10  */
11 #ifdef HAVE_CONFIG_H
12 #include "config.h"
13 #endif
14
15 #include <limits.h>
16
17 #include "obst.h"
18 #include "debug.h"
19
20 #include "irgraph_t.h"
21 #include "irnode_t.h"
22 #include "iredges_t.h"
23 #include "ircons_t.h"
24 #include "irphase_t.h"
25 #include "irgwalk.h"
26 #include "irdump.h"
27 #include "irtools.h"
28 #include "irbitset.h"
29 #include "irprintf.h"
30 #include "bipartite.h"
31 #include "hungarian.h"
32 #include "plist.h"
33
34 #include "height.h"
35
36 #include "beabi.h"
37 #include "bemodule.h"
38 #include "benode_t.h"
39 #include "besched_t.h"
40 #include "beirg_t.h"
41
42 #include <libcore/lc_opts.h>
43 #include <libcore/lc_opts_enum.h>
44
45
46 #define ARR_LEN_SAFE(arr) ((arr) != NULL ? ARR_LEN((arr)) : 0)
47
48 #define HASH_RSS_EDGE(edge) ((get_irn_node_nr((edge)->src) << 16) | (get_irn_node_nr((edge)->tgt) & 0xFFFF))
49 #define BSEARCH_IRN_ARR(val, arr) \
50         bsearch(&(val), (arr), ARR_LEN_SAFE((arr)), sizeof((arr)[0]), cmp_irn_idx)
51
52 #define BLOCK_IDX_MAP(rss, irn) bsearch_for_index(get_irn_idx((irn)), (rss)->idx_map, ARR_LEN_SAFE((rss)->idx_map), 1)
53
54 /* the rss options */
55 typedef struct _rss_opts_t {
56         int dump_flags;
57 } rss_opts_t;
58
59 /* Represents a child with associated costs */
60 typedef struct _child {
61         ir_node *irn;
62         float   cost;
63 } child_t;
64
65 /* We need edges for several purposes. */
66 typedef struct _rss_edge {
67         ir_node *src;
68         ir_node *tgt;
69         void    *next;
70 } rss_edge_t;
71
72 /* Represents a connected bipartite component. */
73 typedef struct _cbc {
74         ir_nodeset_t parents;       /**< = S  a set of value producers */
75         ir_nodeset_t children;      /**< = T  a set of value consumers */
76         pset    *kill_edges;    /**< = E  a set of edges (t in T, s in S) such as each s in S gets killed by at least one t in T */
77         int     nr;             /**< a deterministic index for set insertion (used as hash) */
78 } cbc_t;
79
80 /* Represents a disjoint value DAG. */
81 typedef struct _dvg {
82         ir_nodeset_t nodes;
83         pset    *edges;
84 } dvg_t;
85
86 /* Represents a chain of nodes. */
87 typedef struct _chain {
88         plist_t *elements;   /**< List of chain elements */
89         int     nr;          /**< a deterministic index for set insertion (used as hash) */
90 } chain_t;
91
92 typedef struct _rss_irn {
93         plist_t  *consumer_list;    /**< List of consumers */
94         ir_node **consumer;         /**< Sorted consumer array (needed for faster access) */
95
96         plist_t  *parent_list;      /**< List of parents */
97         plist_t  *pkiller_list;     /**< List of potential killers */
98
99         plist_t  *descendant_list;  /**< List of descendants */
100         ir_node **descendants;      /**< Sorted descendant array (needed for faster access) */
101
102         ir_node  *killer;           /**< The selected unique killer */
103         ir_node  *irn;              /**< The corresponding firm node to this rss_irn */
104
105         chain_t  *chain;            /**< The chain, this node is associated to */
106
107         unsigned desc_walk;         /**< visited flag for collecting descendants */
108         unsigned kill_count;        /**< number of nodes killed by this one */
109
110         unsigned live_out : 1;      /**< irn has consumers outside of it's block */
111         unsigned visited  : 1;      /**< visited flag for bipartite decomposition */
112         unsigned havecons : 1;      /**< visited flag collect consumer */
113         unsigned handled  : 1;      /**< flag indicating whether or not the list structures have been build */
114         unsigned dumped   : 1;      /**< flag indication whether or not this node was dumped */
115 } rss_irn_t;
116
117 /* Represents a serialization edge with associated costs. */
118 typedef struct _serialization {
119         rss_irn_t  *u;       /* the top node */
120         rss_irn_t  *v;       /* the node about to be serialized after u */
121         rss_edge_t *edge;    /* the edge selected for the serialization */
122         int        omega1;   /* estimated: available regs - RS reduction */
123         int        omega2;   /* increase in critical path length */
124         int        new_killer;
125 } serialization_t;
126
127 typedef struct _rss {
128         ir_phase          ph;              /**< Phase to hold some data */
129         heights_t        *h;              /**< The current height object */
130         ir_graph         *irg;            /**< The irg to preprocess */
131         plist_t          *nodes;          /**< The list of interesting nodes */
132         const arch_env_t *arch_env;       /**< The architecture environment */
133         be_abi_irg_t     *abi;            /**< The abi for this irg */
134         pset             *cbc_set;        /**< A set of connected bipartite components */
135         ir_node          *block;          /**< The current block in progress. */
136         int              *idx_map;        /**< Mapping irn indices to per block indices */
137         unsigned         max_height;      /**< maximum height in the current block */
138         rss_opts_t       *opts;           /**< The options */
139         be_lv_t          *liveness;       /**< The liveness information for this irg */
140         pset             *live_block;     /**< Values alive at end of block */
141         const arch_register_class_t *cls; /**< The current register class */
142         DEBUG_ONLY(firm_dbg_module_t *dbg);
143 } rss_t;
144
145 #define get_rss_irn(rss, irn)  (phase_get_or_set_irn_data(&rss->ph, irn))
146
147 /**
148  * We need some special nodes:
149  * a source and a sink for all live-in and live-out values of a block
150  */
151 enum {
152         iro_rss_Source,
153         iro_rss_Sink,
154         iro_rss_last
155 };
156
157 /** The opcode of the rss_Source node once allocated. */
158 static ir_op *op_rss_Source;
159 /** The opcode of the rss_Sink node once allocated. */
160 static ir_op *op_rss_Sink;
161
162 /** The rss_Source node of the current graph. */
163 static ir_node *_source = NULL;
164 /** The rss_Sink node of the current graph. */
165 static ir_node *_sink   = NULL;
166
167 #define is_Source(irn) ((irn) == _source)
168 #define is_Sink(irn)   ((irn) == _sink)
169
170 enum {
171         RSS_DUMP_NONE  = 0,
172         RSS_DUMP_CBC   = 1 << 0,
173         RSS_DUMP_PKG   = 1 << 1,
174         RSS_DUMP_KILL  = 1 << 2,
175         RSS_DUMP_DVG   = 1 << 3,
176         RSS_DUMP_MAXAC = 1 << 4,
177         RSS_DUMP_ALL   = (RSS_DUMP_MAXAC << 1) - 1,
178 };
179
180 static rss_opts_t rss_options = {
181         RSS_DUMP_NONE,
182 };
183
184 static const lc_opt_enum_int_items_t dump_items[] = {
185         { "none",  RSS_DUMP_NONE  },
186         { "cbc",   RSS_DUMP_CBC   },
187         { "pkg",   RSS_DUMP_PKG   },
188         { "kill",  RSS_DUMP_KILL  },
189         { "dvg",   RSS_DUMP_DVG   },
190         { "maxac", RSS_DUMP_MAXAC },
191         { "all",   RSS_DUMP_ALL   },
192         { NULL, 0 }
193 };
194
195 static lc_opt_enum_int_var_t dump_var = {
196         &rss_options.dump_flags, dump_items
197 };
198
199 static const lc_opt_table_entry_t rss_option_table[] = {
200         LC_OPT_ENT_ENUM_MASK("dump", "dump phases", &dump_var),
201         { NULL }
202 };
203
204 /******************************************************************************
205  *  _          _                    __                  _   _
206  * | |        | |                  / _|                | | (_)
207  * | |__   ___| |_ __   ___ _ __  | |_ _   _ _ __   ___| |_ _  ___  _ __  ___
208  * | '_ \ / _ \ | '_ \ / _ \ '__| |  _| | | | '_ \ / __| __| |/ _ \| '_ \/ __|
209  * | | | |  __/ | |_) |  __/ |    | | | |_| | | | | (__| |_| | (_) | | | \__ \
210  * |_| |_|\___|_| .__/ \___|_|    |_|  \__,_|_| |_|\___|\__|_|\___/|_| |_|___/
211  *            | |
212  *            |_|
213  ******************************************************************************/
214
215 /**
216  * Acquire opcodes if needed and create source and sink nodes.
217  */
218 static void init_rss_special_nodes(ir_graph *irg) {
219         ir_node *block;
220
221         if (op_rss_Source == NULL) {
222                 int iro_rss_base = get_next_ir_opcodes(iro_rss_last);
223                 op_rss_Source = new_ir_op(iro_rss_base + iro_rss_Source, "rss_Source", op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
224                 op_rss_Sink   = new_ir_op(iro_rss_base + iro_rss_Sink,   "rss_Sink",   op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
225         }
226         block   = get_irg_start_block(irg);
227         _source = new_ir_node(NULL, irg, block, op_rss_Source, mode_ANY, 0, NULL);
228         _sink   = new_ir_node(NULL, irg, block, op_rss_Sink, mode_ANY, 0, NULL);
229 }
230
231 static int cmp_int(const void *a, const void *b) {
232         const int *i1 = a;
233         const int *i2 = b;
234
235         return QSORT_CMP(*i1, *i2);
236 }
237
238 static int cmp_child_costs(const void *a, const void *b) {
239         const child_t *c1 = a;
240         const child_t *c2 = b;
241
242         return QSORT_CMP(c1->cost, c2->cost);
243 }
244
245 static int cmp_irn_idx(const void *a, const void *b) {
246         const ir_node *n1 = *(ir_node **)a;
247         const ir_node *n2 = *(ir_node **)b;
248
249         return QSORT_CMP(get_irn_idx(n1), get_irn_idx(n2));
250 }
251
252 static int cmp_rss_edges(const void *a, const void *b) {
253         const rss_edge_t *e1 = a;
254         const rss_edge_t *e2 = b;
255
256         return (e1->src != e2->src) || (e1->tgt != e2->tgt);
257 }
258
259 static int bsearch_for_index(int key, int *arr, size_t len, int force) {
260         int left = 0;
261         int right = len;
262
263         while (right >= left) {
264                 int idx = (left + right) / 2;
265
266                 if (key < arr[idx])
267                         right = idx - 1;
268                 else if (key > arr[idx])
269                         left = idx + 1;
270                 else
271                         return idx;
272         }
273
274         if (force)
275                 assert(0 && "Something is wrong, key not found.");
276         return -1;
277 }
278
279 static ir_node **build_sorted_array_from_list(plist_t *irn_list, struct obstack *obst) {
280         plist_element_t *el;
281         int     i     = 0;
282         int     len   = plist_count(irn_list);
283         ir_node **arr = NEW_ARR_D(ir_node *, obst, len);
284
285         /* copy the list into the array */
286         foreach_plist(irn_list, el) {
287                 arr[i++] = plist_element_get_value(el);
288         }
289
290         /* sort the array by node index */
291         qsort(arr, len, sizeof(arr[0]), cmp_irn_idx);
292
293         return arr;
294 }
295
296 /*****************************************************
297  *      _      _                       _
298  *     | |    | |                     (_)
299  *   __| | ___| |__  _   _  __ _  __ _ _ _ __   __ _
300  *  / _` |/ _ \ '_ \| | | |/ _` |/ _` | | '_ \ / _` |
301  * | (_| |  __/ |_) | |_| | (_| | (_| | | | | | (_| |
302  *  \__,_|\___|_.__/ \__,_|\__, |\__, |_|_| |_|\__, |
303  *                          __/ | __/ |         __/ |
304  *                         |___/ |___/         |___/
305  *
306  *****************************************************/
307
308 #ifdef DEBUG_libfirm
309 static void dump_nodeset(ir_nodeset_t *ns, const char *prefix) {
310         ir_nodeset_iterator_t iter;
311         ir_node *irn;
312
313         ir_nodeset_iterator_init(&iter, ns);
314         while( (irn = ir_nodeset_iterator_next(&iter)) != NULL ) {
315                 ir_fprintf(stderr, "%s%+F\n", prefix, irn);
316         }
317 }
318 #endif
319
320 static void build_file_name(rss_t *rss, const char *suffix, size_t suf_len, char *buf, size_t len) {
321         const char *irg_name;
322
323         memset(buf, 0, len);
324         irg_name = get_entity_name(get_irg_entity(rss->irg));
325         snprintf(buf, len - suf_len, "%s-%s-block-%ld",
326                 irg_name, arch_register_class_name(rss->cls), get_irn_node_nr(rss->block));
327         strcat(buf, suffix);
328 }
329
330 /* Dumps all collected bipartite components of current irg as vcg. */
331 static void debug_vcg_dump_bipartite(rss_t *rss) {
332         cbc_t *cbc;
333         FILE  *f;
334         char  file_name[256];
335         static const char suffix[] = "-RSS-CBC.vcg";
336
337         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
338         f = fopen(file_name, "w");
339
340         ir_fprintf(f, "graph: { title: \"connected bipartite component graph of %+F\"\n", rss->irg);
341         fprintf(f, "display_edge_labels: no\n");
342         fprintf(f, "layoutalgorithm: mindepth\n");
343         fprintf(f, "manhattan_edges: yes\n\n");
344
345         foreach_pset(rss->cbc_set, cbc) {
346                 ir_nodeset_iterator_t iter;
347                 ir_node    *n;
348                 rss_edge_t *ke;
349
350                 fprintf(f, "graph: { titel: \"cbc %d\" label: \"cbc %d\" status:clustered color:yellow\n", cbc->nr, cbc->nr);
351                 foreach_ir_nodeset(&cbc->parents, n, iter) {
352                         ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
353                 }
354                 foreach_ir_nodeset(&cbc->children, n, iter) {
355                         ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
356                 }
357                 foreach_pset(cbc->kill_edges, ke) {
358                         ir_fprintf(f, "edge: { sourcename: \"n%d_%d\" targetname: \"n%d_%d\" }\n",
359                                 get_irn_node_nr(ke->src), cbc->nr, get_irn_node_nr(ke->tgt), cbc->nr);
360                 }
361                 fprintf(f, "}\n\n");
362         }
363         fprintf(f, "}\n");
364         fclose(f);
365 }
366
367 /* Dump the computed killing function as vcg. */
368 static void debug_vcg_dump_kill(rss_t *rss) {
369         FILE            *f;
370         char            file_name[256];
371         plist_element_t *el;
372         static const char suffix[] = "-RSS-KILL.vcg";
373
374         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
375         f = fopen(file_name, "w");
376
377         ir_fprintf(f, "graph: { title: \"computed kill graph of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
378         fprintf(f, "display_edge_labels: no\n");
379         fprintf(f, "layoutalgorithm: mindepth\n");
380         fprintf(f, "manhattan_edges: yes\n\n");
381
382         {
383                 /* reset dump_flag */
384                 ir_node *irn;
385                 foreach_phase_irn(&rss->ph, irn) {
386                         rss_irn_t *node = get_rss_irn(rss, irn);
387                         node->dumped = 0;
388                 }
389         }
390
391         /* dump all nodes and their killers */
392         foreach_plist(rss->nodes, el) {
393                 ir_node   *irn     = plist_element_get_value(el);
394                 rss_irn_t *rirn    = get_rss_irn(rss, irn);
395                 rss_irn_t *pk_rirn = get_rss_irn(rss, rirn->killer);
396
397                 if (! rirn->dumped) {
398                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
399                         rirn->dumped = 1;
400                 }
401
402                 if (! pk_rirn->dumped) {
403                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(rirn->killer), rirn->killer);
404                         pk_rirn->dumped = 1;
405                 }
406
407                 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
408                         get_irn_node_nr(rirn->killer), get_irn_node_nr(irn));
409         }
410
411         fprintf(f, "}\n");
412         fclose(f);
413 }
414
415 /* Dumps the potential killing DAG (PKG) as vcg. */
416 static void debug_vcg_dump_pkg(rss_t *rss, ir_nodeset_t *max_ac, int iteration) {
417         FILE              *f;
418         char              file_name[256];
419         char              *suffix   = alloca(32);
420         static const char suffix1[] = "-RSS-PKG.vcg";
421         static const char suffix2[] = "-RSS-PKG-MAXAC.vcg";
422         plist_element_t   *el;
423
424         if (! max_ac) {
425                 snprintf(suffix, 32, "%s", suffix1);
426         }
427         else {
428                 snprintf(suffix, 32, "-%02d%s", iteration, suffix2);
429         }
430
431         build_file_name(rss, suffix, strlen(suffix) + 1, file_name, sizeof(file_name));
432         f = fopen(file_name, "w");
433
434         ir_fprintf(f, "graph: { title: \"potential killing DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
435         fprintf(f, "display_edge_labels: no\n");
436         fprintf(f, "layoutalgorithm: mindepth\n");
437         fprintf(f, "manhattan_edges: yes\n\n");
438
439         {
440                 /* reset dump_flag */
441                 ir_node *irn;
442                 foreach_phase_irn(&rss->ph, irn) {
443                         rss_irn_t *node = get_rss_irn(rss, irn);
444                         node->dumped = 0;
445                 }
446         }
447
448         foreach_plist(rss->nodes, el) {
449                 ir_node   *irn  = plist_element_get_value(el);
450                 rss_irn_t *rirn = get_rss_irn(rss, irn);
451                 char      *c1   = "";
452                 plist_element_t *k_el;
453
454                 /* dump selected saturating values in yellow */
455                 if (max_ac && ir_nodeset_contains(max_ac, irn))
456                         c1 = "color:yellow";
457
458                 if (! rirn->dumped) {
459                         if (rirn->chain)
460                                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F   c%d\" %s }\n", get_irn_node_nr(irn), irn, rirn->chain->nr, c1);
461                         else
462                                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(irn), irn, c1);
463                         rirn->dumped = 1;
464                 }
465
466                 foreach_plist(rirn->pkiller_list, k_el) {
467                         ir_node   *pkiller = plist_element_get_value(k_el);
468                         rss_irn_t *pk_rirn = get_rss_irn(rss, pkiller);
469                         char      *c2      = "";
470
471                         if (max_ac && ir_nodeset_contains(max_ac, pkiller))
472                                 c2 = "color:yellow";
473
474                         if (! pk_rirn->dumped) {
475                                 if (pk_rirn->chain)
476                                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F   c%d\" %s }\n", get_irn_node_nr(pkiller), pkiller, pk_rirn->chain->nr, c2);
477                                 else
478                                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(pkiller), pkiller, c2);
479                                 pk_rirn->dumped = 1;
480                         }
481                         ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
482                                 get_irn_node_nr(pkiller), get_irn_node_nr(irn));
483                 }
484         }
485         fprintf(f, "}\n");
486         fclose(f);
487 }
488
489 /* Dumps the disjoint value DAG (DVG) as vcg. */
490 static void debug_vcg_dump_dvg(rss_t *rss, dvg_t *dvg) {
491         static const char suffix[] = "-RSS-DVG.vcg";
492         FILE       *f;
493         char       file_name[256];
494         ir_nodeset_iterator_t iter;
495         ir_node    *irn;
496         rss_edge_t *edge;
497
498         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
499         f = fopen(file_name, "w");
500
501         ir_fprintf(f, "graph: { title: \"disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
502         fprintf(f, "display_edge_labels: no\n");
503         fprintf(f, "layoutalgorithm: mindepth\n");
504         fprintf(f, "manhattan_edges: yes\n\n");
505
506         /* dump all nodes */
507         foreach_ir_nodeset(&dvg->nodes, irn, iter) {
508                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
509         }
510
511         /* dump all edges */
512         foreach_pset(dvg->edges, edge) {
513                 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
514                         get_irn_node_nr(edge->src), get_irn_node_nr(edge->tgt));
515         }
516
517         fprintf(f, "}\n");
518         fclose(f);
519 }
520
521 #if 0
522 /* Dumps the PKG(DVG). */
523 static void debug_vcg_dump_dvg_pkiller(rss_t *rss, dvg_t *dvg) {
524         static const char suffix[] = "-RSS-DVG-PKG.vcg";
525         FILE       *f;
526         char       file_name[256];
527         ir_nodeset_iterator_t iter;
528         ir_node    *irn;
529
530         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
531         f = fopen(file_name, "w");
532
533         ir_fprintf(f, "graph: { title: \"PKG of disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
534         fprintf(f, "display_edge_labels: no\n");
535         fprintf(f, "layoutalgorithm: mindepth\n");
536         fprintf(f, "manhattan_edges: yes\n\n");
537
538         /* dump all nodes */
539         foreach_ir_nodeset(&dvg->nodes, irn, iter) {
540                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
541         }
542
543         /* dump all edges */
544         foreach_ir_nodeset(&dvg->nodes, irn, iter) {
545                 rss_irn_t       *node = get_rss_irn(rss, irn);
546                 plist_element_t *el;
547
548                 foreach_plist(node->dvg_pkiller_list, el) {
549                         ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
550                                 get_irn_node_nr(plist_element_get_value(el)), get_irn_node_nr(irn));
551                 }
552         }
553
554         fprintf(f, "}\n");
555         fclose(f);
556 }
557 #endif /* if 0 */
558
559 /**
560  * In case there is no rss information for irn, initialize it.
561  */
562 static void *init_rss_irn(ir_phase *ph, ir_node *irn, void *old) {
563         rss_irn_t *res = old ? old : phase_alloc(ph, sizeof(res[0]));
564
565         res->descendant_list = plist_obstack_new(phase_obst(ph));
566         res->descendants     = NULL;
567
568         res->consumer_list   = plist_obstack_new(phase_obst(ph));
569         res->consumer        = NULL;
570
571         res->pkiller_list    = plist_obstack_new(phase_obst(ph));
572
573         res->parent_list     = plist_obstack_new(phase_obst(ph));
574
575         res->killer          = NULL;
576         res->irn             = irn;
577         res->chain           = NULL;
578
579         res->kill_count      = 0;
580         res->desc_walk       = 0;
581         res->live_out        = 0;
582         res->visited         = 0;
583         res->handled         = 0;
584         res->dumped          = 0;
585         res->havecons        = 0;
586
587         return res;
588 }
589
590 /**
591  * Collect all nodes data dependent on current node.
592  */
593 static void collect_descendants(rss_t *rss, rss_irn_t *rirn, ir_node *irn, int *got_sink, unsigned cur_desc_walk) {
594         const ir_edge_t *edge;
595         rss_irn_t       *cur_node = get_rss_irn(rss, irn);
596         ir_node         *block    = rss->block;
597         ir_edge_kind_t  ekind[2]  = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
598         int             i;
599
600         if (cur_node->desc_walk >= cur_desc_walk)
601                 return;
602         cur_node->desc_walk = cur_desc_walk;
603
604         /* Stop at Barriers */
605         if (be_is_Barrier(irn))
606                 return;
607
608         /* loop over normal and dependency edges */
609         for (i = 0; i < 2; ++i) {
610                 foreach_out_edge_kind(irn, edge, ekind[i]) {
611                         ir_node *user = get_edge_src_irn(edge);
612
613                         /* skip ignore nodes as they do not really contribute to register pressure */
614                         if (arch_irn_is(rss->arch_env, user, ignore))
615                                 continue;
616
617                         /*
618                                 (a) mode_X means Jump            -> out_edge
619                                 (b) Phi as user of a normal node -> out-edge
620                         */
621                         if (get_irn_mode(user) == mode_X || is_Phi(user)) {
622                                 if (! *got_sink)
623                                         goto force_sink;
624                                 else
625                                         continue;
626                         }
627
628                         if (is_Proj(user)) {
629
630                                 //if (arch_get_irn_reg_class(rss->arch_env, user, -1) == rss->cls)
631                                         collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
632                         }
633                         else {
634                                 /* check if user lives in block and is not a control flow node */
635                                 if (get_nodes_block(user) == block) {
636                                         if (! plist_has_value(rirn->descendant_list, user)) {
637                                                 plist_insert_back(rirn->descendant_list, user);
638                                                 DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", user));
639                                         }
640                                         collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
641                                 }
642                                 else if (! *got_sink) {
643 force_sink:
644                                         /* user lives out of block: add sink as descendant if not already done */
645                                         plist_insert_back(rirn->descendant_list, _sink);
646                                         *got_sink = 1;
647                                         DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", _sink));
648                                 }
649                         }
650                 }
651         }
652 }
653
654 /**
655  * Handles a single consumer.
656  */
657 static void collect_single_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *consumer, int *got_sink) {
658         ir_node *block = rss->block;
659
660         assert(! is_Proj(consumer) && "Cannot handle Projs");
661
662         if (! is_Phi(consumer) && ! is_Block(consumer) && get_nodes_block(consumer) == block) {
663                 if (! arch_irn_is(rss->arch_env, consumer, ignore) && ! plist_has_value(rss_irn->consumer_list, consumer)) {
664                         plist_insert_back(rss_irn->consumer_list, consumer);
665                         DBG((rss->dbg, LEVEL_2, "\t\tconsumer %+F\n", consumer));
666                 }
667         }
668         else {
669                 rss_irn->live_out = 1;
670                 DBG((rss->dbg, LEVEL_2, "\t\tlive out %+F", consumer));
671                 if (! *got_sink) {
672                         plist_insert_back(rss_irn->consumer_list, _sink);
673                         *got_sink = 1;
674                         DB((rss->dbg, LEVEL_2, ", %+F added instead", _sink));
675                 }
676                 DB((rss->dbg, LEVEL_2, "\n"));
677         }
678 }
679
680 /**
681  * Collect all nodes consuming the value(s) produced by current node.
682  */
683 static void collect_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *irn, int *got_sink) {
684         const ir_edge_t *edge;
685         int             i;
686         ir_edge_kind_t  ekind[2]  = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
687         rss_irn_t       *cur_node = get_rss_irn(rss, irn);
688
689         if (cur_node->havecons)
690                 return;
691         cur_node->havecons = 1;
692
693         for (i = 0; i < 2; ++i) {
694                 foreach_out_edge_kind(irn, edge, ekind[i]) {
695                         ir_node *consumer = get_edge_src_irn(edge);
696
697                         if (is_Proj(consumer)) {
698                                 //if (arch_get_irn_reg_class(rss->arch_env, consumer, -1) == rss->cls)
699                                         collect_consumer(rss, rss_irn, consumer, got_sink);
700                         }
701                         else
702                                 collect_single_consumer(rss, rss_irn, consumer, got_sink);
703                 }
704         }
705 }
706
707 /**
708  * Collects all consumer and descendant of a irn.
709  */
710 static void collect_node_info(rss_t *rss, ir_node *irn) {
711         static unsigned cur_desc_walk = 0;
712         rss_irn_t       *rss_irn      = get_rss_irn(rss, irn);
713         int             got_sink;
714
715         assert(! is_Proj(irn) && "Cannot handle Projs.");
716
717         /* check if node info is already available */
718         if (rss_irn->handled)
719                 return;
720                 //phase_reinit_single_irn_data(&rss->ph, irn);
721
722         DBG((rss->dbg, LEVEL_1, "\tcomputing consumers of %+F:\n", irn));
723
724         /* collect all consumer */
725         got_sink = 0;
726         collect_consumer(rss, rss_irn, irn, &got_sink);
727
728         /* build sorted consumer array */
729         rss_irn->consumer = build_sorted_array_from_list(rss_irn->consumer_list, phase_obst(&rss->ph));
730
731         DBG((rss->dbg, LEVEL_1, "\tcompute descendants of %+F:\n", irn));
732
733         /* collect descendants */
734         got_sink = 0;
735         collect_descendants(rss, rss_irn, irn, &got_sink, ++cur_desc_walk);
736
737         /* build sorted descendant array */
738         rss_irn->descendants = build_sorted_array_from_list(rss_irn->descendant_list, phase_obst(&rss->ph));
739
740         rss_irn->handled = 1;
741 }
742
743 /**
744  * Checks if v is a potential killer of u.
745  * v is in pkill(u) iff descendants(v) cut consumer(u) is v
746  *
747  * @param rss   The rss object
748  * @param v      The node to check for killer
749  * @param u      The potentially killed value
750  * @return 1 if v is in pkill(u), 0 otherwise
751  */
752 static int is_potential_killer(rss_t *rss, rss_irn_t *v, rss_irn_t *u) {
753         plist_t *list;
754         ir_node **arr;
755         plist_element_t *el;
756
757         assert(is_Sink(v->irn) || ((plist_count(v->descendant_list) > 0 && v->descendants) || 1));
758         assert(is_Sink(u->irn) || ((plist_count(u->consumer_list)   > 0 && u->consumer)    || 1));
759
760         /* as we loop over the list: loop over the shorter one */
761         if (plist_count(v->descendant_list) > plist_count(u->consumer_list)) {
762                 list = u->consumer_list;
763                 arr  = v->descendants;
764         }
765         else {
766                 list = v->descendant_list;
767                 arr  = u->consumer;
768         }
769
770         /* for each list element: try to find element in array */
771         foreach_plist(list, el) {
772                 ir_node *irn   = plist_element_get_value(el);
773                 ir_node *match = BSEARCH_IRN_ARR(irn, arr);
774
775                 if (match && match != irn)
776                         return 0;
777         }
778
779         return 1;
780 }
781
782 /**
783  * Update descendants, consumer and pkiller list for the given irn.
784  */
785 static void update_node_info(rss_t *rss, ir_node *irn, ir_node *pk_irn) {
786         rss_irn_t *node    = get_rss_irn(rss, irn);
787         rss_irn_t *pkiller = get_rss_irn(rss, pk_irn);
788
789         /* add consumer and rebuild the consumer array */
790         if (! plist_has_value(node->consumer_list, pk_irn)) {
791                 plist_insert_back(node->consumer_list, pk_irn);
792                 node->consumer = build_sorted_array_from_list(node->consumer_list, phase_obst(&rss->ph));
793         }
794
795         /* add pk_irn as descendant, add it's descendants to irn's and rebuild array */
796         if (! plist_has_value(node->descendant_list, pk_irn)) {
797                 plist_element_t *el;
798
799                 plist_insert_back(node->descendant_list, pk_irn);
800
801                 foreach_plist(pkiller->descendant_list, el) {
802                         ir_node *desc = plist_element_get_value(el);
803
804                         if (! plist_has_value(node->descendant_list, desc))
805                                 plist_insert_back(node->descendant_list, desc);
806                 }
807
808                 node->descendants = build_sorted_array_from_list(node->descendant_list, phase_obst(&rss->ph));
809         }
810
811 }
812
813 /**
814  * Compute the potential killing set PK.
815  */
816 static void compute_pkill_set(rss_t *rss) {
817         plist_element_t *u_el, *v_el;
818
819         foreach_plist(rss->nodes, u_el) {
820                 ir_node   *u_irn = plist_element_get_value(u_el);
821                 rss_irn_t *u     = get_rss_irn(rss, u_irn);
822
823                 DBG((rss->dbg, LEVEL_1, "\tcomputing potential killers of %+F:\n", u_irn));
824
825                 /* check each consumer if it is a potential killer  */
826                 foreach_plist(u->consumer_list, v_el) {
827                         ir_node   *v_irn = plist_element_get_value(v_el);
828                         rss_irn_t *v     = get_rss_irn(rss, v_irn);
829
830                         /* check, if v is a potential killer of u */
831                         if (is_potential_killer(rss, v, u)) {
832                                 if (! plist_has_value(u->pkiller_list, v_irn))
833                                         plist_insert_back(u->pkiller_list, v_irn);
834                                 v->kill_count++;
835                                 DBG((rss->dbg, LEVEL_2, "\t\tpotential killer %+F\n", v_irn));
836                         }
837                 }
838
839                 u->killer = _sink;
840         }
841
842         if (rss->opts->dump_flags & RSS_DUMP_PKG)
843                 debug_vcg_dump_pkg(rss, NULL, 0);
844 }
845
846 /**
847  * Build set of killing edges (from values to their potential killers)
848  */
849 static void build_kill_edges(rss_t *rss, pset *epk) {
850         plist_element_t *el, *k_el;
851
852         foreach_plist(rss->nodes, el) {
853                 ir_node    *irn  = plist_element_get_value(el);
854                 rss_irn_t *rirn = get_rss_irn(rss, irn);
855
856                 DBG((rss->dbg, LEVEL_3, "\t\tbuilding kill edges for %+F:\n", irn));
857
858                 foreach_plist(rirn->pkiller_list, k_el) {
859                         ir_node    *pkiller = plist_element_get_value(k_el);
860                         rss_edge_t *ke      = obstack_alloc(phase_obst(&rss->ph), sizeof(*ke));
861
862                         ke->src  = irn;
863                         ke->tgt  = pkiller;
864                         ke->next = NULL;
865
866                         DBG((rss->dbg, LEVEL_3, "\t\t\ttarget %+F\n", pkiller));
867
868                         pset_insert(epk, ke, HASH_RSS_EDGE(ke));
869                 }
870         }
871 }
872
873 #ifdef DEBUG_libfirm
874 /* print the given cbc for debugging purpose */
875 static void debug_print_cbc(firm_dbg_module_t *mod, cbc_t *cbc) {
876         ir_nodeset_iterator_t iter;
877         ir_node    *n;
878         rss_edge_t *ke;
879
880         DBG((mod, LEVEL_3, "\t\tS = set of parents:\n"));
881         foreach_ir_nodeset(&cbc->parents, n, iter) {
882                 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
883         }
884         DBG((mod, LEVEL_3, "\t\tT = set of children:\n"));
885         foreach_ir_nodeset(&cbc->children, n, iter) {
886                 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
887         }
888         DBG((mod, LEVEL_3, "\t\tE = Edges from producers to consumers\n"));
889         foreach_pset(cbc->kill_edges, ke) {
890                 DBG((mod, LEVEL_3, "\t\t\t%+F -> %+F\n", ke->src, ke->tgt));
891         }
892 }
893 #endif
894
895 /**
896  * Construct the bipartite decomposition.
897  * Sid-Ahmed-Ali Touati, Phd Thesis
898  * Register Pressure in Instruction Level Parallelism, p. 71
899  */
900 static void compute_bipartite_decomposition(rss_t *rss) {
901         pset *epk    = new_pset(cmp_rss_edges, 10);
902         int  cur_num = 0;
903
904         plist_element_t *el;
905
906         DBG((rss->dbg, LEVEL_1, "\tcomputing bipartite decomposition:\n"));
907
908         build_kill_edges(rss, epk);
909
910         foreach_plist(rss->nodes, el) {
911                 ir_node   *u_irn   = plist_element_get_value(el);
912                 rss_irn_t *u       = get_rss_irn(rss, u_irn);
913                 int       p_change = 1;
914                 int       c_change = 1;
915                 int       vrfy_ok  = 1;
916
917                 cbc_t           *cbc;
918                 plist_element_t *el2;
919                 rss_edge_t      *k_edge;
920                 rss_edge_t      *kedge_root = NULL;
921                 ir_node         *t_irn, *s_irn;
922                 ir_nodeset_iterator_t iter;
923
924                 if (u->visited || u_irn == _sink)
925                         continue;
926
927                 DBG((rss->dbg, LEVEL_2, "\t\t%+F choosen:\n", u_irn));
928
929                 cbc     = obstack_alloc(phase_obst(&rss->ph), sizeof(*cbc));
930                 cbc->nr = cur_num++;
931
932                 /* initialize S_cb */
933                 ir_nodeset_init_size(&cbc->parents, 5);
934                 ir_nodeset_insert(&cbc->parents, u_irn);
935                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (init)\n", u_irn));
936
937                 /* E_cb = empty */
938                 cbc->kill_edges = new_pset(cmp_rss_edges, 5);
939
940                 /* each parent gets killed by at least one value from children */
941
942                 /* T_cb = PK_successors(u) */
943                 ir_nodeset_init_size(&cbc->children, 5);
944                 foreach_plist(u->pkiller_list, el2) {
945                         ir_nodeset_insert(&cbc->children, plist_element_get_value(el2));
946                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (init)\n", plist_element_get_value(el2)));
947                 }
948
949                 /*
950                         Now: insert the parents of all children into the parent set
951                         and insert the children of all parents into the children set
952                         until the sets don't change any more
953                 */
954                 while (p_change || c_change) {
955                         ir_nodeset_iterator_t iter;
956                         p_change = c_change = 0;
957
958                         /* accumulate parents */
959                         foreach_ir_nodeset(&cbc->children, t_irn, iter) {
960                                 foreach_pset(epk, k_edge) {
961                                         ir_node *val = k_edge->src;
962
963                                         if (k_edge->tgt == t_irn && ! ir_nodeset_contains(&cbc->parents, val)) {
964                                                 ir_nodeset_insert(&cbc->parents, val);
965                                                 p_change = 1;
966                                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (killed by %+F)\n", val, t_irn));
967                                         }
968                                 }
969                         }
970
971                         /* accumulate children */
972                         foreach_ir_nodeset(&cbc->parents, s_irn, iter) {
973                                 foreach_pset(epk, k_edge) {
974                                         ir_node *val = k_edge->tgt;
975
976                                         if (k_edge->src == s_irn  && ! ir_nodeset_contains(&cbc->children, val)) {
977                                                 ir_nodeset_insert(&cbc->children, val);
978                                                 c_change = 1;
979                                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (kills %+F)\n", val, s_irn));
980                                         }
981                                 }
982                         }
983                 }
984
985                 /* mark all parent values as visited */
986                 foreach_ir_nodeset(&cbc->parents, s_irn, iter) {
987                         rss_irn_t *s = get_rss_irn(rss, s_irn);
988                         s->visited = 1;
989                         /* assure bipartite property */
990 #if 0
991                         if (ir_nodeset_contains(&cbc->children, s_irn)) {
992                                 ir_nodeset_remove(&cbc->children, s_irn);
993                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F removed from to children\n", s_irn));
994                         }
995 #endif
996                 }
997
998                 /* update edges */
999                 foreach_pset(epk, k_edge) {
1000                         if (ir_nodeset_contains(&cbc->parents, k_edge->src) &&
1001                             ir_nodeset_contains(&cbc->children, k_edge->tgt)) {
1002                                 pset_insert(cbc->kill_edges, k_edge, HASH_RSS_EDGE(k_edge));
1003                                 /*
1004                                         Link all k_edges which are about to be removed together.
1005                                         Beware: pset_remove kills the iterator.
1006                                 */
1007                                 k_edge->next = kedge_root;
1008                                 kedge_root   = k_edge;
1009                         }
1010                 }
1011
1012                 /* remove all linked k_edges */
1013                 for (; kedge_root; kedge_root = kedge_root->next) {
1014                         pset_remove(epk, kedge_root, HASH_RSS_EDGE(kedge_root));
1015                 }
1016
1017                 /* verify the cbc */
1018                 foreach_ir_nodeset(&cbc->parents, s_irn, iter) {
1019                         int is_killed = 0;
1020
1021                         foreach_pset(cbc->kill_edges, k_edge) {
1022                                 if (k_edge->src == s_irn) {
1023                                         is_killed = 1;
1024                                         pset_break(cbc->kill_edges);
1025                                         break;
1026                                 }
1027                         }
1028
1029                         if (! is_killed) {
1030                                 vrfy_ok = 0;
1031                                 ir_fprintf(stderr, "Warning: parent %+F is not killed in current cbc\n", s_irn);
1032                         }
1033                 }
1034                 assert(vrfy_ok && "Verification of CBC failed");
1035
1036                 /* add the connected bipartite component */
1037                 if (ir_nodeset_size(&cbc->parents) > 0 &&
1038                     ir_nodeset_size(&cbc->children) > 0 &&
1039                         pset_count(cbc->kill_edges) > 0)
1040                         pset_insert(rss->cbc_set, cbc, (unsigned)cbc->nr);
1041                 DBG((rss->dbg, LEVEL_2, "\tbipartite component %d inserted:\n", cbc->nr));
1042                 DEBUG_ONLY(
1043                         if (firm_dbg_get_mask(rss->dbg) & LEVEL_2)
1044                                 debug_print_cbc(rss->dbg, cbc);
1045                 );
1046         }
1047
1048         if (rss->opts->dump_flags & RSS_DUMP_CBC)
1049                 debug_vcg_dump_bipartite(rss);
1050
1051         del_pset(epk);
1052 }
1053
1054 /**
1055  * Select the child with the maximum cost.
1056  */
1057 static child_t *select_child_max_cost(rss_t *rss, ir_nodeset_t *x, ir_nodeset_t *y, child_t *t, cbc_t *cbc) {
1058         ir_node *child;
1059         ir_nodeset_iterator_t iter;
1060         float   max_cost = -1.0f;
1061
1062         DBG((rss->dbg, LEVEL_2, "\t\tcomputing children costs:\n"));
1063
1064         foreach_ir_nodeset(&cbc->children, child, iter) {
1065                 rss_irn_t  *r_child             = get_rss_irn(rss, child);
1066                 int         num_unkilled_parents = 0;
1067                 int         num_descendants;
1068                 rss_edge_t *k_edge;
1069                 float       cost;
1070
1071                 /* get the number of unkilled parents */
1072                 foreach_pset(cbc->kill_edges, k_edge) {
1073                         if (k_edge->tgt == child && ir_nodeset_contains(x, k_edge->src))
1074                                 ++num_unkilled_parents;
1075                 }
1076
1077                 cost = (float)num_unkilled_parents;
1078
1079                 num_descendants = plist_count(r_child->descendant_list) + ir_nodeset_size(y);
1080
1081                 if (num_descendants > 0)
1082                         cost /= (float)num_descendants;
1083
1084                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F, #desc %d, cost %.3f\n", child, num_descendants, cost));
1085
1086                 if (cost > max_cost) {
1087                         t->irn   = child;
1088                         t->cost  = cost;
1089                         max_cost = cost;
1090                 }
1091         }
1092
1093         return t;
1094 }
1095
1096 /**
1097  * Remove all parents from x which are killed by t_irn.
1098  */
1099 static void remove_covered_parents(rss_t *rss, ir_nodeset_t *x, ir_node *t_irn, cbc_t *cbc) {
1100         rss_irn_t  *t = get_rss_irn(rss, t_irn);
1101         rss_edge_t *k_edge;
1102
1103         DBG((rss->dbg, LEVEL_2, "\t\tremoving parents covered by %+F:\n", t_irn));
1104
1105         foreach_pset(cbc->kill_edges, k_edge) {
1106                 if (k_edge->tgt == t_irn && ir_nodeset_contains(x, k_edge->src)) {
1107                         ir_nodeset_remove(x, k_edge->src);
1108                         plist_insert_back(t->parent_list, k_edge->src);
1109                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", k_edge->src));
1110                 }
1111         }
1112 }
1113
1114 static void update_cumulated_descendent_values(rss_t *rss, ir_nodeset_t *y, ir_node *t_irn) {
1115         rss_irn_t *t = get_rss_irn(rss, t_irn);
1116         plist_element_t *el;
1117
1118         DBG((rss->dbg, LEVEL_2, "\t\tupdating cumulated descendant value of %+F:\n", t_irn));
1119
1120         foreach_plist(t->descendant_list, el) {
1121                 ir_nodeset_insert(y, plist_element_get_value(el));
1122                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", plist_element_get_value(el)));
1123         }
1124 }
1125
1126 /**
1127  * Greedy-k: a heuristics for the MMA problem
1128  */
1129 static void compute_killing_function(rss_t *rss) {
1130         cbc_t *cbc;
1131         struct obstack obst;
1132
1133         obstack_init(&obst);
1134
1135         rss->cbc_set = pset_new_ptr(5);
1136         compute_bipartite_decomposition(rss);
1137
1138         /* for all bipartite components do: */
1139         foreach_pset(rss->cbc_set, cbc) {
1140                 ir_node *p;
1141                 ir_nodeset_t x;
1142                 ir_nodeset_t y;
1143                 ir_nodeset_iterator_t iter;
1144                 child_t **sks    = NEW_ARR_F(child_t *, 20);
1145                 int     cur_len  = 0;
1146                 int     cur_size = 20;
1147                 int     i;
1148
1149                 ir_nodeset_init_size(&x, 10);
1150                 ir_nodeset_init_size(&y, 10);
1151
1152                 DBG((rss->dbg, LEVEL_1, "\tcomputing SKS for cbc %d:\n", cbc->nr));
1153                 DBG((rss->dbg, LEVEL_2, "\t\tinitializing parents X:\n"));
1154
1155                 /* X = S_cb (all parents are initially uncovered) */
1156                 foreach_ir_nodeset(&cbc->parents, p, iter) {
1157                         ir_nodeset_insert(&x, p);
1158                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", p));
1159                 }
1160
1161                 /* while X not empty */
1162                 while (ir_nodeset_size(&x) > 0) {
1163                         child_t *t = obstack_alloc(&obst, sizeof(*t));
1164                         memset(t, 0, sizeof(*t));
1165
1166                         t = select_child_max_cost(rss, &x, &y, t, cbc);
1167
1168                         if (cur_len >= cur_size) {
1169                                 ARR_EXTO(child_t *, sks, cur_size * 2);
1170                                 cur_size *= 2;
1171                         }
1172
1173                         DBG((rss->dbg, LEVEL_2, "\t\tinsert child %+F (%.3f) into SKS at pos %d\n", t->irn, t->cost, cur_len));
1174
1175                         sks[cur_len++] = t;
1176                         remove_covered_parents(rss, &x, t->irn, cbc);
1177                         update_cumulated_descendent_values(rss, &y, t->irn);
1178                 }
1179
1180                 ARR_SHRINKLEN(sks, cur_len);
1181
1182                 /* sort SKS in increasing cost order */
1183                 qsort(sks, cur_len, sizeof(sks[0]), cmp_child_costs);
1184
1185                 DBG((rss->dbg, LEVEL_2, "\tprocessing SKS for cbc %d:\n", cbc->nr));
1186
1187                 /* build killing function */
1188                 for (i = cur_len - 1; i >= 0; --i) { /* loop over sks in decreasing cost order */
1189                         child_t         *t = sks[i];
1190                         rss_irn_t       *rt = get_rss_irn(rss, t->irn);
1191                         plist_element_t *p_el;
1192
1193                         DBG((rss->dbg, LEVEL_3, "\t\t\tkiller %+F (%.3f):\n", t->irn, t->cost));
1194
1195                         /* kill all unkilled parents of t */
1196                         foreach_plist(rt->parent_list, p_el) {
1197                                 ir_node    *par = plist_element_get_value(p_el);
1198                                 rss_irn_t *rpar = get_rss_irn(rss, par);
1199
1200                                 if (is_Sink(rpar->killer)) {
1201                                         rpar->killer = t->irn;
1202                                         DBG((rss->dbg, LEVEL_2, "\t\tkill %+F\n", rpar->irn));
1203                                 }
1204                                 else {
1205                                         DBG((rss->dbg, LEVEL_3, "\t\t\tkeeping %+F as killer for %+F\n", rpar->killer, rpar->irn));
1206                                 }
1207                         }
1208                 }
1209
1210                 ir_nodeset_destroy(&x);
1211                 ir_nodeset_destroy(&y);
1212                 DEL_ARR_F(sks);
1213         }
1214
1215         if (rss->opts->dump_flags & RSS_DUMP_KILL)
1216                 debug_vcg_dump_kill(rss);
1217
1218         del_pset(rss->cbc_set);
1219         obstack_free(&obst, NULL);
1220 }
1221
1222 /**
1223  * Adds the edge src -> tgt to the dvg. Checks if reverse edge is already there (asserts).
1224  */
1225 static INLINE void add_dvg_edge(rss_t *rss, dvg_t *dvg, ir_node *src, ir_node *tgt, int have_source) {
1226         rss_edge_t *dvg_edge;
1227         rss_edge_t key;
1228
1229         if (! have_source)
1230                 ir_nodeset_insert(&dvg->nodes, src);
1231         else
1232                 assert(ir_nodeset_contains(&dvg->nodes, src) && "Wrong assumption");
1233
1234         ir_nodeset_insert(&dvg->nodes, tgt);
1235
1236         key.src = tgt;
1237         key.tgt = src;
1238         assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1239
1240         key.src = src;
1241         key.tgt = tgt;
1242         if (NULL != pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1243                 /* add the edge to the DVG */
1244                 dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1245
1246                 dvg_edge->src  = src;
1247                 dvg_edge->tgt  = tgt;
1248                 dvg_edge->next = NULL;
1249
1250                 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", src, tgt));
1251                 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1252         }
1253 }
1254
1255 /**
1256  * Computes the disjoint value DAG (DVG).
1257  * BEWARE: It is not made explicitly clear in the Touati paper,
1258  *         but the DVG is meant to be build from the KILLING DAG
1259  */
1260 static void compute_dvg(rss_t *rss, dvg_t *dvg) {
1261         plist_element_t *el;
1262
1263         DBG((rss->dbg, LEVEL_1, "\tcomputing DVG:\n"));
1264
1265         foreach_plist(rss->nodes, el) {
1266                 ir_node    *u_irn    = plist_element_get_value(el);
1267                 rss_irn_t  *u        = get_rss_irn(rss, u_irn);
1268                 rss_irn_t  *u_killer = get_rss_irn(rss, u->killer);
1269                 int        i;
1270
1271                 /* TODO: omit nodes only having sink as pkiller and killing no one */
1272
1273                 /* add an edge to killer */
1274                 add_dvg_edge(rss, dvg, u_irn, u->killer, 0);
1275
1276                 if (is_Sink(u->killer))
1277                         continue;
1278
1279                 /* We add an edge to every killer, from where we could be reached. */
1280                 for (i = ARR_LEN_SAFE(u_killer->descendants) - 1; i >= 0; --i) {
1281                         add_dvg_edge(rss, dvg, u_irn, u_killer->descendants[i], 1);
1282                 }
1283
1284 #if 0
1285
1286                 foreach_plist(rss->nodes, el2) {
1287                         ir_node *v_irn = plist_element_get_value(el2);
1288
1289                         /*
1290                                 There is an edge (u, v) in the DVG iff v is a descendant of the killer(u).
1291                         */
1292                         if (BSEARCH_IRN_ARR(v_irn, u_kill->descendants)) {
1293                                 rss_edge_t *dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1294                                 rss_edge_t key;
1295
1296                                 /* insert the user into the DVG and append it to the user list of u */
1297                                 ir_nodeset_insert(&dvg->nodes, v_irn);
1298                                 if (! plist_has_value(u->dvg_user_list, v_irn))
1299                                         plist_insert_back(u->dvg_user_list, v_irn);
1300
1301                                 dvg_edge->src  = u_irn;
1302                                 dvg_edge->tgt  = v_irn;
1303                                 dvg_edge->next = NULL;
1304
1305                                 key.src = v_irn;
1306                                 key.tgt = u_irn;
1307
1308                                 assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1309
1310                                 /* add the edge to the DVG */
1311                                 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", u_irn, v_irn));
1312                                 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1313                         }
1314                 }
1315 #endif /* if 0 */
1316         }
1317
1318         if (rss->opts->dump_flags & RSS_DUMP_DVG)
1319                 debug_vcg_dump_dvg(rss, dvg);
1320 }
1321
1322 /**
1323  * Updates the dvg structure when a serialization edge from src -> tgt is added.
1324  */
1325 static void update_dvg(rss_t *rss, dvg_t *dvg, rss_irn_t *src, rss_irn_t *tgt) {
1326         int i, j, idx;
1327         rss_edge_t *edge;
1328         rss_edge_t **arr = alloca(pset_count(dvg->edges) * sizeof(arr[0]));
1329
1330         /*
1331                 Add an edge from serialization target to serialization src:
1332                 src cannot be alive with target
1333         */
1334         add_dvg_edge(rss, dvg, tgt->irn, src->irn, 0);
1335
1336         /* Add edges to src's descendants as well, they also getting serialized. */
1337         for (i = ARR_LEN_SAFE(src->descendants) - 1; i >= 0; --i) {
1338                 add_dvg_edge(rss, dvg, tgt->irn, src->descendants[i], 1);
1339         }
1340
1341         /* We also have to add edges from targets predecessors in dvg */
1342         idx = 0;
1343         /* We cannot insert elements into set over which we iterate ... */
1344         foreach_pset(dvg->edges, edge) {
1345                 if (edge->tgt == tgt->irn) {
1346                         arr[idx++] = edge;
1347                 }
1348         }
1349
1350         for (i = 0; i < idx; ++i) {
1351                 edge = arr[i];
1352                 add_dvg_edge(rss, dvg, edge->src, src->irn, 1);
1353                 for (j = ARR_LEN_SAFE(src->descendants) - 1; j >= 0; --j) {
1354                         add_dvg_edge(rss, dvg, edge->src, src->descendants[j], 1);
1355                 }
1356         }
1357 }
1358
1359 #if 0
1360 /**
1361  * Accumulate all descendants for root into list.
1362  */
1363 static void accumulate_dvg_descendant_values(rss_t *rss, rss_irn_t *root, plist_t *list) {
1364         if (plist_count(root->dvg_user_list) > 0) {
1365                 plist_element_t *el;
1366
1367                 foreach_plist(root->dvg_user_list, el) {
1368                         ir_node   *v_irn = plist_element_get_value(el);
1369                         rss_irn_t *v     = get_rss_irn(rss, v_irn);
1370
1371                         /* add v as descendant */
1372                         if (! plist_has_value(list, v_irn)) {
1373                                 plist_insert_back(list, v_irn);
1374                                 DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG descendant %+F\n", v_irn));
1375                         }
1376
1377                         /* accumulate v's descendants */
1378                         accumulate_dvg_descendant_values(rss, v, list);
1379                 }
1380         }
1381 }
1382
1383 /**
1384  * Builds the list of potential killers for each node
1385  * in the given DVG.
1386  * Needs the descendant list for all user as sorted array.
1387  */
1388 static void build_dvg_pkiller_list(rss_t *rss, dvg_t *dvg) {
1389         ir_nodeset_iterator_t iter;
1390         ir_node *irn;
1391
1392         foreach_nodeset(&dvg->nodes, irn, iter) {
1393                 rss_irn_t       *node = get_rss_irn(rss, irn);
1394                 plist_element_t *el, *el2;
1395
1396                 DBG((rss->dbg, DEBUG_DVG, "\t\tbuilding pkiller list for %+F\n", irn));
1397
1398                 /* check each user */
1399                 foreach_plist(node->dvg_user_list, el) {
1400                         ir_node *u_irn = plist_element_get_value(el);
1401
1402                         /* is the current user u_irn not a descendant of any other user -> pkiller */
1403                         foreach_plist(node->dvg_user_list, el2) {
1404                                 ir_node   *v_irn = plist_element_get_value(el2);
1405                                 rss_irn_t *v     = get_rss_irn(rss, v_irn);
1406
1407                                 if (el != el2                             &&
1408                                         ! BSEARCH_IRN_ARR(u_irn, v->dvg_desc) &&
1409                                         ! plist_has_value(node->dvg_pkiller_list, u_irn))
1410                                 {
1411                                         plist_insert_back(node->dvg_pkiller_list, u_irn);
1412                                         DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG pkiller %+F\n", u_irn));
1413                                 }
1414                         }
1415                 }
1416
1417                 node->dvg_pkiller = build_sorted_array_from_list(node->dvg_pkiller_list, phase_obst(&rss->ph));
1418         }
1419
1420         DEBUG_ONLY(
1421                 if (firm_dbg_get_mask(rss->dbg) & DEBUG_DVG)
1422                         debug_vcg_dump_dvg_pkiller(rss, dvg);
1423         );
1424 }
1425
1426 #endif /* if 0 */
1427
1428 /**
1429  * Compute the maximal antichain of the current DVG.
1430  * This is a reimplementation of the MAXIMAL_ANTI_CHAIN function
1431  * from the DDG library 1.1 (DAG.cpp).
1432  */
1433 static ir_nodeset_t *compute_maximal_antichain(rss_t *rss, dvg_t *dvg, int iteration) {
1434         int         n               = ir_nodeset_size(&dvg->nodes);
1435         int         *assignment     = alloca(n * sizeof(assignment[0]));
1436         int         *assignment_rev = alloca(n * sizeof(assignment_rev[0]));
1437         int         *idx_map        = alloca(n * sizeof(idx_map[0]));
1438         hungarian_problem_t *bp;
1439         ir_nodeset_t *values, *temp;
1440         ir_nodeset_iterator_t iter;
1441         ir_node     *u_irn;
1442         int         i, j, cost, cur_chain, res;
1443         rss_edge_t  *dvg_edge;
1444
1445 #define MAP_IDX(irn) bsearch_for_index(get_irn_idx(irn), idx_map,  n,  1)
1446
1447         if (pset_count(dvg->edges) == 0)
1448                 return NULL;
1449
1450         bp = hungarian_new(n, n, 1, HUNGARIAN_MATCH_NORMAL);
1451
1452         /*
1453                 At first, we build an index map for the nodes in the DVG,
1454                 because we cannot use the irn idx therefore as the resulting
1455                 bipartite data structure would have around 1.2GB.
1456                 So we limit the size to the number of nodes we have in the DVG
1457                 and build a sorted index map for their irn indices.
1458         */
1459         i = 0;
1460         foreach_ir_nodeset(&dvg->nodes, u_irn, iter) {
1461                 idx_map[i++] = get_irn_idx(u_irn);
1462         }
1463         qsort(idx_map, n, sizeof(idx_map[0]), cmp_int);
1464
1465         foreach_pset(dvg->edges, dvg_edge) {
1466                 int idx_u = MAP_IDX(dvg_edge->src);
1467                 int idx_v = MAP_IDX(dvg_edge->tgt);
1468
1469                 /* add the entry to the bipartite data structure */
1470                 hungarian_add(bp, idx_u, idx_v, 1);
1471                 DBG((rss->dbg, LEVEL_3, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1472                         idx_u, dvg_edge->src, idx_v, dvg_edge->tgt));
1473         }
1474 #if 0
1475         /*
1476                 Add a bipartite entry for each pair of nodes (u, v), where exists a
1477                 path in the DVG from u to v, ie. connect all descendants(v) to v.
1478                 desc_dvg(v) = accumulated descendants of all z in dvg_user_list(v)
1479         */
1480         foreach_ir_nodeset(&dvg->nodes, u_irn, iter) {
1481                 rss_irn_t *u        = get_rss_irn(rss, u_irn);
1482                 int       idx_u_irn = MAP_IDX(u_irn);
1483
1484                 DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG descendants of %+F:\n", u_irn));
1485
1486                 //plist_clear(u->dvg_desc_list);
1487                 //accumulate_dvg_descendant_values(rss, u, u->dvg_desc_list);
1488
1489                 /*
1490                         FIXME: The array is build on the phase obstack and we cannot free the data.
1491                         So the array get as many times allocated as this function is called.
1492                 */
1493
1494                 /* build the sorted array for faster searches */
1495                 //u->dvg_desc = build_sorted_array_from_list(u->dvg_desc_list, phase_obst(&rss->ph));
1496
1497                 DBG((rss->dbg, DEBUG_MAX_AC, "\t\tadding bipartite entries of %+F:\n", u_irn));
1498
1499                 /* add the bipartite entries for all descendants */
1500                 for (i = ARR_LEN_SAFE(u->dvg_desc) - 1; i >= 0; --i) {
1501                         rss_irn_t *desc    = get_rss_irn(rss, u->dvg_desc[i]);
1502                         int       idx_desc = MAP_IDX(u->dvg_desc[i]);
1503
1504                         /* add the entry to the bipartite data structure */
1505                         hungarian_add(bp, idx_u_irn, idx_desc, 1);
1506                         DBG((rss->dbg, DEBUG_MAX_AC, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1507                                 idx_u_irn, u_irn, idx_desc, u->dvg_desc[i]));
1508
1509                         need_matching = 1;
1510                 }
1511         }
1512 #endif
1513
1514         /* We want maximum cardinality matching */
1515         hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL);
1516
1517 #if 0
1518         DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG pkiller:\n"));
1519         /* beware: the following function needs the dvg_desc array */
1520         build_dvg_pkiller_list(rss, dvg);
1521 #endif
1522
1523         DBG((rss->dbg, LEVEL_2, "\t\tcomputing bipartite matching\n"));
1524         /*
1525                 The maximum cardinality bipartite matching gives us the minimal
1526                 chain partition, which corresponds to the maximum anti chains.
1527         */
1528         res = hungarian_solve(bp, assignment, &cost, 1);
1529         assert(res == 0 && "Bipartite matching failed!");
1530
1531         hungarian_free(bp);
1532         memset(assignment_rev, -1, n * sizeof(assignment_rev[0]));
1533
1534         /* build the reverse assignment, ie. foreach i -> j, add j -> i */
1535         for (i = 0; i < n; ++i) {
1536                 if (assignment[i] >= 0) {
1537                         int j = assignment[i];
1538                         assignment_rev[j] = i;
1539                 }
1540         }
1541
1542         DBG((rss->dbg, LEVEL_2, "\t\tgot assignment with cost %d\n", cost));
1543         DBG((rss->dbg, LEVEL_3, "\t\t\tassignment   ---   reverse assignment\n", cost));
1544         for (i = 0; i < n; ++i) {
1545                 DBG((rss->dbg, LEVEL_3, "\t\t\t%3d -> %3d         %3d -> %3d\n", i, assignment[i], i, assignment_rev[i]));
1546         }
1547
1548         values    = xmalloc(sizeof(values[0]));
1549         ir_nodeset_init_size(values, 10);
1550         cur_chain = 0;
1551         /* Construction of the minimal chain partition */
1552         for (j = 0; j < n; ++j) {
1553                 /* check nodes, which did not occur as target */
1554                 if (assignment_rev[j] == -1) {
1555                         int       xj      = idx_map[j];
1556                         ir_node   *xj_irn = get_idx_irn(rss->irg, xj);
1557                         rss_irn_t *xj_rss = get_rss_irn(rss, xj_irn);
1558                         chain_t   *c      = obstack_alloc(phase_obst(&rss->ph), sizeof(*c));
1559                         int       source;
1560
1561                         /* there was no source for j -> we have a source of a new chain */
1562                         ir_nodeset_insert(values, xj_irn);
1563
1564                         c->elements = plist_obstack_new(phase_obst(&rss->ph));
1565                         c->nr       = cur_chain++;
1566                         plist_insert_back(c->elements, xj_irn);
1567
1568                         xj_rss->chain = c;
1569
1570                         DBG((rss->dbg, LEVEL_2, "\t\tstarting chain %d:\n", c->nr));
1571                         DBG((rss->dbg, LEVEL_2, "\t\t\t%+F (%d)", xj_irn, j));
1572
1573                         /* follow chain, having j as source */
1574                         source = j;
1575                         while (assignment[source] >= 0) {
1576                                 int       target  = assignment[source];
1577                                 int       irn_idx = idx_map[target];
1578                                 ir_node   *irn    = get_idx_irn(rss->irg, irn_idx);
1579                                 rss_irn_t *node   = get_rss_irn(rss, irn);
1580
1581                                 plist_insert_back(c->elements, irn);
1582                                 node->chain = c;
1583
1584                                 DB((rss->dbg, LEVEL_2, " -> %+F (%d)", irn, target));
1585
1586                                 /* new source = last target */
1587                                 source = target;
1588                         }
1589
1590                         DB((rss->dbg, LEVEL_2, "\n"));
1591                 }
1592         }
1593
1594         /*
1595                 Computing the maximal antichain: Select an element from each
1596                 chain such, such it is parallel with the others.
1597         */
1598         DBG((rss->dbg, LEVEL_1, "\t\tcomputing set of saturation values (MAX AC)\n"));
1599         DBG((rss->dbg, LEVEL_3, "\t\tstarting with:\n"));
1600
1601         DEBUG_ONLY(
1602                 if (firm_dbg_get_mask(rss->dbg) & LEVEL_3)
1603                         dump_nodeset(values, "\t\t\t");
1604         )
1605
1606         temp = NULL;
1607         do {
1608                 /*
1609                         We need an explicit array for the values as
1610                         we cannot iterate multiple times over the same
1611                         set at the same time. :-(((((
1612                         TODO Matze: now we can, so rewrite this...
1613                 */
1614                 int     n         = ir_nodeset_size(values);
1615                 int     i         = 0;
1616                 ir_node **val_arr = NEW_ARR_F(ir_node *, n);
1617
1618                 foreach_ir_nodeset(values, u_irn, iter)
1619                         val_arr[i++] = u_irn;
1620
1621                 if (temp) {
1622                         ir_nodeset_destroy(temp);
1623                         free(temp);
1624                 }
1625
1626                 temp = xmalloc(sizeof(temp[0]));
1627                 ir_nodeset_init_size(temp, 10);
1628
1629                 /* Select all nodes from current value set, having another node in the set as descendant. */
1630                 for (i = 0; i < n; ++i) {
1631                         rss_irn_t *u = get_rss_irn(rss, val_arr[i]);
1632
1633                         for (j = 0; j < n; ++j) {
1634                                 if (i != j) {
1635                                         rss_edge_t key;
1636
1637                                         key.src = val_arr[i];
1638                                         key.tgt = val_arr[j];
1639
1640                                         if (pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1641                                                 /* v[j] is descendant of u -> remove u and break */
1642                                                 ir_nodeset_insert(temp, u->irn);
1643                                                 ir_nodeset_remove(values, u->irn);
1644
1645                                                 DBG((rss->dbg, LEVEL_3, "\t\t\tremoving %+F from values, adding it to temp\n", u->irn));
1646
1647                                                 break;
1648                                         }
1649                                 }
1650                         }
1651                 }
1652
1653                 /* Try to insert the chain predecessor of all selected u's */
1654                 foreach_ir_nodeset(temp, u_irn, iter) {
1655                         rss_irn_t       *u  = get_rss_irn(rss, u_irn);
1656                         chain_t         *c  = u->chain;
1657                         plist_element_t *el = plist_find_value(c->elements, u_irn);
1658
1659                         assert(el && "Missing element in chain!");
1660
1661                         /* If u has predecessor in chain: insert the predecessor */
1662                         if (el == plist_element_get_prev(el)) {
1663                                 ir_nodeset_insert(values, plist_element_get_value(el));
1664                                 DBG((rss->dbg, LEVEL_3, "\t\t\tadding %+F to values\n", plist_element_get_value(el)));
1665                         }
1666                 }
1667
1668
1669                 DEL_ARR_F(val_arr);
1670         } while (ir_nodeset_size(temp) > 0);
1671
1672         DBG((rss->dbg, LEVEL_2, "\t\tfinal set:\n"));
1673         DEBUG_ONLY(
1674                 if (firm_dbg_get_mask(rss->dbg) & LEVEL_2) {
1675                         dump_nodeset(values, "\t\t\t");
1676                 }
1677         );
1678
1679         if (rss->opts->dump_flags & RSS_DUMP_MAXAC)
1680                 debug_vcg_dump_pkg(rss, values, iteration);
1681
1682         if(temp != NULL) {
1683                 ir_nodeset_destroy(temp);
1684                 free(temp);
1685         }
1686
1687         return values;
1688
1689 #undef MAP_IDX
1690 }
1691
1692 /**
1693  * Computes the best serialization between two nodes of sat_vals.
1694  */
1695 static serialization_t *compute_best_admissible_serialization(rss_t *rss, ir_nodeset_t *sat_vals, serialization_t *ser, int num_regs) {
1696         int        n                    = ir_nodeset_size(sat_vals);
1697         int        n_idx                = ARR_LEN_SAFE(rss->idx_map);
1698         int        i                    = 0;
1699         ir_node    **val_arr            = alloca(n * sizeof(val_arr[0]));
1700         bitset_t   *bs_sv               = bitset_alloca(n_idx);
1701         bitset_t   *bs_vdesc            = bitset_alloca(n_idx);
1702         bitset_t   *bs_tmp              = bitset_alloca(n_idx);
1703         bitset_t   *bs_ukilldesc        = bitset_alloca(n_idx);
1704         int        best_benefit         = INT_MAX;
1705         int        best_omega2          = INT_MAX;
1706         int        best_benefit_omega20 = INT_MAX;
1707         int        has_omega1           = 0;
1708         int        j, k;
1709         ir_node    *irn;
1710         ir_nodeset_iterator_t iter;
1711         rss_edge_t min_benefit_edge;
1712         rss_edge_t min_omega20_edge;
1713         rss_irn_t  *ser_u_omega1 = NULL, *ser_v_omega1 = NULL;
1714         rss_irn_t  *ser_u_omega20 = NULL, *ser_v_omega20 = NULL;
1715
1716         DBG((rss->dbg, LEVEL_1, "\tcomputing admissible serializations:\n"));
1717
1718         /*
1719                 We need an explicit array for the values as
1720                 we cannot iterate multiple times over the same
1721                 set at the same time. :-(((((
1722         */
1723
1724         foreach_ir_nodeset(sat_vals, irn, iter) {
1725                 val_arr[i++] = irn;
1726                 bitset_set(bs_sv, BLOCK_IDX_MAP(rss, irn));
1727         }
1728
1729         /*
1730                 We build all admissible serializations and remember the best found so far.
1731                 For u in sat_vals:
1732                  For v in sat_val:
1733                    if v in pkiller(u): add edge from v to all other pkiller(u)
1734                    else: for all vv in pkiller(u): add edge from v to vv if there exists no path from vv to v
1735         */
1736
1737 /*
1738         A node is unserializable if:
1739         - it has only one killer and this one is Sink
1740     - it kills no other values
1741         In this case there is no serialization which could
1742         reduce the registerpressure
1743 */
1744 #define IS_UNSERIALIZABLE_NODE(rss_node)                  \
1745         (                                                     \
1746                 (                                                 \
1747                         (plist_count(rss_node->pkiller_list) == 1) && \
1748                         is_Sink(rss_node->killer)                  && \
1749                         (rss_node->kill_count                == 0)    \
1750                 )                            ||                   \
1751                 be_is_Barrier(rss_node->irn) ||                   \
1752                 be_is_Keep(rss_node->irn)                         \
1753         )
1754
1755         /* for all u in sat_vals */
1756         for (i = 0; i < n; ++i) {
1757                 rss_irn_t       *u = get_rss_irn(rss, val_arr[i]);
1758                 plist_element_t *el;
1759
1760                 /* ignore nodes where serialization does not help */
1761                 if (IS_UNSERIALIZABLE_NODE(u)) {
1762                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", u->irn));
1763                         continue;
1764                 }
1765
1766                 /* accumulate all descendants of all pkiller(u) */
1767                 bitset_clear_all(bs_ukilldesc);
1768                 foreach_plist(u->pkiller_list, el) {
1769                         ir_node   *irn  = plist_element_get_value(el);
1770                         rss_irn_t *node = get_rss_irn(rss, irn);
1771
1772                         if (! is_Sink(irn))
1773                                 bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, irn));
1774                         else
1775                                 continue;
1776
1777                         for (k = ARR_LEN_SAFE(node->descendants) - 1; k >= 0; --k) {
1778                                 if (! is_Sink(node->descendants[k]))
1779                                         bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, node->descendants[k]));
1780                         }
1781                 }
1782
1783                 /* for all v in sat_vals */
1784                 for (j = 0; j < n; ++j) {
1785                         ir_node   *v_irn   = val_arr[j];
1786                         rss_irn_t *v       = get_rss_irn(rss, v_irn);
1787                         unsigned  v_height = get_irn_height(rss->h, v_irn);
1788                         int       omega1, omega2, is_pkiller;
1789
1790                         /* v cannot be serialized with itself
1791                          * ignore nodes where serialization does not help */
1792                         if (i == j || IS_UNSERIALIZABLE_NODE(v)) {
1793                                 if (i != j)
1794                                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", v->irn));
1795                                 continue;
1796                         }
1797
1798                         /* get descendants of v */
1799                         bitset_clear_all(bs_vdesc);
1800                         bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v_irn));
1801                         for (k = ARR_LEN_SAFE(v->descendants) - 1; k >= 0; --k) {
1802                                 if (! is_Sink(v->descendants[k]))
1803                                         bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v->descendants[k]));
1804                         }
1805
1806                         /* if v is in pkiller(u) */
1807                         is_pkiller = plist_has_value(u->pkiller_list, v_irn);
1808
1809                         /* for all vv in pkiller(u) */
1810                         foreach_plist(u->pkiller_list, el) {
1811                                 ir_node *vv_irn  = plist_element_get_value(el);
1812                                 int     add_edge;
1813
1814                                 if (is_Sink(vv_irn) || is_cfop(vv_irn))
1815                                         continue;
1816
1817                                 if (is_pkiller)
1818                                         add_edge = vv_irn != v_irn && skip_Proj(vv_irn) != skip_Proj(v_irn);
1819                                 else
1820                                         add_edge = ! heights_reachable_in_block(rss->h, skip_Proj(vv_irn), skip_Proj(v_irn));
1821
1822                                 /*
1823                                         As we add an edge from vv -> v, we have to make sure,
1824                                         that there exists no path from v to vv.
1825                                 */
1826
1827                                 if (add_edge) {
1828                                         int vv_height = get_irn_height(rss->h, vv_irn);
1829                                         int mu1, mu2, critical_path_cost;
1830
1831                                         /*
1832                                                 mu1 = | descendants(v) cut sat_vals |
1833                                                 the number of saturating values which cannot
1834                                                 be simultaneously alive with u
1835                                         */
1836                                         bitset_copy(bs_tmp, bs_vdesc);
1837                                         mu1 = bitset_popcnt(bitset_and(bs_tmp, bs_sv));
1838
1839                                         /*
1840                                                 mu2 = | accum_desc_all_pkiller(u) without descendants(v) |
1841                                         */
1842                                         if (is_pkiller) {
1843                                                 bitset_copy(bs_tmp, bs_ukilldesc);
1844                                                 mu2 = bitset_popcnt(bitset_andnot(bs_tmp, bs_vdesc));
1845                                         }
1846                                         else {
1847                                                 mu2 = 0;
1848                                         }
1849
1850                                         /* omega1 = mu1 - mu2 */
1851                                         omega1 = mu1 - mu2;
1852
1853                                         if (omega1 != 0)
1854                                                 has_omega1 = 1;
1855
1856                                         /* omega2 = increase of critical path */
1857                                         critical_path_cost =
1858                                                 v_height                        /* longest path from v to sink */
1859                                                 + rss->max_height - vv_height   /* longest path from source to vv */
1860                                                 + 1;                            /* edge */
1861
1862                                         /*
1863                                                 If critical_path_cost > max_height -> the new edge
1864                                                 would increase the longest critical path by the difference.
1865                                         */
1866                                         omega2 = critical_path_cost > rss->max_height ? critical_path_cost - rss->max_height : 0;
1867
1868                                         /* this keeps track of the edge with the best benefit */
1869                                         if (omega1 >= num_regs - n && omega1 < best_benefit) {
1870                                                 min_benefit_edge.src = v_irn;
1871                                                 min_benefit_edge.tgt = vv_irn;
1872
1873                                                 ser_u_omega1 = u;
1874                                                 ser_v_omega1 = v;
1875
1876                                                 best_benefit    = omega1;
1877                                                 ser->new_killer = is_pkiller;
1878                                         }
1879
1880                                         /* this keeps track of the edge with the best omega1 costs where omega2 == 0 */
1881                                         if (omega2 == 0 && omega1 >= num_regs - n && omega1 < best_benefit_omega20) {
1882                                                 min_omega20_edge.src = v_irn;
1883                                                 min_omega20_edge.tgt = vv_irn;
1884
1885                                                 ser_u_omega20 = u;
1886                                                 ser_v_omega20 = v;
1887
1888                                                 best_benefit_omega20 = omega1;
1889                                                 ser->new_killer      = is_pkiller;
1890                                         }
1891
1892                                         best_omega2 = MIN(best_omega2, omega2);
1893
1894                                         DBG((rss->dbg, LEVEL_2, "\t\tfound %+F -> %+F (w1 %d, w2 %d)\n",
1895                                                 v_irn, vv_irn, omega1, omega2));
1896                                 } /* if add_edge */
1897                         } /* for all vv in pkiller(u) */
1898                 } /* for all v in sat_vals */
1899         } /* for all u in sat_vals */
1900
1901         if (! has_omega1)
1902                 return NULL;
1903
1904         if (best_omega2 == 0) {
1905                 ser->u         = ser_u_omega20;
1906                 ser->v         = ser_v_omega20;
1907                 ser->edge->src = min_omega20_edge.src;
1908                 ser->edge->tgt = min_omega20_edge.tgt;
1909                 ser->omega1    = best_benefit_omega20;
1910                 ser->omega2    = best_omega2;
1911         }
1912         else {
1913                 ser->u         = ser_u_omega1;
1914                 ser->v         = ser_v_omega1;
1915                 ser->edge->src = min_benefit_edge.src;
1916                 ser->edge->tgt = min_benefit_edge.tgt;
1917                 ser->omega1    = best_benefit;
1918                 ser->omega2    = best_omega2;
1919         }
1920
1921         return ser;
1922
1923 #undef IS_UNSERIALIZABLE_NODE
1924 }
1925
1926 /**
1927  * Perform the value serialization heuristic and add all
1928  * computed serialization edges as dependencies to the irg.
1929  */
1930 static void perform_value_serialization_heuristic(rss_t *rss) {
1931         bitset_t *arch_nonign_bs = bitset_alloca(arch_register_class_n_regs(rss->cls));
1932         bitset_t *abi_ign_bs     = bitset_alloca(arch_register_class_n_regs(rss->cls));
1933         int      available_regs, iteration;
1934         dvg_t    dvg;
1935         ir_nodeset_t *sat_vals;
1936         pset *ser_set = new_pset(cmp_rss_edges, 20);
1937
1938         /* available_regs = R = |arch_non_ignore_regs cut ~abi_ignore_regs| */
1939         arch_put_non_ignore_regs(rss->arch_env, rss->cls, arch_nonign_bs);
1940         be_abi_put_ignore_regs(rss->abi, rss->cls, abi_ign_bs);
1941         bitset_andnot(arch_nonign_bs, abi_ign_bs);
1942         available_regs  = bitset_popcnt(arch_nonign_bs);
1943         //num_live = pset_count(rss->live_block);
1944         //available_regs -= num_live < available_regs ? num_live : 0;
1945
1946         DBG((rss->dbg, LEVEL_1, "\n\t#available regs: %d\n\n", available_regs));
1947
1948         /*
1949                 At first we need to compute the disjoint value DAG (DVG = {V, E_dv}).
1950                 V    = set of all nodes we are currently interested in
1951                 E_dv = there is an edge from u to v iff v is a descendant of killer(u), forall u, v in V
1952         */
1953         ir_nodeset_init_size(&dvg.nodes, plist_count(rss->nodes));
1954         dvg.edges = new_pset(cmp_rss_edges, plist_count(rss->nodes) * 5);
1955         compute_dvg(rss, &dvg);
1956
1957         /*
1958                 Then we perform the heuristic serialization algorithm
1959                 on the DVG which gives us all necessary serialization
1960                 edges.
1961         */
1962         DBG((rss->dbg, LEVEL_1, "\tcomputing maximal antichain:\n"));
1963         iteration = 0;
1964         sat_vals  = compute_maximal_antichain(rss, &dvg, iteration++);
1965         while (sat_vals && (ir_nodeset_size(sat_vals) > available_regs)) {
1966                 serialization_t *ser, best_ser;
1967                 rss_edge_t      *edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*edge));
1968                 ir_node         *dep_src, *dep_tgt;
1969
1970                 best_ser.edge = edge;
1971                 ser = compute_best_admissible_serialization(rss, sat_vals, &best_ser, available_regs);
1972
1973                 DBG((rss->dbg, LEVEL_1, "\tcurrent register saturation %d, target %d\n", ir_nodeset_size(sat_vals), available_regs));
1974
1975                 if (! ser) {
1976                         DBG((rss->dbg, LEVEL_1, "\tno RS improving serialization found, breaking at iteration %d\n", iteration));
1977                         break;
1978                 }
1979
1980                 /* Insert the serialization as dependency edge into the irg. */
1981                 DBG((rss->dbg, LEVEL_1, "\tserializing %+F -> %+F with edge %+F -> %+F and cost %d, %d\n",
1982                         ser->u->irn, ser->v->irn, ser->edge->src, ser->edge->tgt, ser->omega1, ser->omega2));
1983
1984                 if (pset_find(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge)))
1985                         ir_printf("WARNING: serialization %+F -> %+F computed twice!\n", ser->edge->src, ser->edge->tgt);
1986
1987
1988                 pset_insert(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge));
1989
1990                 /* update the dvg */
1991                 update_dvg(rss, &dvg, ser->v, ser->u);
1992                 update_dvg(rss, &dvg, get_rss_irn(rss, ser->edge->src), get_rss_irn(rss, ser->edge->tgt));
1993                 if(sat_vals != NULL) {
1994                         ir_nodeset_destroy(sat_vals);
1995                         free(sat_vals);
1996                 }
1997
1998                 dep_src = skip_Proj(ser->edge->src);
1999                 dep_tgt = ser->edge->tgt;
2000                 add_irn_dep(dep_src, dep_tgt);
2001
2002                 /* Update descendants, consumer and pkillers of the target */
2003                 update_node_info(rss, ser->edge->tgt, ser->edge->src);
2004
2005                 /* TODO: try to find a cheaper way for updating height information */
2006                 rss->max_height = heights_recompute_block(rss->h, rss->block);
2007
2008                 /* Recompute the antichain for next serialization */
2009                 DBG((rss->dbg, LEVEL_1, "\tre-computing maximal antichain:\n"));
2010                 sat_vals = compute_maximal_antichain(rss, &dvg, iteration++);
2011         }
2012
2013         ir_nodeset_destroy(&dvg.nodes);
2014         del_pset(dvg.edges);
2015 }
2016
2017 /**
2018  * Do initial calculations for a block.
2019  */
2020 static void process_block(ir_node *block, void *env) {
2021         rss_t *rss = env;
2022         int   i, n;
2023         const ir_edge_t *edge;
2024
2025         phase_init(&rss->ph, "rss block preprocessor", rss->irg, PHASE_DEFAULT_GROWTH, init_rss_irn);
2026
2027         DBG((rss->dbg, LEVEL_1, "preprocessing block %+F\n", block));
2028         rss->block = block;
2029
2030         /* build an index map for all nodes in the current block */
2031         i            = 0;
2032         n            = get_irn_n_edges(block);
2033         NEW_ARR_A(int *, rss->idx_map, n);
2034         foreach_out_edge(block, edge) {
2035                 ir_node *irn      = get_edge_src_irn(edge);
2036                 rss->idx_map[i++] = get_irn_idx(irn);
2037         }
2038         qsort(rss->idx_map, n, sizeof(rss->idx_map[0]), cmp_int);
2039         rss->max_height = heights_recompute_block(rss->h, block);
2040
2041         /* loop over all register classes */
2042         for (i = arch_isa_get_n_reg_class(rss->arch_env->isa) - 1; i >= 0; --i) {
2043                 const arch_register_class_t *cls = arch_isa_get_reg_class(rss->arch_env->isa, i);
2044
2045                 rss->cls = cls;
2046                 DBG((rss->dbg, LEVEL_1, "register class %s\n", arch_register_class_name(cls)));
2047
2048                 /* Get all live value at end of Block having current register class */
2049                 rss->live_block = pset_new_ptr(10);
2050                 be_liveness_end_of_block(rss->liveness, rss->arch_env, rss->cls, rss->block, rss->live_block);
2051
2052                 /* reset the list of interesting nodes */
2053                 plist_clear(rss->nodes);
2054                 plist_insert_back(rss->nodes, _sink);
2055
2056                 /* collect all nodes having a certain register class */
2057                 foreach_out_edge(block, edge) {
2058                         ir_node *irn  = get_edge_src_irn(edge);
2059                         ir_mode *mode = get_irn_mode(irn);
2060
2061                         /*
2062                                 We skip:
2063                                 - mode_T nodes (the projs are asked)
2064                                 - mode_X nodes (control flow nodes are always scheduled last)
2065                                 - Keeps (they are always immediately scheduled)
2066                                 - Phi (same as Keep)
2067                         */
2068                         if (mode == mode_T || mode == mode_X || is_Phi(irn))
2069                                 continue;
2070
2071                         /*
2072                                 In case of a proj, we skip
2073                                 - Barrier (they are a Barrier :)
2074                                 - Start
2075                                 - the Proj itself, as it's scheduled always with it's super node
2076                         */
2077                         if (is_Proj(irn)) {
2078                                 ir_node *pred = get_Proj_pred(irn);
2079                                 if (be_is_Barrier(pred) || is_Start(pred))
2080                                         continue;
2081                         }
2082
2083                         /* calculate the descendants and consumer for each node in the block */
2084                         collect_node_info(rss, skip_Proj(irn));
2085
2086                         if (be_is_Keep(irn))
2087                                 continue;
2088
2089                         if (! arch_irn_is(rss->arch_env, irn, ignore) && arch_get_irn_reg_class(rss->arch_env, irn, -1) == cls) {
2090                                 plist_insert_back(rss->nodes, skip_Proj(irn));
2091                         }
2092                         //}
2093                 }
2094
2095                 /* compute the potential killing set PK(G) */
2096                 compute_pkill_set(rss);
2097
2098                 /* compute the killing function k* */
2099                 compute_killing_function(rss);
2100
2101                 /*
2102                         Compute the heuristic value serialization and
2103                         add the necessary dependencies to the irg.
2104                 */
2105                 perform_value_serialization_heuristic(rss);
2106
2107                 del_pset(rss->live_block);
2108         }
2109
2110         phase_free(&rss->ph);
2111 }
2112
2113 /**
2114  * Register the options.
2115  */
2116 void be_init_schedrss(void) {
2117         lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
2118         lc_opt_entry_t *sched_grp = lc_opt_get_grp(be_grp, "sched");
2119         lc_opt_entry_t *rss_grp = lc_opt_get_grp(sched_grp, "rss");
2120
2121         lc_opt_add_table(rss_grp, rss_option_table);
2122 }
2123
2124 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_schedrss);
2125
2126 /**
2127  * Preprocess the irg for scheduling.
2128  */
2129 void rss_schedule_preparation(const be_irg_t *birg) {
2130         ir_graph *irg = be_get_birg_irg(birg);
2131         rss_t rss;
2132
2133         FIRM_DBG_REGISTER(rss.dbg, "firm.be.sched.rss");
2134
2135         //firm_dbg_set_mask(rss.dbg, LEVEL_1 | LEVEL_2 | LEVEL_3);
2136
2137         init_rss_special_nodes(irg);
2138
2139         rss.irg      = irg;
2140         rss.arch_env = be_get_birg_arch_env(birg);
2141         rss.abi      = birg->abi;
2142         rss.h        = heights_new(irg);
2143         rss.nodes    = plist_new();
2144         rss.opts     = &rss_options;
2145         rss.liveness = be_liveness(irg);
2146         irg_block_walk_graph(irg, NULL, process_block, &rss);
2147         heights_free(rss.h);
2148         plist_free(rss.nodes);
2149         be_liveness_free(rss.liveness);
2150
2151         if (birg->main_env->options->dump_flags & DUMP_SCHED)
2152                 be_dump(rss.irg, "-rss", dump_ir_block_graph);
2153 }