633e89776e2a1b72f5967003efd4cbcd03a52965
[libfirm] / ir / be / beschedrss.c
1 /*
2  * Copyright (C) 1995-2007 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       Implementation of a register saturating list scheduler.
23  * @author      Christian Wuerdig
24  * @date        29.08.2006
25  * @version     $Id$
26  *
27  * Implementation of a register saturating list scheduler
28  * as described in: Sid-Ahmed-Ali Touati
29  * Register Saturation in Superscalar and VLIW Codes
30  */
31 #ifdef HAVE_CONFIG_H
32 #include "config.h"
33 #endif
34
35 #include <limits.h>
36
37 #include "obst.h"
38 #include "debug.h"
39
40 #include "irgraph_t.h"
41 #include "irnode_t.h"
42 #include "iredges_t.h"
43 #include "ircons_t.h"
44 #include "irphase_t.h"
45 #include "irgwalk.h"
46 #include "irdump.h"
47 #include "irtools.h"
48 #include "irbitset.h"
49 #include "irprintf.h"
50 #include "bipartite.h"
51 #include "hungarian.h"
52 #include "plist.h"
53
54 #include "height.h"
55
56 #include "beabi.h"
57 #include "bemodule.h"
58 #include "benode_t.h"
59 #include "besched_t.h"
60 #include "beirg_t.h"
61
62 #include <libcore/lc_opts.h>
63 #include <libcore/lc_opts_enum.h>
64
65
66 #define ARR_LEN_SAFE(arr) ((arr) != NULL ? ARR_LEN((arr)) : 0)
67
68 #define HASH_RSS_EDGE(edge) ((get_irn_node_nr((edge)->src) << 16) | (get_irn_node_nr((edge)->tgt) & 0xFFFF))
69 #define BSEARCH_IRN_ARR(val, arr) \
70         bsearch(&(val), (arr), ARR_LEN_SAFE((arr)), sizeof((arr)[0]), cmp_irn_idx)
71
72 #define BLOCK_IDX_MAP(rss, irn) bsearch_for_index(get_irn_idx((irn)), (rss)->idx_map, ARR_LEN_SAFE((rss)->idx_map), 1)
73
74 /* the rss options */
75 typedef struct _rss_opts_t {
76         int dump_flags;
77 } rss_opts_t;
78
79 /* Represents a child with associated costs */
80 typedef struct _child {
81         ir_node *irn;
82         float   cost;
83 } child_t;
84
85 /* We need edges for several purposes. */
86 typedef struct _rss_edge {
87         ir_node *src;
88         ir_node *tgt;
89         void    *next;
90 } rss_edge_t;
91
92 /* Represents a connected bipartite component. */
93 typedef struct _cbc {
94         ir_nodeset_t parents;       /**< = S  a set of value producers */
95         ir_nodeset_t children;      /**< = T  a set of value consumers */
96         pset    *kill_edges;    /**< = E  a set of edges (t in T, s in S) such as each s in S gets killed by at least one t in T */
97         int     nr;             /**< a deterministic index for set insertion (used as hash) */
98 } cbc_t;
99
100 /* Represents a disjoint value DAG. */
101 typedef struct _dvg {
102         ir_nodeset_t nodes;
103         pset    *edges;
104 } dvg_t;
105
106 /* Represents a chain of nodes. */
107 typedef struct _chain {
108         plist_t *elements;   /**< List of chain elements */
109         int     nr;          /**< a deterministic index for set insertion (used as hash) */
110 } chain_t;
111
112 typedef struct _rss_irn {
113         plist_t  *consumer_list;    /**< List of consumers */
114         ir_node **consumer;         /**< Sorted consumer array (needed for faster access) */
115
116         plist_t  *parent_list;      /**< List of parents */
117         plist_t  *pkiller_list;     /**< List of potential killers */
118
119         plist_t  *descendant_list;  /**< List of descendants */
120         ir_node **descendants;      /**< Sorted descendant array (needed for faster access) */
121
122         ir_node  *killer;           /**< The selected unique killer */
123         ir_node  *irn;              /**< The corresponding firm node to this rss_irn */
124
125         chain_t  *chain;            /**< The chain, this node is associated to */
126
127         unsigned desc_walk;         /**< visited flag for collecting descendants */
128         unsigned kill_count;        /**< number of nodes killed by this one */
129
130         unsigned live_out : 1;      /**< irn has consumers outside of it's block */
131         unsigned visited  : 1;      /**< visited flag for bipartite decomposition */
132         unsigned havecons : 1;      /**< visited flag collect consumer */
133         unsigned handled  : 1;      /**< flag indicating whether or not the list structures have been build */
134         unsigned dumped   : 1;      /**< flag indication whether or not this node was dumped */
135 } rss_irn_t;
136
137 /* Represents a serialization edge with associated costs. */
138 typedef struct _serialization {
139         rss_irn_t  *u;       /* the top node */
140         rss_irn_t  *v;       /* the node about to be serialized after u */
141         rss_edge_t *edge;    /* the edge selected for the serialization */
142         int        omega1;   /* estimated: available regs - RS reduction */
143         int        omega2;   /* increase in critical path length */
144         int        new_killer;
145 } serialization_t;
146
147 typedef struct _rss {
148         ir_phase          ph;              /**< Phase to hold some data */
149         heights_t        *h;              /**< The current height object */
150         ir_graph         *irg;            /**< The irg to preprocess */
151         plist_t          *nodes;          /**< The list of interesting nodes */
152         const arch_env_t *arch_env;       /**< The architecture environment */
153         be_abi_irg_t     *abi;            /**< The abi for this irg */
154         pset             *cbc_set;        /**< A set of connected bipartite components */
155         ir_node          *block;          /**< The current block in progress. */
156         int              *idx_map;        /**< Mapping irn indices to per block indices */
157         unsigned         max_height;      /**< maximum height in the current block */
158         rss_opts_t       *opts;           /**< The options */
159         be_lv_t          *liveness;       /**< The liveness information for this irg */
160         ir_nodeset_t      live_block;     /**< Values alive at end of block */
161         const arch_register_class_t *cls; /**< The current register class */
162         DEBUG_ONLY(firm_dbg_module_t *dbg);
163 } rss_t;
164
165 #define get_rss_irn(rss, irn)  (phase_get_or_set_irn_data(&rss->ph, irn))
166
167 /**
168  * We need some special nodes:
169  * a source and a sink for all live-in and live-out values of a block
170  */
171 enum {
172         iro_rss_Source,
173         iro_rss_Sink,
174         iro_rss_last
175 };
176
177 /** The opcode of the rss_Source node once allocated. */
178 static ir_op *op_rss_Source;
179 /** The opcode of the rss_Sink node once allocated. */
180 static ir_op *op_rss_Sink;
181
182 /** The rss_Source node of the current graph. */
183 static ir_node *_source = NULL;
184 /** The rss_Sink node of the current graph. */
185 static ir_node *_sink   = NULL;
186
187 #define is_Source(irn) ((irn) == _source)
188 #define is_Sink(irn)   ((irn) == _sink)
189
190 enum {
191         RSS_DUMP_NONE  = 0,
192         RSS_DUMP_CBC   = 1 << 0,
193         RSS_DUMP_PKG   = 1 << 1,
194         RSS_DUMP_KILL  = 1 << 2,
195         RSS_DUMP_DVG   = 1 << 3,
196         RSS_DUMP_MAXAC = 1 << 4,
197         RSS_DUMP_ALL   = (RSS_DUMP_MAXAC << 1) - 1,
198 };
199
200 static rss_opts_t rss_options = {
201         RSS_DUMP_NONE,
202 };
203
204 static const lc_opt_enum_int_items_t dump_items[] = {
205         { "none",  RSS_DUMP_NONE  },
206         { "cbc",   RSS_DUMP_CBC   },
207         { "pkg",   RSS_DUMP_PKG   },
208         { "kill",  RSS_DUMP_KILL  },
209         { "dvg",   RSS_DUMP_DVG   },
210         { "maxac", RSS_DUMP_MAXAC },
211         { "all",   RSS_DUMP_ALL   },
212         { NULL, 0 }
213 };
214
215 static lc_opt_enum_int_var_t dump_var = {
216         &rss_options.dump_flags, dump_items
217 };
218
219 static const lc_opt_table_entry_t rss_option_table[] = {
220         LC_OPT_ENT_ENUM_MASK("dump", "dump phases", &dump_var),
221         LC_OPT_LAST
222 };
223
224 /******************************************************************************
225  *  _          _                    __                  _   _
226  * | |        | |                  / _|                | | (_)
227  * | |__   ___| |_ __   ___ _ __  | |_ _   _ _ __   ___| |_ _  ___  _ __  ___
228  * | '_ \ / _ \ | '_ \ / _ \ '__| |  _| | | | '_ \ / __| __| |/ _ \| '_ \/ __|
229  * | | | |  __/ | |_) |  __/ |    | | | |_| | | | | (__| |_| | (_) | | | \__ \
230  * |_| |_|\___|_| .__/ \___|_|    |_|  \__,_|_| |_|\___|\__|_|\___/|_| |_|___/
231  *            | |
232  *            |_|
233  ******************************************************************************/
234
235 /**
236  * Acquire opcodes if needed and create source and sink nodes.
237  */
238 static void init_rss_special_nodes(ir_graph *irg) {
239         ir_node *block;
240
241         if (op_rss_Source == NULL) {
242                 int iro_rss_base = get_next_ir_opcodes(iro_rss_last);
243                 op_rss_Source = new_ir_op(iro_rss_base + iro_rss_Source, "rss_Source", op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
244                 op_rss_Sink   = new_ir_op(iro_rss_base + iro_rss_Sink,   "rss_Sink",   op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
245         }
246         block   = get_irg_start_block(irg);
247         _source = new_ir_node(NULL, irg, block, op_rss_Source, mode_ANY, 0, NULL);
248         _sink   = new_ir_node(NULL, irg, block, op_rss_Sink, mode_ANY, 0, NULL);
249 }
250
251 static int cmp_int(const void *a, const void *b) {
252         const int *i1 = a;
253         const int *i2 = b;
254
255         return QSORT_CMP(*i1, *i2);
256 }
257
258 static int cmp_child_costs(const void *a, const void *b) {
259         const child_t *c1 = a;
260         const child_t *c2 = b;
261
262         return QSORT_CMP(c1->cost, c2->cost);
263 }
264
265 static int cmp_irn_idx(const void *a, const void *b) {
266         const ir_node *n1 = *(ir_node **)a;
267         const ir_node *n2 = *(ir_node **)b;
268
269         return QSORT_CMP(get_irn_idx(n1), get_irn_idx(n2));
270 }
271
272 static int cmp_rss_edges(const void *a, const void *b) {
273         const rss_edge_t *e1 = a;
274         const rss_edge_t *e2 = b;
275
276         return (e1->src != e2->src) || (e1->tgt != e2->tgt);
277 }
278
279 static int bsearch_for_index(int key, int *arr, size_t len, int force) {
280         int left = 0;
281         int right = len;
282
283         while (right >= left) {
284                 int idx = (left + right) / 2;
285
286                 if (key < arr[idx])
287                         right = idx - 1;
288                 else if (key > arr[idx])
289                         left = idx + 1;
290                 else
291                         return idx;
292         }
293
294         if (force)
295                 assert(0 && "Something is wrong, key not found.");
296         return -1;
297 }
298
299 static ir_node **build_sorted_array_from_list(plist_t *irn_list, struct obstack *obst) {
300         plist_element_t *el;
301         int     i     = 0;
302         int     len   = plist_count(irn_list);
303         ir_node **arr = NEW_ARR_D(ir_node *, obst, len);
304
305         /* copy the list into the array */
306         foreach_plist(irn_list, el) {
307                 arr[i++] = plist_element_get_value(el);
308         }
309
310         /* sort the array by node index */
311         qsort(arr, len, sizeof(arr[0]), cmp_irn_idx);
312
313         return arr;
314 }
315
316 /*****************************************************
317  *      _      _                       _
318  *     | |    | |                     (_)
319  *   __| | ___| |__  _   _  __ _  __ _ _ _ __   __ _
320  *  / _` |/ _ \ '_ \| | | |/ _` |/ _` | | '_ \ / _` |
321  * | (_| |  __/ |_) | |_| | (_| | (_| | | | | | (_| |
322  *  \__,_|\___|_.__/ \__,_|\__, |\__, |_|_| |_|\__, |
323  *                          __/ | __/ |         __/ |
324  *                         |___/ |___/         |___/
325  *
326  *****************************************************/
327
328 #ifdef DEBUG_libfirm
329 static void dump_nodeset(ir_nodeset_t *ns, const char *prefix) {
330         ir_nodeset_iterator_t iter;
331         ir_node *irn;
332
333         ir_nodeset_iterator_init(&iter, ns);
334         while( (irn = ir_nodeset_iterator_next(&iter)) != NULL ) {
335                 ir_fprintf(stderr, "%s%+F\n", prefix, irn);
336         }
337 }
338 #endif
339
340 static void build_file_name(rss_t *rss, const char *suffix, size_t suf_len, char *buf, size_t len) {
341         const char *irg_name;
342
343         memset(buf, 0, len);
344         irg_name = get_entity_name(get_irg_entity(rss->irg));
345         snprintf(buf, len - suf_len, "%s-%s-block-%ld",
346                 irg_name, arch_register_class_name(rss->cls), get_irn_node_nr(rss->block));
347         strcat(buf, suffix);
348 }
349
350 /* Dumps all collected bipartite components of current irg as vcg. */
351 static void debug_vcg_dump_bipartite(rss_t *rss) {
352         cbc_t *cbc;
353         FILE  *f;
354         char  file_name[256];
355         static const char suffix[] = "-RSS-CBC.vcg";
356
357         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
358         f = fopen(file_name, "w");
359
360         ir_fprintf(f, "graph: { title: \"connected bipartite component graph of %+F\"\n", rss->irg);
361         fprintf(f, "display_edge_labels: no\n");
362         fprintf(f, "layoutalgorithm: mindepth\n");
363         fprintf(f, "manhattan_edges: yes\n\n");
364
365         foreach_pset(rss->cbc_set, cbc) {
366                 ir_nodeset_iterator_t iter;
367                 ir_node    *n;
368                 rss_edge_t *ke;
369
370                 fprintf(f, "graph: { titel: \"cbc %d\" label: \"cbc %d\" status:clustered color:yellow\n", cbc->nr, cbc->nr);
371                 foreach_ir_nodeset(&cbc->parents, n, iter) {
372                         ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
373                 }
374                 foreach_ir_nodeset(&cbc->children, n, iter) {
375                         ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
376                 }
377                 foreach_pset(cbc->kill_edges, ke) {
378                         ir_fprintf(f, "edge: { sourcename: \"n%d_%d\" targetname: \"n%d_%d\" }\n",
379                                 get_irn_node_nr(ke->src), cbc->nr, get_irn_node_nr(ke->tgt), cbc->nr);
380                 }
381                 fprintf(f, "}\n\n");
382         }
383         fprintf(f, "}\n");
384         fclose(f);
385 }
386
387 /* Dump the computed killing function as vcg. */
388 static void debug_vcg_dump_kill(rss_t *rss) {
389         FILE            *f;
390         char            file_name[256];
391         plist_element_t *el;
392         static const char suffix[] = "-RSS-KILL.vcg";
393
394         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
395         f = fopen(file_name, "w");
396
397         ir_fprintf(f, "graph: { title: \"computed kill graph of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
398         fprintf(f, "display_edge_labels: no\n");
399         fprintf(f, "layoutalgorithm: mindepth\n");
400         fprintf(f, "manhattan_edges: yes\n\n");
401
402         {
403                 /* reset dump_flag */
404                 ir_node *irn;
405                 foreach_phase_irn(&rss->ph, irn) {
406                         rss_irn_t *node = get_rss_irn(rss, irn);
407                         node->dumped = 0;
408                 }
409         }
410
411         /* dump all nodes and their killers */
412         foreach_plist(rss->nodes, el) {
413                 ir_node   *irn     = plist_element_get_value(el);
414                 rss_irn_t *rirn    = get_rss_irn(rss, irn);
415                 rss_irn_t *pk_rirn = get_rss_irn(rss, rirn->killer);
416
417                 if (! rirn->dumped) {
418                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
419                         rirn->dumped = 1;
420                 }
421
422                 if (! pk_rirn->dumped) {
423                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(rirn->killer), rirn->killer);
424                         pk_rirn->dumped = 1;
425                 }
426
427                 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
428                         get_irn_node_nr(rirn->killer), get_irn_node_nr(irn));
429         }
430
431         fprintf(f, "}\n");
432         fclose(f);
433 }
434
435 /* Dumps the potential killing DAG (PKG) as vcg. */
436 static void debug_vcg_dump_pkg(rss_t *rss, ir_nodeset_t *max_ac, int iteration) {
437         FILE              *f;
438         char              file_name[256];
439         char              *suffix   = alloca(32);
440         static const char suffix1[] = "-RSS-PKG.vcg";
441         static const char suffix2[] = "-RSS-PKG-MAXAC.vcg";
442         plist_element_t   *el;
443
444         if (! max_ac) {
445                 snprintf(suffix, 32, "%s", suffix1);
446         }
447         else {
448                 snprintf(suffix, 32, "-%02d%s", iteration, suffix2);
449         }
450
451         build_file_name(rss, suffix, strlen(suffix) + 1, file_name, sizeof(file_name));
452         f = fopen(file_name, "w");
453
454         ir_fprintf(f, "graph: { title: \"potential killing DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
455         fprintf(f, "display_edge_labels: no\n");
456         fprintf(f, "layoutalgorithm: mindepth\n");
457         fprintf(f, "manhattan_edges: yes\n\n");
458
459         {
460                 /* reset dump_flag */
461                 ir_node *irn;
462                 foreach_phase_irn(&rss->ph, irn) {
463                         rss_irn_t *node = get_rss_irn(rss, irn);
464                         node->dumped = 0;
465                 }
466         }
467
468         foreach_plist(rss->nodes, el) {
469                 ir_node   *irn  = plist_element_get_value(el);
470                 rss_irn_t *rirn = get_rss_irn(rss, irn);
471                 char      *c1   = "";
472                 plist_element_t *k_el;
473
474                 /* dump selected saturating values in yellow */
475                 if (max_ac && ir_nodeset_contains(max_ac, irn))
476                         c1 = "color:yellow";
477
478                 if (! rirn->dumped) {
479                         if (rirn->chain)
480                                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F   c%d\" %s }\n", get_irn_node_nr(irn), irn, rirn->chain->nr, c1);
481                         else
482                                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(irn), irn, c1);
483                         rirn->dumped = 1;
484                 }
485
486                 foreach_plist(rirn->pkiller_list, k_el) {
487                         ir_node   *pkiller = plist_element_get_value(k_el);
488                         rss_irn_t *pk_rirn = get_rss_irn(rss, pkiller);
489                         char      *c2      = "";
490
491                         if (max_ac && ir_nodeset_contains(max_ac, pkiller))
492                                 c2 = "color:yellow";
493
494                         if (! pk_rirn->dumped) {
495                                 if (pk_rirn->chain)
496                                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F   c%d\" %s }\n", get_irn_node_nr(pkiller), pkiller, pk_rirn->chain->nr, c2);
497                                 else
498                                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(pkiller), pkiller, c2);
499                                 pk_rirn->dumped = 1;
500                         }
501                         ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
502                                 get_irn_node_nr(pkiller), get_irn_node_nr(irn));
503                 }
504         }
505         fprintf(f, "}\n");
506         fclose(f);
507 }
508
509 /* Dumps the disjoint value DAG (DVG) as vcg. */
510 static void debug_vcg_dump_dvg(rss_t *rss, dvg_t *dvg) {
511         static const char suffix[] = "-RSS-DVG.vcg";
512         FILE       *f;
513         char       file_name[256];
514         ir_nodeset_iterator_t iter;
515         ir_node    *irn;
516         rss_edge_t *edge;
517
518         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
519         f = fopen(file_name, "w");
520
521         ir_fprintf(f, "graph: { title: \"disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
522         fprintf(f, "display_edge_labels: no\n");
523         fprintf(f, "layoutalgorithm: mindepth\n");
524         fprintf(f, "manhattan_edges: yes\n\n");
525
526         /* dump all nodes */
527         foreach_ir_nodeset(&dvg->nodes, irn, iter) {
528                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
529         }
530
531         /* dump all edges */
532         foreach_pset(dvg->edges, edge) {
533                 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
534                         get_irn_node_nr(edge->src), get_irn_node_nr(edge->tgt));
535         }
536
537         fprintf(f, "}\n");
538         fclose(f);
539 }
540
541 #if 0
542 /* Dumps the PKG(DVG). */
543 static void debug_vcg_dump_dvg_pkiller(rss_t *rss, dvg_t *dvg) {
544         static const char suffix[] = "-RSS-DVG-PKG.vcg";
545         FILE       *f;
546         char       file_name[256];
547         ir_nodeset_iterator_t iter;
548         ir_node    *irn;
549
550         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
551         f = fopen(file_name, "w");
552
553         ir_fprintf(f, "graph: { title: \"PKG of disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
554         fprintf(f, "display_edge_labels: no\n");
555         fprintf(f, "layoutalgorithm: mindepth\n");
556         fprintf(f, "manhattan_edges: yes\n\n");
557
558         /* dump all nodes */
559         foreach_ir_nodeset(&dvg->nodes, irn, iter) {
560                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
561         }
562
563         /* dump all edges */
564         foreach_ir_nodeset(&dvg->nodes, irn, iter) {
565                 rss_irn_t       *node = get_rss_irn(rss, irn);
566                 plist_element_t *el;
567
568                 foreach_plist(node->dvg_pkiller_list, el) {
569                         ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
570                                 get_irn_node_nr(plist_element_get_value(el)), get_irn_node_nr(irn));
571                 }
572         }
573
574         fprintf(f, "}\n");
575         fclose(f);
576 }
577 #endif /* if 0 */
578
579 /**
580  * In case there is no rss information for irn, initialize it.
581  */
582 static void *init_rss_irn(ir_phase *ph, ir_node *irn, void *old) {
583         rss_irn_t *res = old ? old : phase_alloc(ph, sizeof(res[0]));
584
585         res->descendant_list = plist_obstack_new(phase_obst(ph));
586         res->descendants     = NULL;
587
588         res->consumer_list   = plist_obstack_new(phase_obst(ph));
589         res->consumer        = NULL;
590
591         res->pkiller_list    = plist_obstack_new(phase_obst(ph));
592
593         res->parent_list     = plist_obstack_new(phase_obst(ph));
594
595         res->killer          = NULL;
596         res->irn             = irn;
597         res->chain           = NULL;
598
599         res->kill_count      = 0;
600         res->desc_walk       = 0;
601         res->live_out        = 0;
602         res->visited         = 0;
603         res->handled         = 0;
604         res->dumped          = 0;
605         res->havecons        = 0;
606
607         return res;
608 }
609
610 /**
611  * Collect all nodes data dependent on current node.
612  */
613 static void collect_descendants(rss_t *rss, rss_irn_t *rirn, ir_node *irn, int *got_sink, unsigned cur_desc_walk) {
614         const ir_edge_t *edge;
615         rss_irn_t       *cur_node = get_rss_irn(rss, irn);
616         ir_node         *block    = rss->block;
617         ir_edge_kind_t  ekind[2]  = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
618         int             i;
619
620         if (cur_node->desc_walk >= cur_desc_walk)
621                 return;
622         cur_node->desc_walk = cur_desc_walk;
623
624         /* Stop at Barriers */
625         if (be_is_Barrier(irn))
626                 return;
627
628         /* loop over normal and dependency edges */
629         for (i = 0; i < 2; ++i) {
630                 foreach_out_edge_kind(irn, edge, ekind[i]) {
631                         ir_node *user = get_edge_src_irn(edge);
632
633                         /* skip ignore nodes as they do not really contribute to register pressure */
634                         if (arch_irn_is(rss->arch_env, user, ignore))
635                                 continue;
636
637                         /*
638                                 (a) mode_X means Jump            -> out_edge
639                                 (b) Phi as user of a normal node -> out-edge
640                         */
641                         if (get_irn_mode(user) == mode_X || is_Phi(user)) {
642                                 if (! *got_sink)
643                                         goto force_sink;
644                                 else
645                                         continue;
646                         }
647
648                         if (is_Proj(user)) {
649
650                                 //if (arch_get_irn_reg_class(rss->arch_env, user, -1) == rss->cls)
651                                         collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
652                         }
653                         else {
654                                 /* check if user lives in block and is not a control flow node */
655                                 if (get_nodes_block(user) == block) {
656                                         if (! plist_has_value(rirn->descendant_list, user)) {
657                                                 plist_insert_back(rirn->descendant_list, user);
658                                                 DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", user));
659                                         }
660                                         collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
661                                 }
662                                 else if (! *got_sink) {
663 force_sink:
664                                         /* user lives out of block: add sink as descendant if not already done */
665                                         plist_insert_back(rirn->descendant_list, _sink);
666                                         *got_sink = 1;
667                                         DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", _sink));
668                                 }
669                         }
670                 }
671         }
672 }
673
674 /**
675  * Handles a single consumer.
676  */
677 static void collect_single_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *consumer, int *got_sink) {
678         ir_node *block = rss->block;
679
680         assert(! is_Proj(consumer) && "Cannot handle Projs");
681
682         if (! is_Phi(consumer) && ! is_Block(consumer) && get_nodes_block(consumer) == block) {
683                 if (! arch_irn_is(rss->arch_env, consumer, ignore) && ! plist_has_value(rss_irn->consumer_list, consumer)) {
684                         plist_insert_back(rss_irn->consumer_list, consumer);
685                         DBG((rss->dbg, LEVEL_2, "\t\tconsumer %+F\n", consumer));
686                 }
687         }
688         else {
689                 rss_irn->live_out = 1;
690                 DBG((rss->dbg, LEVEL_2, "\t\tlive out %+F", consumer));
691                 if (! *got_sink) {
692                         plist_insert_back(rss_irn->consumer_list, _sink);
693                         *got_sink = 1;
694                         DB((rss->dbg, LEVEL_2, ", %+F added instead", _sink));
695                 }
696                 DB((rss->dbg, LEVEL_2, "\n"));
697         }
698 }
699
700 /**
701  * Collect all nodes consuming the value(s) produced by current node.
702  */
703 static void collect_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *irn, int *got_sink) {
704         const ir_edge_t *edge;
705         int             i;
706         ir_edge_kind_t  ekind[2]  = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
707         rss_irn_t       *cur_node = get_rss_irn(rss, irn);
708
709         if (cur_node->havecons)
710                 return;
711         cur_node->havecons = 1;
712
713         for (i = 0; i < 2; ++i) {
714                 foreach_out_edge_kind(irn, edge, ekind[i]) {
715                         ir_node *consumer = get_edge_src_irn(edge);
716
717                         if (is_Proj(consumer)) {
718                                 //if (arch_get_irn_reg_class(rss->arch_env, consumer, -1) == rss->cls)
719                                         collect_consumer(rss, rss_irn, consumer, got_sink);
720                         }
721                         else
722                                 collect_single_consumer(rss, rss_irn, consumer, got_sink);
723                 }
724         }
725 }
726
727 /**
728  * Collects all consumer and descendant of a irn.
729  */
730 static void collect_node_info(rss_t *rss, ir_node *irn) {
731         static unsigned cur_desc_walk = 0;
732         rss_irn_t       *rss_irn      = get_rss_irn(rss, irn);
733         int             got_sink;
734
735         assert(! is_Proj(irn) && "Cannot handle Projs.");
736
737         /* check if node info is already available */
738         if (rss_irn->handled)
739                 return;
740                 //phase_reinit_single_irn_data(&rss->ph, irn);
741
742         DBG((rss->dbg, LEVEL_1, "\tcomputing consumers of %+F:\n", irn));
743
744         /* collect all consumer */
745         got_sink = 0;
746         collect_consumer(rss, rss_irn, irn, &got_sink);
747
748         /* build sorted consumer array */
749         rss_irn->consumer = build_sorted_array_from_list(rss_irn->consumer_list, phase_obst(&rss->ph));
750
751         DBG((rss->dbg, LEVEL_1, "\tcompute descendants of %+F:\n", irn));
752
753         /* collect descendants */
754         got_sink = 0;
755         collect_descendants(rss, rss_irn, irn, &got_sink, ++cur_desc_walk);
756
757         /* build sorted descendant array */
758         rss_irn->descendants = build_sorted_array_from_list(rss_irn->descendant_list, phase_obst(&rss->ph));
759
760         rss_irn->handled = 1;
761 }
762
763 /**
764  * Checks if v is a potential killer of u.
765  * v is in pkill(u) iff descendants(v) cut consumer(u) is v
766  *
767  * @param rss   The rss object
768  * @param v      The node to check for killer
769  * @param u      The potentially killed value
770  * @return 1 if v is in pkill(u), 0 otherwise
771  */
772 static int is_potential_killer(rss_t *rss, rss_irn_t *v, rss_irn_t *u) {
773         plist_t *list;
774         ir_node **arr;
775         plist_element_t *el;
776         (void) rss;
777
778         assert(is_Sink(v->irn) || ((plist_count(v->descendant_list) > 0 && v->descendants) || 1));
779         assert(is_Sink(u->irn) || ((plist_count(u->consumer_list)   > 0 && u->consumer)    || 1));
780
781         /* as we loop over the list: loop over the shorter one */
782         if (plist_count(v->descendant_list) > plist_count(u->consumer_list)) {
783                 list = u->consumer_list;
784                 arr  = v->descendants;
785         }
786         else {
787                 list = v->descendant_list;
788                 arr  = u->consumer;
789         }
790
791         /* for each list element: try to find element in array */
792         foreach_plist(list, el) {
793                 ir_node *irn   = plist_element_get_value(el);
794                 ir_node *match = BSEARCH_IRN_ARR(irn, arr);
795
796                 if (match && match != irn)
797                         return 0;
798         }
799
800         return 1;
801 }
802
803 /**
804  * Update descendants, consumer and pkiller list for the given irn.
805  */
806 static void update_node_info(rss_t *rss, ir_node *irn, ir_node *pk_irn) {
807         rss_irn_t *node    = get_rss_irn(rss, irn);
808         rss_irn_t *pkiller = get_rss_irn(rss, pk_irn);
809
810         /* add consumer and rebuild the consumer array */
811         if (! plist_has_value(node->consumer_list, pk_irn)) {
812                 plist_insert_back(node->consumer_list, pk_irn);
813                 node->consumer = build_sorted_array_from_list(node->consumer_list, phase_obst(&rss->ph));
814         }
815
816         /* add pk_irn as descendant, add it's descendants to irn's and rebuild array */
817         if (! plist_has_value(node->descendant_list, pk_irn)) {
818                 plist_element_t *el;
819
820                 plist_insert_back(node->descendant_list, pk_irn);
821
822                 foreach_plist(pkiller->descendant_list, el) {
823                         ir_node *desc = plist_element_get_value(el);
824
825                         if (! plist_has_value(node->descendant_list, desc))
826                                 plist_insert_back(node->descendant_list, desc);
827                 }
828
829                 node->descendants = build_sorted_array_from_list(node->descendant_list, phase_obst(&rss->ph));
830         }
831
832 }
833
834 /**
835  * Compute the potential killing set PK.
836  */
837 static void compute_pkill_set(rss_t *rss) {
838         plist_element_t *u_el, *v_el;
839
840         foreach_plist(rss->nodes, u_el) {
841                 ir_node   *u_irn = plist_element_get_value(u_el);
842                 rss_irn_t *u     = get_rss_irn(rss, u_irn);
843
844                 DBG((rss->dbg, LEVEL_1, "\tcomputing potential killers of %+F:\n", u_irn));
845
846                 /* check each consumer if it is a potential killer  */
847                 foreach_plist(u->consumer_list, v_el) {
848                         ir_node   *v_irn = plist_element_get_value(v_el);
849                         rss_irn_t *v     = get_rss_irn(rss, v_irn);
850
851                         /* check, if v is a potential killer of u */
852                         if (is_potential_killer(rss, v, u)) {
853                                 if (! plist_has_value(u->pkiller_list, v_irn))
854                                         plist_insert_back(u->pkiller_list, v_irn);
855                                 v->kill_count++;
856                                 DBG((rss->dbg, LEVEL_2, "\t\tpotential killer %+F\n", v_irn));
857                         }
858                 }
859
860                 u->killer = _sink;
861         }
862
863         if (rss->opts->dump_flags & RSS_DUMP_PKG)
864                 debug_vcg_dump_pkg(rss, NULL, 0);
865 }
866
867 /**
868  * Build set of killing edges (from values to their potential killers)
869  */
870 static void build_kill_edges(rss_t *rss, pset *epk) {
871         plist_element_t *el, *k_el;
872
873         foreach_plist(rss->nodes, el) {
874                 ir_node    *irn  = plist_element_get_value(el);
875                 rss_irn_t *rirn = get_rss_irn(rss, irn);
876
877                 DBG((rss->dbg, LEVEL_3, "\t\tbuilding kill edges for %+F:\n", irn));
878
879                 foreach_plist(rirn->pkiller_list, k_el) {
880                         ir_node    *pkiller = plist_element_get_value(k_el);
881                         rss_edge_t *ke      = obstack_alloc(phase_obst(&rss->ph), sizeof(*ke));
882
883                         ke->src  = irn;
884                         ke->tgt  = pkiller;
885                         ke->next = NULL;
886
887                         DBG((rss->dbg, LEVEL_3, "\t\t\ttarget %+F\n", pkiller));
888
889                         pset_insert(epk, ke, HASH_RSS_EDGE(ke));
890                 }
891         }
892 }
893
894 #ifdef DEBUG_libfirm
895 /* print the given cbc for debugging purpose */
896 static void debug_print_cbc(firm_dbg_module_t *mod, cbc_t *cbc) {
897         ir_nodeset_iterator_t iter;
898         ir_node    *n;
899         rss_edge_t *ke;
900
901         DBG((mod, LEVEL_3, "\t\tS = set of parents:\n"));
902         foreach_ir_nodeset(&cbc->parents, n, iter) {
903                 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
904         }
905         DBG((mod, LEVEL_3, "\t\tT = set of children:\n"));
906         foreach_ir_nodeset(&cbc->children, n, iter) {
907                 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
908         }
909         DBG((mod, LEVEL_3, "\t\tE = Edges from producers to consumers\n"));
910         foreach_pset(cbc->kill_edges, ke) {
911                 DBG((mod, LEVEL_3, "\t\t\t%+F -> %+F\n", ke->src, ke->tgt));
912         }
913 }
914 #endif
915
916 /**
917  * Construct the bipartite decomposition.
918  * Sid-Ahmed-Ali Touati, Phd Thesis
919  * Register Pressure in Instruction Level Parallelism, p. 71
920  */
921 static void compute_bipartite_decomposition(rss_t *rss) {
922         pset *epk    = new_pset(cmp_rss_edges, 10);
923         int  cur_num = 0;
924
925         plist_element_t *el;
926
927         DBG((rss->dbg, LEVEL_1, "\tcomputing bipartite decomposition:\n"));
928
929         build_kill_edges(rss, epk);
930
931         foreach_plist(rss->nodes, el) {
932                 ir_node   *u_irn   = plist_element_get_value(el);
933                 rss_irn_t *u       = get_rss_irn(rss, u_irn);
934                 int       p_change = 1;
935                 int       c_change = 1;
936                 int       vrfy_ok  = 1;
937
938                 cbc_t           *cbc;
939                 plist_element_t *el2;
940                 rss_edge_t      *k_edge;
941                 rss_edge_t      *kedge_root = NULL;
942                 ir_node         *t_irn, *s_irn;
943                 ir_nodeset_iterator_t iter;
944
945                 if (u->visited || u_irn == _sink)
946                         continue;
947
948                 DBG((rss->dbg, LEVEL_2, "\t\t%+F choosen:\n", u_irn));
949
950                 cbc     = obstack_alloc(phase_obst(&rss->ph), sizeof(*cbc));
951                 cbc->nr = cur_num++;
952
953                 /* initialize S_cb */
954                 ir_nodeset_init_size(&cbc->parents, 5);
955                 ir_nodeset_insert(&cbc->parents, u_irn);
956                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (init)\n", u_irn));
957
958                 /* E_cb = empty */
959                 cbc->kill_edges = new_pset(cmp_rss_edges, 5);
960
961                 /* each parent gets killed by at least one value from children */
962
963                 /* T_cb = PK_successors(u) */
964                 ir_nodeset_init_size(&cbc->children, 5);
965                 foreach_plist(u->pkiller_list, el2) {
966                         ir_nodeset_insert(&cbc->children, plist_element_get_value(el2));
967                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (init)\n", plist_element_get_value(el2)));
968                 }
969
970                 /*
971                         Now: insert the parents of all children into the parent set
972                         and insert the children of all parents into the children set
973                         until the sets don't change any more
974                 */
975                 while (p_change || c_change) {
976                         ir_nodeset_iterator_t iter;
977                         p_change = c_change = 0;
978
979                         /* accumulate parents */
980                         foreach_ir_nodeset(&cbc->children, t_irn, iter) {
981                                 foreach_pset(epk, k_edge) {
982                                         ir_node *val = k_edge->src;
983
984                                         if (k_edge->tgt == t_irn && ! ir_nodeset_contains(&cbc->parents, val)) {
985                                                 ir_nodeset_insert(&cbc->parents, val);
986                                                 p_change = 1;
987                                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (killed by %+F)\n", val, t_irn));
988                                         }
989                                 }
990                         }
991
992                         /* accumulate children */
993                         foreach_ir_nodeset(&cbc->parents, s_irn, iter) {
994                                 foreach_pset(epk, k_edge) {
995                                         ir_node *val = k_edge->tgt;
996
997                                         if (k_edge->src == s_irn  && ! ir_nodeset_contains(&cbc->children, val)) {
998                                                 ir_nodeset_insert(&cbc->children, val);
999                                                 c_change = 1;
1000                                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (kills %+F)\n", val, s_irn));
1001                                         }
1002                                 }
1003                         }
1004                 }
1005
1006                 /* mark all parent values as visited */
1007                 foreach_ir_nodeset(&cbc->parents, s_irn, iter) {
1008                         rss_irn_t *s = get_rss_irn(rss, s_irn);
1009                         s->visited = 1;
1010                         /* assure bipartite property */
1011 #if 0
1012                         if (ir_nodeset_contains(&cbc->children, s_irn)) {
1013                                 ir_nodeset_remove(&cbc->children, s_irn);
1014                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F removed from to children\n", s_irn));
1015                         }
1016 #endif
1017                 }
1018
1019                 /* update edges */
1020                 foreach_pset(epk, k_edge) {
1021                         if (ir_nodeset_contains(&cbc->parents, k_edge->src) &&
1022                             ir_nodeset_contains(&cbc->children, k_edge->tgt)) {
1023                                 pset_insert(cbc->kill_edges, k_edge, HASH_RSS_EDGE(k_edge));
1024                                 /*
1025                                         Link all k_edges which are about to be removed together.
1026                                         Beware: pset_remove kills the iterator.
1027                                 */
1028                                 k_edge->next = kedge_root;
1029                                 kedge_root   = k_edge;
1030                         }
1031                 }
1032
1033                 /* remove all linked k_edges */
1034                 for (; kedge_root; kedge_root = kedge_root->next) {
1035                         pset_remove(epk, kedge_root, HASH_RSS_EDGE(kedge_root));
1036                 }
1037
1038                 /* verify the cbc */
1039                 foreach_ir_nodeset(&cbc->parents, s_irn, iter) {
1040                         int is_killed = 0;
1041
1042                         foreach_pset(cbc->kill_edges, k_edge) {
1043                                 if (k_edge->src == s_irn) {
1044                                         is_killed = 1;
1045                                         pset_break(cbc->kill_edges);
1046                                         break;
1047                                 }
1048                         }
1049
1050                         if (! is_killed) {
1051                                 vrfy_ok = 0;
1052                                 ir_fprintf(stderr, "Warning: parent %+F is not killed in current cbc\n", s_irn);
1053                         }
1054                 }
1055                 assert(vrfy_ok && "Verification of CBC failed");
1056
1057                 /* add the connected bipartite component */
1058                 if (ir_nodeset_size(&cbc->parents) > 0 &&
1059                     ir_nodeset_size(&cbc->children) > 0 &&
1060                         pset_count(cbc->kill_edges) > 0)
1061                         pset_insert(rss->cbc_set, cbc, (unsigned)cbc->nr);
1062                 DBG((rss->dbg, LEVEL_2, "\tbipartite component %d inserted:\n", cbc->nr));
1063                 DEBUG_ONLY(
1064                         if (firm_dbg_get_mask(rss->dbg) & LEVEL_2)
1065                                 debug_print_cbc(rss->dbg, cbc);
1066                 );
1067         }
1068
1069         if (rss->opts->dump_flags & RSS_DUMP_CBC)
1070                 debug_vcg_dump_bipartite(rss);
1071
1072         del_pset(epk);
1073 }
1074
1075 /**
1076  * Select the child with the maximum cost.
1077  */
1078 static child_t *select_child_max_cost(rss_t *rss, ir_nodeset_t *x, ir_nodeset_t *y, child_t *t, cbc_t *cbc) {
1079         ir_node *child;
1080         ir_nodeset_iterator_t iter;
1081         float   max_cost = -1.0f;
1082
1083         DBG((rss->dbg, LEVEL_2, "\t\tcomputing children costs:\n"));
1084
1085         foreach_ir_nodeset(&cbc->children, child, iter) {
1086                 rss_irn_t  *r_child             = get_rss_irn(rss, child);
1087                 int         num_unkilled_parents = 0;
1088                 int         num_descendants;
1089                 rss_edge_t *k_edge;
1090                 float       cost;
1091
1092                 /* get the number of unkilled parents */
1093                 foreach_pset(cbc->kill_edges, k_edge) {
1094                         if (k_edge->tgt == child && ir_nodeset_contains(x, k_edge->src))
1095                                 ++num_unkilled_parents;
1096                 }
1097
1098                 cost = (float)num_unkilled_parents;
1099
1100                 num_descendants = plist_count(r_child->descendant_list) + ir_nodeset_size(y);
1101
1102                 if (num_descendants > 0)
1103                         cost /= (float)num_descendants;
1104
1105                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F, #desc %d, cost %.3f\n", child, num_descendants, cost));
1106
1107                 if (cost > max_cost) {
1108                         t->irn   = child;
1109                         t->cost  = cost;
1110                         max_cost = cost;
1111                 }
1112         }
1113
1114         return t;
1115 }
1116
1117 /**
1118  * Remove all parents from x which are killed by t_irn.
1119  */
1120 static void remove_covered_parents(rss_t *rss, ir_nodeset_t *x, ir_node *t_irn, cbc_t *cbc) {
1121         rss_irn_t  *t = get_rss_irn(rss, t_irn);
1122         rss_edge_t *k_edge;
1123
1124         DBG((rss->dbg, LEVEL_2, "\t\tremoving parents covered by %+F:\n", t_irn));
1125
1126         foreach_pset(cbc->kill_edges, k_edge) {
1127                 if (k_edge->tgt == t_irn && ir_nodeset_contains(x, k_edge->src)) {
1128                         ir_nodeset_remove(x, k_edge->src);
1129                         plist_insert_back(t->parent_list, k_edge->src);
1130                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", k_edge->src));
1131                 }
1132         }
1133 }
1134
1135 static void update_cumulated_descendent_values(rss_t *rss, ir_nodeset_t *y, ir_node *t_irn) {
1136         rss_irn_t *t = get_rss_irn(rss, t_irn);
1137         plist_element_t *el;
1138
1139         DBG((rss->dbg, LEVEL_2, "\t\tupdating cumulated descendant value of %+F:\n", t_irn));
1140
1141         foreach_plist(t->descendant_list, el) {
1142                 ir_nodeset_insert(y, plist_element_get_value(el));
1143                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", plist_element_get_value(el)));
1144         }
1145 }
1146
1147 /**
1148  * Greedy-k: a heuristics for the MMA problem
1149  */
1150 static void compute_killing_function(rss_t *rss) {
1151         cbc_t *cbc;
1152         struct obstack obst;
1153
1154         obstack_init(&obst);
1155
1156         rss->cbc_set = pset_new_ptr(5);
1157         compute_bipartite_decomposition(rss);
1158
1159         /* for all bipartite components do: */
1160         foreach_pset(rss->cbc_set, cbc) {
1161                 ir_node *p;
1162                 ir_nodeset_t x;
1163                 ir_nodeset_t y;
1164                 ir_nodeset_iterator_t iter;
1165                 child_t **sks    = NEW_ARR_F(child_t *, 20);
1166                 int     cur_len  = 0;
1167                 int     cur_size = 20;
1168                 int     i;
1169
1170                 ir_nodeset_init_size(&x, 10);
1171                 ir_nodeset_init_size(&y, 10);
1172
1173                 DBG((rss->dbg, LEVEL_1, "\tcomputing SKS for cbc %d:\n", cbc->nr));
1174                 DBG((rss->dbg, LEVEL_2, "\t\tinitializing parents X:\n"));
1175
1176                 /* X = S_cb (all parents are initially uncovered) */
1177                 foreach_ir_nodeset(&cbc->parents, p, iter) {
1178                         ir_nodeset_insert(&x, p);
1179                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", p));
1180                 }
1181
1182                 /* while X not empty */
1183                 while (ir_nodeset_size(&x) > 0) {
1184                         child_t *t = obstack_alloc(&obst, sizeof(*t));
1185                         memset(t, 0, sizeof(*t));
1186
1187                         t = select_child_max_cost(rss, &x, &y, t, cbc);
1188
1189                         if (cur_len >= cur_size) {
1190                                 ARR_EXTO(child_t *, sks, cur_size * 2);
1191                                 cur_size *= 2;
1192                         }
1193
1194                         DBG((rss->dbg, LEVEL_2, "\t\tinsert child %+F (%.3f) into SKS at pos %d\n", t->irn, t->cost, cur_len));
1195
1196                         sks[cur_len++] = t;
1197                         remove_covered_parents(rss, &x, t->irn, cbc);
1198                         update_cumulated_descendent_values(rss, &y, t->irn);
1199                 }
1200
1201                 ARR_SHRINKLEN(sks, cur_len);
1202
1203                 /* sort SKS in increasing cost order */
1204                 qsort(sks, cur_len, sizeof(sks[0]), cmp_child_costs);
1205
1206                 DBG((rss->dbg, LEVEL_2, "\tprocessing SKS for cbc %d:\n", cbc->nr));
1207
1208                 /* build killing function */
1209                 for (i = cur_len - 1; i >= 0; --i) { /* loop over sks in decreasing cost order */
1210                         child_t         *t = sks[i];
1211                         rss_irn_t       *rt = get_rss_irn(rss, t->irn);
1212                         plist_element_t *p_el;
1213
1214                         DBG((rss->dbg, LEVEL_3, "\t\t\tkiller %+F (%.3f):\n", t->irn, t->cost));
1215
1216                         /* kill all unkilled parents of t */
1217                         foreach_plist(rt->parent_list, p_el) {
1218                                 ir_node    *par = plist_element_get_value(p_el);
1219                                 rss_irn_t *rpar = get_rss_irn(rss, par);
1220
1221                                 if (is_Sink(rpar->killer)) {
1222                                         rpar->killer = t->irn;
1223                                         DBG((rss->dbg, LEVEL_2, "\t\tkill %+F\n", rpar->irn));
1224                                 }
1225                                 else {
1226                                         DBG((rss->dbg, LEVEL_3, "\t\t\tkeeping %+F as killer for %+F\n", rpar->killer, rpar->irn));
1227                                 }
1228                         }
1229                 }
1230
1231                 ir_nodeset_destroy(&x);
1232                 ir_nodeset_destroy(&y);
1233                 DEL_ARR_F(sks);
1234         }
1235
1236         if (rss->opts->dump_flags & RSS_DUMP_KILL)
1237                 debug_vcg_dump_kill(rss);
1238
1239         del_pset(rss->cbc_set);
1240         obstack_free(&obst, NULL);
1241 }
1242
1243 /**
1244  * Adds the edge src -> tgt to the dvg. Checks if reverse edge is already there (asserts).
1245  */
1246 static INLINE void add_dvg_edge(rss_t *rss, dvg_t *dvg, ir_node *src, ir_node *tgt, int have_source) {
1247         rss_edge_t *dvg_edge;
1248         rss_edge_t key;
1249
1250         if (! have_source)
1251                 ir_nodeset_insert(&dvg->nodes, src);
1252         else
1253                 assert(ir_nodeset_contains(&dvg->nodes, src) && "Wrong assumption");
1254
1255         ir_nodeset_insert(&dvg->nodes, tgt);
1256
1257         key.src = tgt;
1258         key.tgt = src;
1259         assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1260
1261         key.src = src;
1262         key.tgt = tgt;
1263         if (NULL != pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1264                 /* add the edge to the DVG */
1265                 dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1266
1267                 dvg_edge->src  = src;
1268                 dvg_edge->tgt  = tgt;
1269                 dvg_edge->next = NULL;
1270
1271                 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", src, tgt));
1272                 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1273         }
1274 }
1275
1276 /**
1277  * Computes the disjoint value DAG (DVG).
1278  * BEWARE: It is not made explicitly clear in the Touati paper,
1279  *         but the DVG is meant to be build from the KILLING DAG
1280  */
1281 static void compute_dvg(rss_t *rss, dvg_t *dvg) {
1282         plist_element_t *el;
1283
1284         DBG((rss->dbg, LEVEL_1, "\tcomputing DVG:\n"));
1285
1286         foreach_plist(rss->nodes, el) {
1287                 ir_node    *u_irn    = plist_element_get_value(el);
1288                 rss_irn_t  *u        = get_rss_irn(rss, u_irn);
1289                 rss_irn_t  *u_killer = get_rss_irn(rss, u->killer);
1290                 int        i;
1291
1292                 /* TODO: omit nodes only having sink as pkiller and killing no one */
1293
1294                 /* add an edge to killer */
1295                 add_dvg_edge(rss, dvg, u_irn, u->killer, 0);
1296
1297                 if (is_Sink(u->killer))
1298                         continue;
1299
1300                 /* We add an edge to every killer, from where we could be reached. */
1301                 for (i = ARR_LEN_SAFE(u_killer->descendants) - 1; i >= 0; --i) {
1302                         add_dvg_edge(rss, dvg, u_irn, u_killer->descendants[i], 1);
1303                 }
1304
1305 #if 0
1306
1307                 foreach_plist(rss->nodes, el2) {
1308                         ir_node *v_irn = plist_element_get_value(el2);
1309
1310                         /*
1311                                 There is an edge (u, v) in the DVG iff v is a descendant of the killer(u).
1312                         */
1313                         if (BSEARCH_IRN_ARR(v_irn, u_kill->descendants)) {
1314                                 rss_edge_t *dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1315                                 rss_edge_t key;
1316
1317                                 /* insert the user into the DVG and append it to the user list of u */
1318                                 ir_nodeset_insert(&dvg->nodes, v_irn);
1319                                 if (! plist_has_value(u->dvg_user_list, v_irn))
1320                                         plist_insert_back(u->dvg_user_list, v_irn);
1321
1322                                 dvg_edge->src  = u_irn;
1323                                 dvg_edge->tgt  = v_irn;
1324                                 dvg_edge->next = NULL;
1325
1326                                 key.src = v_irn;
1327                                 key.tgt = u_irn;
1328
1329                                 assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1330
1331                                 /* add the edge to the DVG */
1332                                 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", u_irn, v_irn));
1333                                 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1334                         }
1335                 }
1336 #endif /* if 0 */
1337         }
1338
1339         if (rss->opts->dump_flags & RSS_DUMP_DVG)
1340                 debug_vcg_dump_dvg(rss, dvg);
1341 }
1342
1343 /**
1344  * Updates the dvg structure when a serialization edge from src -> tgt is added.
1345  */
1346 static void update_dvg(rss_t *rss, dvg_t *dvg, rss_irn_t *src, rss_irn_t *tgt) {
1347         int i, j, idx;
1348         rss_edge_t *edge;
1349         rss_edge_t **arr = alloca(pset_count(dvg->edges) * sizeof(arr[0]));
1350
1351         /*
1352                 Add an edge from serialization target to serialization src:
1353                 src cannot be alive with target
1354         */
1355         add_dvg_edge(rss, dvg, tgt->irn, src->irn, 0);
1356
1357         /* Add edges to src's descendants as well, they also getting serialized. */
1358         for (i = ARR_LEN_SAFE(src->descendants) - 1; i >= 0; --i) {
1359                 add_dvg_edge(rss, dvg, tgt->irn, src->descendants[i], 1);
1360         }
1361
1362         /* We also have to add edges from targets predecessors in dvg */
1363         idx = 0;
1364         /* We cannot insert elements into set over which we iterate ... */
1365         foreach_pset(dvg->edges, edge) {
1366                 if (edge->tgt == tgt->irn) {
1367                         arr[idx++] = edge;
1368                 }
1369         }
1370
1371         for (i = 0; i < idx; ++i) {
1372                 edge = arr[i];
1373                 add_dvg_edge(rss, dvg, edge->src, src->irn, 1);
1374                 for (j = ARR_LEN_SAFE(src->descendants) - 1; j >= 0; --j) {
1375                         add_dvg_edge(rss, dvg, edge->src, src->descendants[j], 1);
1376                 }
1377         }
1378 }
1379
1380 #if 0
1381 /**
1382  * Accumulate all descendants for root into list.
1383  */
1384 static void accumulate_dvg_descendant_values(rss_t *rss, rss_irn_t *root, plist_t *list) {
1385         if (plist_count(root->dvg_user_list) > 0) {
1386                 plist_element_t *el;
1387
1388                 foreach_plist(root->dvg_user_list, el) {
1389                         ir_node   *v_irn = plist_element_get_value(el);
1390                         rss_irn_t *v     = get_rss_irn(rss, v_irn);
1391
1392                         /* add v as descendant */
1393                         if (! plist_has_value(list, v_irn)) {
1394                                 plist_insert_back(list, v_irn);
1395                                 DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG descendant %+F\n", v_irn));
1396                         }
1397
1398                         /* accumulate v's descendants */
1399                         accumulate_dvg_descendant_values(rss, v, list);
1400                 }
1401         }
1402 }
1403
1404 /**
1405  * Builds the list of potential killers for each node
1406  * in the given DVG.
1407  * Needs the descendant list for all user as sorted array.
1408  */
1409 static void build_dvg_pkiller_list(rss_t *rss, dvg_t *dvg) {
1410         ir_nodeset_iterator_t iter;
1411         ir_node *irn;
1412
1413         foreach_nodeset(&dvg->nodes, irn, iter) {
1414                 rss_irn_t       *node = get_rss_irn(rss, irn);
1415                 plist_element_t *el, *el2;
1416
1417                 DBG((rss->dbg, DEBUG_DVG, "\t\tbuilding pkiller list for %+F\n", irn));
1418
1419                 /* check each user */
1420                 foreach_plist(node->dvg_user_list, el) {
1421                         ir_node *u_irn = plist_element_get_value(el);
1422
1423                         /* is the current user u_irn not a descendant of any other user -> pkiller */
1424                         foreach_plist(node->dvg_user_list, el2) {
1425                                 ir_node   *v_irn = plist_element_get_value(el2);
1426                                 rss_irn_t *v     = get_rss_irn(rss, v_irn);
1427
1428                                 if (el != el2                             &&
1429                                         ! BSEARCH_IRN_ARR(u_irn, v->dvg_desc) &&
1430                                         ! plist_has_value(node->dvg_pkiller_list, u_irn))
1431                                 {
1432                                         plist_insert_back(node->dvg_pkiller_list, u_irn);
1433                                         DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG pkiller %+F\n", u_irn));
1434                                 }
1435                         }
1436                 }
1437
1438                 node->dvg_pkiller = build_sorted_array_from_list(node->dvg_pkiller_list, phase_obst(&rss->ph));
1439         }
1440
1441         DEBUG_ONLY(
1442                 if (firm_dbg_get_mask(rss->dbg) & DEBUG_DVG)
1443                         debug_vcg_dump_dvg_pkiller(rss, dvg);
1444         );
1445 }
1446
1447 #endif /* if 0 */
1448
1449 /**
1450  * Compute the maximal antichain of the current DVG.
1451  * This is a reimplementation of the MAXIMAL_ANTI_CHAIN function
1452  * from the DDG library 1.1 (DAG.cpp).
1453  */
1454 static ir_nodeset_t *compute_maximal_antichain(rss_t *rss, dvg_t *dvg, int iteration) {
1455         int         n               = ir_nodeset_size(&dvg->nodes);
1456         int         *assignment     = alloca(n * sizeof(assignment[0]));
1457         int         *assignment_rev = alloca(n * sizeof(assignment_rev[0]));
1458         int         *idx_map        = alloca(n * sizeof(idx_map[0]));
1459         hungarian_problem_t *bp;
1460         ir_nodeset_t *values, *temp;
1461         ir_nodeset_iterator_t iter;
1462         ir_node     *u_irn;
1463         int         i, j, cost, cur_chain, res;
1464         rss_edge_t  *dvg_edge;
1465
1466 #define MAP_IDX(irn) bsearch_for_index(get_irn_idx(irn), idx_map,  n,  1)
1467
1468         if (pset_count(dvg->edges) == 0)
1469                 return NULL;
1470
1471         bp = hungarian_new(n, n, 1, HUNGARIAN_MATCH_NORMAL);
1472
1473         /*
1474                 At first, we build an index map for the nodes in the DVG,
1475                 because we cannot use the irn idx therefore as the resulting
1476                 bipartite data structure would have around 1.2GB.
1477                 So we limit the size to the number of nodes we have in the DVG
1478                 and build a sorted index map for their irn indices.
1479         */
1480         i = 0;
1481         foreach_ir_nodeset(&dvg->nodes, u_irn, iter) {
1482                 idx_map[i++] = get_irn_idx(u_irn);
1483         }
1484         qsort(idx_map, n, sizeof(idx_map[0]), cmp_int);
1485
1486         foreach_pset(dvg->edges, dvg_edge) {
1487                 int idx_u = MAP_IDX(dvg_edge->src);
1488                 int idx_v = MAP_IDX(dvg_edge->tgt);
1489
1490                 /* add the entry to the bipartite data structure */
1491                 hungarian_add(bp, idx_u, idx_v, 1);
1492                 DBG((rss->dbg, LEVEL_3, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1493                         idx_u, dvg_edge->src, idx_v, dvg_edge->tgt));
1494         }
1495 #if 0
1496         /*
1497                 Add a bipartite entry for each pair of nodes (u, v), where exists a
1498                 path in the DVG from u to v, ie. connect all descendants(v) to v.
1499                 desc_dvg(v) = accumulated descendants of all z in dvg_user_list(v)
1500         */
1501         foreach_ir_nodeset(&dvg->nodes, u_irn, iter) {
1502                 rss_irn_t *u        = get_rss_irn(rss, u_irn);
1503                 int       idx_u_irn = MAP_IDX(u_irn);
1504
1505                 DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG descendants of %+F:\n", u_irn));
1506
1507                 //plist_clear(u->dvg_desc_list);
1508                 //accumulate_dvg_descendant_values(rss, u, u->dvg_desc_list);
1509
1510                 /*
1511                         FIXME: The array is build on the phase obstack and we cannot free the data.
1512                         So the array get as many times allocated as this function is called.
1513                 */
1514
1515                 /* build the sorted array for faster searches */
1516                 //u->dvg_desc = build_sorted_array_from_list(u->dvg_desc_list, phase_obst(&rss->ph));
1517
1518                 DBG((rss->dbg, DEBUG_MAX_AC, "\t\tadding bipartite entries of %+F:\n", u_irn));
1519
1520                 /* add the bipartite entries for all descendants */
1521                 for (i = ARR_LEN_SAFE(u->dvg_desc) - 1; i >= 0; --i) {
1522                         rss_irn_t *desc    = get_rss_irn(rss, u->dvg_desc[i]);
1523                         int       idx_desc = MAP_IDX(u->dvg_desc[i]);
1524
1525                         /* add the entry to the bipartite data structure */
1526                         hungarian_add(bp, idx_u_irn, idx_desc, 1);
1527                         DBG((rss->dbg, DEBUG_MAX_AC, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1528                                 idx_u_irn, u_irn, idx_desc, u->dvg_desc[i]));
1529
1530                         need_matching = 1;
1531                 }
1532         }
1533 #endif
1534
1535         /* We want maximum cardinality matching */
1536         hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL);
1537
1538 #if 0
1539         DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG pkiller:\n"));
1540         /* beware: the following function needs the dvg_desc array */
1541         build_dvg_pkiller_list(rss, dvg);
1542 #endif
1543
1544         DBG((rss->dbg, LEVEL_2, "\t\tcomputing bipartite matching\n"));
1545         /*
1546                 The maximum cardinality bipartite matching gives us the minimal
1547                 chain partition, which corresponds to the maximum anti chains.
1548         */
1549         res = hungarian_solve(bp, assignment, &cost, 1);
1550         assert(res == 0 && "Bipartite matching failed!");
1551
1552         hungarian_free(bp);
1553         memset(assignment_rev, -1, n * sizeof(assignment_rev[0]));
1554
1555         /* build the reverse assignment, ie. foreach i -> j, add j -> i */
1556         for (i = 0; i < n; ++i) {
1557                 if (assignment[i] >= 0) {
1558                         int j = assignment[i];
1559                         assignment_rev[j] = i;
1560                 }
1561         }
1562
1563         DBG((rss->dbg, LEVEL_2, "\t\tgot assignment with cost %d\n", cost));
1564         DBG((rss->dbg, LEVEL_3, "\t\t\tassignment   ---   reverse assignment\n", cost));
1565         for (i = 0; i < n; ++i) {
1566                 DBG((rss->dbg, LEVEL_3, "\t\t\t%3d -> %3d         %3d -> %3d\n", i, assignment[i], i, assignment_rev[i]));
1567         }
1568
1569         values    = xmalloc(sizeof(values[0]));
1570         ir_nodeset_init_size(values, 10);
1571         cur_chain = 0;
1572         /* Construction of the minimal chain partition */
1573         for (j = 0; j < n; ++j) {
1574                 /* check nodes, which did not occur as target */
1575                 if (assignment_rev[j] == -1) {
1576                         int       xj      = idx_map[j];
1577                         ir_node   *xj_irn = get_idx_irn(rss->irg, xj);
1578                         rss_irn_t *xj_rss = get_rss_irn(rss, xj_irn);
1579                         chain_t   *c      = obstack_alloc(phase_obst(&rss->ph), sizeof(*c));
1580                         int       source;
1581
1582                         /* there was no source for j -> we have a source of a new chain */
1583                         ir_nodeset_insert(values, xj_irn);
1584
1585                         c->elements = plist_obstack_new(phase_obst(&rss->ph));
1586                         c->nr       = cur_chain++;
1587                         plist_insert_back(c->elements, xj_irn);
1588
1589                         xj_rss->chain = c;
1590
1591                         DBG((rss->dbg, LEVEL_2, "\t\tstarting chain %d:\n", c->nr));
1592                         DBG((rss->dbg, LEVEL_2, "\t\t\t%+F (%d)", xj_irn, j));
1593
1594                         /* follow chain, having j as source */
1595                         source = j;
1596                         while (assignment[source] >= 0) {
1597                                 int       target  = assignment[source];
1598                                 int       irn_idx = idx_map[target];
1599                                 ir_node   *irn    = get_idx_irn(rss->irg, irn_idx);
1600                                 rss_irn_t *node   = get_rss_irn(rss, irn);
1601
1602                                 plist_insert_back(c->elements, irn);
1603                                 node->chain = c;
1604
1605                                 DB((rss->dbg, LEVEL_2, " -> %+F (%d)", irn, target));
1606
1607                                 /* new source = last target */
1608                                 source = target;
1609                         }
1610
1611                         DB((rss->dbg, LEVEL_2, "\n"));
1612                 }
1613         }
1614
1615         /*
1616                 Computing the maximal antichain: Select an element from each
1617                 chain such, such it is parallel with the others.
1618         */
1619         DBG((rss->dbg, LEVEL_1, "\t\tcomputing set of saturation values (MAX AC)\n"));
1620         DBG((rss->dbg, LEVEL_3, "\t\tstarting with:\n"));
1621
1622         DEBUG_ONLY(
1623                 if (firm_dbg_get_mask(rss->dbg) & LEVEL_3)
1624                         dump_nodeset(values, "\t\t\t");
1625         )
1626
1627         temp = NULL;
1628         do {
1629                 /*
1630                         We need an explicit array for the values as
1631                         we cannot iterate multiple times over the same
1632                         set at the same time. :-(((((
1633                         TODO Matze: now we can, so rewrite this...
1634                 */
1635                 int     n         = ir_nodeset_size(values);
1636                 int     i         = 0;
1637                 ir_node **val_arr = NEW_ARR_F(ir_node *, n);
1638
1639                 foreach_ir_nodeset(values, u_irn, iter)
1640                         val_arr[i++] = u_irn;
1641
1642                 if (temp) {
1643                         ir_nodeset_destroy(temp);
1644                         free(temp);
1645                 }
1646
1647                 temp = xmalloc(sizeof(temp[0]));
1648                 ir_nodeset_init_size(temp, 10);
1649
1650                 /* Select all nodes from current value set, having another node in the set as descendant. */
1651                 for (i = 0; i < n; ++i) {
1652                         rss_irn_t *u = get_rss_irn(rss, val_arr[i]);
1653
1654                         for (j = 0; j < n; ++j) {
1655                                 if (i != j) {
1656                                         rss_edge_t key;
1657
1658                                         key.src = val_arr[i];
1659                                         key.tgt = val_arr[j];
1660
1661                                         if (pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1662                                                 /* v[j] is descendant of u -> remove u and break */
1663                                                 ir_nodeset_insert(temp, u->irn);
1664                                                 ir_nodeset_remove(values, u->irn);
1665
1666                                                 DBG((rss->dbg, LEVEL_3, "\t\t\tremoving %+F from values, adding it to temp\n", u->irn));
1667
1668                                                 break;
1669                                         }
1670                                 }
1671                         }
1672                 }
1673
1674                 /* Try to insert the chain predecessor of all selected u's */
1675                 foreach_ir_nodeset(temp, u_irn, iter) {
1676                         rss_irn_t       *u  = get_rss_irn(rss, u_irn);
1677                         chain_t         *c  = u->chain;
1678                         plist_element_t *el = plist_find_value(c->elements, u_irn);
1679
1680                         assert(el && "Missing element in chain!");
1681
1682                         /* If u has predecessor in chain: insert the predecessor */
1683                         if (el == plist_element_get_prev(el)) {
1684                                 ir_nodeset_insert(values, plist_element_get_value(el));
1685                                 DBG((rss->dbg, LEVEL_3, "\t\t\tadding %+F to values\n", plist_element_get_value(el)));
1686                         }
1687                 }
1688
1689
1690                 DEL_ARR_F(val_arr);
1691         } while (ir_nodeset_size(temp) > 0);
1692
1693         DBG((rss->dbg, LEVEL_2, "\t\tfinal set:\n"));
1694         DEBUG_ONLY(
1695                 if (firm_dbg_get_mask(rss->dbg) & LEVEL_2) {
1696                         dump_nodeset(values, "\t\t\t");
1697                 }
1698         );
1699
1700         if (rss->opts->dump_flags & RSS_DUMP_MAXAC)
1701                 debug_vcg_dump_pkg(rss, values, iteration);
1702
1703         if(temp != NULL) {
1704                 ir_nodeset_destroy(temp);
1705                 free(temp);
1706         }
1707
1708         return values;
1709
1710 #undef MAP_IDX
1711 }
1712
1713 /**
1714  * Computes the best serialization between two nodes of sat_vals.
1715  */
1716 static serialization_t *compute_best_admissible_serialization(rss_t *rss, ir_nodeset_t *sat_vals, serialization_t *ser, int num_regs) {
1717         int        n                    = ir_nodeset_size(sat_vals);
1718         int        n_idx                = ARR_LEN_SAFE(rss->idx_map);
1719         int        i                    = 0;
1720         ir_node    **val_arr            = alloca(n * sizeof(val_arr[0]));
1721         bitset_t   *bs_sv               = bitset_alloca(n_idx);
1722         bitset_t   *bs_vdesc            = bitset_alloca(n_idx);
1723         bitset_t   *bs_tmp              = bitset_alloca(n_idx);
1724         bitset_t   *bs_ukilldesc        = bitset_alloca(n_idx);
1725         int        best_benefit         = INT_MAX;
1726         int        best_omega2          = INT_MAX;
1727         int        best_benefit_omega20 = INT_MAX;
1728         int        has_omega1           = 0;
1729         int        j, k;
1730         ir_node    *irn;
1731         ir_nodeset_iterator_t iter;
1732         rss_edge_t min_benefit_edge;
1733         rss_edge_t min_omega20_edge;
1734         rss_irn_t  *ser_u_omega1 = NULL, *ser_v_omega1 = NULL;
1735         rss_irn_t  *ser_u_omega20 = NULL, *ser_v_omega20 = NULL;
1736
1737         DBG((rss->dbg, LEVEL_1, "\tcomputing admissible serializations:\n"));
1738
1739         /*
1740                 We need an explicit array for the values as
1741                 we cannot iterate multiple times over the same
1742                 set at the same time. :-(((((
1743         */
1744
1745         foreach_ir_nodeset(sat_vals, irn, iter) {
1746                 val_arr[i++] = irn;
1747                 bitset_set(bs_sv, BLOCK_IDX_MAP(rss, irn));
1748         }
1749
1750         /*
1751                 We build all admissible serializations and remember the best found so far.
1752                 For u in sat_vals:
1753                  For v in sat_val:
1754                    if v in pkiller(u): add edge from v to all other pkiller(u)
1755                    else: for all vv in pkiller(u): add edge from v to vv if there exists no path from vv to v
1756         */
1757
1758 /*
1759         A node is unserializable if:
1760         - it has only one killer and this one is Sink
1761     - it kills no other values
1762         In this case there is no serialization which could
1763         reduce the registerpressure
1764 */
1765 #define IS_UNSERIALIZABLE_NODE(rss_node)                  \
1766         (                                                     \
1767                 (                                                 \
1768                         (plist_count(rss_node->pkiller_list) == 1) && \
1769                         is_Sink(rss_node->killer)                  && \
1770                         (rss_node->kill_count                == 0)    \
1771                 )                            ||                   \
1772                 be_is_Barrier(rss_node->irn) ||                   \
1773                 be_is_Keep(rss_node->irn)                         \
1774         )
1775
1776         /* for all u in sat_vals */
1777         for (i = 0; i < n; ++i) {
1778                 rss_irn_t       *u = get_rss_irn(rss, val_arr[i]);
1779                 plist_element_t *el;
1780
1781                 /* ignore nodes where serialization does not help */
1782                 if (IS_UNSERIALIZABLE_NODE(u)) {
1783                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", u->irn));
1784                         continue;
1785                 }
1786
1787                 /* accumulate all descendants of all pkiller(u) */
1788                 bitset_clear_all(bs_ukilldesc);
1789                 foreach_plist(u->pkiller_list, el) {
1790                         ir_node   *irn  = plist_element_get_value(el);
1791                         rss_irn_t *node = get_rss_irn(rss, irn);
1792
1793                         if (! is_Sink(irn))
1794                                 bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, irn));
1795                         else
1796                                 continue;
1797
1798                         for (k = ARR_LEN_SAFE(node->descendants) - 1; k >= 0; --k) {
1799                                 if (! is_Sink(node->descendants[k]))
1800                                         bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, node->descendants[k]));
1801                         }
1802                 }
1803
1804                 /* for all v in sat_vals */
1805                 for (j = 0; j < n; ++j) {
1806                         ir_node   *v_irn   = val_arr[j];
1807                         rss_irn_t *v       = get_rss_irn(rss, v_irn);
1808                         unsigned  v_height = get_irn_height(rss->h, v_irn);
1809                         int       omega1, omega2, is_pkiller;
1810
1811                         /* v cannot be serialized with itself
1812                          * ignore nodes where serialization does not help */
1813                         if (i == j || IS_UNSERIALIZABLE_NODE(v)) {
1814                                 if (i != j)
1815                                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", v->irn));
1816                                 continue;
1817                         }
1818
1819                         /* get descendants of v */
1820                         bitset_clear_all(bs_vdesc);
1821                         bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v_irn));
1822                         for (k = ARR_LEN_SAFE(v->descendants) - 1; k >= 0; --k) {
1823                                 if (! is_Sink(v->descendants[k]))
1824                                         bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v->descendants[k]));
1825                         }
1826
1827                         /* if v is in pkiller(u) */
1828                         is_pkiller = plist_has_value(u->pkiller_list, v_irn);
1829
1830                         /* for all vv in pkiller(u) */
1831                         foreach_plist(u->pkiller_list, el) {
1832                                 ir_node *vv_irn  = plist_element_get_value(el);
1833                                 int     add_edge;
1834
1835                                 if (is_Sink(vv_irn) || is_cfop(vv_irn))
1836                                         continue;
1837
1838                                 if (is_pkiller)
1839                                         add_edge = vv_irn != v_irn && skip_Proj(vv_irn) != skip_Proj(v_irn);
1840                                 else
1841                                         add_edge = ! heights_reachable_in_block(rss->h, skip_Proj(vv_irn), skip_Proj(v_irn));
1842
1843                                 /*
1844                                         As we add an edge from vv -> v, we have to make sure,
1845                                         that there exists no path from v to vv.
1846                                 */
1847
1848                                 if (add_edge) {
1849                                         unsigned vv_height = get_irn_height(rss->h, vv_irn);
1850                                         unsigned critical_path_cost;
1851                                         unsigned mu1, mu2;
1852
1853                                         /*
1854                                                 mu1 = | descendants(v) cut sat_vals |
1855                                                 the number of saturating values which cannot
1856                                                 be simultaneously alive with u
1857                                         */
1858                                         bitset_copy(bs_tmp, bs_vdesc);
1859                                         mu1 = bitset_popcnt(bitset_and(bs_tmp, bs_sv));
1860
1861                                         /*
1862                                                 mu2 = | accum_desc_all_pkiller(u) without descendants(v) |
1863                                         */
1864                                         if (is_pkiller) {
1865                                                 bitset_copy(bs_tmp, bs_ukilldesc);
1866                                                 mu2 = bitset_popcnt(bitset_andnot(bs_tmp, bs_vdesc));
1867                                         }
1868                                         else {
1869                                                 mu2 = 0;
1870                                         }
1871
1872                                         /* omega1 = mu1 - mu2 */
1873                                         omega1 = mu1 - mu2;
1874
1875                                         if (omega1 != 0)
1876                                                 has_omega1 = 1;
1877
1878                                         /* omega2 = increase of critical path */
1879                                         critical_path_cost =
1880                                                 v_height                        /* longest path from v to sink */
1881                                                 + rss->max_height - vv_height   /* longest path from source to vv */
1882                                                 + 1;                            /* edge */
1883
1884                                         /*
1885                                                 If critical_path_cost > max_height -> the new edge
1886                                                 would increase the longest critical path by the difference.
1887                                         */
1888                                         omega2 = critical_path_cost > rss->max_height ? critical_path_cost - rss->max_height : 0;
1889
1890                                         /* this keeps track of the edge with the best benefit */
1891                                         if (omega1 >= num_regs - n && omega1 < best_benefit) {
1892                                                 min_benefit_edge.src = v_irn;
1893                                                 min_benefit_edge.tgt = vv_irn;
1894
1895                                                 ser_u_omega1 = u;
1896                                                 ser_v_omega1 = v;
1897
1898                                                 best_benefit    = omega1;
1899                                                 ser->new_killer = is_pkiller;
1900                                         }
1901
1902                                         /* this keeps track of the edge with the best omega1 costs where omega2 == 0 */
1903                                         if (omega2 == 0 && omega1 >= num_regs - n && omega1 < best_benefit_omega20) {
1904                                                 min_omega20_edge.src = v_irn;
1905                                                 min_omega20_edge.tgt = vv_irn;
1906
1907                                                 ser_u_omega20 = u;
1908                                                 ser_v_omega20 = v;
1909
1910                                                 best_benefit_omega20 = omega1;
1911                                                 ser->new_killer      = is_pkiller;
1912                                         }
1913
1914                                         best_omega2 = MIN(best_omega2, omega2);
1915
1916                                         DBG((rss->dbg, LEVEL_2, "\t\tfound %+F -> %+F (w1 %d, w2 %d)\n",
1917                                                 v_irn, vv_irn, omega1, omega2));
1918                                 } /* if add_edge */
1919                         } /* for all vv in pkiller(u) */
1920                 } /* for all v in sat_vals */
1921         } /* for all u in sat_vals */
1922
1923         if (! has_omega1)
1924                 return NULL;
1925
1926         if (best_omega2 == 0) {
1927                 ser->u         = ser_u_omega20;
1928                 ser->v         = ser_v_omega20;
1929                 ser->edge->src = min_omega20_edge.src;
1930                 ser->edge->tgt = min_omega20_edge.tgt;
1931                 ser->omega1    = best_benefit_omega20;
1932                 ser->omega2    = best_omega2;
1933         }
1934         else {
1935                 ser->u         = ser_u_omega1;
1936                 ser->v         = ser_v_omega1;
1937                 ser->edge->src = min_benefit_edge.src;
1938                 ser->edge->tgt = min_benefit_edge.tgt;
1939                 ser->omega1    = best_benefit;
1940                 ser->omega2    = best_omega2;
1941         }
1942
1943         return ser;
1944
1945 #undef IS_UNSERIALIZABLE_NODE
1946 }
1947
1948 /**
1949  * Perform the value serialization heuristic and add all
1950  * computed serialization edges as dependencies to the irg.
1951  */
1952 static void perform_value_serialization_heuristic(rss_t *rss) {
1953         bitset_t *arch_nonign_bs = bitset_alloca(arch_register_class_n_regs(rss->cls));
1954         bitset_t *abi_ign_bs     = bitset_alloca(arch_register_class_n_regs(rss->cls));
1955         unsigned available_regs, iteration;
1956         dvg_t    dvg;
1957         ir_nodeset_t *sat_vals;
1958         pset *ser_set = new_pset(cmp_rss_edges, 20);
1959
1960         /* available_regs = R = |arch_non_ignore_regs cut ~abi_ignore_regs| */
1961         arch_put_non_ignore_regs(rss->arch_env, rss->cls, arch_nonign_bs);
1962         be_abi_put_ignore_regs(rss->abi, rss->cls, abi_ign_bs);
1963         bitset_andnot(arch_nonign_bs, abi_ign_bs);
1964         available_regs  = bitset_popcnt(arch_nonign_bs);
1965         //num_live = pset_count(rss->live_block);
1966         //available_regs -= num_live < available_regs ? num_live : 0;
1967
1968         DBG((rss->dbg, LEVEL_1, "\n\t#available regs: %d\n\n", available_regs));
1969
1970         /*
1971                 At first we need to compute the disjoint value DAG (DVG = {V, E_dv}).
1972                 V    = set of all nodes we are currently interested in
1973                 E_dv = there is an edge from u to v iff v is a descendant of killer(u), forall u, v in V
1974         */
1975         ir_nodeset_init_size(&dvg.nodes, plist_count(rss->nodes));
1976         dvg.edges = new_pset(cmp_rss_edges, plist_count(rss->nodes) * 5);
1977         compute_dvg(rss, &dvg);
1978
1979         /*
1980                 Then we perform the heuristic serialization algorithm
1981                 on the DVG which gives us all necessary serialization
1982                 edges.
1983         */
1984         DBG((rss->dbg, LEVEL_1, "\tcomputing maximal antichain:\n"));
1985         iteration = 0;
1986         sat_vals  = compute_maximal_antichain(rss, &dvg, iteration++);
1987         while (sat_vals && (ir_nodeset_size(sat_vals) > available_regs)) {
1988                 serialization_t *ser, best_ser;
1989                 rss_edge_t      *edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*edge));
1990                 ir_node         *dep_src, *dep_tgt;
1991
1992                 best_ser.edge = edge;
1993                 ser = compute_best_admissible_serialization(rss, sat_vals, &best_ser, available_regs);
1994
1995                 DBG((rss->dbg, LEVEL_1, "\tcurrent register saturation %d, target %d\n", ir_nodeset_size(sat_vals), available_regs));
1996
1997                 if (! ser) {
1998                         DBG((rss->dbg, LEVEL_1, "\tno RS improving serialization found, breaking at iteration %d\n", iteration));
1999                         break;
2000                 }
2001
2002                 /* Insert the serialization as dependency edge into the irg. */
2003                 DBG((rss->dbg, LEVEL_1, "\tserializing %+F -> %+F with edge %+F -> %+F and cost %d, %d\n",
2004                         ser->u->irn, ser->v->irn, ser->edge->src, ser->edge->tgt, ser->omega1, ser->omega2));
2005
2006                 if (pset_find(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge)))
2007                         ir_printf("WARNING: serialization %+F -> %+F computed twice!\n", ser->edge->src, ser->edge->tgt);
2008
2009
2010                 pset_insert(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge));
2011
2012                 /* update the dvg */
2013                 update_dvg(rss, &dvg, ser->v, ser->u);
2014                 update_dvg(rss, &dvg, get_rss_irn(rss, ser->edge->src), get_rss_irn(rss, ser->edge->tgt));
2015                 if(sat_vals != NULL) {
2016                         ir_nodeset_destroy(sat_vals);
2017                         free(sat_vals);
2018                 }
2019
2020                 dep_src = skip_Proj(ser->edge->src);
2021                 dep_tgt = ser->edge->tgt;
2022                 add_irn_dep(dep_src, dep_tgt);
2023
2024                 /* Update descendants, consumer and pkillers of the target */
2025                 update_node_info(rss, ser->edge->tgt, ser->edge->src);
2026
2027                 /* TODO: try to find a cheaper way for updating height information */
2028                 rss->max_height = heights_recompute_block(rss->h, rss->block);
2029
2030                 /* Recompute the antichain for next serialization */
2031                 DBG((rss->dbg, LEVEL_1, "\tre-computing maximal antichain:\n"));
2032                 sat_vals = compute_maximal_antichain(rss, &dvg, iteration++);
2033         }
2034
2035         ir_nodeset_destroy(&dvg.nodes);
2036         del_pset(dvg.edges);
2037 }
2038
2039 /**
2040  * Do initial calculations for a block.
2041  */
2042 static void process_block(ir_node *block, void *env) {
2043         rss_t *rss = env;
2044         int   i, n;
2045         const ir_edge_t *edge;
2046
2047         phase_init(&rss->ph, "rss block preprocessor", rss->irg, PHASE_DEFAULT_GROWTH, init_rss_irn, NULL);
2048
2049         DBG((rss->dbg, LEVEL_1, "preprocessing block %+F\n", block));
2050         rss->block = block;
2051
2052         /* build an index map for all nodes in the current block */
2053         i            = 0;
2054         n            = get_irn_n_edges(block);
2055         NEW_ARR_A(int *, rss->idx_map, n);
2056         foreach_out_edge(block, edge) {
2057                 ir_node *irn      = get_edge_src_irn(edge);
2058                 rss->idx_map[i++] = get_irn_idx(irn);
2059         }
2060         qsort(rss->idx_map, n, sizeof(rss->idx_map[0]), cmp_int);
2061         rss->max_height = heights_recompute_block(rss->h, block);
2062
2063         /* loop over all register classes */
2064         for (i = arch_isa_get_n_reg_class(rss->arch_env->isa) - 1; i >= 0; --i) {
2065                 const arch_register_class_t *cls = arch_isa_get_reg_class(rss->arch_env->isa, i);
2066
2067                 rss->cls = cls;
2068                 DBG((rss->dbg, LEVEL_1, "register class %s\n", arch_register_class_name(cls)));
2069
2070                 /* Get all live value at end of Block having current register class */
2071                 ir_nodeset_init(&rss->live_block);
2072                 be_liveness_end_of_block(rss->liveness, rss->arch_env, rss->cls, rss->block, &rss->live_block);
2073
2074                 /* reset the list of interesting nodes */
2075                 plist_clear(rss->nodes);
2076                 plist_insert_back(rss->nodes, _sink);
2077
2078                 /* collect all nodes having a certain register class */
2079                 foreach_out_edge(block, edge) {
2080                         ir_node *irn  = get_edge_src_irn(edge);
2081                         ir_mode *mode = get_irn_mode(irn);
2082
2083                         /*
2084                                 We skip:
2085                                 - mode_T nodes (the projs are asked)
2086                                 - mode_X nodes (control flow nodes are always scheduled last)
2087                                 - Keeps (they are always immediately scheduled)
2088                                 - Phi (same as Keep)
2089                         */
2090                         if (mode == mode_T || mode == mode_X || is_Phi(irn))
2091                                 continue;
2092
2093                         /*
2094                                 In case of a proj, we skip
2095                                 - Barrier (they are a Barrier :)
2096                                 - Start
2097                                 - the Proj itself, as it's scheduled always with it's super node
2098                         */
2099                         if (is_Proj(irn)) {
2100                                 ir_node *pred = get_Proj_pred(irn);
2101                                 if (be_is_Barrier(pred) || is_Start(pred))
2102                                         continue;
2103                         }
2104
2105                         /* calculate the descendants and consumer for each node in the block */
2106                         collect_node_info(rss, skip_Proj(irn));
2107
2108                         if (be_is_Keep(irn))
2109                                 continue;
2110
2111                         if (! arch_irn_is(rss->arch_env, irn, ignore) && arch_get_irn_reg_class(rss->arch_env, irn, -1) == cls) {
2112                                 plist_insert_back(rss->nodes, skip_Proj(irn));
2113                         }
2114                         //}
2115                 }
2116
2117                 /* compute the potential killing set PK(G) */
2118                 compute_pkill_set(rss);
2119
2120                 /* compute the killing function k* */
2121                 compute_killing_function(rss);
2122
2123                 /*
2124                         Compute the heuristic value serialization and
2125                         add the necessary dependencies to the irg.
2126                 */
2127                 perform_value_serialization_heuristic(rss);
2128
2129                 ir_nodeset_destroy(&rss->live_block);
2130         }
2131
2132         phase_free(&rss->ph);
2133 }
2134
2135 /**
2136  * Register the options.
2137  */
2138 void be_init_schedrss(void) {
2139         lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
2140         lc_opt_entry_t *sched_grp = lc_opt_get_grp(be_grp, "sched");
2141         lc_opt_entry_t *rss_grp = lc_opt_get_grp(sched_grp, "rss");
2142
2143         lc_opt_add_table(rss_grp, rss_option_table);
2144 }
2145
2146 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_schedrss);
2147
2148 /**
2149  * Preprocess the irg for scheduling.
2150  */
2151 void rss_schedule_preparation(const be_irg_t *birg) {
2152         ir_graph *irg = be_get_birg_irg(birg);
2153         rss_t rss;
2154
2155         FIRM_DBG_REGISTER(rss.dbg, "firm.be.sched.rss");
2156
2157         //firm_dbg_set_mask(rss.dbg, LEVEL_1 | LEVEL_2 | LEVEL_3);
2158
2159         init_rss_special_nodes(irg);
2160
2161         rss.irg      = irg;
2162         rss.arch_env = be_get_birg_arch_env(birg);
2163         rss.abi      = birg->abi;
2164         rss.h        = heights_new(irg);
2165         rss.nodes    = plist_new();
2166         rss.opts     = &rss_options;
2167         rss.liveness = be_liveness(irg);
2168         irg_block_walk_graph(irg, NULL, process_block, &rss);
2169         heights_free(rss.h);
2170         plist_free(rss.nodes);
2171         be_liveness_free(rss.liveness);
2172
2173         if (birg->main_env->options->dump_flags & DUMP_SCHED)
2174                 be_dump(rss.irg, "-rss", dump_ir_block_graph);
2175 }