- refactor emitter next/prev block handling a bit
[libfirm] / ir / be / beschedrss.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       Implementation of a register saturating list scheduler.
23  * @author      Christian Wuerdig
24  * @date        29.08.2006
25  * @version     $Id$
26  *
27  * Implementation of a register saturating list scheduler
28  * as described in: Sid-Ahmed-Ali Touati
29  * Register Saturation in Superscalar and VLIW Codes
30  */
31 #ifdef HAVE_CONFIG_H
32 #include "config.h"
33 #endif
34
35 #include <limits.h>
36
37 #include "obst.h"
38 #include "debug.h"
39
40 #include "irgraph_t.h"
41 #include "irnode_t.h"
42 #include "iredges_t.h"
43 #include "ircons_t.h"
44 #include "irphase_t.h"
45 #include "irgwalk.h"
46 #include "irdump.h"
47 #include "irtools.h"
48 #include "irbitset.h"
49 #include "irprintf.h"
50 #include "bipartite.h"
51 #include "hungarian.h"
52 #include "plist.h"
53
54 #include "height.h"
55
56 #include "beabi.h"
57 #include "bemodule.h"
58 #include "benode_t.h"
59 #include "besched_t.h"
60 #include "beirg_t.h"
61
62 #include "lc_opts.h"
63 #include "lc_opts_enum.h"
64
65
66 #define ARR_LEN_SAFE(arr) ((arr) != NULL ? ARR_LEN((arr)) : 0)
67
68 #define HASH_RSS_EDGE(edge) ((get_irn_node_nr((edge)->src) << 16) | (get_irn_node_nr((edge)->tgt) & 0xFFFF))
69 #define BSEARCH_IRN_ARR(val, arr) \
70         bsearch(&(val), (arr), ARR_LEN_SAFE((arr)), sizeof((arr)[0]), cmp_irn_idx)
71
72 #define BLOCK_IDX_MAP(rss, irn) bsearch_for_index(get_irn_idx((irn)), (rss)->idx_map, ARR_LEN_SAFE((rss)->idx_map), 1)
73
74 /* the rss options */
75 typedef struct _rss_opts_t {
76         int dump_flags;
77 } rss_opts_t;
78
79 /* Represents a child with associated costs */
80 typedef struct _child {
81         ir_node *irn;
82         float   cost;
83 } child_t;
84
85 /* We need edges for several purposes. */
86 typedef struct _rss_edge {
87         ir_node *src;
88         ir_node *tgt;
89         void    *next;
90 } rss_edge_t;
91
92 /* Represents a connected bipartite component. */
93 typedef struct _cbc {
94         ir_nodeset_t parents;       /**< = S  a set of value producers */
95         ir_nodeset_t children;      /**< = T  a set of value consumers */
96         pset    *kill_edges;    /**< = E  a set of edges (t in T, s in S) such as each s in S gets killed by at least one t in T */
97         int     nr;             /**< a deterministic index for set insertion (used as hash) */
98 } cbc_t;
99
100 /* Represents a disjoint value DAG. */
101 typedef struct _dvg {
102         ir_nodeset_t nodes;
103         pset    *edges;
104 } dvg_t;
105
106 /* Represents a chain of nodes. */
107 typedef struct _chain {
108         plist_t *elements;   /**< List of chain elements */
109         int     nr;          /**< a deterministic index for set insertion (used as hash) */
110 } chain_t;
111
112 typedef struct _rss_irn {
113         plist_t  *consumer_list;    /**< List of consumers */
114         const ir_node **consumer;   /**< Sorted consumer array (needed for faster access) */
115
116         plist_t  *parent_list;      /**< List of parents */
117         plist_t  *pkiller_list;     /**< List of potential killers */
118
119         plist_t  *descendant_list;  /**< List of descendants */
120         const ir_node **descendants;      /**< Sorted descendant array (needed for faster access) */
121
122         const ir_node  *killer;     /**< The selected unique killer */
123         const ir_node  *irn;        /**< The corresponding firm node to this rss_irn */
124
125         chain_t  *chain;            /**< The chain, this node is associated to */
126
127         unsigned desc_walk;         /**< visited flag for collecting descendants */
128         unsigned kill_count;        /**< number of nodes killed by this one */
129
130         unsigned live_out : 1;      /**< irn has consumers outside of it's block */
131         unsigned visited  : 1;      /**< visited flag for bipartite decomposition */
132         unsigned havecons : 1;      /**< visited flag collect consumer */
133         unsigned handled  : 1;      /**< flag indicating whether or not the list structures have been build */
134         unsigned dumped   : 1;      /**< flag indication whether or not this node was dumped */
135 } rss_irn_t;
136
137 /* Represents a serialization edge with associated costs. */
138 typedef struct _serialization {
139         rss_irn_t  *u;       /* the top node */
140         rss_irn_t  *v;       /* the node about to be serialized after u */
141         rss_edge_t *edge;    /* the edge selected for the serialization */
142         int        omega1;   /* estimated: available regs - RS reduction */
143         int        omega2;   /* increase in critical path length */
144         int        new_killer;
145 } serialization_t;
146
147 typedef struct _rss {
148         ir_phase          ph;              /**< Phase to hold some data */
149         heights_t        *h;              /**< The current height object */
150         ir_graph         *irg;            /**< The irg to preprocess */
151         plist_t          *nodes;          /**< The list of interesting nodes */
152         const arch_env_t *arch_env;       /**< The architecture environment */
153         be_abi_irg_t     *abi;            /**< The abi for this irg */
154         pset             *cbc_set;        /**< A set of connected bipartite components */
155         ir_node          *block;          /**< The current block in progress. */
156         int              *idx_map;        /**< Mapping irn indices to per block indices */
157         unsigned         max_height;      /**< maximum height in the current block */
158         rss_opts_t       *opts;           /**< The options */
159         be_lv_t          *liveness;       /**< The liveness information for this irg */
160         ir_nodeset_t      live_block;     /**< Values alive at end of block */
161         const arch_register_class_t *cls; /**< The current register class */
162         DEBUG_ONLY(firm_dbg_module_t *dbg);
163 } rss_t;
164
165 #define get_rss_irn(rss, irn)  (phase_get_or_set_irn_data(&rss->ph, irn))
166
167 /**
168  * We need some special nodes:
169  * a source and a sink for all live-in and live-out values of a block
170  */
171 enum {
172         iro_rss_Source,
173         iro_rss_Sink,
174         iro_rss_last
175 };
176
177 /** The opcode of the rss_Source node once allocated. */
178 static ir_op *op_rss_Source;
179 /** The opcode of the rss_Sink node once allocated. */
180 static ir_op *op_rss_Sink;
181
182 /** The rss_Source node of the current graph. */
183 static ir_node *_source = NULL;
184 /** The rss_Sink node of the current graph. */
185 static ir_node *_sink   = NULL;
186
187 #define is_Source(irn) ((irn) == _source)
188 #define is_Sink(irn)   ((irn) == _sink)
189
190 enum {
191         RSS_DUMP_NONE  = 0,
192         RSS_DUMP_CBC   = 1 << 0,
193         RSS_DUMP_PKG   = 1 << 1,
194         RSS_DUMP_KILL  = 1 << 2,
195         RSS_DUMP_DVG   = 1 << 3,
196         RSS_DUMP_MAXAC = 1 << 4,
197         RSS_DUMP_ALL   = (RSS_DUMP_MAXAC << 1) - 1,
198 };
199
200 static rss_opts_t rss_options = {
201         RSS_DUMP_NONE,
202 };
203
204 static const lc_opt_enum_int_items_t dump_items[] = {
205         { "none",  RSS_DUMP_NONE  },
206         { "cbc",   RSS_DUMP_CBC   },
207         { "pkg",   RSS_DUMP_PKG   },
208         { "kill",  RSS_DUMP_KILL  },
209         { "dvg",   RSS_DUMP_DVG   },
210         { "maxac", RSS_DUMP_MAXAC },
211         { "all",   RSS_DUMP_ALL   },
212         { NULL, 0 }
213 };
214
215 static lc_opt_enum_int_var_t dump_var = {
216         &rss_options.dump_flags, dump_items
217 };
218
219 static const lc_opt_table_entry_t rss_option_table[] = {
220         LC_OPT_ENT_ENUM_MASK("dump", "dump phases", &dump_var),
221         LC_OPT_LAST
222 };
223
224 /******************************************************************************
225  *  _          _                    __                  _   _
226  * | |        | |                  / _|                | | (_)
227  * | |__   ___| |_ __   ___ _ __  | |_ _   _ _ __   ___| |_ _  ___  _ __  ___
228  * | '_ \ / _ \ | '_ \ / _ \ '__| |  _| | | | '_ \ / __| __| |/ _ \| '_ \/ __|
229  * | | | |  __/ | |_) |  __/ |    | | | |_| | | | | (__| |_| | (_) | | | \__ \
230  * |_| |_|\___|_| .__/ \___|_|    |_|  \__,_|_| |_|\___|\__|_|\___/|_| |_|___/
231  *            | |
232  *            |_|
233  ******************************************************************************/
234
235 /**
236  * Acquire opcodes if needed and create source and sink nodes.
237  */
238 static void init_rss_special_nodes(ir_graph *irg) {
239         ir_node *block;
240
241         if (op_rss_Source == NULL) {
242                 int iro_rss_base = get_next_ir_opcodes(iro_rss_last);
243                 op_rss_Source = new_ir_op(iro_rss_base + iro_rss_Source, "rss_Source", op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
244                 op_rss_Sink   = new_ir_op(iro_rss_base + iro_rss_Sink,   "rss_Sink",   op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
245         }
246         block   = get_irg_start_block(irg);
247         _source = new_ir_node(NULL, irg, block, op_rss_Source, mode_ANY, 0, NULL);
248         _sink   = new_ir_node(NULL, irg, block, op_rss_Sink, mode_ANY, 0, NULL);
249 }
250
251 static int cmp_int(const void *a, const void *b) {
252         const int *i1 = a;
253         const int *i2 = b;
254
255         return QSORT_CMP(*i1, *i2);
256 }
257
258 static int cmp_child_costs(const void *a, const void *b) {
259         const child_t *c1 = a;
260         const child_t *c2 = b;
261
262         return QSORT_CMP(c1->cost, c2->cost);
263 }
264
265 static int cmp_irn_idx(const void *a, const void *b) {
266         const ir_node *n1 = *(ir_node **)a;
267         const ir_node *n2 = *(ir_node **)b;
268
269         return QSORT_CMP(get_irn_idx(n1), get_irn_idx(n2));
270 }
271
272 static int cmp_rss_edges(const void *a, const void *b) {
273         const rss_edge_t *e1 = a;
274         const rss_edge_t *e2 = b;
275
276         return (e1->src != e2->src) || (e1->tgt != e2->tgt);
277 }
278
279 static int bsearch_for_index(int key, int *arr, size_t len, int force) {
280         int left = 0;
281         int right = len;
282
283         while (right >= left) {
284                 int idx = (left + right) / 2;
285
286                 if (key < arr[idx])
287                         right = idx - 1;
288                 else if (key > arr[idx])
289                         left = idx + 1;
290                 else
291                         return idx;
292         }
293
294         if (force)
295                 assert(0 && "Something is wrong, key not found.");
296         return -1;
297 }
298
299 static const ir_node **build_sorted_array_from_list(plist_t *irn_list, struct obstack *obst) {
300         plist_element_t *el;
301         int             i   = 0;
302         int             len = plist_count(irn_list);
303         const ir_node   **arr = (const ir_node**)NEW_ARR_D(ir_node*, obst, len);
304
305         /* copy the list into the array */
306         foreach_plist(irn_list, el) {
307                 arr[i++] = plist_element_get_value(el);
308         }
309
310         /* sort the array by node index */
311         /* HACK cast for MSVC */
312         qsort((void*)arr, len, sizeof(arr[0]), cmp_irn_idx);
313
314         return arr;
315 }
316
317 /*****************************************************
318  *      _      _                       _
319  *     | |    | |                     (_)
320  *   __| | ___| |__  _   _  __ _  __ _ _ _ __   __ _
321  *  / _` |/ _ \ '_ \| | | |/ _` |/ _` | | '_ \ / _` |
322  * | (_| |  __/ |_) | |_| | (_| | (_| | | | | | (_| |
323  *  \__,_|\___|_.__/ \__,_|\__, |\__, |_|_| |_|\__, |
324  *                          __/ | __/ |         __/ |
325  *                         |___/ |___/         |___/
326  *
327  *****************************************************/
328
329 #ifdef DEBUG_libfirm
330 static void dump_nodeset(ir_nodeset_t *ns, const char *prefix) {
331         ir_nodeset_iterator_t iter;
332         ir_node *irn;
333
334         ir_nodeset_iterator_init(&iter, ns);
335         while( (irn = ir_nodeset_iterator_next(&iter)) != NULL ) {
336                 ir_fprintf(stderr, "%s%+F\n", prefix, irn);
337         }
338 }
339 #endif
340
341 static void build_file_name(rss_t *rss, const char *suffix, size_t suf_len, char *buf, size_t len) {
342         const char *irg_name;
343
344         memset(buf, 0, len);
345         irg_name = get_entity_name(get_irg_entity(rss->irg));
346         snprintf(buf, len - suf_len, "%s-%s-block-%ld",
347                 irg_name, arch_register_class_name(rss->cls), get_irn_node_nr(rss->block));
348         strcat(buf, suffix);
349 }
350
351 /* Dumps all collected bipartite components of current irg as vcg. */
352 static void debug_vcg_dump_bipartite(rss_t *rss) {
353         cbc_t *cbc;
354         FILE  *f;
355         char  file_name[256];
356         static const char suffix[] = "-RSS-CBC.vcg";
357
358         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
359         f = fopen(file_name, "w");
360
361         ir_fprintf(f, "graph: { title: \"connected bipartite component graph of %+F\"\n", rss->irg);
362         fprintf(f, "display_edge_labels: no\n");
363         fprintf(f, "layoutalgorithm: mindepth\n");
364         fprintf(f, "manhattan_edges: yes\n\n");
365
366         foreach_pset(rss->cbc_set, cbc) {
367                 ir_nodeset_iterator_t iter;
368                 ir_node    *n;
369                 rss_edge_t *ke;
370
371                 fprintf(f, "graph: { titel: \"cbc %d\" label: \"cbc %d\" status:clustered color:yellow\n", cbc->nr, cbc->nr);
372                 foreach_ir_nodeset(&cbc->parents, n, iter) {
373                         ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
374                 }
375                 foreach_ir_nodeset(&cbc->children, n, iter) {
376                         ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
377                 }
378                 foreach_pset(cbc->kill_edges, ke) {
379                         ir_fprintf(f, "edge: { sourcename: \"n%d_%d\" targetname: \"n%d_%d\" }\n",
380                                 get_irn_node_nr(ke->src), cbc->nr, get_irn_node_nr(ke->tgt), cbc->nr);
381                 }
382                 fprintf(f, "}\n\n");
383         }
384         fprintf(f, "}\n");
385         fclose(f);
386 }
387
388 /* Dump the computed killing function as vcg. */
389 static void debug_vcg_dump_kill(rss_t *rss) {
390         FILE            *f;
391         char            file_name[256];
392         plist_element_t *el;
393         static const char suffix[] = "-RSS-KILL.vcg";
394
395         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
396         f = fopen(file_name, "w");
397
398         ir_fprintf(f, "graph: { title: \"computed kill graph of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
399         fprintf(f, "display_edge_labels: no\n");
400         fprintf(f, "layoutalgorithm: mindepth\n");
401         fprintf(f, "manhattan_edges: yes\n\n");
402
403         {
404                 /* reset dump_flag */
405                 ir_node *irn;
406                 foreach_phase_irn(&rss->ph, irn) {
407                         rss_irn_t *node = get_rss_irn(rss, irn);
408                         node->dumped = 0;
409                 }
410         }
411
412         /* dump all nodes and their killers */
413         foreach_plist(rss->nodes, el) {
414                 ir_node   *irn     = plist_element_get_value(el);
415                 rss_irn_t *rirn    = get_rss_irn(rss, irn);
416                 rss_irn_t *pk_rirn = get_rss_irn(rss, rirn->killer);
417
418                 if (! rirn->dumped) {
419                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
420                         rirn->dumped = 1;
421                 }
422
423                 if (! pk_rirn->dumped) {
424                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(rirn->killer), rirn->killer);
425                         pk_rirn->dumped = 1;
426                 }
427
428                 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
429                         get_irn_node_nr(rirn->killer), get_irn_node_nr(irn));
430         }
431
432         fprintf(f, "}\n");
433         fclose(f);
434 }
435
436 /* Dumps the potential killing DAG (PKG) as vcg. */
437 static void debug_vcg_dump_pkg(rss_t *rss, ir_nodeset_t *max_ac, int iteration) {
438         FILE              *f;
439         char              file_name[256];
440         char              *suffix   = alloca(32);
441         static const char suffix1[] = "-RSS-PKG.vcg";
442         static const char suffix2[] = "-RSS-PKG-MAXAC.vcg";
443         plist_element_t   *el;
444
445         if (! max_ac) {
446                 snprintf(suffix, 32, "%s", suffix1);
447         }
448         else {
449                 snprintf(suffix, 32, "-%02d%s", iteration, suffix2);
450         }
451
452         build_file_name(rss, suffix, strlen(suffix) + 1, file_name, sizeof(file_name));
453         f = fopen(file_name, "w");
454
455         ir_fprintf(f, "graph: { title: \"potential killing DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
456         fprintf(f, "display_edge_labels: no\n");
457         fprintf(f, "layoutalgorithm: mindepth\n");
458         fprintf(f, "manhattan_edges: yes\n\n");
459
460         {
461                 /* reset dump_flag */
462                 ir_node *irn;
463                 foreach_phase_irn(&rss->ph, irn) {
464                         rss_irn_t *node = get_rss_irn(rss, irn);
465                         node->dumped = 0;
466                 }
467         }
468
469         foreach_plist(rss->nodes, el) {
470                 ir_node   *irn  = plist_element_get_value(el);
471                 rss_irn_t *rirn = get_rss_irn(rss, irn);
472                 char      *c1   = "";
473                 plist_element_t *k_el;
474
475                 /* dump selected saturating values in yellow */
476                 if (max_ac && ir_nodeset_contains(max_ac, irn))
477                         c1 = "color:yellow";
478
479                 if (! rirn->dumped) {
480                         if (rirn->chain)
481                                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F   c%d\" %s }\n", get_irn_node_nr(irn), irn, rirn->chain->nr, c1);
482                         else
483                                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(irn), irn, c1);
484                         rirn->dumped = 1;
485                 }
486
487                 foreach_plist(rirn->pkiller_list, k_el) {
488                         ir_node   *pkiller = plist_element_get_value(k_el);
489                         rss_irn_t *pk_rirn = get_rss_irn(rss, pkiller);
490                         char      *c2      = "";
491
492                         if (max_ac && ir_nodeset_contains(max_ac, pkiller))
493                                 c2 = "color:yellow";
494
495                         if (! pk_rirn->dumped) {
496                                 if (pk_rirn->chain)
497                                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F   c%d\" %s }\n", get_irn_node_nr(pkiller), pkiller, pk_rirn->chain->nr, c2);
498                                 else
499                                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(pkiller), pkiller, c2);
500                                 pk_rirn->dumped = 1;
501                         }
502                         ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
503                                 get_irn_node_nr(pkiller), get_irn_node_nr(irn));
504                 }
505         }
506         fprintf(f, "}\n");
507         fclose(f);
508 }
509
510 /* Dumps the disjoint value DAG (DVG) as vcg. */
511 static void debug_vcg_dump_dvg(rss_t *rss, dvg_t *dvg) {
512         static const char suffix[] = "-RSS-DVG.vcg";
513         FILE       *f;
514         char       file_name[256];
515         ir_nodeset_iterator_t iter;
516         ir_node    *irn;
517         rss_edge_t *edge;
518
519         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
520         f = fopen(file_name, "w");
521
522         ir_fprintf(f, "graph: { title: \"disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
523         fprintf(f, "display_edge_labels: no\n");
524         fprintf(f, "layoutalgorithm: mindepth\n");
525         fprintf(f, "manhattan_edges: yes\n\n");
526
527         /* dump all nodes */
528         foreach_ir_nodeset(&dvg->nodes, irn, iter) {
529                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
530         }
531
532         /* dump all edges */
533         foreach_pset(dvg->edges, edge) {
534                 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
535                         get_irn_node_nr(edge->src), get_irn_node_nr(edge->tgt));
536         }
537
538         fprintf(f, "}\n");
539         fclose(f);
540 }
541
542 #if 0
543 /* Dumps the PKG(DVG). */
544 static void debug_vcg_dump_dvg_pkiller(rss_t *rss, dvg_t *dvg) {
545         static const char suffix[] = "-RSS-DVG-PKG.vcg";
546         FILE       *f;
547         char       file_name[256];
548         ir_nodeset_iterator_t iter;
549         ir_node    *irn;
550
551         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
552         f = fopen(file_name, "w");
553
554         ir_fprintf(f, "graph: { title: \"PKG of disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
555         fprintf(f, "display_edge_labels: no\n");
556         fprintf(f, "layoutalgorithm: mindepth\n");
557         fprintf(f, "manhattan_edges: yes\n\n");
558
559         /* dump all nodes */
560         foreach_ir_nodeset(&dvg->nodes, irn, iter) {
561                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
562         }
563
564         /* dump all edges */
565         foreach_ir_nodeset(&dvg->nodes, irn, iter) {
566                 rss_irn_t       *node = get_rss_irn(rss, irn);
567                 plist_element_t *el;
568
569                 foreach_plist(node->dvg_pkiller_list, el) {
570                         ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
571                                 get_irn_node_nr(plist_element_get_value(el)), get_irn_node_nr(irn));
572                 }
573         }
574
575         fprintf(f, "}\n");
576         fclose(f);
577 }
578 #endif /* if 0 */
579
580 /**
581  * In case there is no rss information for irn, initialize it.
582  */
583 static void *init_rss_irn(ir_phase *ph, const ir_node *irn, void *old) {
584         rss_irn_t *res = old ? old : phase_alloc(ph, sizeof(res[0]));
585
586         res->descendant_list = plist_obstack_new(phase_obst(ph));
587         res->descendants     = NULL;
588
589         res->consumer_list   = plist_obstack_new(phase_obst(ph));
590         res->consumer        = NULL;
591
592         res->pkiller_list    = plist_obstack_new(phase_obst(ph));
593
594         res->parent_list     = plist_obstack_new(phase_obst(ph));
595
596         res->killer          = NULL;
597         res->irn             = irn;
598         res->chain           = NULL;
599
600         res->kill_count      = 0;
601         res->desc_walk       = 0;
602         res->live_out        = 0;
603         res->visited         = 0;
604         res->handled         = 0;
605         res->dumped          = 0;
606         res->havecons        = 0;
607
608         return res;
609 }
610
611 /**
612  * Collect all nodes data dependent on current node.
613  */
614 static void collect_descendants(rss_t *rss, rss_irn_t *rirn, ir_node *irn, int *got_sink, unsigned cur_desc_walk) {
615         const ir_edge_t *edge;
616         rss_irn_t       *cur_node = get_rss_irn(rss, irn);
617         ir_node         *block    = rss->block;
618         ir_edge_kind_t  ekind[2]  = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
619         int             i;
620
621         if (cur_node->desc_walk >= cur_desc_walk)
622                 return;
623         cur_node->desc_walk = cur_desc_walk;
624
625         /* Stop at Barriers */
626         if (be_is_Barrier(irn))
627                 return;
628
629         /* loop over normal and dependency edges */
630         for (i = 0; i < 2; ++i) {
631                 foreach_out_edge_kind(irn, edge, ekind[i]) {
632                         ir_node *user = get_edge_src_irn(edge);
633
634                         /* skip ignore nodes as they do not really contribute to register pressure */
635                         if (arch_irn_is(rss->arch_env, user, ignore))
636                                 continue;
637
638                         /*
639                                 (a) mode_X means Jump            -> out_edge
640                                 (b) Phi as user of a normal node -> out-edge
641                         */
642                         if (get_irn_mode(user) == mode_X || is_Phi(user)) {
643                                 if (! *got_sink)
644                                         goto force_sink;
645                                 else
646                                         continue;
647                         }
648
649                         if (is_Proj(user)) {
650
651                                 //if (arch_get_irn_reg_class(rss->arch_env, user, -1) == rss->cls)
652                                         collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
653                         }
654                         else {
655                                 /* check if user lives in block and is not a control flow node */
656                                 if (get_nodes_block(user) == block) {
657                                         if (! plist_has_value(rirn->descendant_list, user)) {
658                                                 plist_insert_back(rirn->descendant_list, user);
659                                                 DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", user));
660                                         }
661                                         collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
662                                 }
663                                 else if (! *got_sink) {
664 force_sink:
665                                         /* user lives out of block: add sink as descendant if not already done */
666                                         plist_insert_back(rirn->descendant_list, _sink);
667                                         *got_sink = 1;
668                                         DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", _sink));
669                                 }
670                         }
671                 }
672         }
673 }
674
675 /**
676  * Handles a single consumer.
677  */
678 static void collect_single_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *consumer, int *got_sink) {
679         ir_node *block = rss->block;
680
681         assert(! is_Proj(consumer) && "Cannot handle Projs");
682
683         if (! is_Phi(consumer) && ! is_Block(consumer) && get_nodes_block(consumer) == block) {
684                 if (! arch_irn_is(rss->arch_env, consumer, ignore) && ! plist_has_value(rss_irn->consumer_list, consumer)) {
685                         plist_insert_back(rss_irn->consumer_list, consumer);
686                         DBG((rss->dbg, LEVEL_2, "\t\tconsumer %+F\n", consumer));
687                 }
688         }
689         else {
690                 rss_irn->live_out = 1;
691                 DBG((rss->dbg, LEVEL_2, "\t\tlive out %+F", consumer));
692                 if (! *got_sink) {
693                         plist_insert_back(rss_irn->consumer_list, _sink);
694                         *got_sink = 1;
695                         DB((rss->dbg, LEVEL_2, ", %+F added instead", _sink));
696                 }
697                 DB((rss->dbg, LEVEL_2, "\n"));
698         }
699 }
700
701 /**
702  * Collect all nodes consuming the value(s) produced by current node.
703  */
704 static void collect_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *irn, int *got_sink) {
705         const ir_edge_t *edge;
706         int             i;
707         ir_edge_kind_t  ekind[2]  = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
708         rss_irn_t       *cur_node = get_rss_irn(rss, irn);
709
710         if (cur_node->havecons)
711                 return;
712         cur_node->havecons = 1;
713
714         for (i = 0; i < 2; ++i) {
715                 foreach_out_edge_kind(irn, edge, ekind[i]) {
716                         ir_node *consumer = get_edge_src_irn(edge);
717
718                         if (is_Proj(consumer)) {
719                                 //if (arch_get_irn_reg_class(rss->arch_env, consumer, -1) == rss->cls)
720                                         collect_consumer(rss, rss_irn, consumer, got_sink);
721                         }
722                         else
723                                 collect_single_consumer(rss, rss_irn, consumer, got_sink);
724                 }
725         }
726 }
727
728 /**
729  * Collects all consumer and descendant of a irn.
730  */
731 static void collect_node_info(rss_t *rss, ir_node *irn) {
732         static unsigned cur_desc_walk = 0;
733         rss_irn_t       *rss_irn      = get_rss_irn(rss, irn);
734         int             got_sink;
735
736         assert(! is_Proj(irn) && "Cannot handle Projs.");
737
738         /* check if node info is already available */
739         if (rss_irn->handled)
740                 return;
741                 //phase_reinit_single_irn_data(&rss->ph, irn);
742
743         DBG((rss->dbg, LEVEL_1, "\tcomputing consumers of %+F:\n", irn));
744
745         /* collect all consumer */
746         got_sink = 0;
747         collect_consumer(rss, rss_irn, irn, &got_sink);
748
749         /* build sorted consumer array */
750         rss_irn->consumer = build_sorted_array_from_list(rss_irn->consumer_list, phase_obst(&rss->ph));
751
752         DBG((rss->dbg, LEVEL_1, "\tcompute descendants of %+F:\n", irn));
753
754         /* collect descendants */
755         got_sink = 0;
756         collect_descendants(rss, rss_irn, irn, &got_sink, ++cur_desc_walk);
757
758         /* build sorted descendant array */
759         rss_irn->descendants = build_sorted_array_from_list(rss_irn->descendant_list, phase_obst(&rss->ph));
760
761         rss_irn->handled = 1;
762 }
763
764 /**
765  * Checks if v is a potential killer of u.
766  * v is in pkill(u) iff descendants(v) cut consumer(u) is v
767  *
768  * @param rss   The rss object
769  * @param v      The node to check for killer
770  * @param u      The potentially killed value
771  * @return 1 if v is in pkill(u), 0 otherwise
772  */
773 static int is_potential_killer(rss_t *rss, rss_irn_t *v, rss_irn_t *u) {
774         plist_t *list;
775         const ir_node **arr;
776         plist_element_t *el;
777         (void) rss;
778
779         assert(is_Sink(v->irn) || ((plist_count(v->descendant_list) > 0 && v->descendants) || 1));
780         assert(is_Sink(u->irn) || ((plist_count(u->consumer_list)   > 0 && u->consumer)    || 1));
781
782         /* as we loop over the list: loop over the shorter one */
783         if (plist_count(v->descendant_list) > plist_count(u->consumer_list)) {
784                 list = u->consumer_list;
785                 arr  = v->descendants;
786         }
787         else {
788                 list = v->descendant_list;
789                 arr  = u->consumer;
790         }
791
792         /* for each list element: try to find element in array */
793         foreach_plist(list, el) {
794                 ir_node *irn   = plist_element_get_value(el);
795                 ir_node *match = BSEARCH_IRN_ARR(irn, arr);
796
797                 if (match && match != irn)
798                         return 0;
799         }
800
801         return 1;
802 }
803
804 /**
805  * Update descendants, consumer and pkiller list for the given irn.
806  */
807 static void update_node_info(rss_t *rss, ir_node *irn, ir_node *pk_irn) {
808         rss_irn_t *node    = get_rss_irn(rss, irn);
809         rss_irn_t *pkiller = get_rss_irn(rss, pk_irn);
810
811         /* add consumer and rebuild the consumer array */
812         if (! plist_has_value(node->consumer_list, pk_irn)) {
813                 plist_insert_back(node->consumer_list, pk_irn);
814                 node->consumer = build_sorted_array_from_list(node->consumer_list, phase_obst(&rss->ph));
815         }
816
817         /* add pk_irn as descendant, add it's descendants to irn's and rebuild array */
818         if (! plist_has_value(node->descendant_list, pk_irn)) {
819                 plist_element_t *el;
820
821                 plist_insert_back(node->descendant_list, pk_irn);
822
823                 foreach_plist(pkiller->descendant_list, el) {
824                         ir_node *desc = plist_element_get_value(el);
825
826                         if (! plist_has_value(node->descendant_list, desc))
827                                 plist_insert_back(node->descendant_list, desc);
828                 }
829
830                 node->descendants = build_sorted_array_from_list(node->descendant_list, phase_obst(&rss->ph));
831         }
832
833 }
834
835 /**
836  * Compute the potential killing set PK.
837  */
838 static void compute_pkill_set(rss_t *rss) {
839         plist_element_t *u_el, *v_el;
840
841         foreach_plist(rss->nodes, u_el) {
842                 ir_node   *u_irn = plist_element_get_value(u_el);
843                 rss_irn_t *u     = get_rss_irn(rss, u_irn);
844
845                 DBG((rss->dbg, LEVEL_1, "\tcomputing potential killers of %+F:\n", u_irn));
846
847                 /* check each consumer if it is a potential killer  */
848                 foreach_plist(u->consumer_list, v_el) {
849                         ir_node   *v_irn = plist_element_get_value(v_el);
850                         rss_irn_t *v     = get_rss_irn(rss, v_irn);
851
852                         /* check, if v is a potential killer of u */
853                         if (is_potential_killer(rss, v, u)) {
854                                 if (! plist_has_value(u->pkiller_list, v_irn))
855                                         plist_insert_back(u->pkiller_list, v_irn);
856                                 v->kill_count++;
857                                 DBG((rss->dbg, LEVEL_2, "\t\tpotential killer %+F\n", v_irn));
858                         }
859                 }
860
861                 u->killer = _sink;
862         }
863
864         if (rss->opts->dump_flags & RSS_DUMP_PKG)
865                 debug_vcg_dump_pkg(rss, NULL, 0);
866 }
867
868 /**
869  * Build set of killing edges (from values to their potential killers)
870  */
871 static void build_kill_edges(rss_t *rss, pset *epk) {
872         plist_element_t *el, *k_el;
873
874         foreach_plist(rss->nodes, el) {
875                 ir_node    *irn  = plist_element_get_value(el);
876                 rss_irn_t *rirn = get_rss_irn(rss, irn);
877
878                 DBG((rss->dbg, LEVEL_3, "\t\tbuilding kill edges for %+F:\n", irn));
879
880                 foreach_plist(rirn->pkiller_list, k_el) {
881                         ir_node    *pkiller = plist_element_get_value(k_el);
882                         rss_edge_t *ke      = obstack_alloc(phase_obst(&rss->ph), sizeof(*ke));
883
884                         ke->src  = irn;
885                         ke->tgt  = pkiller;
886                         ke->next = NULL;
887
888                         DBG((rss->dbg, LEVEL_3, "\t\t\ttarget %+F\n", pkiller));
889
890                         pset_insert(epk, ke, HASH_RSS_EDGE(ke));
891                 }
892         }
893 }
894
895 #ifdef DEBUG_libfirm
896 /* print the given cbc for debugging purpose */
897 static void debug_print_cbc(firm_dbg_module_t *mod, cbc_t *cbc) {
898         ir_nodeset_iterator_t iter;
899         ir_node    *n;
900         rss_edge_t *ke;
901
902         DBG((mod, LEVEL_3, "\t\tS = set of parents:\n"));
903         foreach_ir_nodeset(&cbc->parents, n, iter) {
904                 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
905         }
906         DBG((mod, LEVEL_3, "\t\tT = set of children:\n"));
907         foreach_ir_nodeset(&cbc->children, n, iter) {
908                 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
909         }
910         DBG((mod, LEVEL_3, "\t\tE = Edges from producers to consumers\n"));
911         foreach_pset(cbc->kill_edges, ke) {
912                 DBG((mod, LEVEL_3, "\t\t\t%+F -> %+F\n", ke->src, ke->tgt));
913         }
914 }
915 #endif
916
917 /**
918  * Construct the bipartite decomposition.
919  * Sid-Ahmed-Ali Touati, Phd Thesis
920  * Register Pressure in Instruction Level Parallelism, p. 71
921  */
922 static void compute_bipartite_decomposition(rss_t *rss) {
923         pset *epk    = new_pset(cmp_rss_edges, 10);
924         int  cur_num = 0;
925
926         plist_element_t *el;
927
928         DBG((rss->dbg, LEVEL_1, "\tcomputing bipartite decomposition:\n"));
929
930         build_kill_edges(rss, epk);
931
932         foreach_plist(rss->nodes, el) {
933                 ir_node   *u_irn   = plist_element_get_value(el);
934                 rss_irn_t *u       = get_rss_irn(rss, u_irn);
935                 int       p_change = 1;
936                 int       c_change = 1;
937                 int       vrfy_ok  = 1;
938
939                 cbc_t           *cbc;
940                 plist_element_t *el2;
941                 rss_edge_t      *k_edge;
942                 rss_edge_t      *kedge_root = NULL;
943                 ir_node         *t_irn, *s_irn;
944                 ir_nodeset_iterator_t iter;
945
946                 if (u->visited || u_irn == _sink)
947                         continue;
948
949                 DBG((rss->dbg, LEVEL_2, "\t\t%+F choosen:\n", u_irn));
950
951                 cbc     = obstack_alloc(phase_obst(&rss->ph), sizeof(*cbc));
952                 cbc->nr = cur_num++;
953
954                 /* initialize S_cb */
955                 ir_nodeset_init_size(&cbc->parents, 5);
956                 ir_nodeset_insert(&cbc->parents, u_irn);
957                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (init)\n", u_irn));
958
959                 /* E_cb = empty */
960                 cbc->kill_edges = new_pset(cmp_rss_edges, 5);
961
962                 /* each parent gets killed by at least one value from children */
963
964                 /* T_cb = PK_successors(u) */
965                 ir_nodeset_init_size(&cbc->children, 5);
966                 foreach_plist(u->pkiller_list, el2) {
967                         ir_nodeset_insert(&cbc->children, plist_element_get_value(el2));
968                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (init)\n", plist_element_get_value(el2)));
969                 }
970
971                 /*
972                         Now: insert the parents of all children into the parent set
973                         and insert the children of all parents into the children set
974                         until the sets don't change any more
975                 */
976                 while (p_change || c_change) {
977                         ir_nodeset_iterator_t iter;
978                         p_change = c_change = 0;
979
980                         /* accumulate parents */
981                         foreach_ir_nodeset(&cbc->children, t_irn, iter) {
982                                 foreach_pset(epk, k_edge) {
983                                         ir_node *val = k_edge->src;
984
985                                         if (k_edge->tgt == t_irn && ! ir_nodeset_contains(&cbc->parents, val)) {
986                                                 ir_nodeset_insert(&cbc->parents, val);
987                                                 p_change = 1;
988                                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (killed by %+F)\n", val, t_irn));
989                                         }
990                                 }
991                         }
992
993                         /* accumulate children */
994                         foreach_ir_nodeset(&cbc->parents, s_irn, iter) {
995                                 foreach_pset(epk, k_edge) {
996                                         ir_node *val = k_edge->tgt;
997
998                                         if (k_edge->src == s_irn  && ! ir_nodeset_contains(&cbc->children, val)) {
999                                                 ir_nodeset_insert(&cbc->children, val);
1000                                                 c_change = 1;
1001                                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (kills %+F)\n", val, s_irn));
1002                                         }
1003                                 }
1004                         }
1005                 }
1006
1007                 /* mark all parent values as visited */
1008                 foreach_ir_nodeset(&cbc->parents, s_irn, iter) {
1009                         rss_irn_t *s = get_rss_irn(rss, s_irn);
1010                         s->visited = 1;
1011                         /* assure bipartite property */
1012 #if 0
1013                         if (ir_nodeset_contains(&cbc->children, s_irn)) {
1014                                 ir_nodeset_remove(&cbc->children, s_irn);
1015                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F removed from to children\n", s_irn));
1016                         }
1017 #endif
1018                 }
1019
1020                 /* update edges */
1021                 foreach_pset(epk, k_edge) {
1022                         if (ir_nodeset_contains(&cbc->parents, k_edge->src) &&
1023                             ir_nodeset_contains(&cbc->children, k_edge->tgt)) {
1024                                 pset_insert(cbc->kill_edges, k_edge, HASH_RSS_EDGE(k_edge));
1025                                 /*
1026                                         Link all k_edges which are about to be removed together.
1027                                         Beware: pset_remove kills the iterator.
1028                                 */
1029                                 k_edge->next = kedge_root;
1030                                 kedge_root   = k_edge;
1031                         }
1032                 }
1033
1034                 /* remove all linked k_edges */
1035                 for (; kedge_root; kedge_root = kedge_root->next) {
1036                         pset_remove(epk, kedge_root, HASH_RSS_EDGE(kedge_root));
1037                 }
1038
1039                 /* verify the cbc */
1040                 foreach_ir_nodeset(&cbc->parents, s_irn, iter) {
1041                         int is_killed = 0;
1042
1043                         foreach_pset(cbc->kill_edges, k_edge) {
1044                                 if (k_edge->src == s_irn) {
1045                                         is_killed = 1;
1046                                         pset_break(cbc->kill_edges);
1047                                         break;
1048                                 }
1049                         }
1050
1051                         if (! is_killed) {
1052                                 vrfy_ok = 0;
1053                                 ir_fprintf(stderr, "Warning: parent %+F is not killed in current cbc\n", s_irn);
1054                         }
1055                 }
1056                 assert(vrfy_ok && "Verification of CBC failed");
1057
1058                 /* add the connected bipartite component */
1059                 if (ir_nodeset_size(&cbc->parents) > 0 &&
1060                     ir_nodeset_size(&cbc->children) > 0 &&
1061                         pset_count(cbc->kill_edges) > 0)
1062                         pset_insert(rss->cbc_set, cbc, (unsigned)cbc->nr);
1063                 DBG((rss->dbg, LEVEL_2, "\tbipartite component %d inserted:\n", cbc->nr));
1064                 DEBUG_ONLY(
1065                         if (firm_dbg_get_mask(rss->dbg) & LEVEL_2)
1066                                 debug_print_cbc(rss->dbg, cbc);
1067                 );
1068         }
1069
1070         if (rss->opts->dump_flags & RSS_DUMP_CBC)
1071                 debug_vcg_dump_bipartite(rss);
1072
1073         del_pset(epk);
1074 }
1075
1076 /**
1077  * Select the child with the maximum cost.
1078  */
1079 static child_t *select_child_max_cost(rss_t *rss, ir_nodeset_t *x, ir_nodeset_t *y, child_t *t, cbc_t *cbc) {
1080         ir_node *child;
1081         ir_nodeset_iterator_t iter;
1082         float   max_cost = -1.0f;
1083
1084         DBG((rss->dbg, LEVEL_2, "\t\tcomputing children costs:\n"));
1085
1086         foreach_ir_nodeset(&cbc->children, child, iter) {
1087                 rss_irn_t  *r_child             = get_rss_irn(rss, child);
1088                 int         num_unkilled_parents = 0;
1089                 int         num_descendants;
1090                 rss_edge_t *k_edge;
1091                 float       cost;
1092
1093                 /* get the number of unkilled parents */
1094                 foreach_pset(cbc->kill_edges, k_edge) {
1095                         if (k_edge->tgt == child && ir_nodeset_contains(x, k_edge->src))
1096                                 ++num_unkilled_parents;
1097                 }
1098
1099                 cost = (float)num_unkilled_parents;
1100
1101                 num_descendants = plist_count(r_child->descendant_list) + ir_nodeset_size(y);
1102
1103                 if (num_descendants > 0)
1104                         cost /= (float)num_descendants;
1105
1106                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F, #desc %d, cost %.3f\n", child, num_descendants, cost));
1107
1108                 if (cost > max_cost) {
1109                         t->irn   = child;
1110                         t->cost  = cost;
1111                         max_cost = cost;
1112                 }
1113         }
1114
1115         return t;
1116 }
1117
1118 /**
1119  * Remove all parents from x which are killed by t_irn.
1120  */
1121 static void remove_covered_parents(rss_t *rss, ir_nodeset_t *x, ir_node *t_irn, cbc_t *cbc) {
1122         rss_irn_t  *t = get_rss_irn(rss, t_irn);
1123         rss_edge_t *k_edge;
1124
1125         DBG((rss->dbg, LEVEL_2, "\t\tremoving parents covered by %+F:\n", t_irn));
1126
1127         foreach_pset(cbc->kill_edges, k_edge) {
1128                 if (k_edge->tgt == t_irn && ir_nodeset_contains(x, k_edge->src)) {
1129                         ir_nodeset_remove(x, k_edge->src);
1130                         plist_insert_back(t->parent_list, k_edge->src);
1131                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", k_edge->src));
1132                 }
1133         }
1134 }
1135
1136 static void update_cumulated_descendent_values(rss_t *rss, ir_nodeset_t *y, ir_node *t_irn) {
1137         rss_irn_t *t = get_rss_irn(rss, t_irn);
1138         plist_element_t *el;
1139
1140         DBG((rss->dbg, LEVEL_2, "\t\tupdating cumulated descendant value of %+F:\n", t_irn));
1141
1142         foreach_plist(t->descendant_list, el) {
1143                 ir_nodeset_insert(y, plist_element_get_value(el));
1144                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", plist_element_get_value(el)));
1145         }
1146 }
1147
1148 /**
1149  * Greedy-k: a heuristics for the MMA problem
1150  */
1151 static void compute_killing_function(rss_t *rss) {
1152         cbc_t *cbc;
1153         struct obstack obst;
1154
1155         obstack_init(&obst);
1156
1157         rss->cbc_set = pset_new_ptr(5);
1158         compute_bipartite_decomposition(rss);
1159
1160         /* for all bipartite components do: */
1161         foreach_pset(rss->cbc_set, cbc) {
1162                 ir_node *p;
1163                 ir_nodeset_t x;
1164                 ir_nodeset_t y;
1165                 ir_nodeset_iterator_t iter;
1166                 child_t **sks    = NEW_ARR_F(child_t *, 20);
1167                 int     cur_len  = 0;
1168                 int     cur_size = 20;
1169                 int     i;
1170
1171                 ir_nodeset_init_size(&x, 10);
1172                 ir_nodeset_init_size(&y, 10);
1173
1174                 DBG((rss->dbg, LEVEL_1, "\tcomputing SKS for cbc %d:\n", cbc->nr));
1175                 DBG((rss->dbg, LEVEL_2, "\t\tinitializing parents X:\n"));
1176
1177                 /* X = S_cb (all parents are initially uncovered) */
1178                 foreach_ir_nodeset(&cbc->parents, p, iter) {
1179                         ir_nodeset_insert(&x, p);
1180                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", p));
1181                 }
1182
1183                 /* while X not empty */
1184                 while (ir_nodeset_size(&x) > 0) {
1185                         child_t *t = obstack_alloc(&obst, sizeof(*t));
1186                         memset(t, 0, sizeof(*t));
1187
1188                         t = select_child_max_cost(rss, &x, &y, t, cbc);
1189
1190                         if (cur_len >= cur_size) {
1191                                 ARR_EXTO(child_t *, sks, cur_size * 2);
1192                                 cur_size *= 2;
1193                         }
1194
1195                         DBG((rss->dbg, LEVEL_2, "\t\tinsert child %+F (%.3f) into SKS at pos %d\n", t->irn, t->cost, cur_len));
1196
1197                         sks[cur_len++] = t;
1198                         remove_covered_parents(rss, &x, t->irn, cbc);
1199                         update_cumulated_descendent_values(rss, &y, t->irn);
1200                 }
1201
1202                 ARR_SHRINKLEN(sks, cur_len);
1203
1204                 /* sort SKS in increasing cost order */
1205                 qsort(sks, cur_len, sizeof(sks[0]), cmp_child_costs);
1206
1207                 DBG((rss->dbg, LEVEL_2, "\tprocessing SKS for cbc %d:\n", cbc->nr));
1208
1209                 /* build killing function */
1210                 for (i = cur_len - 1; i >= 0; --i) { /* loop over sks in decreasing cost order */
1211                         child_t         *t = sks[i];
1212                         rss_irn_t       *rt = get_rss_irn(rss, t->irn);
1213                         plist_element_t *p_el;
1214
1215                         DBG((rss->dbg, LEVEL_3, "\t\t\tkiller %+F (%.3f):\n", t->irn, t->cost));
1216
1217                         /* kill all unkilled parents of t */
1218                         foreach_plist(rt->parent_list, p_el) {
1219                                 ir_node    *par = plist_element_get_value(p_el);
1220                                 rss_irn_t *rpar = get_rss_irn(rss, par);
1221
1222                                 if (is_Sink(rpar->killer)) {
1223                                         rpar->killer = t->irn;
1224                                         DBG((rss->dbg, LEVEL_2, "\t\tkill %+F\n", rpar->irn));
1225                                 }
1226                                 else {
1227                                         DBG((rss->dbg, LEVEL_3, "\t\t\tkeeping %+F as killer for %+F\n", rpar->killer, rpar->irn));
1228                                 }
1229                         }
1230                 }
1231
1232                 ir_nodeset_destroy(&x);
1233                 ir_nodeset_destroy(&y);
1234                 DEL_ARR_F(sks);
1235         }
1236
1237         if (rss->opts->dump_flags & RSS_DUMP_KILL)
1238                 debug_vcg_dump_kill(rss);
1239
1240         del_pset(rss->cbc_set);
1241         obstack_free(&obst, NULL);
1242 }
1243
1244 /**
1245  * Adds the edge src -> tgt to the dvg. Checks if reverse edge is already there (asserts).
1246  */
1247 static INLINE void add_dvg_edge(rss_t *rss, dvg_t *dvg, const ir_node *src, const ir_node *tgt, int have_source) {
1248         rss_edge_t *dvg_edge;
1249         rss_edge_t key;
1250
1251         if (! have_source)
1252                 ir_nodeset_insert(&dvg->nodes, (ir_node *) src);
1253         else
1254                 assert(ir_nodeset_contains(&dvg->nodes, src) && "Wrong assumption");
1255
1256         ir_nodeset_insert(&dvg->nodes, (ir_node *) tgt);
1257
1258         key.src = (ir_node *) tgt;
1259         key.tgt = (ir_node *) src;
1260         assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1261
1262         key.src = (ir_node *) src;
1263         key.tgt = (ir_node *) tgt;
1264         if (NULL != pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1265                 /* add the edge to the DVG */
1266                 dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1267
1268                 dvg_edge->src  = (ir_node *) src;
1269                 dvg_edge->tgt  = (ir_node *) tgt;
1270                 dvg_edge->next = NULL;
1271
1272                 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", src, tgt));
1273                 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1274         }
1275 }
1276
1277 /**
1278  * Computes the disjoint value DAG (DVG).
1279  * BEWARE: It is not made explicitly clear in the Touati paper,
1280  *         but the DVG is meant to be build from the KILLING DAG
1281  */
1282 static void compute_dvg(rss_t *rss, dvg_t *dvg) {
1283         plist_element_t *el;
1284
1285         DBG((rss->dbg, LEVEL_1, "\tcomputing DVG:\n"));
1286
1287         foreach_plist(rss->nodes, el) {
1288                 ir_node    *u_irn    = plist_element_get_value(el);
1289                 rss_irn_t  *u        = get_rss_irn(rss, u_irn);
1290                 rss_irn_t  *u_killer = get_rss_irn(rss, u->killer);
1291                 int        i;
1292
1293                 /* TODO: omit nodes only having sink as pkiller and killing no one */
1294
1295                 /* add an edge to killer */
1296                 add_dvg_edge(rss, dvg, u_irn, u->killer, 0);
1297
1298                 if (is_Sink(u->killer))
1299                         continue;
1300
1301                 /* We add an edge to every killer, from where we could be reached. */
1302                 for (i = ARR_LEN_SAFE(u_killer->descendants) - 1; i >= 0; --i) {
1303                         add_dvg_edge(rss, dvg, u_irn, u_killer->descendants[i], 1);
1304                 }
1305
1306 #if 0
1307
1308                 foreach_plist(rss->nodes, el2) {
1309                         ir_node *v_irn = plist_element_get_value(el2);
1310
1311                         /*
1312                                 There is an edge (u, v) in the DVG iff v is a descendant of the killer(u).
1313                         */
1314                         if (BSEARCH_IRN_ARR(v_irn, u_kill->descendants)) {
1315                                 rss_edge_t *dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1316                                 rss_edge_t key;
1317
1318                                 /* insert the user into the DVG and append it to the user list of u */
1319                                 ir_nodeset_insert(&dvg->nodes, v_irn);
1320                                 if (! plist_has_value(u->dvg_user_list, v_irn))
1321                                         plist_insert_back(u->dvg_user_list, v_irn);
1322
1323                                 dvg_edge->src  = u_irn;
1324                                 dvg_edge->tgt  = v_irn;
1325                                 dvg_edge->next = NULL;
1326
1327                                 key.src = v_irn;
1328                                 key.tgt = u_irn;
1329
1330                                 assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1331
1332                                 /* add the edge to the DVG */
1333                                 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", u_irn, v_irn));
1334                                 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1335                         }
1336                 }
1337 #endif /* if 0 */
1338         }
1339
1340         if (rss->opts->dump_flags & RSS_DUMP_DVG)
1341                 debug_vcg_dump_dvg(rss, dvg);
1342 }
1343
1344 /**
1345  * Updates the dvg structure when a serialization edge from src -> tgt is added.
1346  */
1347 static void update_dvg(rss_t *rss, dvg_t *dvg, rss_irn_t *src, rss_irn_t *tgt) {
1348         int i, j, idx;
1349         rss_edge_t *edge;
1350         rss_edge_t **arr = alloca(pset_count(dvg->edges) * sizeof(arr[0]));
1351
1352         /*
1353                 Add an edge from serialization target to serialization src:
1354                 src cannot be alive with target
1355         */
1356         add_dvg_edge(rss, dvg, tgt->irn, src->irn, 0);
1357
1358         /* Add edges to src's descendants as well, they also getting serialized. */
1359         for (i = ARR_LEN_SAFE(src->descendants) - 1; i >= 0; --i) {
1360                 add_dvg_edge(rss, dvg, tgt->irn, src->descendants[i], 1);
1361         }
1362
1363         /* We also have to add edges from targets predecessors in dvg */
1364         idx = 0;
1365         /* We cannot insert elements into set over which we iterate ... */
1366         foreach_pset(dvg->edges, edge) {
1367                 if (edge->tgt == tgt->irn) {
1368                         arr[idx++] = edge;
1369                 }
1370         }
1371
1372         for (i = 0; i < idx; ++i) {
1373                 edge = arr[i];
1374                 add_dvg_edge(rss, dvg, edge->src, src->irn, 1);
1375                 for (j = ARR_LEN_SAFE(src->descendants) - 1; j >= 0; --j) {
1376                         add_dvg_edge(rss, dvg, edge->src, src->descendants[j], 1);
1377                 }
1378         }
1379 }
1380
1381 #if 0
1382 /**
1383  * Accumulate all descendants for root into list.
1384  */
1385 static void accumulate_dvg_descendant_values(rss_t *rss, rss_irn_t *root, plist_t *list) {
1386         if (plist_count(root->dvg_user_list) > 0) {
1387                 plist_element_t *el;
1388
1389                 foreach_plist(root->dvg_user_list, el) {
1390                         ir_node   *v_irn = plist_element_get_value(el);
1391                         rss_irn_t *v     = get_rss_irn(rss, v_irn);
1392
1393                         /* add v as descendant */
1394                         if (! plist_has_value(list, v_irn)) {
1395                                 plist_insert_back(list, v_irn);
1396                                 DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG descendant %+F\n", v_irn));
1397                         }
1398
1399                         /* accumulate v's descendants */
1400                         accumulate_dvg_descendant_values(rss, v, list);
1401                 }
1402         }
1403 }
1404
1405 /**
1406  * Builds the list of potential killers for each node
1407  * in the given DVG.
1408  * Needs the descendant list for all user as sorted array.
1409  */
1410 static void build_dvg_pkiller_list(rss_t *rss, dvg_t *dvg) {
1411         ir_nodeset_iterator_t iter;
1412         ir_node *irn;
1413
1414         foreach_nodeset(&dvg->nodes, irn, iter) {
1415                 rss_irn_t       *node = get_rss_irn(rss, irn);
1416                 plist_element_t *el, *el2;
1417
1418                 DBG((rss->dbg, DEBUG_DVG, "\t\tbuilding pkiller list for %+F\n", irn));
1419
1420                 /* check each user */
1421                 foreach_plist(node->dvg_user_list, el) {
1422                         ir_node *u_irn = plist_element_get_value(el);
1423
1424                         /* is the current user u_irn not a descendant of any other user -> pkiller */
1425                         foreach_plist(node->dvg_user_list, el2) {
1426                                 ir_node   *v_irn = plist_element_get_value(el2);
1427                                 rss_irn_t *v     = get_rss_irn(rss, v_irn);
1428
1429                                 if (el != el2                             &&
1430                                         ! BSEARCH_IRN_ARR(u_irn, v->dvg_desc) &&
1431                                         ! plist_has_value(node->dvg_pkiller_list, u_irn))
1432                                 {
1433                                         plist_insert_back(node->dvg_pkiller_list, u_irn);
1434                                         DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG pkiller %+F\n", u_irn));
1435                                 }
1436                         }
1437                 }
1438
1439                 node->dvg_pkiller = build_sorted_array_from_list(node->dvg_pkiller_list, phase_obst(&rss->ph));
1440         }
1441
1442         DEBUG_ONLY(
1443                 if (firm_dbg_get_mask(rss->dbg) & DEBUG_DVG)
1444                         debug_vcg_dump_dvg_pkiller(rss, dvg);
1445         );
1446 }
1447
1448 #endif /* if 0 */
1449
1450 /**
1451  * Compute the maximal antichain of the current DVG.
1452  * This is a reimplementation of the MAXIMAL_ANTI_CHAIN function
1453  * from the DDG library 1.1 (DAG.cpp).
1454  */
1455 static ir_nodeset_t *compute_maximal_antichain(rss_t *rss, dvg_t *dvg, int iteration) {
1456         int         n               = ir_nodeset_size(&dvg->nodes);
1457         int         *assignment     = alloca(n * sizeof(assignment[0]));
1458         int         *assignment_rev = alloca(n * sizeof(assignment_rev[0]));
1459         int         *idx_map        = alloca(n * sizeof(idx_map[0]));
1460         hungarian_problem_t *bp;
1461         ir_nodeset_t *values, *temp;
1462         ir_nodeset_iterator_t iter;
1463         ir_node     *u_irn;
1464         int         i, j, cost, cur_chain, res;
1465         rss_edge_t  *dvg_edge;
1466
1467 #define MAP_IDX(irn) bsearch_for_index(get_irn_idx(irn), idx_map,  n,  1)
1468
1469         if (pset_count(dvg->edges) == 0)
1470                 return NULL;
1471
1472         bp = hungarian_new(n, n, 1, HUNGARIAN_MATCH_NORMAL);
1473
1474         /*
1475                 At first, we build an index map for the nodes in the DVG,
1476                 because we cannot use the irn idx therefore as the resulting
1477                 bipartite data structure would have around 1.2GB.
1478                 So we limit the size to the number of nodes we have in the DVG
1479                 and build a sorted index map for their irn indices.
1480         */
1481         i = 0;
1482         foreach_ir_nodeset(&dvg->nodes, u_irn, iter) {
1483                 idx_map[i++] = get_irn_idx(u_irn);
1484         }
1485         qsort(idx_map, n, sizeof(idx_map[0]), cmp_int);
1486
1487         foreach_pset(dvg->edges, dvg_edge) {
1488                 int idx_u = MAP_IDX(dvg_edge->src);
1489                 int idx_v = MAP_IDX(dvg_edge->tgt);
1490
1491                 /* add the entry to the bipartite data structure */
1492                 hungarian_add(bp, idx_u, idx_v, 1);
1493                 DBG((rss->dbg, LEVEL_3, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1494                         idx_u, dvg_edge->src, idx_v, dvg_edge->tgt));
1495         }
1496 #if 0
1497         /*
1498                 Add a bipartite entry for each pair of nodes (u, v), where exists a
1499                 path in the DVG from u to v, ie. connect all descendants(v) to v.
1500                 desc_dvg(v) = accumulated descendants of all z in dvg_user_list(v)
1501         */
1502         foreach_ir_nodeset(&dvg->nodes, u_irn, iter) {
1503                 rss_irn_t *u        = get_rss_irn(rss, u_irn);
1504                 int       idx_u_irn = MAP_IDX(u_irn);
1505
1506                 DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG descendants of %+F:\n", u_irn));
1507
1508                 //plist_clear(u->dvg_desc_list);
1509                 //accumulate_dvg_descendant_values(rss, u, u->dvg_desc_list);
1510
1511                 /*
1512                         FIXME: The array is build on the phase obstack and we cannot free the data.
1513                         So the array get as many times allocated as this function is called.
1514                 */
1515
1516                 /* build the sorted array for faster searches */
1517                 //u->dvg_desc = build_sorted_array_from_list(u->dvg_desc_list, phase_obst(&rss->ph));
1518
1519                 DBG((rss->dbg, DEBUG_MAX_AC, "\t\tadding bipartite entries of %+F:\n", u_irn));
1520
1521                 /* add the bipartite entries for all descendants */
1522                 for (i = ARR_LEN_SAFE(u->dvg_desc) - 1; i >= 0; --i) {
1523                         rss_irn_t *desc    = get_rss_irn(rss, u->dvg_desc[i]);
1524                         int       idx_desc = MAP_IDX(u->dvg_desc[i]);
1525
1526                         /* add the entry to the bipartite data structure */
1527                         hungarian_add(bp, idx_u_irn, idx_desc, 1);
1528                         DBG((rss->dbg, DEBUG_MAX_AC, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1529                                 idx_u_irn, u_irn, idx_desc, u->dvg_desc[i]));
1530
1531                         need_matching = 1;
1532                 }
1533         }
1534 #endif
1535
1536         /* We want maximum cardinality matching */
1537         hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL);
1538
1539 #if 0
1540         DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG pkiller:\n"));
1541         /* beware: the following function needs the dvg_desc array */
1542         build_dvg_pkiller_list(rss, dvg);
1543 #endif
1544
1545         DBG((rss->dbg, LEVEL_2, "\t\tcomputing bipartite matching\n"));
1546         /*
1547                 The maximum cardinality bipartite matching gives us the minimal
1548                 chain partition, which corresponds to the maximum anti chains.
1549         */
1550         res = hungarian_solve(bp, assignment, &cost, 1);
1551         assert(res == 0 && "Bipartite matching failed!");
1552
1553         hungarian_free(bp);
1554         memset(assignment_rev, -1, n * sizeof(assignment_rev[0]));
1555
1556         /* build the reverse assignment, ie. foreach i -> j, add j -> i */
1557         for (i = 0; i < n; ++i) {
1558                 if (assignment[i] >= 0) {
1559                         int j = assignment[i];
1560                         assignment_rev[j] = i;
1561                 }
1562         }
1563
1564         DBG((rss->dbg, LEVEL_2, "\t\tgot assignment with cost %d\n", cost));
1565         DBG((rss->dbg, LEVEL_3, "\t\t\tassignment   ---   reverse assignment\n", cost));
1566         for (i = 0; i < n; ++i) {
1567                 DBG((rss->dbg, LEVEL_3, "\t\t\t%3d -> %3d         %3d -> %3d\n", i, assignment[i], i, assignment_rev[i]));
1568         }
1569
1570         values    = xmalloc(sizeof(values[0]));
1571         ir_nodeset_init_size(values, 10);
1572         cur_chain = 0;
1573         /* Construction of the minimal chain partition */
1574         for (j = 0; j < n; ++j) {
1575                 /* check nodes, which did not occur as target */
1576                 if (assignment_rev[j] == -1) {
1577                         int       xj      = idx_map[j];
1578                         ir_node   *xj_irn = get_idx_irn(rss->irg, xj);
1579                         rss_irn_t *xj_rss = get_rss_irn(rss, xj_irn);
1580                         chain_t   *c      = obstack_alloc(phase_obst(&rss->ph), sizeof(*c));
1581                         int       source;
1582
1583                         /* there was no source for j -> we have a source of a new chain */
1584                         ir_nodeset_insert(values, xj_irn);
1585
1586                         c->elements = plist_obstack_new(phase_obst(&rss->ph));
1587                         c->nr       = cur_chain++;
1588                         plist_insert_back(c->elements, xj_irn);
1589
1590                         xj_rss->chain = c;
1591
1592                         DBG((rss->dbg, LEVEL_2, "\t\tstarting chain %d:\n", c->nr));
1593                         DBG((rss->dbg, LEVEL_2, "\t\t\t%+F (%d)", xj_irn, j));
1594
1595                         /* follow chain, having j as source */
1596                         source = j;
1597                         while (assignment[source] >= 0) {
1598                                 int       target  = assignment[source];
1599                                 int       irn_idx = idx_map[target];
1600                                 ir_node   *irn    = get_idx_irn(rss->irg, irn_idx);
1601                                 rss_irn_t *node   = get_rss_irn(rss, irn);
1602
1603                                 plist_insert_back(c->elements, irn);
1604                                 node->chain = c;
1605
1606                                 DB((rss->dbg, LEVEL_2, " -> %+F (%d)", irn, target));
1607
1608                                 /* new source = last target */
1609                                 source = target;
1610                         }
1611
1612                         DB((rss->dbg, LEVEL_2, "\n"));
1613                 }
1614         }
1615
1616         /*
1617                 Computing the maximal antichain: Select an element from each
1618                 chain such, such it is parallel with the others.
1619         */
1620         DBG((rss->dbg, LEVEL_1, "\t\tcomputing set of saturation values (MAX AC)\n"));
1621         DBG((rss->dbg, LEVEL_3, "\t\tstarting with:\n"));
1622
1623         DEBUG_ONLY(
1624                 if (firm_dbg_get_mask(rss->dbg) & LEVEL_3)
1625                         dump_nodeset(values, "\t\t\t");
1626         )
1627
1628         temp = NULL;
1629         do {
1630                 /*
1631                         We need an explicit array for the values as
1632                         we cannot iterate multiple times over the same
1633                         set at the same time. :-(((((
1634                         TODO Matze: now we can, so rewrite this...
1635                 */
1636                 int     n         = ir_nodeset_size(values);
1637                 int     i         = 0;
1638                 ir_node **val_arr = NEW_ARR_F(ir_node *, n);
1639
1640                 foreach_ir_nodeset(values, u_irn, iter)
1641                         val_arr[i++] = u_irn;
1642
1643                 if (temp) {
1644                         ir_nodeset_destroy(temp);
1645                         free(temp);
1646                 }
1647
1648                 temp = xmalloc(sizeof(temp[0]));
1649                 ir_nodeset_init_size(temp, 10);
1650
1651                 /* Select all nodes from current value set, having another node in the set as descendant. */
1652                 for (i = 0; i < n; ++i) {
1653                         rss_irn_t *u = get_rss_irn(rss, val_arr[i]);
1654
1655                         for (j = 0; j < n; ++j) {
1656                                 if (i != j) {
1657                                         rss_edge_t key;
1658
1659                                         key.src = val_arr[i];
1660                                         key.tgt = val_arr[j];
1661
1662                                         if (pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1663                                                 /* v[j] is descendant of u -> remove u and break */
1664                                                 ir_nodeset_insert(temp, (ir_node *) u->irn);
1665                                                 ir_nodeset_remove(values, u->irn);
1666
1667                                                 DBG((rss->dbg, LEVEL_3, "\t\t\tremoving %+F from values, adding it to temp\n", u->irn));
1668
1669                                                 break;
1670                                         }
1671                                 }
1672                         }
1673                 }
1674
1675                 /* Try to insert the chain predecessor of all selected u's */
1676                 foreach_ir_nodeset(temp, u_irn, iter) {
1677                         rss_irn_t       *u  = get_rss_irn(rss, u_irn);
1678                         chain_t         *c  = u->chain;
1679                         plist_element_t *el = plist_find_value(c->elements, u_irn);
1680
1681                         assert(el && "Missing element in chain!");
1682
1683                         /* If u has predecessor in chain: insert the predecessor */
1684                         if (el == plist_element_get_prev(el)) {
1685                                 ir_nodeset_insert(values, plist_element_get_value(el));
1686                                 DBG((rss->dbg, LEVEL_3, "\t\t\tadding %+F to values\n", plist_element_get_value(el)));
1687                         }
1688                 }
1689
1690
1691                 DEL_ARR_F(val_arr);
1692         } while (ir_nodeset_size(temp) > 0);
1693
1694         DBG((rss->dbg, LEVEL_2, "\t\tfinal set:\n"));
1695         DEBUG_ONLY(
1696                 if (firm_dbg_get_mask(rss->dbg) & LEVEL_2) {
1697                         dump_nodeset(values, "\t\t\t");
1698                 }
1699         );
1700
1701         if (rss->opts->dump_flags & RSS_DUMP_MAXAC)
1702                 debug_vcg_dump_pkg(rss, values, iteration);
1703
1704         if(temp != NULL) {
1705                 ir_nodeset_destroy(temp);
1706                 free(temp);
1707         }
1708
1709         return values;
1710
1711 #undef MAP_IDX
1712 }
1713
1714 /**
1715  * Computes the best serialization between two nodes of sat_vals.
1716  */
1717 static serialization_t *compute_best_admissible_serialization(rss_t *rss, ir_nodeset_t *sat_vals, serialization_t *ser, int num_regs) {
1718         int        n                    = ir_nodeset_size(sat_vals);
1719         int        n_idx                = ARR_LEN_SAFE(rss->idx_map);
1720         int        i                    = 0;
1721         ir_node    **val_arr            = alloca(n * sizeof(val_arr[0]));
1722         bitset_t   *bs_sv               = bitset_alloca(n_idx);
1723         bitset_t   *bs_vdesc            = bitset_alloca(n_idx);
1724         bitset_t   *bs_tmp              = bitset_alloca(n_idx);
1725         bitset_t   *bs_ukilldesc        = bitset_alloca(n_idx);
1726         int        best_benefit         = INT_MAX;
1727         int        best_omega2          = INT_MAX;
1728         int        best_benefit_omega20 = INT_MAX;
1729         int        has_omega1           = 0;
1730         int        j, k;
1731         ir_node    *irn;
1732         ir_nodeset_iterator_t iter;
1733         rss_edge_t min_benefit_edge = {NULL, NULL, NULL};
1734         rss_edge_t min_omega20_edge = {NULL, NULL, NULL};
1735         rss_irn_t  *ser_u_omega1 = NULL, *ser_v_omega1 = NULL;
1736         rss_irn_t  *ser_u_omega20 = NULL, *ser_v_omega20 = NULL;
1737
1738         DBG((rss->dbg, LEVEL_1, "\tcomputing admissible serializations:\n"));
1739
1740         /*
1741                 We need an explicit array for the values as
1742                 we cannot iterate multiple times over the same
1743                 set at the same time. :-(((((
1744         */
1745
1746         foreach_ir_nodeset(sat_vals, irn, iter) {
1747                 val_arr[i++] = irn;
1748                 bitset_set(bs_sv, BLOCK_IDX_MAP(rss, irn));
1749         }
1750
1751         /*
1752                 We build all admissible serializations and remember the best found so far.
1753                 For u in sat_vals:
1754                  For v in sat_val:
1755                    if v in pkiller(u): add edge from v to all other pkiller(u)
1756                    else: for all vv in pkiller(u): add edge from v to vv if there exists no path from vv to v
1757         */
1758
1759 /*
1760         A node is unserializable if:
1761         - it has only one killer and this one is Sink
1762     - it kills no other values
1763         In this case there is no serialization which could
1764         reduce the registerpressure
1765 */
1766 #define IS_UNSERIALIZABLE_NODE(rss_node)                  \
1767         (                                                     \
1768                 (                                                 \
1769                         (plist_count(rss_node->pkiller_list) == 1) && \
1770                         is_Sink(rss_node->killer)                  && \
1771                         (rss_node->kill_count                == 0)    \
1772                 )                            ||                   \
1773                 be_is_Barrier(rss_node->irn) ||                   \
1774                 be_is_Keep(rss_node->irn)                         \
1775         )
1776
1777         /* for all u in sat_vals */
1778         for (i = 0; i < n; ++i) {
1779                 rss_irn_t       *u = get_rss_irn(rss, val_arr[i]);
1780                 plist_element_t *el;
1781
1782                 /* ignore nodes where serialization does not help */
1783                 if (IS_UNSERIALIZABLE_NODE(u)) {
1784                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", u->irn));
1785                         continue;
1786                 }
1787
1788                 /* accumulate all descendants of all pkiller(u) */
1789                 bitset_clear_all(bs_ukilldesc);
1790                 foreach_plist(u->pkiller_list, el) {
1791                         ir_node   *irn  = plist_element_get_value(el);
1792                         rss_irn_t *node = get_rss_irn(rss, irn);
1793
1794                         if (! is_Sink(irn))
1795                                 bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, irn));
1796                         else
1797                                 continue;
1798
1799                         for (k = ARR_LEN_SAFE(node->descendants) - 1; k >= 0; --k) {
1800                                 if (! is_Sink(node->descendants[k]))
1801                                         bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, node->descendants[k]));
1802                         }
1803                 }
1804
1805                 /* for all v in sat_vals */
1806                 for (j = 0; j < n; ++j) {
1807                         ir_node   *v_irn   = val_arr[j];
1808                         rss_irn_t *v       = get_rss_irn(rss, v_irn);
1809                         unsigned  v_height = get_irn_height(rss->h, v_irn);
1810                         int       omega1, omega2, is_pkiller;
1811
1812                         /* v cannot be serialized with itself
1813                          * ignore nodes where serialization does not help */
1814                         if (i == j || IS_UNSERIALIZABLE_NODE(v)) {
1815 #ifdef DEBUG_libfirm
1816                                 if (i != j)
1817                                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", v->irn));
1818 #endif
1819                                 continue;
1820                         }
1821
1822                         /* get descendants of v */
1823                         bitset_clear_all(bs_vdesc);
1824                         bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v_irn));
1825                         for (k = ARR_LEN_SAFE(v->descendants) - 1; k >= 0; --k) {
1826                                 if (! is_Sink(v->descendants[k]))
1827                                         bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v->descendants[k]));
1828                         }
1829
1830                         /* if v is in pkiller(u) */
1831                         is_pkiller = plist_has_value(u->pkiller_list, v_irn);
1832
1833                         /* for all vv in pkiller(u) */
1834                         foreach_plist(u->pkiller_list, el) {
1835                                 ir_node *vv_irn  = plist_element_get_value(el);
1836                                 int     add_edge;
1837
1838                                 if (is_Sink(vv_irn) || is_cfop(vv_irn))
1839                                         continue;
1840
1841                                 if (is_pkiller)
1842                                         add_edge = vv_irn != v_irn && skip_Proj(vv_irn) != skip_Proj(v_irn);
1843                                 else
1844                                         add_edge = ! heights_reachable_in_block(rss->h, skip_Proj(vv_irn), skip_Proj(v_irn));
1845
1846                                 /*
1847                                         As we add an edge from vv -> v, we have to make sure,
1848                                         that there exists no path from v to vv.
1849                                 */
1850
1851                                 if (add_edge) {
1852                                         unsigned vv_height = get_irn_height(rss->h, vv_irn);
1853                                         unsigned critical_path_cost;
1854                                         unsigned mu1, mu2;
1855
1856                                         /*
1857                                                 mu1 = | descendants(v) cut sat_vals |
1858                                                 the number of saturating values which cannot
1859                                                 be simultaneously alive with u
1860                                         */
1861                                         bitset_copy(bs_tmp, bs_vdesc);
1862                                         mu1 = bitset_popcnt(bitset_and(bs_tmp, bs_sv));
1863
1864                                         /*
1865                                                 mu2 = | accum_desc_all_pkiller(u) without descendants(v) |
1866                                         */
1867                                         if (is_pkiller) {
1868                                                 bitset_copy(bs_tmp, bs_ukilldesc);
1869                                                 mu2 = bitset_popcnt(bitset_andnot(bs_tmp, bs_vdesc));
1870                                         }
1871                                         else {
1872                                                 mu2 = 0;
1873                                         }
1874
1875                                         /* omega1 = mu1 - mu2 */
1876                                         omega1 = mu1 - mu2;
1877
1878                                         if (omega1 != 0)
1879                                                 has_omega1 = 1;
1880
1881                                         /* omega2 = increase of critical path */
1882                                         critical_path_cost =
1883                                                 v_height                        /* longest path from v to sink */
1884                                                 + rss->max_height - vv_height   /* longest path from source to vv */
1885                                                 + 1;                            /* edge */
1886
1887                                         /*
1888                                                 If critical_path_cost > max_height -> the new edge
1889                                                 would increase the longest critical path by the difference.
1890                                         */
1891                                         omega2 = critical_path_cost > rss->max_height ? critical_path_cost - rss->max_height : 0;
1892
1893                                         /* this keeps track of the edge with the best benefit */
1894                                         if (omega1 >= num_regs - n && omega1 < best_benefit) {
1895                                                 min_benefit_edge.src = v_irn;
1896                                                 min_benefit_edge.tgt = vv_irn;
1897
1898                                                 ser_u_omega1 = u;
1899                                                 ser_v_omega1 = v;
1900
1901                                                 best_benefit    = omega1;
1902                                                 ser->new_killer = is_pkiller;
1903                                         }
1904
1905                                         /* this keeps track of the edge with the best omega1 costs where omega2 == 0 */
1906                                         if (omega2 == 0 && omega1 >= num_regs - n && omega1 < best_benefit_omega20) {
1907                                                 min_omega20_edge.src = v_irn;
1908                                                 min_omega20_edge.tgt = vv_irn;
1909
1910                                                 ser_u_omega20 = u;
1911                                                 ser_v_omega20 = v;
1912
1913                                                 best_benefit_omega20 = omega1;
1914                                                 ser->new_killer      = is_pkiller;
1915                                         }
1916
1917                                         best_omega2 = MIN(best_omega2, omega2);
1918
1919                                         DBG((rss->dbg, LEVEL_2, "\t\tfound %+F -> %+F (w1 %d, w2 %d)\n",
1920                                                 v_irn, vv_irn, omega1, omega2));
1921                                 } /* if add_edge */
1922                         } /* for all vv in pkiller(u) */
1923                 } /* for all v in sat_vals */
1924         } /* for all u in sat_vals */
1925
1926         if (! has_omega1)
1927                 return NULL;
1928
1929         if (best_omega2 == 0) {
1930                 ser->u         = ser_u_omega20;
1931                 ser->v         = ser_v_omega20;
1932                 ser->edge->src = min_omega20_edge.src;
1933                 ser->edge->tgt = min_omega20_edge.tgt;
1934                 ser->omega1    = best_benefit_omega20;
1935                 ser->omega2    = best_omega2;
1936         }
1937         else {
1938                 ser->u         = ser_u_omega1;
1939                 ser->v         = ser_v_omega1;
1940                 ser->edge->src = min_benefit_edge.src;
1941                 ser->edge->tgt = min_benefit_edge.tgt;
1942                 ser->omega1    = best_benefit;
1943                 ser->omega2    = best_omega2;
1944         }
1945
1946         return ser;
1947
1948 #undef IS_UNSERIALIZABLE_NODE
1949 }
1950
1951 /**
1952  * Perform the value serialization heuristic and add all
1953  * computed serialization edges as dependencies to the irg.
1954  */
1955 static void perform_value_serialization_heuristic(rss_t *rss) {
1956         bitset_t *arch_nonign_bs = bitset_alloca(arch_register_class_n_regs(rss->cls));
1957         bitset_t *abi_ign_bs     = bitset_alloca(arch_register_class_n_regs(rss->cls));
1958         unsigned available_regs, iteration;
1959         dvg_t    dvg;
1960         ir_nodeset_t *sat_vals;
1961         pset *ser_set = new_pset(cmp_rss_edges, 20);
1962
1963         /* available_regs = R = |arch_non_ignore_regs cut ~abi_ignore_regs| */
1964         arch_put_non_ignore_regs(rss->arch_env, rss->cls, arch_nonign_bs);
1965         be_abi_put_ignore_regs(rss->abi, rss->cls, abi_ign_bs);
1966         bitset_andnot(arch_nonign_bs, abi_ign_bs);
1967         available_regs  = bitset_popcnt(arch_nonign_bs);
1968         //num_live = pset_count(rss->live_block);
1969         //available_regs -= num_live < available_regs ? num_live : 0;
1970
1971         DBG((rss->dbg, LEVEL_1, "\n\t#available regs: %d\n\n", available_regs));
1972
1973         /*
1974                 At first we need to compute the disjoint value DAG (DVG = {V, E_dv}).
1975                 V    = set of all nodes we are currently interested in
1976                 E_dv = there is an edge from u to v iff v is a descendant of killer(u), forall u, v in V
1977         */
1978         ir_nodeset_init_size(&dvg.nodes, plist_count(rss->nodes));
1979         dvg.edges = new_pset(cmp_rss_edges, plist_count(rss->nodes) * 5);
1980         compute_dvg(rss, &dvg);
1981
1982         /*
1983                 Then we perform the heuristic serialization algorithm
1984                 on the DVG which gives us all necessary serialization
1985                 edges.
1986         */
1987         DBG((rss->dbg, LEVEL_1, "\tcomputing maximal antichain:\n"));
1988         iteration = 0;
1989         sat_vals  = compute_maximal_antichain(rss, &dvg, iteration++);
1990         while (sat_vals && (ir_nodeset_size(sat_vals) > available_regs)) {
1991                 serialization_t *ser, best_ser;
1992                 rss_edge_t      *edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*edge));
1993                 ir_node         *dep_src, *dep_tgt;
1994
1995                 best_ser.edge = edge;
1996                 ser = compute_best_admissible_serialization(rss, sat_vals, &best_ser, available_regs);
1997
1998                 DBG((rss->dbg, LEVEL_1, "\tcurrent register saturation %d, target %d\n", ir_nodeset_size(sat_vals), available_regs));
1999
2000                 if (! ser) {
2001                         DBG((rss->dbg, LEVEL_1, "\tno RS improving serialization found, breaking at iteration %d\n", iteration));
2002                         break;
2003                 }
2004
2005                 /* Insert the serialization as dependency edge into the irg. */
2006                 DBG((rss->dbg, LEVEL_1, "\tserializing %+F -> %+F with edge %+F -> %+F and cost %d, %d\n",
2007                         ser->u->irn, ser->v->irn, ser->edge->src, ser->edge->tgt, ser->omega1, ser->omega2));
2008
2009                 if (pset_find(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge)))
2010                         ir_printf("WARNING: serialization %+F -> %+F computed twice!\n", ser->edge->src, ser->edge->tgt);
2011
2012
2013                 pset_insert(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge));
2014
2015                 /* update the dvg */
2016                 update_dvg(rss, &dvg, ser->v, ser->u);
2017                 update_dvg(rss, &dvg, get_rss_irn(rss, ser->edge->src), get_rss_irn(rss, ser->edge->tgt));
2018                 if(sat_vals != NULL) {
2019                         ir_nodeset_destroy(sat_vals);
2020                         free(sat_vals);
2021                 }
2022
2023                 dep_src = skip_Proj(ser->edge->src);
2024                 dep_tgt = ser->edge->tgt;
2025                 add_irn_dep(dep_src, dep_tgt);
2026
2027                 /* Update descendants, consumer and pkillers of the target */
2028                 update_node_info(rss, ser->edge->tgt, ser->edge->src);
2029
2030                 /* TODO: try to find a cheaper way for updating height information */
2031                 rss->max_height = heights_recompute_block(rss->h, rss->block);
2032
2033                 /* Recompute the antichain for next serialization */
2034                 DBG((rss->dbg, LEVEL_1, "\tre-computing maximal antichain:\n"));
2035                 sat_vals = compute_maximal_antichain(rss, &dvg, iteration++);
2036         }
2037
2038         ir_nodeset_destroy(&dvg.nodes);
2039         del_pset(dvg.edges);
2040 }
2041
2042 /**
2043  * Do initial calculations for a block.
2044  */
2045 static void process_block(ir_node *block, void *env) {
2046         rss_t *rss = env;
2047         int   i, n;
2048         const ir_edge_t *edge;
2049
2050         phase_init(&rss->ph, "rss block preprocessor", rss->irg, PHASE_DEFAULT_GROWTH, init_rss_irn, NULL);
2051
2052         DBG((rss->dbg, LEVEL_1, "preprocessing block %+F\n", block));
2053         rss->block = block;
2054
2055         /* build an index map for all nodes in the current block */
2056         i            = 0;
2057         n            = get_irn_n_edges(block);
2058         NEW_ARR_A(int *, rss->idx_map, n);
2059         foreach_out_edge(block, edge) {
2060                 ir_node *irn      = get_edge_src_irn(edge);
2061                 rss->idx_map[i++] = get_irn_idx(irn);
2062         }
2063         qsort(rss->idx_map, n, sizeof(rss->idx_map[0]), cmp_int);
2064         rss->max_height = heights_recompute_block(rss->h, block);
2065
2066         /* loop over all register classes */
2067         for (i = arch_env_get_n_reg_class(rss->arch_env) - 1; i >= 0; --i) {
2068                 const arch_register_class_t *cls = arch_env_get_reg_class(rss->arch_env, i);
2069
2070                 rss->cls = cls;
2071                 DBG((rss->dbg, LEVEL_1, "register class %s\n", arch_register_class_name(cls)));
2072
2073                 /* Get all live value at end of Block having current register class */
2074                 ir_nodeset_init(&rss->live_block);
2075                 be_liveness_end_of_block(rss->liveness, rss->arch_env, rss->cls, rss->block, &rss->live_block);
2076
2077                 /* reset the list of interesting nodes */
2078                 plist_clear(rss->nodes);
2079                 plist_insert_back(rss->nodes, _sink);
2080
2081                 /* collect all nodes having a certain register class */
2082                 foreach_out_edge(block, edge) {
2083                         ir_node *irn  = get_edge_src_irn(edge);
2084                         ir_mode *mode = get_irn_mode(irn);
2085
2086                         /*
2087                                 We skip:
2088                                 - mode_T nodes (the projs are asked)
2089                                 - mode_X nodes (control flow nodes are always scheduled last)
2090                                 - Keeps (they are always immediately scheduled)
2091                                 - Phi (same as Keep)
2092                         */
2093                         if (mode == mode_T || mode == mode_X || is_Phi(irn))
2094                                 continue;
2095
2096                         /*
2097                                 In case of a proj, we skip
2098                                 - Barrier (they are a Barrier :)
2099                                 - Start
2100                                 - the Proj itself, as it's scheduled always with it's super node
2101                         */
2102                         if (is_Proj(irn)) {
2103                                 ir_node *pred = get_Proj_pred(irn);
2104                                 if (be_is_Barrier(pred) || is_Start(pred))
2105                                         continue;
2106                         }
2107
2108                         /* calculate the descendants and consumer for each node in the block */
2109                         collect_node_info(rss, skip_Proj(irn));
2110
2111                         if (be_is_Keep(irn))
2112                                 continue;
2113
2114                         if (! arch_irn_is(rss->arch_env, irn, ignore) && arch_get_irn_reg_class(rss->arch_env, irn, -1) == cls) {
2115                                 plist_insert_back(rss->nodes, skip_Proj(irn));
2116                         }
2117                         //}
2118                 }
2119
2120                 /* compute the potential killing set PK(G) */
2121                 compute_pkill_set(rss);
2122
2123                 /* compute the killing function k* */
2124                 compute_killing_function(rss);
2125
2126                 /*
2127                         Compute the heuristic value serialization and
2128                         add the necessary dependencies to the irg.
2129                 */
2130                 perform_value_serialization_heuristic(rss);
2131
2132                 ir_nodeset_destroy(&rss->live_block);
2133         }
2134
2135         phase_free(&rss->ph);
2136 }
2137
2138 /**
2139  * Register the options.
2140  */
2141 void be_init_schedrss(void) {
2142         lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
2143         lc_opt_entry_t *sched_grp = lc_opt_get_grp(be_grp, "sched");
2144         lc_opt_entry_t *rss_grp = lc_opt_get_grp(sched_grp, "rss");
2145
2146         lc_opt_add_table(rss_grp, rss_option_table);
2147 }
2148
2149 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_schedrss);
2150
2151 /**
2152  * Preprocess the irg for scheduling.
2153  */
2154 void rss_schedule_preparation(be_irg_t *birg) {
2155         ir_graph *irg = be_get_birg_irg(birg);
2156         rss_t rss;
2157
2158         FIRM_DBG_REGISTER(rss.dbg, "firm.be.sched.rss");
2159
2160         //firm_dbg_set_mask(rss.dbg, LEVEL_1 | LEVEL_2 | LEVEL_3);
2161
2162         init_rss_special_nodes(irg);
2163
2164         rss.irg      = irg;
2165         rss.arch_env = be_get_birg_arch_env(birg);
2166         rss.abi      = birg->abi;
2167         rss.h        = heights_new(irg);
2168         rss.nodes    = plist_new();
2169         rss.opts     = &rss_options;
2170         rss.liveness = be_liveness(birg);
2171         be_liveness_assure_sets(rss.liveness);
2172         irg_block_walk_graph(irg, NULL, process_block, &rss);
2173         heights_free(rss.h);
2174         plist_free(rss.nodes);
2175         be_liveness_free(rss.liveness);
2176
2177         if (birg->main_env->options->dump_flags & DUMP_SCHED)
2178                 be_dump(rss.irg, "-rss", dump_ir_block_graph);
2179 }