amd64: small changes w.r.t. stack alignment.
[libfirm] / ir / be / beschedrss.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       Implementation of a register saturating list scheduler.
23  * @author      Christian Wuerdig
24  * @date        29.08.2006
25  * @version     $Id$
26  *
27  * Implementation of a register saturating list scheduler
28  * as described in: Sid-Ahmed-Ali Touati
29  * Register Saturation in Superscalar and VLIW Codes
30  */
31 #include "config.h"
32
33 #include "beschedrss.h"
34
35 #include <limits.h>
36
37 #include "obst.h"
38 #include "debug.h"
39
40 #include "irgraph_t.h"
41 #include "irnode_t.h"
42 #include "iredges_t.h"
43 #include "ircons_t.h"
44 #include "irphase_t.h"
45 #include "irgwalk.h"
46 #include "irdump.h"
47 #include "irtools.h"
48 #include "irbitset.h"
49 #include "irprintf.h"
50 #include "irnodeset.h"
51 #include "bipartite.h"
52 #include "hungarian.h"
53 #include "plist.h"
54 #include "array_t.h"
55
56 #include "height.h"
57
58 #include "beabi.h"
59 #include "bemodule.h"
60 #include "benode.h"
61 #include "besched.h"
62 #include "beirg.h"
63 #include "belive.h"
64
65 #include "lc_opts.h"
66 #include "lc_opts_enum.h"
67
68
69 #define ARR_LEN_SAFE(arr) ((arr) != NULL ? ARR_LEN((arr)) : 0)
70
71 #define HASH_RSS_EDGE(edge) ((get_irn_node_nr((edge)->src) << 16) | (get_irn_node_nr((edge)->tgt) & 0xFFFF))
72 #define BSEARCH_IRN_ARR(val, arr) \
73         bsearch(&(val), (arr), ARR_LEN_SAFE((arr)), sizeof((arr)[0]), cmp_irn_idx)
74
75 #define BLOCK_IDX_MAP(rss, irn) bsearch_for_index(get_irn_idx((irn)), (rss)->idx_map, ARR_LEN_SAFE((rss)->idx_map), 1)
76
77 /* the rss options */
78 typedef struct _rss_opts_t {
79         int dump_flags;
80 } rss_opts_t;
81
82 /* Represents a child with associated costs */
83 typedef struct _child {
84         ir_node *irn;
85         float   cost;
86 } child_t;
87
88 /* We need edges for several purposes. */
89 typedef struct _rss_edge {
90         ir_node *src;
91         ir_node *tgt;
92         void    *next;
93 } rss_edge_t;
94
95 /* Represents a connected bipartite component. */
96 typedef struct _cbc {
97         ir_nodeset_t parents;       /**< = S  a set of value producers */
98         ir_nodeset_t children;      /**< = T  a set of value consumers */
99         pset    *kill_edges;    /**< = E  a set of edges (t in T, s in S) such as each s in S gets killed by at least one t in T */
100         int     nr;             /**< a deterministic index for set insertion (used as hash) */
101 } cbc_t;
102
103 /* Represents a disjoint value DAG. */
104 typedef struct _dvg {
105         ir_nodeset_t nodes;
106         pset    *edges;
107 } dvg_t;
108
109 /* Represents a chain of nodes. */
110 typedef struct _chain {
111         plist_t *elements;   /**< List of chain elements */
112         int     nr;          /**< a deterministic index for set insertion (used as hash) */
113 } chain_t;
114
115 typedef struct _rss_irn {
116         plist_t  *consumer_list;    /**< List of consumers */
117         const ir_node **consumer;   /**< Sorted consumer array (needed for faster access) */
118
119         plist_t  *parent_list;      /**< List of parents */
120         plist_t  *pkiller_list;     /**< List of potential killers */
121
122         plist_t  *descendant_list;  /**< List of descendants */
123         const ir_node **descendants;      /**< Sorted descendant array (needed for faster access) */
124
125         const ir_node  *killer;     /**< The selected unique killer */
126         const ir_node  *irn;        /**< The corresponding firm node to this rss_irn */
127
128         chain_t  *chain;            /**< The chain, this node is associated to */
129
130         unsigned desc_walk;         /**< visited flag for collecting descendants */
131         unsigned kill_count;        /**< number of nodes killed by this one */
132
133         unsigned live_out : 1;      /**< irn has consumers outside of it's block */
134         unsigned visited  : 1;      /**< visited flag for bipartite decomposition */
135         unsigned havecons : 1;      /**< visited flag collect consumer */
136         unsigned handled  : 1;      /**< flag indicating whether or not the list structures have been build */
137         unsigned dumped   : 1;      /**< flag indication whether or not this node was dumped */
138 } rss_irn_t;
139
140 /* Represents a serialization edge with associated costs. */
141 typedef struct _serialization {
142         rss_irn_t  *u;       /* the top node */
143         rss_irn_t  *v;       /* the node about to be serialized after u */
144         rss_edge_t *edge;    /* the edge selected for the serialization */
145         int        omega1;   /* estimated: available regs - RS reduction */
146         int        omega2;   /* increase in critical path length */
147         int        new_killer;
148 } serialization_t;
149
150 typedef struct _rss {
151         ir_phase          ph;              /**< Phase to hold some data */
152         heights_t        *h;              /**< The current height object */
153         ir_graph         *irg;            /**< The irg to preprocess */
154         plist_t          *nodes;          /**< The list of interesting nodes */
155         const arch_env_t *arch_env;       /**< The architecture environment */
156         be_abi_irg_t     *abi;            /**< The abi for this irg */
157         pset             *cbc_set;        /**< A set of connected bipartite components */
158         ir_node          *block;          /**< The current block in progress. */
159         int              *idx_map;        /**< Mapping irn indices to per block indices */
160         unsigned         max_height;      /**< maximum height in the current block */
161         rss_opts_t       *opts;           /**< The options */
162         be_lv_t          *liveness;       /**< The liveness information for this irg */
163         ir_nodeset_t      live_block;     /**< Values alive at end of block */
164         const arch_register_class_t *cls; /**< The current register class */
165         DEBUG_ONLY(firm_dbg_module_t *dbg);
166 } rss_t;
167
168 #define get_rss_irn(rss, irn)  (phase_get_or_set_irn_data(&rss->ph, irn))
169
170 /**
171  * We need some special nodes:
172  * a source and a sink for all live-in and live-out values of a block
173  */
174 enum {
175         iro_rss_Source,
176         iro_rss_Sink,
177         iro_rss_last
178 };
179
180 /** The opcode of the rss_Source node once allocated. */
181 static ir_op *op_rss_Source;
182 /** The opcode of the rss_Sink node once allocated. */
183 static ir_op *op_rss_Sink;
184
185 /** The rss_Source node of the current graph. */
186 static ir_node *_source = NULL;
187 /** The rss_Sink node of the current graph. */
188 static ir_node *_sink   = NULL;
189
190 #define is_Source(irn) ((irn) == _source)
191 #define is_Sink(irn)   ((irn) == _sink)
192
193 enum {
194         RSS_DUMP_NONE  = 0,
195         RSS_DUMP_CBC   = 1 << 0,
196         RSS_DUMP_PKG   = 1 << 1,
197         RSS_DUMP_KILL  = 1 << 2,
198         RSS_DUMP_DVG   = 1 << 3,
199         RSS_DUMP_MAXAC = 1 << 4,
200         RSS_DUMP_ALL   = (RSS_DUMP_MAXAC << 1) - 1,
201 };
202
203 static rss_opts_t rss_options = {
204         RSS_DUMP_NONE,
205 };
206
207 static const lc_opt_enum_int_items_t dump_items[] = {
208         { "none",  RSS_DUMP_NONE  },
209         { "cbc",   RSS_DUMP_CBC   },
210         { "pkg",   RSS_DUMP_PKG   },
211         { "kill",  RSS_DUMP_KILL  },
212         { "dvg",   RSS_DUMP_DVG   },
213         { "maxac", RSS_DUMP_MAXAC },
214         { "all",   RSS_DUMP_ALL   },
215         { NULL, 0 }
216 };
217
218 static lc_opt_enum_int_var_t dump_var = {
219         &rss_options.dump_flags, dump_items
220 };
221
222 static const lc_opt_table_entry_t rss_option_table[] = {
223         LC_OPT_ENT_ENUM_MASK("dump", "dump phases", &dump_var),
224         LC_OPT_LAST
225 };
226
227 /******************************************************************************
228  *  _          _                    __                  _   _
229  * | |        | |                  / _|                | | (_)
230  * | |__   ___| |_ __   ___ _ __  | |_ _   _ _ __   ___| |_ _  ___  _ __  ___
231  * | '_ \ / _ \ | '_ \ / _ \ '__| |  _| | | | '_ \ / __| __| |/ _ \| '_ \/ __|
232  * | | | |  __/ | |_) |  __/ |    | | | |_| | | | | (__| |_| | (_) | | | \__ \
233  * |_| |_|\___|_| .__/ \___|_|    |_|  \__,_|_| |_|\___|\__|_|\___/|_| |_|___/
234  *            | |
235  *            |_|
236  ******************************************************************************/
237
238 /**
239  * Acquire opcodes if needed and create source and sink nodes.
240  */
241 static void init_rss_special_nodes(ir_graph *irg)
242 {
243         ir_node *block;
244
245         if (op_rss_Source == NULL) {
246                 int iro_rss_base = get_next_ir_opcodes(iro_rss_last);
247                 op_rss_Source = new_ir_op(iro_rss_base + iro_rss_Source, "rss_Source", op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
248                 op_rss_Sink   = new_ir_op(iro_rss_base + iro_rss_Sink,   "rss_Sink",   op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
249         }
250         block   = get_irg_start_block(irg);
251         _source = new_ir_node(NULL, irg, block, op_rss_Source, mode_ANY, 0, NULL);
252         _sink   = new_ir_node(NULL, irg, block, op_rss_Sink, mode_ANY, 0, NULL);
253 }
254
255 static int cmp_int(const void *a, const void *b)
256 {
257         const int *i1 = a;
258         const int *i2 = b;
259
260         return QSORT_CMP(*i1, *i2);
261 }
262
263 static int cmp_child_costs(const void *a, const void *b)
264 {
265         const child_t *c1 = a;
266         const child_t *c2 = b;
267
268         return QSORT_CMP(c1->cost, c2->cost);
269 }
270
271 static int cmp_irn_idx(const void *a, const void *b)
272 {
273         const ir_node *n1 = *(ir_node **)a;
274         const ir_node *n2 = *(ir_node **)b;
275
276         return QSORT_CMP(get_irn_idx(n1), get_irn_idx(n2));
277 }
278
279 static int cmp_rss_edges(const void *a, const void *b)
280 {
281         const rss_edge_t *e1 = a;
282         const rss_edge_t *e2 = b;
283
284         return (e1->src != e2->src) || (e1->tgt != e2->tgt);
285 }
286
287 static int bsearch_for_index(int key, int *arr, size_t len, int force)
288 {
289         int left = 0;
290         int right = len;
291
292         while (right >= left) {
293                 int idx = (left + right) / 2;
294
295                 if (key < arr[idx])
296                         right = idx - 1;
297                 else if (key > arr[idx])
298                         left = idx + 1;
299                 else
300                         return idx;
301         }
302
303         if (force)
304                 assert(0 && "Something is wrong, key not found.");
305         return -1;
306 }
307
308 static const ir_node **build_sorted_array_from_list(plist_t *irn_list, struct obstack *obst)
309 {
310         plist_element_t *el;
311         int             i   = 0;
312         int             len = plist_count(irn_list);
313         const ir_node   **arr = (const ir_node**)NEW_ARR_D(ir_node*, obst, len);
314
315         /* copy the list into the array */
316         foreach_plist(irn_list, el) {
317                 arr[i++] = plist_element_get_value(el);
318         }
319
320         /* sort the array by node index */
321         /* HACK cast for MSVC */
322         qsort((void*)arr, len, sizeof(arr[0]), cmp_irn_idx);
323
324         return arr;
325 }
326
327 /*****************************************************
328  *      _      _                       _
329  *     | |    | |                     (_)
330  *   __| | ___| |__  _   _  __ _  __ _ _ _ __   __ _
331  *  / _` |/ _ \ '_ \| | | |/ _` |/ _` | | '_ \ / _` |
332  * | (_| |  __/ |_) | |_| | (_| | (_| | | | | | (_| |
333  *  \__,_|\___|_.__/ \__,_|\__, |\__, |_|_| |_|\__, |
334  *                          __/ | __/ |         __/ |
335  *                         |___/ |___/         |___/
336  *
337  *****************************************************/
338
339 #ifdef DEBUG_libfirm
340 static void dump_nodeset(ir_nodeset_t *ns, const char *prefix)
341 {
342         ir_nodeset_iterator_t iter;
343         ir_node *irn;
344
345         ir_nodeset_iterator_init(&iter, ns);
346         while ( (irn = ir_nodeset_iterator_next(&iter)) != NULL ) {
347                 ir_fprintf(stderr, "%s%+F\n", prefix, irn);
348         }
349 }
350 #endif
351
352 static void build_file_name(rss_t *rss, const char *suffix, size_t suf_len, char *buf, size_t len)
353 {
354         const char *irg_name;
355
356         memset(buf, 0, len);
357         irg_name = get_entity_name(get_irg_entity(rss->irg));
358         snprintf(buf, len - suf_len, "%s-%s-block-%ld",
359                 irg_name, arch_register_class_name(rss->cls), get_irn_node_nr(rss->block));
360         strcat(buf, suffix);
361 }
362
363 /* Dumps all collected bipartite components of current irg as vcg. */
364 static void debug_vcg_dump_bipartite(rss_t *rss)
365 {
366         cbc_t *cbc;
367         FILE  *f;
368         char  file_name[256];
369         static const char suffix[] = "-RSS-CBC.vcg";
370
371         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
372         f = fopen(file_name, "w");
373
374         ir_fprintf(f, "graph: { title: \"connected bipartite component graph of %+F\"\n", rss->irg);
375         fprintf(f, "display_edge_labels: no\n");
376         fprintf(f, "layoutalgorithm: mindepth\n");
377         fprintf(f, "manhattan_edges: yes\n\n");
378
379         foreach_pset(rss->cbc_set, cbc) {
380                 ir_nodeset_iterator_t iter;
381                 ir_node    *n;
382                 rss_edge_t *ke;
383
384                 fprintf(f, "graph: { titel: \"cbc %d\" label: \"cbc %d\" status:clustered color:yellow\n", cbc->nr, cbc->nr);
385                 foreach_ir_nodeset(&cbc->parents, n, iter) {
386                         ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
387                 }
388                 foreach_ir_nodeset(&cbc->children, n, iter) {
389                         ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
390                 }
391                 foreach_pset(cbc->kill_edges, ke) {
392                         ir_fprintf(f, "edge: { sourcename: \"n%d_%d\" targetname: \"n%d_%d\" }\n",
393                                 get_irn_node_nr(ke->src), cbc->nr, get_irn_node_nr(ke->tgt), cbc->nr);
394                 }
395                 fprintf(f, "}\n\n");
396         }
397         fprintf(f, "}\n");
398         fclose(f);
399 }
400
401 /* Dump the computed killing function as vcg. */
402 static void debug_vcg_dump_kill(rss_t *rss)
403 {
404         FILE            *f;
405         char            file_name[256];
406         plist_element_t *el;
407         static const char suffix[] = "-RSS-KILL.vcg";
408
409         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
410         f = fopen(file_name, "w");
411
412         ir_fprintf(f, "graph: { title: \"computed kill graph of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
413         fprintf(f, "display_edge_labels: no\n");
414         fprintf(f, "layoutalgorithm: mindepth\n");
415         fprintf(f, "manhattan_edges: yes\n\n");
416
417         {
418                 /* reset dump_flag */
419                 ir_node *irn;
420                 foreach_phase_irn(&rss->ph, irn) {
421                         rss_irn_t *node = get_rss_irn(rss, irn);
422                         node->dumped = 0;
423                 }
424         }
425
426         /* dump all nodes and their killers */
427         foreach_plist(rss->nodes, el) {
428                 ir_node   *irn     = plist_element_get_value(el);
429                 rss_irn_t *rirn    = get_rss_irn(rss, irn);
430                 rss_irn_t *pk_rirn = get_rss_irn(rss, rirn->killer);
431
432                 if (! rirn->dumped) {
433                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
434                         rirn->dumped = 1;
435                 }
436
437                 if (! pk_rirn->dumped) {
438                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(rirn->killer), rirn->killer);
439                         pk_rirn->dumped = 1;
440                 }
441
442                 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
443                         get_irn_node_nr(rirn->killer), get_irn_node_nr(irn));
444         }
445
446         fprintf(f, "}\n");
447         fclose(f);
448 }
449
450 /* Dumps the potential killing DAG (PKG) as vcg. */
451 static void debug_vcg_dump_pkg(rss_t *rss, ir_nodeset_t *max_ac, int iteration)
452 {
453         FILE              *f;
454         char              file_name[256];
455         char              suffix[32];
456         static const char suffix1[] = "-RSS-PKG.vcg";
457         static const char suffix2[] = "-RSS-PKG-MAXAC.vcg";
458         plist_element_t   *el;
459
460         if (! max_ac) {
461                 snprintf(suffix, sizeof(suffix), "%s", suffix1);
462         }
463         else {
464                 snprintf(suffix, sizeof(suffix), "-%02d%s", iteration, suffix2);
465         }
466
467         build_file_name(rss, suffix, strlen(suffix) + 1, file_name, sizeof(file_name));
468         f = fopen(file_name, "w");
469
470         ir_fprintf(f, "graph: { title: \"potential killing DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
471         fprintf(f, "display_edge_labels: no\n");
472         fprintf(f, "layoutalgorithm: mindepth\n");
473         fprintf(f, "manhattan_edges: yes\n\n");
474
475         {
476                 /* reset dump_flag */
477                 ir_node *irn;
478                 foreach_phase_irn(&rss->ph, irn) {
479                         rss_irn_t *node = get_rss_irn(rss, irn);
480                         node->dumped = 0;
481                 }
482         }
483
484         foreach_plist(rss->nodes, el) {
485                 ir_node   *irn  = plist_element_get_value(el);
486                 rss_irn_t *rirn = get_rss_irn(rss, irn);
487                 char      *c1   = "";
488                 plist_element_t *k_el;
489
490                 /* dump selected saturating values in yellow */
491                 if (max_ac && ir_nodeset_contains(max_ac, irn))
492                         c1 = "color:yellow";
493
494                 if (! rirn->dumped) {
495                         if (rirn->chain)
496                                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F   c%d\" %s }\n", get_irn_node_nr(irn), irn, rirn->chain->nr, c1);
497                         else
498                                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(irn), irn, c1);
499                         rirn->dumped = 1;
500                 }
501
502                 foreach_plist(rirn->pkiller_list, k_el) {
503                         ir_node   *pkiller = plist_element_get_value(k_el);
504                         rss_irn_t *pk_rirn = get_rss_irn(rss, pkiller);
505                         char      *c2      = "";
506
507                         if (max_ac && ir_nodeset_contains(max_ac, pkiller))
508                                 c2 = "color:yellow";
509
510                         if (! pk_rirn->dumped) {
511                                 if (pk_rirn->chain)
512                                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F   c%d\" %s }\n", get_irn_node_nr(pkiller), pkiller, pk_rirn->chain->nr, c2);
513                                 else
514                                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(pkiller), pkiller, c2);
515                                 pk_rirn->dumped = 1;
516                         }
517                         ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
518                                 get_irn_node_nr(pkiller), get_irn_node_nr(irn));
519                 }
520         }
521         fprintf(f, "}\n");
522         fclose(f);
523 }
524
525 /* Dumps the disjoint value DAG (DVG) as vcg. */
526 static void debug_vcg_dump_dvg(rss_t *rss, dvg_t *dvg)
527 {
528         static const char suffix[] = "-RSS-DVG.vcg";
529         FILE       *f;
530         char       file_name[256];
531         ir_nodeset_iterator_t iter;
532         ir_node    *irn;
533         rss_edge_t *edge;
534
535         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
536         f = fopen(file_name, "w");
537
538         ir_fprintf(f, "graph: { title: \"disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
539         fprintf(f, "display_edge_labels: no\n");
540         fprintf(f, "layoutalgorithm: mindepth\n");
541         fprintf(f, "manhattan_edges: yes\n\n");
542
543         /* dump all nodes */
544         foreach_ir_nodeset(&dvg->nodes, irn, iter) {
545                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
546         }
547
548         /* dump all edges */
549         foreach_pset(dvg->edges, edge) {
550                 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
551                         get_irn_node_nr(edge->src), get_irn_node_nr(edge->tgt));
552         }
553
554         fprintf(f, "}\n");
555         fclose(f);
556 }
557
558 #if 0
559 /* Dumps the PKG(DVG). */
560 static void debug_vcg_dump_dvg_pkiller(rss_t *rss, dvg_t *dvg)
561 {
562         static const char suffix[] = "-RSS-DVG-PKG.vcg";
563         FILE       *f;
564         char       file_name[256];
565         ir_nodeset_iterator_t iter;
566         ir_node    *irn;
567
568         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
569         f = fopen(file_name, "w");
570
571         ir_fprintf(f, "graph: { title: \"PKG of disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
572         fprintf(f, "display_edge_labels: no\n");
573         fprintf(f, "layoutalgorithm: mindepth\n");
574         fprintf(f, "manhattan_edges: yes\n\n");
575
576         /* dump all nodes */
577         foreach_ir_nodeset(&dvg->nodes, irn, iter) {
578                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
579         }
580
581         /* dump all edges */
582         foreach_ir_nodeset(&dvg->nodes, irn, iter) {
583                 rss_irn_t       *node = get_rss_irn(rss, irn);
584                 plist_element_t *el;
585
586                 foreach_plist(node->dvg_pkiller_list, el) {
587                         ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
588                                 get_irn_node_nr(plist_element_get_value(el)), get_irn_node_nr(irn));
589                 }
590         }
591
592         fprintf(f, "}\n");
593         fclose(f);
594 }
595 #endif /* if 0 */
596
597 /**
598  * In case there is no rss information for irn, initialize it.
599  */
600 static void *init_rss_irn(ir_phase *ph, const ir_node *irn, void *old)
601 {
602         rss_irn_t *res = old ? old : phase_alloc(ph, sizeof(res[0]));
603
604         res->descendant_list = plist_obstack_new(phase_obst(ph));
605         res->descendants     = NULL;
606
607         res->consumer_list   = plist_obstack_new(phase_obst(ph));
608         res->consumer        = NULL;
609
610         res->pkiller_list    = plist_obstack_new(phase_obst(ph));
611
612         res->parent_list     = plist_obstack_new(phase_obst(ph));
613
614         res->killer          = NULL;
615         res->irn             = irn;
616         res->chain           = NULL;
617
618         res->kill_count      = 0;
619         res->desc_walk       = 0;
620         res->live_out        = 0;
621         res->visited         = 0;
622         res->handled         = 0;
623         res->dumped          = 0;
624         res->havecons        = 0;
625
626         return res;
627 }
628
629 /**
630  * Collect all nodes data dependent on current node.
631  */
632 static void collect_descendants(rss_t *rss, rss_irn_t *rirn, ir_node *irn, int *got_sink, unsigned cur_desc_walk)
633 {
634         const ir_edge_t *edge;
635         rss_irn_t       *cur_node = get_rss_irn(rss, irn);
636         ir_node         *block    = rss->block;
637         ir_edge_kind_t  ekind[2]  = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
638         int             i;
639
640         if (cur_node->desc_walk >= cur_desc_walk)
641                 return;
642         cur_node->desc_walk = cur_desc_walk;
643
644         /* Stop at Barriers */
645         if (be_is_Barrier(irn))
646                 return;
647
648         /* loop over normal and dependency edges */
649         for (i = 0; i < 2; ++i) {
650                 foreach_out_edge_kind(irn, edge, ekind[i]) {
651                         ir_node *user = get_edge_src_irn(edge);
652
653                         /* skip ignore nodes as they do not really contribute to register pressure */
654                         if (arch_irn_is_ignore(user))
655                                 continue;
656
657                         /*
658                                 (a) mode_X means Jump            -> out_edge
659                                 (b) Phi as user of a normal node -> out-edge
660                         */
661                         if (get_irn_mode(user) == mode_X || is_Phi(user)) {
662                                 if (! *got_sink)
663                                         goto force_sink;
664                                 else
665                                         continue;
666                         }
667
668                         if (is_Proj(user)) {
669                                 //if (arch_get_irn_reg_class_out(user) == rss->cls)
670                                         collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
671                         }
672                         else {
673                                 /* check if user lives in block and is not a control flow node */
674                                 if (get_nodes_block(user) == block) {
675                                         if (! plist_has_value(rirn->descendant_list, user)) {
676                                                 plist_insert_back(rirn->descendant_list, user);
677                                                 DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", user));
678                                         }
679                                         collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
680                                 }
681                                 else if (! *got_sink) {
682 force_sink:
683                                         /* user lives out of block: add sink as descendant if not already done */
684                                         plist_insert_back(rirn->descendant_list, _sink);
685                                         *got_sink = 1;
686                                         DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", _sink));
687                                 }
688                         }
689                 }
690         }
691 }
692
693 /**
694  * Handles a single consumer.
695  */
696 static void collect_single_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *consumer, int *got_sink)
697 {
698         ir_node *block = rss->block;
699
700         assert(! is_Proj(consumer) && "Cannot handle Projs");
701
702         if (! is_Phi(consumer) && ! is_Block(consumer) && get_nodes_block(consumer) == block) {
703                 if (!arch_irn_is_ignore(consumer) &&
704                                 !plist_has_value(rss_irn->consumer_list, consumer)) {
705                         plist_insert_back(rss_irn->consumer_list, consumer);
706                         DBG((rss->dbg, LEVEL_2, "\t\tconsumer %+F\n", consumer));
707                 }
708         }
709         else {
710                 rss_irn->live_out = 1;
711                 DBG((rss->dbg, LEVEL_2, "\t\tlive out %+F", consumer));
712                 if (! *got_sink) {
713                         plist_insert_back(rss_irn->consumer_list, _sink);
714                         *got_sink = 1;
715                         DB((rss->dbg, LEVEL_2, ", %+F added instead", _sink));
716                 }
717                 DB((rss->dbg, LEVEL_2, "\n"));
718         }
719 }
720
721 /**
722  * Collect all nodes consuming the value(s) produced by current node.
723  */
724 static void collect_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *irn, int *got_sink)
725 {
726         const ir_edge_t *edge;
727         int             i;
728         ir_edge_kind_t  ekind[2]  = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
729         rss_irn_t       *cur_node = get_rss_irn(rss, irn);
730
731         if (cur_node->havecons)
732                 return;
733         cur_node->havecons = 1;
734
735         for (i = 0; i < 2; ++i) {
736                 foreach_out_edge_kind(irn, edge, ekind[i]) {
737                         ir_node *consumer = get_edge_src_irn(edge);
738
739                         if (is_Proj(consumer)) {
740                                 //if (arch_get_irn_reg_class_out(consumer) == rss->cls)
741                                         collect_consumer(rss, rss_irn, consumer, got_sink);
742                         }
743                         else
744                                 collect_single_consumer(rss, rss_irn, consumer, got_sink);
745                 }
746         }
747 }
748
749 /**
750  * Collects all consumer and descendant of a irn.
751  */
752 static void collect_node_info(rss_t *rss, ir_node *irn)
753 {
754         static unsigned cur_desc_walk = 0;
755         rss_irn_t       *rss_irn      = get_rss_irn(rss, irn);
756         int             got_sink;
757
758         assert(! is_Proj(irn) && "Cannot handle Projs.");
759
760         /* check if node info is already available */
761         if (rss_irn->handled)
762                 return;
763                 //phase_reinit_single_irn_data(&rss->ph, irn);
764
765         DBG((rss->dbg, LEVEL_1, "\tcomputing consumers of %+F:\n", irn));
766
767         /* collect all consumer */
768         got_sink = 0;
769         collect_consumer(rss, rss_irn, irn, &got_sink);
770
771         /* build sorted consumer array */
772         rss_irn->consumer = build_sorted_array_from_list(rss_irn->consumer_list, phase_obst(&rss->ph));
773
774         DBG((rss->dbg, LEVEL_1, "\tcompute descendants of %+F:\n", irn));
775
776         /* collect descendants */
777         got_sink = 0;
778         collect_descendants(rss, rss_irn, irn, &got_sink, ++cur_desc_walk);
779
780         /* build sorted descendant array */
781         rss_irn->descendants = build_sorted_array_from_list(rss_irn->descendant_list, phase_obst(&rss->ph));
782
783         rss_irn->handled = 1;
784 }
785
786 /**
787  * Checks if v is a potential killer of u.
788  * v is in pkill(u) iff descendants(v) cut consumer(u) is v
789  *
790  * @param rss   The rss object
791  * @param v      The node to check for killer
792  * @param u      The potentially killed value
793  * @return 1 if v is in pkill(u), 0 otherwise
794  */
795 static int is_potential_killer(rss_t *rss, rss_irn_t *v, rss_irn_t *u)
796 {
797         plist_t *list;
798         const ir_node **arr;
799         plist_element_t *el;
800         (void) rss;
801
802         /* as we loop over the list: loop over the shorter one */
803         if (plist_count(v->descendant_list) > plist_count(u->consumer_list)) {
804                 list = u->consumer_list;
805                 arr  = v->descendants;
806         }
807         else {
808                 list = v->descendant_list;
809                 arr  = u->consumer;
810         }
811
812         /* for each list element: try to find element in array */
813         foreach_plist(list, el) {
814                 ir_node *irn   = plist_element_get_value(el);
815                 ir_node *match = BSEARCH_IRN_ARR(irn, arr);
816
817                 if (match && match != irn)
818                         return 0;
819         }
820
821         return 1;
822 }
823
824 /**
825  * Update descendants, consumer and pkiller list for the given irn.
826  */
827 static void update_node_info(rss_t *rss, ir_node *irn, ir_node *pk_irn)
828 {
829         rss_irn_t *node    = get_rss_irn(rss, irn);
830         rss_irn_t *pkiller = get_rss_irn(rss, pk_irn);
831
832         /* add consumer and rebuild the consumer array */
833         if (! plist_has_value(node->consumer_list, pk_irn)) {
834                 plist_insert_back(node->consumer_list, pk_irn);
835                 node->consumer = build_sorted_array_from_list(node->consumer_list, phase_obst(&rss->ph));
836         }
837
838         /* add pk_irn as descendant, add it's descendants to irn's and rebuild array */
839         if (! plist_has_value(node->descendant_list, pk_irn)) {
840                 plist_element_t *el;
841
842                 plist_insert_back(node->descendant_list, pk_irn);
843
844                 foreach_plist(pkiller->descendant_list, el) {
845                         ir_node *desc = plist_element_get_value(el);
846
847                         if (! plist_has_value(node->descendant_list, desc))
848                                 plist_insert_back(node->descendant_list, desc);
849                 }
850
851                 node->descendants = build_sorted_array_from_list(node->descendant_list, phase_obst(&rss->ph));
852         }
853
854 }
855
856 /**
857  * Compute the potential killing set PK.
858  */
859 static void compute_pkill_set(rss_t *rss)
860 {
861         plist_element_t *u_el, *v_el;
862
863         foreach_plist(rss->nodes, u_el) {
864                 ir_node   *u_irn = plist_element_get_value(u_el);
865                 rss_irn_t *u     = get_rss_irn(rss, u_irn);
866
867                 DBG((rss->dbg, LEVEL_1, "\tcomputing potential killers of %+F:\n", u_irn));
868
869                 /* check each consumer if it is a potential killer  */
870                 foreach_plist(u->consumer_list, v_el) {
871                         ir_node   *v_irn = plist_element_get_value(v_el);
872                         rss_irn_t *v     = get_rss_irn(rss, v_irn);
873
874                         /* check, if v is a potential killer of u */
875                         if (is_potential_killer(rss, v, u)) {
876                                 if (! plist_has_value(u->pkiller_list, v_irn))
877                                         plist_insert_back(u->pkiller_list, v_irn);
878                                 v->kill_count++;
879                                 DBG((rss->dbg, LEVEL_2, "\t\tpotential killer %+F\n", v_irn));
880                         }
881                 }
882
883                 u->killer = _sink;
884         }
885
886         if (rss->opts->dump_flags & RSS_DUMP_PKG)
887                 debug_vcg_dump_pkg(rss, NULL, 0);
888 }
889
890 /**
891  * Build set of killing edges (from values to their potential killers)
892  */
893 static void build_kill_edges(rss_t *rss, pset *epk)
894 {
895         plist_element_t *el, *k_el;
896
897         foreach_plist(rss->nodes, el) {
898                 ir_node    *irn  = plist_element_get_value(el);
899                 rss_irn_t *rirn = get_rss_irn(rss, irn);
900
901                 DBG((rss->dbg, LEVEL_3, "\t\tbuilding kill edges for %+F:\n", irn));
902
903                 foreach_plist(rirn->pkiller_list, k_el) {
904                         ir_node    *pkiller = plist_element_get_value(k_el);
905                         rss_edge_t *ke      = OALLOC(phase_obst(&rss->ph), rss_edge_t);
906
907                         ke->src  = irn;
908                         ke->tgt  = pkiller;
909                         ke->next = NULL;
910
911                         DBG((rss->dbg, LEVEL_3, "\t\t\ttarget %+F\n", pkiller));
912
913                         pset_insert(epk, ke, HASH_RSS_EDGE(ke));
914                 }
915         }
916 }
917
918 #ifdef DEBUG_libfirm
919 /* print the given cbc for debugging purpose */
920 static void debug_print_cbc(firm_dbg_module_t *mod, cbc_t *cbc)
921 {
922         ir_nodeset_iterator_t iter;
923         ir_node    *n;
924         rss_edge_t *ke;
925
926         DBG((mod, LEVEL_3, "\t\tS = set of parents:\n"));
927         foreach_ir_nodeset(&cbc->parents, n, iter) {
928                 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
929         }
930         DBG((mod, LEVEL_3, "\t\tT = set of children:\n"));
931         foreach_ir_nodeset(&cbc->children, n, iter) {
932                 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
933         }
934         DBG((mod, LEVEL_3, "\t\tE = Edges from producers to consumers\n"));
935         foreach_pset(cbc->kill_edges, ke) {
936                 DBG((mod, LEVEL_3, "\t\t\t%+F -> %+F\n", ke->src, ke->tgt));
937         }
938 }
939 #endif
940
941 /**
942  * Construct the bipartite decomposition.
943  * Sid-Ahmed-Ali Touati, Phd Thesis
944  * Register Pressure in Instruction Level Parallelism, p. 71
945  */
946 static void compute_bipartite_decomposition(rss_t *rss)
947 {
948         pset *epk    = new_pset(cmp_rss_edges, 10);
949         int  cur_num = 0;
950
951         plist_element_t *el;
952
953         DBG((rss->dbg, LEVEL_1, "\tcomputing bipartite decomposition:\n"));
954
955         build_kill_edges(rss, epk);
956
957         foreach_plist(rss->nodes, el) {
958                 ir_node   *u_irn   = plist_element_get_value(el);
959                 rss_irn_t *u       = get_rss_irn(rss, u_irn);
960                 int       p_change = 1;
961                 int       c_change = 1;
962                 int       vrfy_ok  = 1;
963
964                 cbc_t           *cbc;
965                 plist_element_t *el2;
966                 rss_edge_t      *k_edge;
967                 rss_edge_t      *kedge_root = NULL;
968                 ir_node         *t_irn, *s_irn;
969                 ir_nodeset_iterator_t iter;
970
971                 if (u->visited || u_irn == _sink)
972                         continue;
973
974                 DBG((rss->dbg, LEVEL_2, "\t\t%+F choosen:\n", u_irn));
975
976                 cbc     = OALLOC(phase_obst(&rss->ph), cbc_t);
977                 cbc->nr = cur_num++;
978
979                 /* initialize S_cb */
980                 ir_nodeset_init_size(&cbc->parents, 5);
981                 ir_nodeset_insert(&cbc->parents, u_irn);
982                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (init)\n", u_irn));
983
984                 /* E_cb = empty */
985                 cbc->kill_edges = new_pset(cmp_rss_edges, 5);
986
987                 /* each parent gets killed by at least one value from children */
988
989                 /* T_cb = PK_successors(u) */
990                 ir_nodeset_init_size(&cbc->children, 5);
991                 foreach_plist(u->pkiller_list, el2) {
992                         ir_nodeset_insert(&cbc->children, plist_element_get_value(el2));
993                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (init)\n", plist_element_get_value(el2)));
994                 }
995
996                 /*
997                         Now: insert the parents of all children into the parent set
998                         and insert the children of all parents into the children set
999                         until the sets don't change any more
1000                 */
1001                 while (p_change || c_change) {
1002                         ir_nodeset_iterator_t iter;
1003                         p_change = c_change = 0;
1004
1005                         /* accumulate parents */
1006                         foreach_ir_nodeset(&cbc->children, t_irn, iter) {
1007                                 foreach_pset(epk, k_edge) {
1008                                         ir_node *val = k_edge->src;
1009
1010                                         if (k_edge->tgt == t_irn && ! ir_nodeset_contains(&cbc->parents, val)) {
1011                                                 ir_nodeset_insert(&cbc->parents, val);
1012                                                 p_change = 1;
1013                                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (killed by %+F)\n", val, t_irn));
1014                                         }
1015                                 }
1016                         }
1017
1018                         /* accumulate children */
1019                         foreach_ir_nodeset(&cbc->parents, s_irn, iter) {
1020                                 foreach_pset(epk, k_edge) {
1021                                         ir_node *val = k_edge->tgt;
1022
1023                                         if (k_edge->src == s_irn  && ! ir_nodeset_contains(&cbc->children, val)) {
1024                                                 ir_nodeset_insert(&cbc->children, val);
1025                                                 c_change = 1;
1026                                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (kills %+F)\n", val, s_irn));
1027                                         }
1028                                 }
1029                         }
1030                 }
1031
1032                 /* mark all parent values as visited */
1033                 foreach_ir_nodeset(&cbc->parents, s_irn, iter) {
1034                         rss_irn_t *s = get_rss_irn(rss, s_irn);
1035                         s->visited = 1;
1036                         /* assure bipartite property */
1037 #if 0
1038                         if (ir_nodeset_contains(&cbc->children, s_irn)) {
1039                                 ir_nodeset_remove(&cbc->children, s_irn);
1040                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F removed from to children\n", s_irn));
1041                         }
1042 #endif
1043                 }
1044
1045                 /* update edges */
1046                 foreach_pset(epk, k_edge) {
1047                         if (ir_nodeset_contains(&cbc->parents, k_edge->src) &&
1048                             ir_nodeset_contains(&cbc->children, k_edge->tgt)) {
1049                                 pset_insert(cbc->kill_edges, k_edge, HASH_RSS_EDGE(k_edge));
1050                                 /*
1051                                         Link all k_edges which are about to be removed together.
1052                                         Beware: pset_remove kills the iterator.
1053                                 */
1054                                 k_edge->next = kedge_root;
1055                                 kedge_root   = k_edge;
1056                         }
1057                 }
1058
1059                 /* remove all linked k_edges */
1060                 for (; kedge_root; kedge_root = kedge_root->next) {
1061                         pset_remove(epk, kedge_root, HASH_RSS_EDGE(kedge_root));
1062                 }
1063
1064                 /* verify the cbc */
1065                 foreach_ir_nodeset(&cbc->parents, s_irn, iter) {
1066                         int is_killed = 0;
1067
1068                         foreach_pset(cbc->kill_edges, k_edge) {
1069                                 if (k_edge->src == s_irn) {
1070                                         is_killed = 1;
1071                                         pset_break(cbc->kill_edges);
1072                                         break;
1073                                 }
1074                         }
1075
1076                         if (! is_killed) {
1077                                 vrfy_ok = 0;
1078                                 ir_fprintf(stderr, "Warning: parent %+F is not killed in current cbc\n", s_irn);
1079                         }
1080                 }
1081                 assert(vrfy_ok && "Verification of CBC failed");
1082
1083                 /* add the connected bipartite component */
1084                 if (ir_nodeset_size(&cbc->parents) > 0 &&
1085                     ir_nodeset_size(&cbc->children) > 0 &&
1086                         pset_count(cbc->kill_edges) > 0)
1087                         pset_insert(rss->cbc_set, cbc, (unsigned)cbc->nr);
1088                 DBG((rss->dbg, LEVEL_2, "\tbipartite component %d inserted:\n", cbc->nr));
1089                 DEBUG_ONLY(
1090                         if (firm_dbg_get_mask(rss->dbg) & LEVEL_2)
1091                                 debug_print_cbc(rss->dbg, cbc);
1092                 );
1093         }
1094
1095         if (rss->opts->dump_flags & RSS_DUMP_CBC)
1096                 debug_vcg_dump_bipartite(rss);
1097
1098         del_pset(epk);
1099 }
1100
1101 /**
1102  * Select the child with the maximum cost.
1103  */
1104 static child_t *select_child_max_cost(rss_t *rss, ir_nodeset_t *x, ir_nodeset_t *y, child_t *t, cbc_t *cbc)
1105 {
1106         ir_node *child;
1107         ir_nodeset_iterator_t iter;
1108         float   max_cost = -1.0f;
1109
1110         DBG((rss->dbg, LEVEL_2, "\t\tcomputing children costs:\n"));
1111
1112         foreach_ir_nodeset(&cbc->children, child, iter) {
1113                 rss_irn_t  *r_child             = get_rss_irn(rss, child);
1114                 int         num_unkilled_parents = 0;
1115                 int         num_descendants;
1116                 rss_edge_t *k_edge;
1117                 float       cost;
1118
1119                 /* get the number of unkilled parents */
1120                 foreach_pset(cbc->kill_edges, k_edge) {
1121                         if (k_edge->tgt == child && ir_nodeset_contains(x, k_edge->src))
1122                                 ++num_unkilled_parents;
1123                 }
1124
1125                 cost = (float)num_unkilled_parents;
1126
1127                 num_descendants = plist_count(r_child->descendant_list) + ir_nodeset_size(y);
1128
1129                 if (num_descendants > 0)
1130                         cost /= (float)num_descendants;
1131
1132                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F, #desc %d, cost %.3f\n", child, num_descendants, cost));
1133
1134                 if (cost > max_cost) {
1135                         t->irn   = child;
1136                         t->cost  = cost;
1137                         max_cost = cost;
1138                 }
1139         }
1140
1141         return t;
1142 }
1143
1144 /**
1145  * Remove all parents from x which are killed by t_irn.
1146  */
1147 static void remove_covered_parents(rss_t *rss, ir_nodeset_t *x, ir_node *t_irn, cbc_t *cbc)
1148 {
1149         rss_irn_t  *t = get_rss_irn(rss, t_irn);
1150         rss_edge_t *k_edge;
1151
1152         DBG((rss->dbg, LEVEL_2, "\t\tremoving parents covered by %+F:\n", t_irn));
1153
1154         foreach_pset(cbc->kill_edges, k_edge) {
1155                 if (k_edge->tgt == t_irn && ir_nodeset_contains(x, k_edge->src)) {
1156                         ir_nodeset_remove(x, k_edge->src);
1157                         plist_insert_back(t->parent_list, k_edge->src);
1158                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", k_edge->src));
1159                 }
1160         }
1161 }
1162
1163 static void update_cumulated_descendent_values(rss_t *rss, ir_nodeset_t *y, ir_node *t_irn)
1164 {
1165         rss_irn_t *t = get_rss_irn(rss, t_irn);
1166         plist_element_t *el;
1167
1168         DBG((rss->dbg, LEVEL_2, "\t\tupdating cumulated descendant value of %+F:\n", t_irn));
1169
1170         foreach_plist(t->descendant_list, el) {
1171                 ir_nodeset_insert(y, plist_element_get_value(el));
1172                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", plist_element_get_value(el)));
1173         }
1174 }
1175
1176 /**
1177  * Greedy-k: a heuristics for the MMA problem
1178  */
1179 static void compute_killing_function(rss_t *rss)
1180 {
1181         cbc_t *cbc;
1182         struct obstack obst;
1183
1184         obstack_init(&obst);
1185
1186         rss->cbc_set = pset_new_ptr(5);
1187         compute_bipartite_decomposition(rss);
1188
1189         /* for all bipartite components do: */
1190         foreach_pset(rss->cbc_set, cbc) {
1191                 ir_node *p;
1192                 ir_nodeset_t x;
1193                 ir_nodeset_t y;
1194                 ir_nodeset_iterator_t iter;
1195                 child_t **sks    = NEW_ARR_F(child_t *, 20);
1196                 int     cur_len  = 0;
1197                 int     cur_size = 20;
1198                 int     i;
1199
1200                 ir_nodeset_init_size(&x, 10);
1201                 ir_nodeset_init_size(&y, 10);
1202
1203                 DBG((rss->dbg, LEVEL_1, "\tcomputing SKS for cbc %d:\n", cbc->nr));
1204                 DBG((rss->dbg, LEVEL_2, "\t\tinitializing parents X:\n"));
1205
1206                 /* X = S_cb (all parents are initially uncovered) */
1207                 foreach_ir_nodeset(&cbc->parents, p, iter) {
1208                         ir_nodeset_insert(&x, p);
1209                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", p));
1210                 }
1211
1212                 /* while X not empty */
1213                 while (ir_nodeset_size(&x) > 0) {
1214                         child_t *t = OALLOCZ(&obst, child_t);
1215
1216                         t = select_child_max_cost(rss, &x, &y, t, cbc);
1217
1218                         if (cur_len >= cur_size) {
1219                                 ARR_EXTO(child_t *, sks, cur_size * 2);
1220                                 cur_size *= 2;
1221                         }
1222
1223                         DBG((rss->dbg, LEVEL_2, "\t\tinsert child %+F (%.3f) into SKS at pos %d\n", t->irn, t->cost, cur_len));
1224
1225                         sks[cur_len++] = t;
1226                         remove_covered_parents(rss, &x, t->irn, cbc);
1227                         update_cumulated_descendent_values(rss, &y, t->irn);
1228                 }
1229
1230                 ARR_SHRINKLEN(sks, cur_len);
1231
1232                 /* sort SKS in increasing cost order */
1233                 qsort(sks, cur_len, sizeof(sks[0]), cmp_child_costs);
1234
1235                 DBG((rss->dbg, LEVEL_2, "\tprocessing SKS for cbc %d:\n", cbc->nr));
1236
1237                 /* build killing function */
1238                 for (i = cur_len - 1; i >= 0; --i) { /* loop over sks in decreasing cost order */
1239                         child_t         *t = sks[i];
1240                         rss_irn_t       *rt = get_rss_irn(rss, t->irn);
1241                         plist_element_t *p_el;
1242
1243                         DBG((rss->dbg, LEVEL_3, "\t\t\tkiller %+F (%.3f):\n", t->irn, t->cost));
1244
1245                         /* kill all unkilled parents of t */
1246                         foreach_plist(rt->parent_list, p_el) {
1247                                 ir_node    *par = plist_element_get_value(p_el);
1248                                 rss_irn_t *rpar = get_rss_irn(rss, par);
1249
1250                                 if (is_Sink(rpar->killer)) {
1251                                         rpar->killer = t->irn;
1252                                         DBG((rss->dbg, LEVEL_2, "\t\tkill %+F\n", rpar->irn));
1253                                 }
1254                                 else {
1255                                         DBG((rss->dbg, LEVEL_3, "\t\t\tkeeping %+F as killer for %+F\n", rpar->killer, rpar->irn));
1256                                 }
1257                         }
1258                 }
1259
1260                 ir_nodeset_destroy(&x);
1261                 ir_nodeset_destroy(&y);
1262                 DEL_ARR_F(sks);
1263         }
1264
1265         if (rss->opts->dump_flags & RSS_DUMP_KILL)
1266                 debug_vcg_dump_kill(rss);
1267
1268         del_pset(rss->cbc_set);
1269         obstack_free(&obst, NULL);
1270 }
1271
1272 /**
1273  * Adds the edge src -> tgt to the dvg. Checks if reverse edge is already there (asserts).
1274  */
1275 static inline void add_dvg_edge(rss_t *rss, dvg_t *dvg, const ir_node *src, const ir_node *tgt, int have_source)
1276 {
1277         rss_edge_t *dvg_edge;
1278         rss_edge_t key;
1279
1280         if (! have_source)
1281                 ir_nodeset_insert(&dvg->nodes, (ir_node *) src);
1282         else
1283                 assert(ir_nodeset_contains(&dvg->nodes, src) && "Wrong assumption");
1284
1285         ir_nodeset_insert(&dvg->nodes, (ir_node *) tgt);
1286
1287         key.src = (ir_node *) tgt;
1288         key.tgt = (ir_node *) src;
1289         assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1290
1291         key.src = (ir_node *) src;
1292         key.tgt = (ir_node *) tgt;
1293         if (NULL != pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1294                 /* add the edge to the DVG */
1295                 dvg_edge = OALLOC(phase_obst(&rss->ph), rss_edge_t);
1296
1297                 dvg_edge->src  = (ir_node *) src;
1298                 dvg_edge->tgt  = (ir_node *) tgt;
1299                 dvg_edge->next = NULL;
1300
1301                 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", src, tgt));
1302                 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1303         }
1304 }
1305
1306 /**
1307  * Computes the disjoint value DAG (DVG).
1308  * BEWARE: It is not made explicitly clear in the Touati paper,
1309  *         but the DVG is meant to be build from the KILLING DAG
1310  */
1311 static void compute_dvg(rss_t *rss, dvg_t *dvg)
1312 {
1313         plist_element_t *el;
1314
1315         DBG((rss->dbg, LEVEL_1, "\tcomputing DVG:\n"));
1316
1317         foreach_plist(rss->nodes, el) {
1318                 ir_node    *u_irn    = plist_element_get_value(el);
1319                 rss_irn_t  *u        = get_rss_irn(rss, u_irn);
1320                 rss_irn_t  *u_killer = get_rss_irn(rss, u->killer);
1321                 int        i;
1322
1323                 /* TODO: omit nodes only having sink as pkiller and killing no one */
1324
1325                 /* add an edge to killer */
1326                 add_dvg_edge(rss, dvg, u_irn, u->killer, 0);
1327
1328                 if (is_Sink(u->killer))
1329                         continue;
1330
1331                 /* We add an edge to every killer, from where we could be reached. */
1332                 for (i = ARR_LEN_SAFE(u_killer->descendants) - 1; i >= 0; --i) {
1333                         add_dvg_edge(rss, dvg, u_irn, u_killer->descendants[i], 1);
1334                 }
1335
1336 #if 0
1337
1338                 foreach_plist(rss->nodes, el2) {
1339                         ir_node *v_irn = plist_element_get_value(el2);
1340
1341                         /*
1342                                 There is an edge (u, v) in the DVG iff v is a descendant of the killer(u).
1343                         */
1344                         if (BSEARCH_IRN_ARR(v_irn, u_kill->descendants)) {
1345                                 rss_edge_t *dvg_edge = OALLOC(phase_obst(&rss->ph), rss_edge_t);
1346                                 rss_edge_t key;
1347
1348                                 /* insert the user into the DVG and append it to the user list of u */
1349                                 ir_nodeset_insert(&dvg->nodes, v_irn);
1350                                 if (! plist_has_value(u->dvg_user_list, v_irn))
1351                                         plist_insert_back(u->dvg_user_list, v_irn);
1352
1353                                 dvg_edge->src  = u_irn;
1354                                 dvg_edge->tgt  = v_irn;
1355                                 dvg_edge->next = NULL;
1356
1357                                 key.src = v_irn;
1358                                 key.tgt = u_irn;
1359
1360                                 assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1361
1362                                 /* add the edge to the DVG */
1363                                 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", u_irn, v_irn));
1364                                 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1365                         }
1366                 }
1367 #endif /* if 0 */
1368         }
1369
1370         if (rss->opts->dump_flags & RSS_DUMP_DVG)
1371                 debug_vcg_dump_dvg(rss, dvg);
1372 }
1373
1374 /**
1375  * Updates the dvg structure when a serialization edge from src -> tgt is added.
1376  */
1377 static void update_dvg(rss_t *rss, dvg_t *dvg, rss_irn_t *src, rss_irn_t *tgt)
1378 {
1379         int i, j, idx;
1380         rss_edge_t *edge;
1381         rss_edge_t **arr = ALLOCAN(rss_edge_t*, pset_count(dvg->edges));
1382
1383         /*
1384                 Add an edge from serialization target to serialization src:
1385                 src cannot be alive with target
1386         */
1387         add_dvg_edge(rss, dvg, tgt->irn, src->irn, 0);
1388
1389         /* Add edges to src's descendants as well, they also getting serialized. */
1390         for (i = ARR_LEN_SAFE(src->descendants) - 1; i >= 0; --i) {
1391                 add_dvg_edge(rss, dvg, tgt->irn, src->descendants[i], 1);
1392         }
1393
1394         /* We also have to add edges from targets predecessors in dvg */
1395         idx = 0;
1396         /* We cannot insert elements into set over which we iterate ... */
1397         foreach_pset(dvg->edges, edge) {
1398                 if (edge->tgt == tgt->irn) {
1399                         arr[idx++] = edge;
1400                 }
1401         }
1402
1403         for (i = 0; i < idx; ++i) {
1404                 edge = arr[i];
1405                 add_dvg_edge(rss, dvg, edge->src, src->irn, 1);
1406                 for (j = ARR_LEN_SAFE(src->descendants) - 1; j >= 0; --j) {
1407                         add_dvg_edge(rss, dvg, edge->src, src->descendants[j], 1);
1408                 }
1409         }
1410 }
1411
1412 #if 0
1413 /**
1414  * Accumulate all descendants for root into list.
1415  */
1416 static void accumulate_dvg_descendant_values(rss_t *rss, rss_irn_t *root, plist_t *list)
1417 {
1418         if (plist_count(root->dvg_user_list) > 0) {
1419                 plist_element_t *el;
1420
1421                 foreach_plist(root->dvg_user_list, el) {
1422                         ir_node   *v_irn = plist_element_get_value(el);
1423                         rss_irn_t *v     = get_rss_irn(rss, v_irn);
1424
1425                         /* add v as descendant */
1426                         if (! plist_has_value(list, v_irn)) {
1427                                 plist_insert_back(list, v_irn);
1428                                 DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG descendant %+F\n", v_irn));
1429                         }
1430
1431                         /* accumulate v's descendants */
1432                         accumulate_dvg_descendant_values(rss, v, list);
1433                 }
1434         }
1435 }
1436
1437 /**
1438  * Builds the list of potential killers for each node
1439  * in the given DVG.
1440  * Needs the descendant list for all user as sorted array.
1441  */
1442 static void build_dvg_pkiller_list(rss_t *rss, dvg_t *dvg)
1443 {
1444         ir_nodeset_iterator_t iter;
1445         ir_node *irn;
1446
1447         foreach_nodeset(&dvg->nodes, irn, iter) {
1448                 rss_irn_t       *node = get_rss_irn(rss, irn);
1449                 plist_element_t *el, *el2;
1450
1451                 DBG((rss->dbg, DEBUG_DVG, "\t\tbuilding pkiller list for %+F\n", irn));
1452
1453                 /* check each user */
1454                 foreach_plist(node->dvg_user_list, el) {
1455                         ir_node *u_irn = plist_element_get_value(el);
1456
1457                         /* is the current user u_irn not a descendant of any other user -> pkiller */
1458                         foreach_plist(node->dvg_user_list, el2) {
1459                                 ir_node   *v_irn = plist_element_get_value(el2);
1460                                 rss_irn_t *v     = get_rss_irn(rss, v_irn);
1461
1462                                 if (el != el2                             &&
1463                                         ! BSEARCH_IRN_ARR(u_irn, v->dvg_desc) &&
1464                                         ! plist_has_value(node->dvg_pkiller_list, u_irn))
1465                                 {
1466                                         plist_insert_back(node->dvg_pkiller_list, u_irn);
1467                                         DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG pkiller %+F\n", u_irn));
1468                                 }
1469                         }
1470                 }
1471
1472                 node->dvg_pkiller = build_sorted_array_from_list(node->dvg_pkiller_list, phase_obst(&rss->ph));
1473         }
1474
1475         DEBUG_ONLY(
1476                 if (firm_dbg_get_mask(rss->dbg) & DEBUG_DVG)
1477                         debug_vcg_dump_dvg_pkiller(rss, dvg);
1478         );
1479 }
1480
1481 #endif /* if 0 */
1482
1483 /**
1484  * Compute the maximal antichain of the current DVG.
1485  * This is a reimplementation of the MAXIMAL_ANTI_CHAIN function
1486  * from the DDG library 1.1 (DAG.cpp).
1487  */
1488 static ir_nodeset_t *compute_maximal_antichain(rss_t *rss, dvg_t *dvg, int iteration)
1489 {
1490         int         n               = ir_nodeset_size(&dvg->nodes);
1491         int         *assignment     = ALLOCAN(int, n);
1492         int         *assignment_rev = ALLOCAN(int, n);
1493         int         *idx_map        = ALLOCAN(int, n);
1494         hungarian_problem_t *bp;
1495         ir_nodeset_t *values, *temp;
1496         ir_nodeset_iterator_t iter;
1497         ir_node     *u_irn;
1498         int         i, j, cost, cur_chain, res;
1499         rss_edge_t  *dvg_edge;
1500
1501 #define MAP_IDX(irn) bsearch_for_index(get_irn_idx(irn), idx_map,  n,  1)
1502
1503         if (pset_count(dvg->edges) == 0)
1504                 return NULL;
1505
1506         bp = hungarian_new(n, n, HUNGARIAN_MATCH_NORMAL);
1507
1508         /*
1509                 At first, we build an index map for the nodes in the DVG,
1510                 because we cannot use the irn idx therefore as the resulting
1511                 bipartite data structure would have around 1.2GB.
1512                 So we limit the size to the number of nodes we have in the DVG
1513                 and build a sorted index map for their irn indices.
1514         */
1515         i = 0;
1516         foreach_ir_nodeset(&dvg->nodes, u_irn, iter) {
1517                 idx_map[i++] = get_irn_idx(u_irn);
1518         }
1519         qsort(idx_map, n, sizeof(idx_map[0]), cmp_int);
1520
1521         foreach_pset(dvg->edges, dvg_edge) {
1522                 int idx_u = MAP_IDX(dvg_edge->src);
1523                 int idx_v = MAP_IDX(dvg_edge->tgt);
1524
1525                 /* add the entry to the bipartite data structure */
1526                 hungarian_add(bp, idx_u, idx_v, 1);
1527                 DBG((rss->dbg, LEVEL_3, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1528                         idx_u, dvg_edge->src, idx_v, dvg_edge->tgt));
1529         }
1530 #if 0
1531         /*
1532                 Add a bipartite entry for each pair of nodes (u, v), where exists a
1533                 path in the DVG from u to v, ie. connect all descendants(v) to v.
1534                 desc_dvg(v) = accumulated descendants of all z in dvg_user_list(v)
1535         */
1536         foreach_ir_nodeset(&dvg->nodes, u_irn, iter) {
1537                 rss_irn_t *u        = get_rss_irn(rss, u_irn);
1538                 int       idx_u_irn = MAP_IDX(u_irn);
1539
1540                 DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG descendants of %+F:\n", u_irn));
1541
1542                 //plist_clear(u->dvg_desc_list);
1543                 //accumulate_dvg_descendant_values(rss, u, u->dvg_desc_list);
1544
1545                 /*
1546                         FIXME: The array is build on the phase obstack and we cannot free the data.
1547                         So the array get as many times allocated as this function is called.
1548                 */
1549
1550                 /* build the sorted array for faster searches */
1551                 //u->dvg_desc = build_sorted_array_from_list(u->dvg_desc_list, phase_obst(&rss->ph));
1552
1553                 DBG((rss->dbg, DEBUG_MAX_AC, "\t\tadding bipartite entries of %+F:\n", u_irn));
1554
1555                 /* add the bipartite entries for all descendants */
1556                 for (i = ARR_LEN_SAFE(u->dvg_desc) - 1; i >= 0; --i) {
1557                         rss_irn_t *desc    = get_rss_irn(rss, u->dvg_desc[i]);
1558                         int       idx_desc = MAP_IDX(u->dvg_desc[i]);
1559
1560                         /* add the entry to the bipartite data structure */
1561                         hungarian_add(bp, idx_u_irn, idx_desc, 1);
1562                         DBG((rss->dbg, DEBUG_MAX_AC, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1563                                 idx_u_irn, u_irn, idx_desc, u->dvg_desc[i]));
1564
1565                         need_matching = 1;
1566                 }
1567         }
1568 #endif
1569
1570         /* We want maximum cardinality matching */
1571         hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL);
1572
1573 #if 0
1574         DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG pkiller:\n"));
1575         /* beware: the following function needs the dvg_desc array */
1576         build_dvg_pkiller_list(rss, dvg);
1577 #endif
1578
1579         DBG((rss->dbg, LEVEL_2, "\t\tcomputing bipartite matching\n"));
1580         /*
1581                 The maximum cardinality bipartite matching gives us the minimal
1582                 chain partition, which corresponds to the maximum anti chains.
1583         */
1584         res = hungarian_solve(bp, assignment, &cost, 1);
1585         assert(res == 0 && "Bipartite matching failed!");
1586
1587         hungarian_free(bp);
1588         memset(assignment_rev, -1, n * sizeof(assignment_rev[0]));
1589
1590         /* build the reverse assignment, ie. foreach i -> j, add j -> i */
1591         for (i = 0; i < n; ++i) {
1592                 if (assignment[i] >= 0) {
1593                         int j = assignment[i];
1594                         assignment_rev[j] = i;
1595                 }
1596         }
1597
1598         DBG((rss->dbg, LEVEL_2, "\t\tgot assignment with cost %d\n", cost));
1599         DBG((rss->dbg, LEVEL_3, "\t\t\tassignment   ---   reverse assignment\n", cost));
1600         for (i = 0; i < n; ++i) {
1601                 DBG((rss->dbg, LEVEL_3, "\t\t\t%3d -> %3d         %3d -> %3d\n", i, assignment[i], i, assignment_rev[i]));
1602         }
1603
1604         values    = XMALLOC(ir_nodeset_t);
1605         ir_nodeset_init_size(values, 10);
1606         cur_chain = 0;
1607         /* Construction of the minimal chain partition */
1608         for (j = 0; j < n; ++j) {
1609                 /* check nodes, which did not occur as target */
1610                 if (assignment_rev[j] == -1) {
1611                         int       xj      = idx_map[j];
1612                         ir_node   *xj_irn = get_idx_irn(rss->irg, xj);
1613                         rss_irn_t *xj_rss = get_rss_irn(rss, xj_irn);
1614                         chain_t   *c      = OALLOC(phase_obst(&rss->ph), chain_t);
1615                         int       source;
1616
1617                         /* there was no source for j -> we have a source of a new chain */
1618                         ir_nodeset_insert(values, xj_irn);
1619
1620                         c->elements = plist_obstack_new(phase_obst(&rss->ph));
1621                         c->nr       = cur_chain++;
1622                         plist_insert_back(c->elements, xj_irn);
1623
1624                         xj_rss->chain = c;
1625
1626                         DBG((rss->dbg, LEVEL_2, "\t\tstarting chain %d:\n", c->nr));
1627                         DBG((rss->dbg, LEVEL_2, "\t\t\t%+F (%d)", xj_irn, j));
1628
1629                         /* follow chain, having j as source */
1630                         source = j;
1631                         while (assignment[source] >= 0) {
1632                                 int       target  = assignment[source];
1633                                 int       irn_idx = idx_map[target];
1634                                 ir_node   *irn    = get_idx_irn(rss->irg, irn_idx);
1635                                 rss_irn_t *node   = get_rss_irn(rss, irn);
1636
1637                                 plist_insert_back(c->elements, irn);
1638                                 node->chain = c;
1639
1640                                 DB((rss->dbg, LEVEL_2, " -> %+F (%d)", irn, target));
1641
1642                                 /* new source = last target */
1643                                 source = target;
1644                         }
1645
1646                         DB((rss->dbg, LEVEL_2, "\n"));
1647                 }
1648         }
1649
1650         /*
1651                 Computing the maximal antichain: Select an element from each
1652                 chain such, such it is parallel with the others.
1653         */
1654         DBG((rss->dbg, LEVEL_1, "\t\tcomputing set of saturation values (MAX AC)\n"));
1655         DBG((rss->dbg, LEVEL_3, "\t\tstarting with:\n"));
1656
1657         DEBUG_ONLY(
1658                 if (firm_dbg_get_mask(rss->dbg) & LEVEL_3)
1659                         dump_nodeset(values, "\t\t\t");
1660         )
1661
1662         temp = NULL;
1663         do {
1664                 /*
1665                         We need an explicit array for the values as
1666                         we cannot iterate multiple times over the same
1667                         set at the same time. :-(((((
1668                         TODO Matze: now we can, so rewrite this...
1669                 */
1670                 int     n         = ir_nodeset_size(values);
1671                 int     i         = 0;
1672                 ir_node **val_arr = NEW_ARR_F(ir_node *, n);
1673
1674                 foreach_ir_nodeset(values, u_irn, iter)
1675                         val_arr[i++] = u_irn;
1676
1677                 if (temp) {
1678                         ir_nodeset_destroy(temp);
1679                         free(temp);
1680                 }
1681
1682                 temp = XMALLOC(ir_nodeset_t);
1683                 ir_nodeset_init_size(temp, 10);
1684
1685                 /* Select all nodes from current value set, having another node in the set as descendant. */
1686                 for (i = 0; i < n; ++i) {
1687                         rss_irn_t *u = get_rss_irn(rss, val_arr[i]);
1688
1689                         for (j = 0; j < n; ++j) {
1690                                 if (i != j) {
1691                                         rss_edge_t key;
1692
1693                                         key.src = val_arr[i];
1694                                         key.tgt = val_arr[j];
1695
1696                                         if (pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1697                                                 /* v[j] is descendant of u -> remove u and break */
1698                                                 ir_nodeset_insert(temp, (ir_node *) u->irn);
1699                                                 ir_nodeset_remove(values, u->irn);
1700
1701                                                 DBG((rss->dbg, LEVEL_3, "\t\t\tremoving %+F from values, adding it to temp\n", u->irn));
1702
1703                                                 break;
1704                                         }
1705                                 }
1706                         }
1707                 }
1708
1709                 /* Try to insert the chain predecessor of all selected u's */
1710                 foreach_ir_nodeset(temp, u_irn, iter) {
1711                         rss_irn_t       *u  = get_rss_irn(rss, u_irn);
1712                         chain_t         *c  = u->chain;
1713                         plist_element_t *el = plist_find_value(c->elements, u_irn);
1714
1715                         assert(el && "Missing element in chain!");
1716
1717                         /* If u has predecessor in chain: insert the predecessor */
1718                         if (el == plist_element_get_prev(el)) {
1719                                 ir_nodeset_insert(values, plist_element_get_value(el));
1720                                 DBG((rss->dbg, LEVEL_3, "\t\t\tadding %+F to values\n", plist_element_get_value(el)));
1721                         }
1722                 }
1723
1724
1725                 DEL_ARR_F(val_arr);
1726         } while (ir_nodeset_size(temp) > 0);
1727
1728         DBG((rss->dbg, LEVEL_2, "\t\tfinal set:\n"));
1729         DEBUG_ONLY(
1730                 if (firm_dbg_get_mask(rss->dbg) & LEVEL_2) {
1731                         dump_nodeset(values, "\t\t\t");
1732                 }
1733         );
1734
1735         if (rss->opts->dump_flags & RSS_DUMP_MAXAC)
1736                 debug_vcg_dump_pkg(rss, values, iteration);
1737
1738         if (temp != NULL) {
1739                 ir_nodeset_destroy(temp);
1740                 free(temp);
1741         }
1742
1743         return values;
1744
1745 #undef MAP_IDX
1746 }
1747
1748 /**
1749  * Computes the best serialization between two nodes of sat_vals.
1750  */
1751 static serialization_t *compute_best_admissible_serialization(rss_t *rss, ir_nodeset_t *sat_vals, serialization_t *ser, int num_regs)
1752 {
1753         int        n                    = ir_nodeset_size(sat_vals);
1754         int        n_idx                = ARR_LEN_SAFE(rss->idx_map);
1755         int        i                    = 0;
1756         ir_node    **val_arr            = ALLOCAN(ir_node*, n);
1757         bitset_t   *bs_sv               = bitset_alloca(n_idx);
1758         bitset_t   *bs_vdesc            = bitset_alloca(n_idx);
1759         bitset_t   *bs_tmp              = bitset_alloca(n_idx);
1760         bitset_t   *bs_ukilldesc        = bitset_alloca(n_idx);
1761         int        best_benefit         = INT_MAX;
1762         int        best_omega2          = INT_MAX;
1763         int        best_benefit_omega20 = INT_MAX;
1764         int        has_omega1           = 0;
1765         int        j, k;
1766         ir_node    *irn;
1767         ir_nodeset_iterator_t iter;
1768         rss_edge_t min_benefit_edge = {NULL, NULL, NULL};
1769         rss_edge_t min_omega20_edge = {NULL, NULL, NULL};
1770         rss_irn_t  *ser_u_omega1 = NULL, *ser_v_omega1 = NULL;
1771         rss_irn_t  *ser_u_omega20 = NULL, *ser_v_omega20 = NULL;
1772
1773         DBG((rss->dbg, LEVEL_1, "\tcomputing admissible serializations:\n"));
1774
1775         /*
1776                 We need an explicit array for the values as
1777                 we cannot iterate multiple times over the same
1778                 set at the same time. :-(((((
1779         */
1780
1781         foreach_ir_nodeset(sat_vals, irn, iter) {
1782                 val_arr[i++] = irn;
1783                 bitset_set(bs_sv, BLOCK_IDX_MAP(rss, irn));
1784         }
1785
1786         /*
1787                 We build all admissible serializations and remember the best found so far.
1788                 For u in sat_vals:
1789                  For v in sat_val:
1790                    if v in pkiller(u): add edge from v to all other pkiller(u)
1791                    else: for all vv in pkiller(u): add edge from v to vv if there exists no path from vv to v
1792         */
1793
1794 /*
1795         A node is unserializable if:
1796         - it has only one killer and this one is Sink
1797     - it kills no other values
1798         In this case there is no serialization which could
1799         reduce the registerpressure
1800 */
1801 #define IS_UNSERIALIZABLE_NODE(rss_node)                  \
1802         (                                                     \
1803                 (                                                 \
1804                         (plist_count(rss_node->pkiller_list) == 1) && \
1805                         is_Sink(rss_node->killer)                  && \
1806                         (rss_node->kill_count                == 0)    \
1807                 )                            ||                   \
1808                 be_is_Barrier(rss_node->irn) ||                   \
1809                 be_is_Keep(rss_node->irn)                         \
1810         )
1811
1812         /* for all u in sat_vals */
1813         for (i = 0; i < n; ++i) {
1814                 rss_irn_t       *u = get_rss_irn(rss, val_arr[i]);
1815                 plist_element_t *el;
1816
1817                 /* ignore nodes where serialization does not help */
1818                 if (IS_UNSERIALIZABLE_NODE(u)) {
1819                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", u->irn));
1820                         continue;
1821                 }
1822
1823                 /* accumulate all descendants of all pkiller(u) */
1824                 bitset_clear_all(bs_ukilldesc);
1825                 foreach_plist(u->pkiller_list, el) {
1826                         ir_node   *irn  = plist_element_get_value(el);
1827                         rss_irn_t *node = get_rss_irn(rss, irn);
1828
1829                         if (! is_Sink(irn))
1830                                 bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, irn));
1831                         else
1832                                 continue;
1833
1834                         for (k = ARR_LEN_SAFE(node->descendants) - 1; k >= 0; --k) {
1835                                 if (! is_Sink(node->descendants[k]))
1836                                         bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, node->descendants[k]));
1837                         }
1838                 }
1839
1840                 /* for all v in sat_vals */
1841                 for (j = 0; j < n; ++j) {
1842                         ir_node   *v_irn   = val_arr[j];
1843                         rss_irn_t *v       = get_rss_irn(rss, v_irn);
1844                         unsigned  v_height = get_irn_height(rss->h, v_irn);
1845                         int       omega1, omega2, is_pkiller;
1846
1847                         /* v cannot be serialized with itself
1848                          * ignore nodes where serialization does not help */
1849                         if (i == j || IS_UNSERIALIZABLE_NODE(v)) {
1850 #ifdef DEBUG_libfirm
1851                                 if (i != j)
1852                                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", v->irn));
1853 #endif
1854                                 continue;
1855                         }
1856
1857                         /* get descendants of v */
1858                         bitset_clear_all(bs_vdesc);
1859                         bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v_irn));
1860                         for (k = ARR_LEN_SAFE(v->descendants) - 1; k >= 0; --k) {
1861                                 if (! is_Sink(v->descendants[k]))
1862                                         bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v->descendants[k]));
1863                         }
1864
1865                         /* if v is in pkiller(u) */
1866                         is_pkiller = plist_has_value(u->pkiller_list, v_irn);
1867
1868                         /* for all vv in pkiller(u) */
1869                         foreach_plist(u->pkiller_list, el) {
1870                                 ir_node *vv_irn  = plist_element_get_value(el);
1871                                 int     add_edge;
1872
1873                                 if (is_Sink(vv_irn) || is_cfop(vv_irn))
1874                                         continue;
1875
1876                                 if (is_pkiller)
1877                                         add_edge = vv_irn != v_irn && skip_Proj(vv_irn) != skip_Proj(v_irn);
1878                                 else
1879                                         add_edge = ! heights_reachable_in_block(rss->h, skip_Proj(vv_irn), skip_Proj(v_irn));
1880
1881                                 /*
1882                                         As we add an edge from vv -> v, we have to make sure,
1883                                         that there exists no path from v to vv.
1884                                 */
1885
1886                                 if (add_edge) {
1887                                         unsigned vv_height = get_irn_height(rss->h, vv_irn);
1888                                         unsigned critical_path_cost;
1889                                         unsigned mu1, mu2;
1890
1891                                         /*
1892                                                 mu1 = | descendants(v) cut sat_vals |
1893                                                 the number of saturating values which cannot
1894                                                 be simultaneously alive with u
1895                                         */
1896                                         bitset_copy(bs_tmp, bs_vdesc);
1897                                         bitset_and(bs_tmp, bs_sv);
1898                                         mu1 = bitset_popcount(bs_tmp);
1899
1900                                         /*
1901                                                 mu2 = | accum_desc_all_pkiller(u) without descendants(v) |
1902                                         */
1903                                         if (is_pkiller) {
1904                                                 bitset_copy(bs_tmp, bs_ukilldesc);
1905                                                 bitset_andnot(bs_tmp, bs_vdesc);
1906                                                 mu2 = bitset_popcount(bs_tmp);
1907                                         }
1908                                         else {
1909                                                 mu2 = 0;
1910                                         }
1911
1912                                         /* omega1 = mu1 - mu2 */
1913                                         omega1 = mu1 - mu2;
1914
1915                                         if (omega1 != 0)
1916                                                 has_omega1 = 1;
1917
1918                                         /* omega2 = increase of critical path */
1919                                         critical_path_cost =
1920                                                 v_height                        /* longest path from v to sink */
1921                                                 + rss->max_height - vv_height   /* longest path from source to vv */
1922                                                 + 1;                            /* edge */
1923
1924                                         /*
1925                                                 If critical_path_cost > max_height -> the new edge
1926                                                 would increase the longest critical path by the difference.
1927                                         */
1928                                         omega2 = critical_path_cost > rss->max_height ? critical_path_cost - rss->max_height : 0;
1929
1930                                         /* this keeps track of the edge with the best benefit */
1931                                         if (omega1 >= num_regs - n && omega1 < best_benefit) {
1932                                                 min_benefit_edge.src = v_irn;
1933                                                 min_benefit_edge.tgt = vv_irn;
1934
1935                                                 ser_u_omega1 = u;
1936                                                 ser_v_omega1 = v;
1937
1938                                                 best_benefit    = omega1;
1939                                                 ser->new_killer = is_pkiller;
1940                                         }
1941
1942                                         /* this keeps track of the edge with the best omega1 costs where omega2 == 0 */
1943                                         if (omega2 == 0 && omega1 >= num_regs - n && omega1 < best_benefit_omega20) {
1944                                                 min_omega20_edge.src = v_irn;
1945                                                 min_omega20_edge.tgt = vv_irn;
1946
1947                                                 ser_u_omega20 = u;
1948                                                 ser_v_omega20 = v;
1949
1950                                                 best_benefit_omega20 = omega1;
1951                                                 ser->new_killer      = is_pkiller;
1952                                         }
1953
1954                                         best_omega2 = MIN(best_omega2, omega2);
1955
1956                                         DBG((rss->dbg, LEVEL_2, "\t\tfound %+F -> %+F (w1 %d, w2 %d)\n",
1957                                                 v_irn, vv_irn, omega1, omega2));
1958                                 } /* if add_edge */
1959                         } /* for all vv in pkiller(u) */
1960                 } /* for all v in sat_vals */
1961         } /* for all u in sat_vals */
1962
1963         if (! has_omega1)
1964                 return NULL;
1965
1966         if (best_omega2 == 0) {
1967                 ser->u         = ser_u_omega20;
1968                 ser->v         = ser_v_omega20;
1969                 ser->edge->src = min_omega20_edge.src;
1970                 ser->edge->tgt = min_omega20_edge.tgt;
1971                 ser->omega1    = best_benefit_omega20;
1972                 ser->omega2    = best_omega2;
1973         }
1974         else {
1975                 ser->u         = ser_u_omega1;
1976                 ser->v         = ser_v_omega1;
1977                 ser->edge->src = min_benefit_edge.src;
1978                 ser->edge->tgt = min_benefit_edge.tgt;
1979                 ser->omega1    = best_benefit;
1980                 ser->omega2    = best_omega2;
1981         }
1982
1983         return ser;
1984
1985 #undef IS_UNSERIALIZABLE_NODE
1986 }
1987
1988 /**
1989  * Perform the value serialization heuristic and add all
1990  * computed serialization edges as dependencies to the irg.
1991  */
1992 static void perform_value_serialization_heuristic(rss_t *rss)
1993 {
1994         bitset_t *arch_nonign_bs = bitset_alloca(arch_register_class_n_regs(rss->cls));
1995         bitset_t *abi_ign_bs     = bitset_alloca(arch_register_class_n_regs(rss->cls));
1996         unsigned available_regs, iteration;
1997         dvg_t    dvg;
1998         ir_nodeset_t *sat_vals;
1999         pset *ser_set = new_pset(cmp_rss_edges, 20);
2000
2001         /* available_regs = R = |arch_non_ignore_regs cut ~abi_ignore_regs| */
2002         arch_put_non_ignore_regs(rss->cls, arch_nonign_bs);
2003         be_abi_put_ignore_regs(rss->abi, rss->cls, abi_ign_bs);
2004         bitset_andnot(arch_nonign_bs, abi_ign_bs);
2005         available_regs  = bitset_popcount(arch_nonign_bs);
2006         //num_live = pset_count(rss->live_block);
2007         //available_regs -= num_live < available_regs ? num_live : 0;
2008
2009         DBG((rss->dbg, LEVEL_1, "\n\t#available regs: %d\n\n", available_regs));
2010
2011         /*
2012                 At first we need to compute the disjoint value DAG (DVG = {V, E_dv}).
2013                 V    = set of all nodes we are currently interested in
2014                 E_dv = there is an edge from u to v iff v is a descendant of killer(u), forall u, v in V
2015         */
2016         ir_nodeset_init_size(&dvg.nodes, plist_count(rss->nodes));
2017         dvg.edges = new_pset(cmp_rss_edges, plist_count(rss->nodes) * 5);
2018         compute_dvg(rss, &dvg);
2019
2020         /*
2021                 Then we perform the heuristic serialization algorithm
2022                 on the DVG which gives us all necessary serialization
2023                 edges.
2024         */
2025         DBG((rss->dbg, LEVEL_1, "\tcomputing maximal antichain:\n"));
2026         iteration = 0;
2027         sat_vals  = compute_maximal_antichain(rss, &dvg, iteration++);
2028         while (sat_vals && (ir_nodeset_size(sat_vals) > available_regs)) {
2029                 serialization_t *ser, best_ser;
2030                 rss_edge_t      *edge = OALLOC(phase_obst(&rss->ph), rss_edge_t);
2031                 ir_node         *dep_src, *dep_tgt;
2032
2033                 best_ser.edge = edge;
2034                 ser = compute_best_admissible_serialization(rss, sat_vals, &best_ser, available_regs);
2035
2036                 DBG((rss->dbg, LEVEL_1, "\tcurrent register saturation %d, target %d\n", ir_nodeset_size(sat_vals), available_regs));
2037
2038                 if (! ser) {
2039                         DBG((rss->dbg, LEVEL_1, "\tno RS improving serialization found, breaking at iteration %d\n", iteration));
2040                         break;
2041                 }
2042
2043                 /* Insert the serialization as dependency edge into the irg. */
2044                 DBG((rss->dbg, LEVEL_1, "\tserializing %+F -> %+F with edge %+F -> %+F and cost %d, %d\n",
2045                         ser->u->irn, ser->v->irn, ser->edge->src, ser->edge->tgt, ser->omega1, ser->omega2));
2046
2047                 if (pset_find(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge)))
2048                         ir_printf("WARNING: serialization %+F -> %+F computed twice!\n", ser->edge->src, ser->edge->tgt);
2049
2050
2051                 pset_insert(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge));
2052
2053                 /* update the dvg */
2054                 update_dvg(rss, &dvg, ser->v, ser->u);
2055                 update_dvg(rss, &dvg, get_rss_irn(rss, ser->edge->src), get_rss_irn(rss, ser->edge->tgt));
2056                 if (sat_vals != NULL) {
2057                         ir_nodeset_destroy(sat_vals);
2058                         free(sat_vals);
2059                 }
2060
2061                 dep_src = skip_Proj(ser->edge->src);
2062                 dep_tgt = ser->edge->tgt;
2063                 add_irn_dep(dep_src, dep_tgt);
2064
2065                 /* Update descendants, consumer and pkillers of the target */
2066                 update_node_info(rss, ser->edge->tgt, ser->edge->src);
2067
2068                 /* TODO: try to find a cheaper way for updating height information */
2069                 rss->max_height = heights_recompute_block(rss->h, rss->block);
2070
2071                 /* Recompute the antichain for next serialization */
2072                 DBG((rss->dbg, LEVEL_1, "\tre-computing maximal antichain:\n"));
2073                 sat_vals = compute_maximal_antichain(rss, &dvg, iteration++);
2074         }
2075
2076         ir_nodeset_destroy(&dvg.nodes);
2077         del_pset(dvg.edges);
2078 }
2079
2080 /**
2081  * Do initial calculations for a block.
2082  */
2083 static void process_block(ir_node *block, void *env)
2084 {
2085         rss_t *rss = env;
2086         int   i, n;
2087         const ir_edge_t *edge;
2088
2089         phase_init(&rss->ph, rss->irg, init_rss_irn);
2090
2091         DBG((rss->dbg, LEVEL_1, "preprocessing block %+F\n", block));
2092         rss->block = block;
2093
2094         /* build an index map for all nodes in the current block */
2095         i            = 0;
2096         n            = get_irn_n_edges(block);
2097         NEW_ARR_A(int *, rss->idx_map, n);
2098         foreach_out_edge(block, edge) {
2099                 ir_node *irn      = get_edge_src_irn(edge);
2100                 rss->idx_map[i++] = get_irn_idx(irn);
2101         }
2102         qsort(rss->idx_map, n, sizeof(rss->idx_map[0]), cmp_int);
2103         rss->max_height = heights_recompute_block(rss->h, block);
2104
2105         /* loop over all register classes */
2106         for (i = arch_env_get_n_reg_class(rss->arch_env) - 1; i >= 0; --i) {
2107                 const arch_register_class_t *cls = arch_env_get_reg_class(rss->arch_env, i);
2108
2109                 rss->cls = cls;
2110                 DBG((rss->dbg, LEVEL_1, "register class %s\n", arch_register_class_name(cls)));
2111
2112                 /* Get all live value at end of Block having current register class */
2113                 ir_nodeset_init(&rss->live_block);
2114                 be_liveness_end_of_block(rss->liveness, rss->cls, rss->block, &rss->live_block);
2115
2116                 /* reset the list of interesting nodes */
2117                 plist_clear(rss->nodes);
2118                 plist_insert_back(rss->nodes, _sink);
2119
2120                 /* collect all nodes having a certain register class */
2121                 foreach_out_edge(block, edge) {
2122                         ir_node *irn  = get_edge_src_irn(edge);
2123                         ir_mode *mode = get_irn_mode(irn);
2124
2125                         /*
2126                                 We skip:
2127                                 - mode_T nodes (the projs are asked)
2128                                 - mode_X nodes (control flow nodes are always scheduled last)
2129                                 - Keeps (they are always immediately scheduled)
2130                                 - Phi (same as Keep)
2131                         */
2132                         if (mode == mode_T || mode == mode_X || is_Phi(irn))
2133                                 continue;
2134
2135                         /*
2136                                 In case of a proj, we skip
2137                                 - Barrier (they are a Barrier :)
2138                                 - Start
2139                                 - the Proj itself, as it's scheduled always with it's super node
2140                         */
2141                         if (is_Proj(irn)) {
2142                                 ir_node *pred = get_Proj_pred(irn);
2143                                 if (be_is_Barrier(pred) || be_is_Start(pred))
2144                                         continue;
2145                         }
2146
2147                         /* calculate the descendants and consumer for each node in the block */
2148                         collect_node_info(rss, skip_Proj(irn));
2149
2150                         if (be_is_Keep(irn))
2151                                 continue;
2152
2153                         if (!arch_irn_is_ignore(irn) &&
2154                                         arch_get_irn_reg_class_out(irn) == cls) {
2155                                 plist_insert_back(rss->nodes, skip_Proj(irn));
2156                         }
2157                         //}
2158                 }
2159
2160                 /* compute the potential killing set PK(G) */
2161                 compute_pkill_set(rss);
2162
2163                 /* compute the killing function k* */
2164                 compute_killing_function(rss);
2165
2166                 /*
2167                         Compute the heuristic value serialization and
2168                         add the necessary dependencies to the irg.
2169                 */
2170                 perform_value_serialization_heuristic(rss);
2171
2172                 ir_nodeset_destroy(&rss->live_block);
2173         }
2174
2175         phase_deinit(&rss->ph);
2176 }
2177
2178 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_schedrss);
2179 void be_init_schedrss(void)
2180 {
2181         lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
2182         lc_opt_entry_t *sched_grp = lc_opt_get_grp(be_grp, "sched");
2183         lc_opt_entry_t *rss_grp = lc_opt_get_grp(sched_grp, "rss");
2184
2185         lc_opt_add_table(rss_grp, rss_option_table);
2186 }
2187
2188 /**
2189  * Preprocess the irg for scheduling.
2190  */
2191 void rss_schedule_preparation(be_irg_t *birg)
2192 {
2193         ir_graph *irg = be_get_birg_irg(birg);
2194         rss_t rss;
2195
2196         FIRM_DBG_REGISTER(rss.dbg, "firm.be.sched.rss");
2197
2198         //firm_dbg_set_mask(rss.dbg, LEVEL_1 | LEVEL_2 | LEVEL_3);
2199
2200         init_rss_special_nodes(irg);
2201
2202         rss.irg      = irg;
2203         rss.arch_env = be_get_birg_arch_env(birg);
2204         rss.abi      = birg->abi;
2205         rss.h        = heights_new(irg);
2206         rss.nodes    = plist_new();
2207         rss.opts     = &rss_options;
2208         rss.liveness = be_liveness(irg);
2209         be_liveness_assure_sets(rss.liveness);
2210         irg_block_walk_graph(irg, NULL, process_block, &rss);
2211         heights_free(rss.h);
2212         plist_free(rss.nodes);
2213         be_liveness_free(rss.liveness);
2214
2215         if (birg->main_env->options->dump_flags & DUMP_SCHED)
2216                 dump_ir_graph(rss.irg, "rss");
2217 }