b3934b314578783c4f9f54b044c869aaff0eb471
[libfirm] / ir / be / beschedrss.c
1 /**
2  * Implementation of a register saturating list scheduler
3  * as described in: Sid-Ahmed-Ali Touati
4  * Register Saturation in Superscalar and VLIW Codes
5  *
6  * @license This file protected by GPL -  GNU GENERAL PUBLIC LICENSE.
7  * @author  Christian Wuerdig
8  * @date    29.08.2006
9  * @cvs-id  $Id$
10  */
11 #ifdef HAVE_CONFIG_H
12 #include "config.h"
13 #endif
14
15 #include <limits.h>
16
17 #include "obst.h"
18 #include "debug.h"
19
20 #include "irgraph_t.h"
21 #include "irnode_t.h"
22 #include "iredges_t.h"
23 #include "ircons_t.h"
24 #include "irphase_t.h"
25 #include "irgwalk.h"
26 #include "irtools.h"
27 #include "irbitset.h"
28 #include "irprintf.h"
29 #include "bipartite.h"
30 #include "hungarian.h"
31 #include "plist.h"
32
33 #include "height.h"
34
35 #include "beabi.h"
36 #include "benode_t.h"
37 #include "besched_t.h"
38
39 #define ARR_LEN_SAFE(arr) ((arr) != NULL ? ARR_LEN((arr)) : 0)
40
41 #define HASH_RSS_EDGE(edge) ((get_irn_node_nr((edge)->src) << 16) | (get_irn_node_nr((edge)->tgt) & 0xFFFF))
42 #define BSEARCH_IRN_ARR(val, arr) \
43         bsearch(&(val), (arr), ARR_LEN_SAFE((arr)), sizeof((arr)[0]), cmp_irn_idx)
44
45 #define BLOCK_IDX_MAP(rss, irn) bsearch_for_index(get_irn_idx((irn)), (rss)->idx_map, ARR_LEN_SAFE((rss)->idx_map), 1)
46
47 /* Represents a child with associated costs */
48 typedef struct _child {
49         ir_node *irn;
50         float   cost;
51 } child_t;
52
53 /* We need edges for several purposes. */
54 typedef struct _rss_edge {
55         ir_node *src;
56         ir_node *tgt;
57         void    *next;
58 } rss_edge_t;
59
60 /* Represents a connected bipartite component. */
61 typedef struct _cbc {
62         nodeset *parents;       /**< = S  a set of value producers */
63         nodeset *children;      /**< = T  a set of value consumers */
64         pset    *kill_edges;    /**< = E  a set of edges (t in T, s in S) such as each s in S gets killed by at least one t in T */
65         int     nr;             /**< a deterministic index for set insertion (used as hash) */
66 } cbc_t;
67
68 /* Represents a disjoint value DAG. */
69 typedef struct _dvg {
70         nodeset *nodes;
71         pset    *edges;
72 } dvg_t;
73
74 /* Represents a chain of nodes. */
75 typedef struct _chain {
76         plist_t *elements;   /**< List of chain elements */
77         int     nr;          /**< a deterministic index for set insertion (used as hash) */
78 } chain_t;
79
80 typedef struct _rss_irn {
81         plist_t  *consumer_list;    /**< List of consumers */
82         ir_node **consumer;         /**< Sorted consumer array (needed for faster access) */
83
84         plist_t  *parent_list;      /**< List of parents */
85         ir_node **parents;          /**< Sorted parent array (needed for faster access) */
86
87         plist_t  *descendant_list;  /**< List of descendants */
88         ir_node **descendants;      /**< Sorted descendant array (needed for faster access) */
89
90         plist_t  *pkiller_list;     /**< List of potential killers */
91         ir_node **pkillers;         /**< Sorted pkiller array (needed for faster access) */
92
93 #if 0
94         plist_t  *dvg_desc_list;    /**< List of all descendants in the DVG */
95         ir_node **dvg_desc;         /**< Sorted dvg descendant array (needed for faster access) */
96
97         plist_t  *dvg_pkiller_list; /**< List of potential killers in the DVG */
98         ir_node **dvg_pkiller;      /**< Sorted dvg pkiller array (needed for faster access) */
99
100         plist_t  *dvg_user_list;    /**< List of users in the disjoint value DAG DVG */
101 #endif
102         plist_t  *kill_value_list;  /**< List of values getting potentially killed by this node */
103
104         ir_node  *killer;           /**< The selected unique killer */
105         ir_node  *irn;              /**< The corresponding firm node to this rss_irn */
106
107         chain_t  *chain;            /**< The chain, this node is associated to */
108
109         unsigned live_out : 1;      /**< irn has consumers outside of it's block */
110         unsigned visited  : 1;      /**< visited flag for bipartite decomposition */
111         unsigned handled  : 1;      /**< flag indicating whether or not the list structures have been build */
112         unsigned dumped   : 1;      /**< flag indication whether or not this node was dumped */
113 } rss_irn_t;
114
115 /* Represents a serialization edge with associated costs. */
116 typedef struct _serialization {
117         rss_irn_t  *u;       /* the top node */
118         rss_irn_t  *v;       /* the node about to be serialized after u */
119         rss_edge_t *edge;    /* the edge selected for the serialization */
120         int        omega1;   /* estimated: available regs - RS reduction */
121         int        omega2;   /* increase in critical path length */
122 } serialization_t;
123
124 typedef struct _rss {
125         phase_t          ph;              /**< Phase to hold some data */
126         heights_t        *h;              /**< The current height object */
127         ir_graph         *irg;            /**< The irg to preprocess */
128         plist_t          *nodes;          /**< The list of interesting nodes */
129         const arch_env_t *arch_env;       /**< The architecture environment */
130         be_abi_irg_t     *abi;            /**< The abi for this irg */
131         pset             *cbc_set;        /**< A set of connected bipartite components */
132         ir_node          *block;          /**< The current block in progress. */
133         int              *idx_map;        /**< Mapping irn indices to per block indices */
134         unsigned         max_height;      /**< maximum height in the current block */
135         const arch_register_class_t *cls; /**< The current register class */
136         DEBUG_ONLY(firm_dbg_module_t *dbg);
137 } rss_t;
138
139 #define get_rss_irn(rss, irn)  (phase_get_or_set_irn_data(&rss->ph, irn))
140
141 /**
142  * We need some special nodes:
143  * a source and a sink for all live-in and live-out values of a block
144  */
145
146 static enum {
147         iro_rss_Source,
148         iro_rss_Sink,
149         iro_rss_last
150 };
151
152 static ir_node *_source = NULL;
153 static ir_node *_sink   = NULL;
154
155 #define is_Source(irn) ((irn) == _source)
156 #define is_Sink(irn)   ((irn) == _sink)
157
158 /******************************************************************************
159  *  _          _                    __                  _   _
160  * | |        | |                  / _|                | | (_)
161  * | |__   ___| |_ __   ___ _ __  | |_ _   _ _ __   ___| |_ _  ___  _ __  ___
162  * | '_ \ / _ \ | '_ \ / _ \ '__| |  _| | | | '_ \ / __| __| |/ _ \| '_ \/ __|
163  * | | | |  __/ | |_) |  __/ |    | | | |_| | | | | (__| |_| | (_) | | | \__ \
164  * |_| |_|\___|_| .__/ \___|_|    |_|  \__,_|_| |_|\___|\__|_|\___/|_| |_|___/
165  *            | |
166  *            |_|
167  ******************************************************************************/
168
169 /**
170  * Acquire opcodes and create source and sink nodes.
171  */
172 static void init_rss_special_nodes(ir_graph *irg) {
173         ir_node *block         = get_irg_start_block(irg);
174         int     iro_rss_base   = get_next_ir_opcodes(iro_rss_last);
175         ir_op   *op_rss_Source = new_ir_op(iro_rss_base + iro_rss_Source, "rss_Source", op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
176         ir_op   *op_rss_Sink   = new_ir_op(iro_rss_base + iro_rss_Sink,   "rss_Sink",   op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
177         _source                = new_ir_node(NULL, irg, block, op_rss_Source, mode_ANY, 0, NULL);
178         _sink                  = new_ir_node(NULL, irg, block, op_rss_Sink, mode_ANY, 0, NULL);
179 }
180
181 static int cmp_int(const void *a, const void *b) {
182         const int *i1 = a;
183         const int *i2 = b;
184
185         return QSORT_CMP(*i1, *i2);
186 }
187
188 static int cmp_child_costs(const void *a, const void *b) {
189         const child_t *c1 = a;
190         const child_t *c2 = b;
191
192         return QSORT_CMP(c1->cost, c2->cost);
193 }
194
195 static int cmp_irn_idx(const void *a, const void *b) {
196         const ir_node *n1 = *(ir_node **)a;
197         const ir_node *n2 = *(ir_node **)b;
198
199         return QSORT_CMP(get_irn_idx(n1), get_irn_idx(n2));
200 }
201
202 static int cmp_rss_edges(const void *a, const void *b) {
203         const rss_edge_t *e1 = a;
204         const rss_edge_t *e2 = b;
205
206         return (e1->src != e2->src) || (e1->tgt != e2->tgt);
207 }
208
209 static int bsearch_for_index(int key, int *arr, size_t len, int force) {
210         int left = 0;
211         int right = len;
212
213         while (right >= left) {
214                 int idx = (left + right) / 2;
215
216                 if (key < arr[idx])
217                         right = idx - 1;
218                 else if (key > arr[idx])
219                         left = idx + 1;
220                 else
221                         return idx;
222         }
223
224         if (force)
225                 assert(0 && "Something is wrong, key not found.");
226         return -1;
227 }
228
229 static ir_node **build_sorted_array_from_list(plist_t *irn_list, struct obstack *obst) {
230         plist_element_t *el;
231         int     i     = 0;
232         int     len   = plist_count(irn_list);
233         ir_node **arr = NEW_ARR_D(ir_node *, obst, len);
234
235         /* copy the list into the array */
236         foreach_plist(irn_list, el) {
237                 arr[i++] = plist_element_get_value(el);
238         }
239
240         /* sort the array by node index */
241         qsort(arr, len, sizeof(arr[0]), cmp_irn_idx);
242
243         return arr;
244 }
245
246 /*****************************************************
247  *      _      _                       _
248  *     | |    | |                     (_)
249  *   __| | ___| |__  _   _  __ _  __ _ _ _ __   __ _
250  *  / _` |/ _ \ '_ \| | | |/ _` |/ _` | | '_ \ / _` |
251  * | (_| |  __/ |_) | |_| | (_| | (_| | | | | | (_| |
252  *  \__,_|\___|_.__/ \__,_|\__, |\__, |_|_| |_|\__, |
253  *                          __/ | __/ |         __/ |
254  *                         |___/ |___/         |___/
255  *
256  *****************************************************/
257
258 static void dump_nodeset(nodeset *ns, const char *prefix) {
259         ir_node *irn;
260         foreach_nodeset(ns, irn) {
261                 ir_printf("%s%+F\n", prefix, irn);
262         }
263 }
264
265 static void build_file_name(rss_t *rss, const char *suffix, size_t suf_len, char *buf, size_t len) {
266         const char *irg_name;
267
268         memset(buf, 0, len);
269         irg_name = get_entity_name(get_irg_entity(rss->irg));
270         snprintf(buf, len - suf_len, "%s-%s-block-%d",
271                 irg_name, arch_register_class_name(rss->cls), get_irn_node_nr(rss->block));
272         strcat(buf, suffix);
273 }
274
275 /* Dumps all collected bipartite components of current irg as vcg. */
276 static void debug_vcg_dump_bipartite(rss_t *rss) {
277         cbc_t *cbc;
278         FILE  *f;
279         char  file_name[256];
280         static const char suffix[] = "-RSS-CBC.vcg";
281
282         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
283         f = fopen(file_name, "w");
284
285         ir_fprintf(f, "graph: { title: \"connected bipartite component graph of %+F\"\n", rss->irg);
286         fprintf(f, "display_edge_labels: no\n");
287         fprintf(f, "layoutalgorithm: mindepth\n");
288         fprintf(f, "manhattan_edges: yes\n\n");
289
290         foreach_pset(rss->cbc_set, cbc) {
291                 ir_node    *n;
292                 rss_edge_t *ke;
293
294                 fprintf(f, "graph: { titel: \"cbc %d\" label: \"cbc %d\" status:clustered color:yellow\n", cbc->nr, cbc->nr);
295                 foreach_nodeset(cbc->parents, n) {
296                         ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
297                 }
298                 foreach_nodeset(cbc->children, n) {
299                         ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
300                 }
301                 foreach_pset(cbc->kill_edges, ke) {
302                         ir_fprintf(f, "edge: { sourcename: \"n%d_%d\" targetname: \"n%d_%d\" }\n",
303                                 get_irn_node_nr(ke->src), cbc->nr, get_irn_node_nr(ke->tgt), cbc->nr);
304                 }
305                 fprintf(f, "}\n\n");
306         }
307         fprintf(f, "}\n");
308         fclose(f);
309 }
310
311 /* Dump the computed killing function as vcg. */
312 static void debug_vcg_dump_kill(rss_t *rss) {
313         FILE            *f;
314         char            file_name[256];
315         plist_element_t *el;
316         static const char suffix[] = "-RSS-KILL.vcg";
317
318         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
319         f = fopen(file_name, "w");
320
321         ir_fprintf(f, "graph: { title: \"computed kill graph of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
322         fprintf(f, "display_edge_labels: no\n");
323         fprintf(f, "layoutalgorithm: mindepth\n");
324         fprintf(f, "manhattan_edges: yes\n\n");
325
326         /* first: reset dumped flag of all nodes */
327         foreach_plist(rss->nodes, el) {
328                 ir_node   *irn  = plist_element_get_value(el);
329                 rss_irn_t *rirn = get_rss_irn(rss, irn);
330                 rirn->dumped = 0;
331         }
332
333         /* dump all nodes and their killers */
334         foreach_plist(rss->nodes, el) {
335                 ir_node   *irn     = plist_element_get_value(el);
336                 rss_irn_t *rirn    = get_rss_irn(rss, irn);
337                 rss_irn_t *pk_rirn = get_rss_irn(rss, rirn->killer);
338
339                 if (! rirn->dumped) {
340                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
341                         rirn->dumped = 1;
342                 }
343
344                 if (! pk_rirn->dumped) {
345                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(rirn->killer), rirn->killer);
346                         pk_rirn->dumped = 1;
347                 }
348
349                 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
350                         get_irn_node_nr(rirn->killer), get_irn_node_nr(irn));
351         }
352
353         fprintf(f, "}\n");
354         fclose(f);
355 }
356
357 /* Dumps the potential killing DAG (PKG) as vcg. */
358 static void debug_vcg_dump_pkg(rss_t *rss, nodeset *max_ac, int iteration) {
359         FILE              *f;
360         char              file_name[256];
361         char              *suffix   = alloca(32);
362         static const char suffix1[] = "-RSS-PKG.vcg";
363         static const char suffix2[] = "-RSS-PKG-MAXAC.vcg";
364         plist_element_t   *el;
365
366         if (! max_ac) {
367                 snprintf(suffix, 32, "%s", suffix1);
368         }
369         else {
370                 snprintf(suffix, 32, "-%0.2d%s", iteration, suffix2);
371         }
372
373         build_file_name(rss, suffix, strlen(suffix) + 1, file_name, sizeof(file_name));
374         f = fopen(file_name, "w");
375
376         ir_fprintf(f, "graph: { title: \"potential killing DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
377         fprintf(f, "display_edge_labels: no\n");
378         fprintf(f, "layoutalgorithm: mindepth\n");
379         fprintf(f, "manhattan_edges: yes\n\n");
380
381         foreach_plist(rss->nodes, el) {
382                 ir_node   *irn  = plist_element_get_value(el);
383                 rss_irn_t *rirn = get_rss_irn(rss, irn);
384                 char      *c1   = "";
385                 plist_element_t *k_el;
386
387                 /* dump selected saturating values in yellow */
388                 if (max_ac && nodeset_find(max_ac, irn))
389                         c1 = "color:yellow";
390
391                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(irn), irn, c1);
392                 rirn->dumped = 1;
393
394                 foreach_plist(rirn->pkiller_list, k_el) {
395                         ir_node   *pkiller = plist_element_get_value(k_el);
396                         rss_irn_t *pk_rirn = get_rss_irn(rss, pkiller);
397                         char      *c2      = "";
398
399                         if (max_ac && nodeset_find(max_ac, pkiller))
400                                 c2 = "color:yellow";
401
402                         if (! pk_rirn->dumped) {
403                                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(pkiller), pkiller, c2);
404                                 pk_rirn->dumped = 1;
405                         }
406                         ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
407                                 get_irn_node_nr(pkiller), get_irn_node_nr(irn));
408                 }
409         }
410         fprintf(f, "}\n");
411         fclose(f);
412 }
413
414 /* Dumps the disjoint value DAG (DVG) as vcg. */
415 static void debug_vcg_dump_dvg(rss_t *rss, dvg_t *dvg) {
416         static const char suffix[] = "-RSS-DVG.vcg";
417         FILE       *f;
418         char       file_name[256];
419         ir_node    *irn;
420         rss_edge_t *edge;
421
422         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
423         f = fopen(file_name, "w");
424
425         ir_fprintf(f, "graph: { title: \"disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
426         fprintf(f, "display_edge_labels: no\n");
427         fprintf(f, "layoutalgorithm: mindepth\n");
428         fprintf(f, "manhattan_edges: yes\n\n");
429
430         /* dump all nodes */
431         foreach_nodeset(dvg->nodes, irn) {
432                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
433         }
434
435         /* dump all edges */
436         foreach_pset(dvg->edges, edge) {
437                 rss_irn_t *src = get_rss_irn(rss, edge->src);
438                 rss_irn_t *tgt = get_rss_irn(rss, edge->tgt);
439
440                 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
441                         get_irn_node_nr(edge->src), get_irn_node_nr(edge->tgt));
442         }
443
444         fprintf(f, "}\n");
445         fclose(f);
446 }
447
448 #if 0
449 /* Dumps the PKG(DVG). */
450 static void debug_vcg_dump_dvg_pkiller(rss_t *rss, dvg_t *dvg) {
451         static const char suffix[] = "-RSS-DVG-PKG.vcg";
452         FILE       *f;
453         char       file_name[256];
454         ir_node    *irn;
455
456         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
457         f = fopen(file_name, "w");
458
459         ir_fprintf(f, "graph: { title: \"PKG of disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
460         fprintf(f, "display_edge_labels: no\n");
461         fprintf(f, "layoutalgorithm: mindepth\n");
462         fprintf(f, "manhattan_edges: yes\n\n");
463
464         /* dump all nodes */
465         foreach_nodeset(dvg->nodes, irn) {
466                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
467         }
468
469         /* dump all edges */
470         foreach_nodeset(dvg->nodes, irn) {
471                 rss_irn_t       *node = get_rss_irn(rss, irn);
472                 plist_element_t *el;
473
474                 foreach_plist(node->dvg_pkiller_list, el) {
475                         ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
476                                 get_irn_node_nr(plist_element_get_value(el)), get_irn_node_nr(irn));
477                 }
478         }
479
480         fprintf(f, "}\n");
481         fclose(f);
482 }
483 #endif /* if 0 */
484
485 /**
486  * In case there is no rss information for irn, initialize it.
487  */
488 static void *init_rss_irn(phase_t *ph, ir_node *irn, void *old) {
489         rss_irn_t *res = old ? old : phase_alloc(ph, sizeof(res[0]));
490
491         res->descendant_list  = plist_obstack_new(phase_obst(ph));
492         res->descendants      = NULL;
493
494         res->consumer_list    = plist_obstack_new(phase_obst(ph));
495         res->consumer         = NULL;
496
497         res->pkiller_list     = plist_obstack_new(phase_obst(ph));
498         res->pkillers         = NULL;
499
500         res->parent_list      = plist_obstack_new(phase_obst(ph));
501         res->parents          = NULL;
502
503         //res->dvg_desc_list    = plist_obstack_new(phase_obst(ph));
504         //res->dvg_desc         = NULL;
505
506         res->kill_value_list  = plist_obstack_new(phase_obst(ph));
507         //res->dvg_user_list    = plist_obstack_new(phase_obst(ph));
508         //res->dvg_pkiller_list = plist_obstack_new(phase_obst(ph));
509
510         res->killer           = NULL;
511         res->irn              = irn;
512         res->chain            = NULL;
513
514         res->live_out         = 0;
515         res->visited          = 0;
516         res->handled          = 0;
517         res->dumped           = 0;
518
519         return res;
520 }
521
522 /**
523  * Collect all nodes data dependent on current node.
524  */
525 static void collect_descendants(rss_t *rss, rss_irn_t *rirn, ir_node *irn, int *got_sink) {
526         const ir_edge_t *edge;
527         ir_node         *block = rss->block;
528         ir_edge_kind_t  ekind[2] = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
529         int i;
530
531         /* loop over normal and dependency edges */
532         for (i = 0; i < 2; ++i) {
533                 foreach_out_edge_kind(irn, edge, ekind[i]) {
534                         ir_node *user = get_edge_src_irn(edge);
535
536                         /* skip ignore nodes as they do not really contribute to register pressure */
537                         if (arch_irn_is(rss->arch_env, user, ignore))
538                                 continue;
539
540                         /* check if user lives in block and is not a control flow node */
541                         if (get_nodes_block(user) == block && get_irn_mode(user) != mode_X) {
542                                 /* skip mode_T nodes */
543                                 if (get_irn_mode(user) != mode_T && ! plist_has_value(rirn->descendant_list, user)) {
544                                         plist_insert_back(rirn->descendant_list, user);
545                                         DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", user));
546                                 }
547                                 collect_descendants(rss, rirn, user, got_sink);
548                         }
549                         else if (! *got_sink) {
550                                 /* user lives out of block: add sink as descendant if not already done */
551                                 plist_insert_back(rirn->descendant_list, _sink);
552                                 *got_sink = 1;
553                                 DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", _sink));
554                         }
555                 }
556         }
557 }
558
559 /**
560  * Handles a single consumer.
561  */
562 static int collect_single_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *consumer, int got_sink) {
563         ir_node        *block   = rss->block;
564         ir_edge_kind_t ekind[2] = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
565         int            i;
566
567         if (get_nodes_block(consumer) == block) {
568                 /* the consumer of a mode_T node are it's projs */
569                 if (get_irn_mode(consumer) == mode_T) {
570                         const ir_edge_t *cons_edge;
571
572                         DBG((rss->dbg, LEVEL_2, "\t\tmode_T consumer %+F skipped\n", consumer));
573
574                         for (i = 0; i < 2; ++i) {
575                                 foreach_out_edge_kind(consumer, cons_edge, ekind[i]) {
576                                         ir_node *cons_proj = get_edge_src_irn(cons_edge);
577
578                                         assert(get_nodes_block(cons_proj) == block && "Proj in wrong block!");
579
580                                         /* skip ignore nodes, as they do not really contribute to register pressure */
581                                         if (arch_irn_is(rss->arch_env, cons_proj, ignore))
582                                                 continue;
583
584                                         plist_insert_back(rss_irn->consumer_list, cons_proj);
585                                         DBG((rss->dbg, LEVEL_3, "\t\t\treal consumer %+F\n", cons_proj));
586                                 }
587                         }
588                 }
589                 else if (! arch_irn_is(rss->arch_env, consumer, ignore)) {
590                         plist_insert_back(rss_irn->consumer_list, consumer);
591                         DBG((rss->dbg, LEVEL_2, "\t\tconsumer %+F\n", consumer));
592                 }
593         }
594         else {
595                 rss_irn->live_out = 1;
596                 DBG((rss->dbg, LEVEL_2, "\t\tlive out %+F", consumer));
597                 if (! got_sink) {
598                         plist_insert_back(rss_irn->consumer_list, _sink);
599                         got_sink = 1;
600                         DB((rss->dbg, LEVEL_2, ", %+F added instead", _sink));
601                 }
602                 DB((rss->dbg, LEVEL_2, "\n"));
603         }
604
605         return got_sink;
606 }
607
608 /**
609  * Collect all nodes consuming the value(s) produced by current node.
610  */
611 static void collect_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *irn) {
612         const ir_edge_t *edge;
613         int             got_sink = 0;
614         int             i;
615         ir_edge_kind_t  ekind[2] = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
616
617         for (i = 0; i < 2; ++i) {
618                 foreach_out_edge_kind(irn, edge, ekind[i]) {
619                         ir_node *consumer = get_edge_src_irn(edge);
620                         got_sink = collect_single_consumer(rss, rss_irn, consumer, got_sink);
621                 }
622         }
623 }
624
625 #if 0
626 /**
627  * We need to build the consumer and descendant list for _source.
628  */
629 static void collect_node_info_source(rss_t *rss) {
630         const ir_edge_t *edge;
631         rss_irn_t       *rirn  = get_rss_irn(rss, _source);
632         ir_node         *block = rss->block;
633
634         if (rirn->handled)
635                 return;
636
637         foreach_out_edge(block, edge) {
638                 ir_node *irn = get_edge_src_irn(edge);
639                 int     i;
640
641                 for (i = get_irn_arity(n) - 1; i >= 0; --i) {
642
643                 }
644         }
645 }
646
647 static void reset_node_info(rss_irn_t *rss_irn) {
648         /* Beware: array data resides on phase obstack, so it gets removed automatically */
649
650         plist_clear(rss_irn->consumer_list);
651         rss_irn->consumer = NULL;
652
653         plist_clear(rss_irn->parent_list);
654         rss_irn->parents = NULL;
655
656         plist_clear(rss_irn->descendant_list);
657         rss_irn->descendants = NULL;
658
659         plist_clear(rss_irn->pkiller_list);
660         rss_irn->pkillers = NULL;
661
662         plist_clear(rss_irn->kill_value_list);
663
664         rss_irn->killer   = NULL;
665         rss_irn->live_out = 0;
666         rss_irn->visited  = 0;
667         rss_irn->handled  = 0;
668 }
669 #endif /* if 0 */
670
671 /**
672  * Collects all consumer and descendant of a irn.
673  */
674 static void collect_node_info(rss_t *rss, ir_node *irn) {
675         rss_irn_t *rss_irn = get_rss_irn(rss, irn);
676         int       got_sink;
677
678         assert(get_irn_mode(irn) != mode_T && "Cannot handle mode_T nodes.");
679
680         /* check if node info is already available */
681         if (rss_irn->handled)
682                 phase_reinit_single_irn_data(&rss->ph, irn);
683
684         DBG((rss->dbg, LEVEL_1, "\tcomputing consumers of %+F:\n", irn));
685
686         /* collect all consumer */
687         collect_consumer(rss, rss_irn, irn);
688
689         /* build sorted consumer array */
690         rss_irn->consumer = build_sorted_array_from_list(rss_irn->consumer_list, phase_obst(&rss->ph));
691
692         DBG((rss->dbg, LEVEL_1, "\tcompute descendants of %+F:\n", irn));
693
694         /* collect descendants */
695         got_sink = 0;
696         collect_descendants(rss, rss_irn, irn, &got_sink);
697
698         /* build sorted descendant array */
699         rss_irn->descendants = build_sorted_array_from_list(rss_irn->descendant_list, phase_obst(&rss->ph));
700
701         rss_irn->handled = 1;
702 }
703
704 /**
705  * Checks if v is a potential killer of u.
706  * v is in pkill(u) iff descendants(v) cut consumer(u) is v
707  *
708  * @param rss   The rss object
709  * @param v      The node to check for killer
710  * @param u      The potentially killed value
711  * @return 1 if v is in pkill(u), 0 otherwise
712  */
713 static int is_potential_killer(rss_t *rss, rss_irn_t *v, rss_irn_t *u) {
714         plist_t *list;
715         ir_node **arr;
716         plist_element_t *el;
717
718         assert(is_Sink(v->irn) || ((plist_count(v->descendant_list) > 0 && v->descendants) || 1));
719         assert(is_Sink(u->irn) || ((plist_count(u->consumer_list)   > 0 && u->consumer)    || 1));
720
721         /* as we loop over the list: loop over the shorter one */
722         if (plist_count(v->descendant_list) > plist_count(u->consumer_list)) {
723                 list = u->consumer_list;
724                 arr  = v->descendants;
725         }
726         else {
727                 list = v->descendant_list;
728                 arr  = u->consumer;
729         }
730
731         /* for each list element: try to find element in array */
732         foreach_plist(list, el) {
733                 ir_node *irn   = plist_element_get_value(el);
734                 ir_node *match = BSEARCH_IRN_ARR(irn, arr);
735
736                 if (match && match != irn)
737                         return 0;
738         }
739
740         return 1;
741 }
742
743 /**
744  * Update descendants, consumer and pkiller list for the given irn.
745  */
746 static void update_node_info(rss_t *rss, ir_node *irn, ir_node *pk_irn) {
747         rss_irn_t *node    = get_rss_irn(rss, irn);
748         rss_irn_t *pkiller = get_rss_irn(rss, pk_irn);
749
750         /* add consumer and rebuild the consumer array */
751         if (! plist_has_value(node->consumer_list, pk_irn)) {
752                 plist_insert_back(node->consumer_list, pk_irn);
753                 node->consumer = build_sorted_array_from_list(node->consumer_list, phase_obst(&rss->ph));
754         }
755
756         /* add pk_irn as descendant, add it's descendants to irn's and rebuild array */
757         if (! plist_has_value(node->descendant_list, pk_irn)) {
758                 plist_element_t *el;
759
760                 plist_insert_back(node->descendant_list, pk_irn);
761
762                 foreach_plist(pkiller->descendant_list, el) {
763                         ir_node *desc = plist_element_get_value(el);
764
765                         if (! plist_has_value(node->descendant_list, desc))
766                                 plist_insert_back(node->descendant_list, desc);
767                 }
768
769                 node->descendants = build_sorted_array_from_list(node->descendant_list, phase_obst(&rss->ph));
770         }
771
772 #if 0
773         /* check, if pkiller is a potential killer of irn */
774         if (is_potential_killer(rss, pkiller, node)) {
775                 if (! plist_has_value(node->pkiller_list, pk_irn))
776                         plist_insert_back(node->pkiller_list, pk_irn);
777                 if (! plist_has_value(pkiller->kill_value_list, irn))
778                         plist_insert_back(pkiller->kill_value_list, irn);
779                 DBG((rss->dbg, LEVEL_1, "\tpotential killer %+F added to %+F\n", pk_irn, irn));
780         }
781 #endif
782 }
783
784 /**
785  * Compute the potential killing set PK.
786  */
787 static void compute_pkill_set(rss_t *rss) {
788         plist_element_t *u_el, *v_el;
789
790         foreach_plist(rss->nodes, u_el) {
791                 ir_node   *u_irn = plist_element_get_value(u_el);
792                 rss_irn_t *u     = get_rss_irn(rss, u_irn);
793
794                 DBG((rss->dbg, LEVEL_1, "\tcomputing potential killers of %+F:\n", u_irn));
795
796                 /* check each consumer if it is a potential killer  */
797                 foreach_plist(u->consumer_list, v_el) {
798                         ir_node   *v_irn = plist_element_get_value(v_el);
799                         rss_irn_t *v     = get_rss_irn(rss, v_irn);
800
801                         /* check, if v is a potential killer of u */
802                         if (is_potential_killer(rss, v, u)) {
803                                 if (! plist_has_value(u->pkiller_list, v_irn))
804                                         plist_insert_back(u->pkiller_list, v_irn);
805                                 if (! plist_has_value(v->kill_value_list, u_irn))
806                                         plist_insert_back(v->kill_value_list, u_irn);
807                                 DBG((rss->dbg, LEVEL_2, "\t\tpotential killer %+F\n", v_irn));
808                         }
809                 }
810
811                 u->killer = _sink;
812         }
813
814         DEBUG_ONLY(
815                 if (firm_dbg_get_mask(rss->dbg))
816                         debug_vcg_dump_pkg(rss, NULL, 0);
817         )
818 }
819
820 /**
821  * Build set of killing edges (from values to their potential killers)
822  */
823 static void build_kill_edges(rss_t *rss, pset *epk) {
824         plist_element_t *el, *k_el;
825
826         foreach_plist(rss->nodes, el) {
827                 ir_node    *irn  = plist_element_get_value(el);
828                 rss_irn_t *rirn = get_rss_irn(rss, irn);
829
830                 DBG((rss->dbg, LEVEL_3, "\t\tbuilding kill edges for %+F:\n", irn));
831
832                 foreach_plist(rirn->pkiller_list, k_el) {
833                         ir_node    *pkiller = plist_element_get_value(k_el);
834                         rss_edge_t *ke      = obstack_alloc(phase_obst(&rss->ph), sizeof(*ke));
835
836                         ke->src  = irn;
837                         ke->tgt  = pkiller;
838                         ke->next = NULL;
839
840                         DBG((rss->dbg, LEVEL_3, "\t\t\ttarget %+F\n", pkiller));
841
842                         pset_insert(epk, ke, HASH_RSS_EDGE(ke));
843                 }
844         }
845 }
846
847 /* print the given cbc for debugging purpose */
848 static void debug_print_cbc(firm_dbg_module_t *mod, cbc_t *cbc) {
849         ir_node    *n;
850         rss_edge_t *ke;
851
852         DBG((mod, LEVEL_3, "\t\tS = set of parents:\n"));
853         foreach_nodeset(cbc->parents, n) {
854                 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
855         }
856         DBG((mod, LEVEL_3, "\t\tT = set of children:\n"));
857         foreach_nodeset(cbc->children, n) {
858                 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
859         }
860         DBG((mod, LEVEL_3, "\t\tE = Edges from producers to consumers\n"));
861         foreach_pset(cbc->kill_edges, ke) {
862                 DBG((mod, LEVEL_3, "\t\t\t%+F -> %+F\n", ke->src, ke->tgt));
863         }
864 }
865
866 /**
867  * Construct the bipartite decomposition.
868  * Sid-Ahmed-Ali Touati, Phd Thesis
869  * Register Pressure in Instruction Level Parallelism, p. 71
870  */
871 static void compute_bipartite_decomposition(rss_t *rss) {
872         pset *epk    = new_pset(cmp_rss_edges, 10);
873         int  cur_num = 0;
874
875         plist_element_t *el;
876
877         DBG((rss->dbg, LEVEL_1, "\tcomputing bipartite decomposition:\n"));
878
879         build_kill_edges(rss, epk);
880
881         foreach_plist(rss->nodes, el) {
882                 ir_node   *u_irn   = plist_element_get_value(el);
883                 rss_irn_t *u       = get_rss_irn(rss, u_irn);
884                 int       p_change = 1;
885                 int       c_change = 1;
886
887                 cbc_t           *cbc;
888                 plist_element_t *el2;
889                 rss_edge_t      *k_edge;
890                 rss_edge_t      *kedge_root = NULL;
891                 ir_node         *t_irn, *s_irn;
892
893                 if (u->visited || u_irn == _sink)
894                         continue;
895
896                 DBG((rss->dbg, LEVEL_2, "\t\t%+F choosen:\n", u_irn));
897
898                 cbc     = obstack_alloc(phase_obst(&rss->ph), sizeof(*cbc));
899                 cbc->nr = cur_num++;
900
901                 /* initialize S_cb */
902                 cbc->parents = new_nodeset(5);
903                 nodeset_insert(cbc->parents, u_irn);
904                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (init)\n", u_irn));
905
906                 /* E_cb = empty */
907                 cbc->kill_edges = new_pset(cmp_rss_edges, 5);
908
909                 /* each parent gets killed by at least one value from children */
910
911                 /* T_cb = PK_successors(u) */
912                 cbc->children = new_nodeset(5);
913                 foreach_plist(u->pkiller_list, el2) {
914                         nodeset_insert(cbc->children, plist_element_get_value(el2));
915                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (init)\n", plist_element_get_value(el2)));
916                 }
917
918                 /*
919                         Now: insert the parents of all children into the parent set
920                         and insert the children of all parents into the children set
921                         until the sets don't change any more
922                 */
923                 while (p_change || c_change) {
924                         p_change = c_change = 0;
925
926                         /* accumulate parents */
927                         foreach_nodeset(cbc->children, t_irn) {
928                                 rss_irn_t *t = get_rss_irn(rss, t_irn);
929                                 plist_element_t *kvl_el;
930
931                                 foreach_pset(epk, k_edge) {
932                                         ir_node *val = k_edge->src;
933
934                                         if (k_edge->src == t_irn && ! nodeset_find(cbc->parents, k_edge->src)) {
935                                                 nodeset_insert(cbc->parents, val);
936                                                 p_change = 1;
937                                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents\n", val));
938                                         }
939                                 }
940                         }
941
942                         /* accumulate children */
943                         foreach_nodeset(cbc->parents, s_irn) {
944                                 rss_irn_t *s = get_rss_irn(rss, s_irn);
945                                 plist_element_t *pkl_el;
946
947                                 foreach_plist(s->pkiller_list, pkl_el) {
948                                         ir_node *val = plist_element_get_value(pkl_el);
949
950                                         if (! nodeset_find(cbc->children, val)) {
951                                                 nodeset_insert(cbc->children, val);
952                                                 c_change = 1;
953                                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children\n", val));
954                                         }
955                                 }
956                         }
957                 }
958
959                 /* mark all parent values as visited */
960                 foreach_nodeset(cbc->parents, s_irn) {
961                         rss_irn_t *s = get_rss_irn(rss, s_irn);
962                         s->visited = 1;
963                         /* assure bipartite property */
964                         if (nodeset_find(cbc->children, s_irn)) {
965                                 nodeset_remove(cbc->children, s_irn);
966                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F removed from to children\n", s_irn));
967                         }
968                 }
969
970                 /* update edges */
971                 foreach_pset(epk, k_edge) {
972                         if (nodeset_find(cbc->parents, k_edge->src) && nodeset_find(cbc->children, k_edge->tgt)) {
973                                 pset_insert(cbc->kill_edges, k_edge, HASH_RSS_EDGE(k_edge));
974                                 /*
975                                         Link all k_edges which are about to be removed together.
976                                         Beware: pset_remove kills the iterator.
977                                 */
978                                 k_edge->next = kedge_root;
979                                 kedge_root   = k_edge;
980                         }
981                 }
982
983                 /* remove all linked k_edges */
984                 for (; kedge_root; kedge_root = kedge_root->next) {
985                         pset_remove(epk, kedge_root, HASH_RSS_EDGE(kedge_root));
986                 }
987
988                 /* add the connected bipartite component */
989                 pset_insert(rss->cbc_set, cbc, (unsigned)cbc->nr);
990                 DBG((rss->dbg, LEVEL_1, "\tbipartite component %d inserted:\n", cbc->nr));
991                 DEBUG_ONLY(debug_print_cbc(rss->dbg, cbc));
992         }
993
994         if (firm_dbg_get_mask(rss->dbg))
995                 debug_vcg_dump_bipartite(rss);
996
997         del_pset(epk);
998 }
999
1000 /**
1001  * Select the child with the maximum cost.
1002  */
1003 static child_t *select_child_max_cost(rss_t *rss, nodeset *x, nodeset *y, child_t *t, cbc_t *cbc) {
1004         ir_node *child;
1005         float   max_cost = -1.0f;
1006
1007         DBG((rss->dbg, LEVEL_2, "\t\tcomputing children costs:\n"));
1008
1009         foreach_nodeset(cbc->children, child) {
1010                 rss_irn_t  *r_child             = get_rss_irn(rss, child);
1011                 int         num_unkilled_parents = 0;
1012                 int         num_descendants;
1013                 rss_edge_t *k_edge;
1014                 float       cost;
1015
1016                 /* get the number of unkilled parents */
1017                 foreach_pset(cbc->kill_edges, k_edge) {
1018                         if (k_edge->tgt == child && nodeset_find(x, k_edge->src))
1019                                 ++num_unkilled_parents;
1020                 }
1021
1022                 cost = (float)num_unkilled_parents;
1023
1024                 num_descendants = plist_count(r_child->descendant_list) + nodeset_count(y);
1025
1026                 if (num_descendants > 0)
1027                         cost /= (float)num_descendants;
1028
1029                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F, #desc %d, cost %.3f\n", child, num_descendants, cost));
1030
1031                 if (cost > max_cost) {
1032                         t->irn   = child;
1033                         t->cost  = cost;
1034                         max_cost = cost;
1035                 }
1036         }
1037
1038         return t;
1039 }
1040
1041 /**
1042  * Remove all parents from x which are killed by t_irn.
1043  */
1044 static void remove_covered_parents(rss_t *rss, nodeset *x, ir_node *t_irn, cbc_t *cbc) {
1045         rss_irn_t  *t = get_rss_irn(rss, t_irn);
1046         rss_edge_t *k_edge;
1047
1048         DBG((rss->dbg, LEVEL_2, "\t\tremoving parents covered by %+F:\n", t_irn));
1049
1050         foreach_pset(cbc->kill_edges, k_edge) {
1051                 if (k_edge->tgt == t_irn && nodeset_find(x, k_edge->src)) {
1052                         nodeset_remove(x, k_edge->src);
1053                         plist_insert_back(t->parent_list, k_edge->src);
1054                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", k_edge->src));
1055                 }
1056         }
1057 }
1058
1059 static void update_cumulated_descendent_values(rss_t *rss, nodeset *y, ir_node *t_irn) {
1060         rss_irn_t *t = get_rss_irn(rss, t_irn);
1061         plist_element_t *el;
1062
1063         DBG((rss->dbg, LEVEL_2, "\t\tupdating cumulated descendant value of %+F:\n", t_irn));
1064
1065         foreach_plist(t->descendant_list, el) {
1066                 nodeset_insert(y, plist_element_get_value(el));
1067                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", plist_element_get_value(el)));
1068         }
1069 }
1070
1071 /**
1072  * Greedy-k: a heuristics for the MMA problem
1073  */
1074 static void compute_killing_function(rss_t *rss) {
1075         cbc_t *cbc;
1076         struct obstack obst;
1077
1078         obstack_init(&obst);
1079
1080         rss->cbc_set = pset_new_ptr(5);
1081         compute_bipartite_decomposition(rss);
1082
1083         /* for all bipartite components do: */
1084         foreach_pset(rss->cbc_set, cbc) {
1085                 ir_node *p;
1086                 nodeset *x       = new_nodeset(10);
1087                 nodeset *y       = new_nodeset(10);
1088                 child_t **sks    = NEW_ARR_F(child_t *, 20);
1089                 int     cur_len  = 0;
1090                 int     cur_size = 20;
1091                 int     i;
1092
1093                 DBG((rss->dbg, LEVEL_1, "\tcomputing SKS for cbc %d:\n", cbc->nr));
1094                 DBG((rss->dbg, LEVEL_2, "\t\tinitializing parents X:\n"));
1095
1096                 /* X = S_cb (all parents are initially uncovered) */
1097                 foreach_nodeset(cbc->parents, p) {
1098                         nodeset_insert(x, p);
1099                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", p));
1100                 }
1101
1102                 /* while X not empty */
1103                 while (nodeset_count(x) > 0) {
1104                         child_t *t = obstack_alloc(&obst, sizeof(*t));
1105                         memset(t, 0, sizeof(*t));
1106
1107                         t = select_child_max_cost(rss, x, y, t, cbc);
1108
1109                         if (cur_len >= cur_size) {
1110                                 ARR_EXTO(child_t *, sks, cur_size * 2);
1111                                 cur_size *= 2;
1112                         }
1113
1114                         DBG((rss->dbg, LEVEL_2, "\t\tinsert child %+F (%.3f) into SKS at pos %d\n", t->irn, t->cost, cur_len));
1115
1116                         sks[cur_len++] = t;
1117                         remove_covered_parents(rss, x, t->irn, cbc);
1118                         update_cumulated_descendent_values(rss, y, t->irn);
1119                 }
1120
1121                 ARR_SHRINKLEN(sks, cur_len);
1122
1123                 /* sort SKS in increasing cost order */
1124                 qsort(sks, cur_len, sizeof(sks[0]), cmp_child_costs);
1125
1126                 DBG((rss->dbg, LEVEL_2, "\tprocessing SKS for cbc %d:\n", cbc->nr));
1127
1128                 /* build killing function */
1129                 for (i = cur_len - 1; i >= 0; --i) { /* loop over sks in decreasing cost order */
1130                         child_t         *t = sks[i];
1131                         rss_irn_t       *rt = get_rss_irn(rss, t->irn);
1132                         plist_element_t *p_el;
1133
1134                         DBG((rss->dbg, LEVEL_3, "\t\t\tkiller %+F (%.3f):\n", t->irn, t->cost));
1135
1136                         /* kill all unkilled parents of t */
1137                         foreach_plist(rt->parent_list, p_el) {
1138                                 ir_node    *par = plist_element_get_value(p_el);
1139                                 rss_irn_t *rpar = get_rss_irn(rss, par);
1140
1141                                 if (is_Sink(rpar->killer)) {
1142                                         rpar->killer = t->irn;
1143                                         DBG((rss->dbg, LEVEL_2, "\t\tkill %+F\n", rpar->irn));
1144                                 }
1145                                 else {
1146                                         DBG((rss->dbg, LEVEL_3, "\t\t\tkeeping %+F as killer for %+F\n", rpar->killer, rpar->irn));
1147                                 }
1148                         }
1149                 }
1150
1151                 del_nodeset(x);
1152                 del_nodeset(y);
1153                 DEL_ARR_F(sks);
1154         }
1155
1156         DEBUG_ONLY(
1157                 if (firm_dbg_get_mask(rss->dbg))
1158                         debug_vcg_dump_kill(rss);
1159         );
1160
1161         del_pset(rss->cbc_set);
1162         obstack_free(&obst, NULL);
1163 }
1164
1165 /**
1166  * Adds the edge src -> tgt to the dvg. Checks if reverse edge is already there (asserts).
1167  */
1168 static INLINE void add_dvg_edge(rss_t *rss, dvg_t *dvg, ir_node *src, ir_node *tgt, int have_source) {
1169         rss_edge_t *dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1170         rss_edge_t key;
1171
1172         if (! have_source)
1173                 nodeset_insert(dvg->nodes, src);
1174
1175         nodeset_insert(dvg->nodes, tgt);
1176
1177         /* add an edge to our killer */
1178         dvg_edge->src  = src;
1179         dvg_edge->tgt  = tgt;
1180         dvg_edge->next = NULL;
1181
1182         key.src = tgt;
1183         key.tgt = src;
1184         assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1185
1186         /* add the edge to the DVG */
1187         DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", src, tgt));
1188         pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1189 }
1190
1191 /**
1192  * Computes the disjoint value DAG (DVG).
1193  * BEWARE: It is not made explicitly clear in the Touati paper,
1194  *         but the DVG is meant to be build from the KILLING DAG
1195  */
1196 static void compute_dvg(rss_t *rss, dvg_t *dvg) {
1197         plist_element_t *el;
1198
1199         DBG((rss->dbg, LEVEL_1, "\tcomputing DVG:\n"));
1200
1201         foreach_plist(rss->nodes, el) {
1202                 ir_node    *u_irn    = plist_element_get_value(el);
1203                 rss_irn_t  *u        = get_rss_irn(rss, u_irn);
1204                 rss_irn_t  *u_killer = get_rss_irn(rss, u->killer);
1205                 int        i;
1206
1207                 /* TODO: omit nodes only having sink as pkiller and killing no one */
1208
1209                 /* add an edge to killer */
1210                 add_dvg_edge(rss, dvg, u_irn, u->killer, 0);
1211
1212                 if (is_Sink(u->killer))
1213                         continue;
1214
1215                 /* We add an edge to every killer, from where we could be reached. */
1216                 for (i = ARR_LEN_SAFE(u_killer->descendants) - 1; i >= 0; --i) {
1217                         add_dvg_edge(rss, dvg, u_irn, u_killer->descendants[i], 1);
1218                 }
1219
1220 #if 0
1221
1222                 foreach_plist(rss->nodes, el2) {
1223                         ir_node *v_irn = plist_element_get_value(el2);
1224
1225                         /*
1226                                 There is an edge (u, v) in the DVG iff v is a descendant of the killer(u).
1227                         */
1228                         if (BSEARCH_IRN_ARR(v_irn, u_kill->descendants)) {
1229                                 rss_edge_t *dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1230                                 rss_edge_t key;
1231
1232                                 /* insert the user into the DVG and append it to the user list of u */
1233                                 nodeset_insert(dvg->nodes, v_irn);
1234                                 if (! plist_has_value(u->dvg_user_list, v_irn))
1235                                         plist_insert_back(u->dvg_user_list, v_irn);
1236
1237                                 dvg_edge->src  = u_irn;
1238                                 dvg_edge->tgt  = v_irn;
1239                                 dvg_edge->next = NULL;
1240
1241                                 key.src = v_irn;
1242                                 key.tgt = u_irn;
1243
1244                                 assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1245
1246                                 /* add the edge to the DVG */
1247                                 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", u_irn, v_irn));
1248                                 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1249                         }
1250                 }
1251 #endif /* if 0 */
1252         }
1253
1254         DEBUG_ONLY(
1255                 if (firm_dbg_get_mask(rss->dbg))
1256                         debug_vcg_dump_dvg(rss, dvg);
1257         );
1258 }
1259
1260 /**
1261  * Updates the dvg structure when a serialization edge from src -> tgt is added.
1262  */
1263 static void update_dvg(rss_t *rss, dvg_t *dvg, rss_irn_t *src, rss_irn_t *tgt) {
1264         int i;
1265
1266         add_dvg_edge(rss, dvg, tgt->irn, src->irn, 1);
1267
1268         for (i = ARR_LEN_SAFE(src->descendants) - 1; i >= 0; --i) {
1269                 add_dvg_edge(rss, dvg, tgt->irn, src->descendants[i], 1);
1270         }
1271 }
1272
1273 #if 0
1274 /**
1275  * Accumulate all descendants for root into list.
1276  */
1277 static void accumulate_dvg_descendant_values(rss_t *rss, rss_irn_t *root, plist_t *list) {
1278         if (plist_count(root->dvg_user_list) > 0) {
1279                 plist_element_t *el;
1280
1281                 foreach_plist(root->dvg_user_list, el) {
1282                         ir_node   *v_irn = plist_element_get_value(el);
1283                         rss_irn_t *v     = get_rss_irn(rss, v_irn);
1284
1285                         /* add v as descendant */
1286                         if (! plist_has_value(list, v_irn)) {
1287                                 plist_insert_back(list, v_irn);
1288                                 DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG descendant %+F\n", v_irn));
1289                         }
1290
1291                         /* accumulate v's descendants */
1292                         accumulate_dvg_descendant_values(rss, v, list);
1293                 }
1294         }
1295 }
1296
1297 /**
1298  * Builds the list of potential killers for each node
1299  * in the given DVG.
1300  * Needs the descendant list for all user as sorted array.
1301  */
1302 static void build_dvg_pkiller_list(rss_t *rss, dvg_t *dvg) {
1303         ir_node *irn;
1304
1305         foreach_nodeset(dvg->nodes, irn) {
1306                 rss_irn_t       *node = get_rss_irn(rss, irn);
1307                 plist_element_t *el, *el2;
1308
1309                 DBG((rss->dbg, DEBUG_DVG, "\t\tbuilding pkiller list for %+F\n", irn));
1310
1311                 /* check each user */
1312                 foreach_plist(node->dvg_user_list, el) {
1313                         ir_node *u_irn = plist_element_get_value(el);
1314
1315                         /* is the current user u_irn not a descendant of any other user -> pkiller */
1316                         foreach_plist(node->dvg_user_list, el2) {
1317                                 ir_node   *v_irn = plist_element_get_value(el2);
1318                                 rss_irn_t *v     = get_rss_irn(rss, v_irn);
1319
1320                                 if (el != el2                             &&
1321                                         ! BSEARCH_IRN_ARR(u_irn, v->dvg_desc) &&
1322                                         ! plist_has_value(node->dvg_pkiller_list, u_irn))
1323                                 {
1324                                         plist_insert_back(node->dvg_pkiller_list, u_irn);
1325                                         DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG pkiller %+F\n", u_irn));
1326                                 }
1327                         }
1328                 }
1329
1330                 node->dvg_pkiller = build_sorted_array_from_list(node->dvg_pkiller_list, phase_obst(&rss->ph));
1331         }
1332
1333         DEBUG_ONLY(
1334                 if (firm_dbg_get_mask(rss->dbg) & DEBUG_DVG)
1335                         debug_vcg_dump_dvg_pkiller(rss, dvg);
1336         );
1337 }
1338
1339 #endif /* if 0 */
1340
1341 /**
1342  * Compute the maximal antichain of the current DVG.
1343  * This is a reimplementation of the MAXIMAL_ANTI_CHAIN function
1344  * from the DDG library 1.1 (DAG.cpp).
1345  */
1346 static nodeset *compute_maximal_antichain(rss_t *rss, dvg_t *dvg, int iteration) {
1347         int         n               = nodeset_count(dvg->nodes);
1348         int         *assignment     = alloca(n * sizeof(assignment[0]));
1349         int         *assignment_rev = alloca(n * sizeof(assignment_rev[0]));
1350         int         *idx_map        = alloca(n * sizeof(idx_map[0]));
1351         hungarian_problem_t *bp;
1352         nodeset     *values, *temp;
1353         ir_node     *u_irn;
1354         int         i, j, cost, cur_chain, res;
1355         rss_edge_t  *dvg_edge;
1356
1357 #define MAP_IDX(irn) bsearch_for_index(get_irn_idx(irn), idx_map,  n,  1)
1358
1359         if (pset_count(dvg->edges) == 0)
1360                 return NULL;
1361
1362         bp = hungarian_new(n, n, 1, HUNGARIAN_MATCH_NORMAL);
1363
1364         /*
1365                 At first, we build an index map for the nodes in the DVG,
1366                 because we cannot use the irn idx therefore as the resulting
1367                 bipartite data structure would have around 1.2GB.
1368                 So we limit the size to the number of nodes we have in the DVG
1369                 and build a sorted index map for their irn indices.
1370         */
1371         i = 0;
1372         foreach_nodeset(dvg->nodes, u_irn) {
1373                 idx_map[i++] = get_irn_idx(u_irn);
1374         }
1375         qsort(idx_map, n, sizeof(idx_map[0]), cmp_int);
1376
1377         foreach_pset(dvg->edges, dvg_edge) {
1378                 int idx_u = MAP_IDX(dvg_edge->src);
1379                 int idx_v = MAP_IDX(dvg_edge->tgt);
1380
1381                 /* add the entry to the bipartite data structure */
1382                 hungarian_add(bp, idx_u, idx_v, 1);
1383                 DBG((rss->dbg, LEVEL_3, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1384                         idx_u, dvg_edge->src, idx_v, dvg_edge->tgt));
1385         }
1386 #if 0
1387         /*
1388                 Add a bipartite entry for each pair of nodes (u, v), where exists a
1389                 path in the DVG from u to v, ie. connect all descendants(v) to v.
1390                 desc_dvg(v) = accumulated descendants of all z in dvg_user_list(v)
1391         */
1392         foreach_nodeset(dvg->nodes, u_irn) {
1393                 rss_irn_t *u        = get_rss_irn(rss, u_irn);
1394                 int       idx_u_irn = MAP_IDX(u_irn);
1395
1396                 DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG descendants of %+F:\n", u_irn));
1397
1398                 //plist_clear(u->dvg_desc_list);
1399                 //accumulate_dvg_descendant_values(rss, u, u->dvg_desc_list);
1400
1401                 /*
1402                         FIXME: The array is build on the phase obstack and we cannot free the data.
1403                         So the array get as many times allocated as this function is called.
1404                 */
1405
1406                 /* build the sorted array for faster searches */
1407                 //u->dvg_desc = build_sorted_array_from_list(u->dvg_desc_list, phase_obst(&rss->ph));
1408
1409                 DBG((rss->dbg, DEBUG_MAX_AC, "\t\tadding bipartite entries of %+F:\n", u_irn));
1410
1411                 /* add the bipartite entries for all descendants */
1412                 for (i = ARR_LEN_SAFE(u->dvg_desc) - 1; i >= 0; --i) {
1413                         rss_irn_t *desc    = get_rss_irn(rss, u->dvg_desc[i]);
1414                         int       idx_desc = MAP_IDX(u->dvg_desc[i]);
1415
1416                         /* add the entry to the bipartite data structure */
1417                         hungarian_add(bp, idx_u_irn, idx_desc, 1);
1418                         DBG((rss->dbg, DEBUG_MAX_AC, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1419                                 idx_u_irn, u_irn, idx_desc, u->dvg_desc[i]));
1420
1421                         need_matching = 1;
1422                 }
1423         }
1424 #endif
1425
1426         /* We want maximum cardinality matching */
1427         hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL);
1428
1429 #if 0
1430         DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG pkiller:\n"));
1431         /* beware: the following function needs the dvg_desc array */
1432         build_dvg_pkiller_list(rss, dvg);
1433 #endif
1434
1435         DBG((rss->dbg, LEVEL_2, "\t\tcomputing bipartite matching\n"));
1436         /*
1437                 The maximum cardinality bipartite matching gives us the minimal
1438                 chain partition, which corresponds to the maximum anti chains.
1439         */
1440         res = hungarian_solve(bp, assignment, &cost);
1441         assert(res == 0 && "Bipartite matching failed!");
1442
1443         hungarian_free(bp);
1444         memset(assignment_rev, -1, n * sizeof(assignment_rev[0]));
1445
1446         /* build the reverse assignment, ie. foreach i -> j, add j -> i */
1447         for (i = 0; i < n; ++i) {
1448                 if (assignment[i] >= 0) {
1449                         int j = assignment[i];
1450                         assignment_rev[j] = i;
1451                 }
1452         }
1453
1454         DBG((rss->dbg, LEVEL_2, "\t\tgot assignment with cost %d\n", cost));
1455         DBG((rss->dbg, LEVEL_3, "\t\t\tassignment   ---   reverse assignment\n", cost));
1456         for (i = 0; i < n; ++i) {
1457                 DBG((rss->dbg, LEVEL_3, "\t\t\t%3d -> %3d         %3d -> %3d\n", i, assignment[i], i, assignment_rev[i]));
1458         }
1459
1460         values    = new_nodeset(10);
1461         cur_chain = 0;
1462         /* Construction of the minimal chain partition */
1463         for (j = 0; j < n; ++j) {
1464                 /* check nodes, which did not occur as target */
1465                 if (assignment_rev[j] == -1) {
1466                         int       xj      = idx_map[j];
1467                         ir_node   *xj_irn = get_idx_irn(rss->irg, xj);
1468                         rss_irn_t *xj_rss = get_rss_irn(rss, xj_irn);
1469                         chain_t   *c      = obstack_alloc(phase_obst(&rss->ph), sizeof(*c));
1470                         int       source;
1471
1472                         /* there was no source for j -> we have a source of a new chain */
1473                         nodeset_insert(values, xj_irn);
1474
1475                         c->elements = plist_obstack_new(phase_obst(&rss->ph));
1476                         c->nr       = cur_chain++;
1477                         plist_insert_back(c->elements, xj_irn);
1478
1479                         xj_rss->chain = c;
1480
1481                         DBG((rss->dbg, LEVEL_2, "\t\tstarting chain %d:\n", c->nr));
1482                         DBG((rss->dbg, LEVEL_2, "\t\t\t%+F (%d)", xj_irn, j));
1483
1484                         /* follow chain, having j as source */
1485                         source = j;
1486                         while (assignment[source] >= 0) {
1487                                 int       target  = assignment[source];
1488                                 int       irn_idx = idx_map[target];
1489                                 ir_node   *irn    = get_idx_irn(rss->irg, irn_idx);
1490                                 rss_irn_t *node   = get_rss_irn(rss, irn);
1491
1492                                 plist_insert_back(c->elements, irn);
1493                                 node->chain = c;
1494
1495                                 DB((rss->dbg, LEVEL_2, " -> %+F (%d)", irn, target));
1496
1497                                 /* new source = last target */
1498                                 source = target;
1499                         }
1500
1501                         DB((rss->dbg, LEVEL_2, "\n"));
1502                 }
1503         }
1504
1505         /*
1506                 Computing the maximal antichain: Select an element from each
1507                 chain such, such it is parallel with the others.
1508         */
1509         DBG((rss->dbg, LEVEL_1, "\t\tcomputing set of saturation values (MAX AC)\n"));
1510         DBG((rss->dbg, LEVEL_2, "\t\tstarting with:\n"));
1511         dump_nodeset(values, "\t\t\t");
1512         temp = NULL;
1513         do {
1514                 /*
1515                         We need an explicit array for the values as
1516                         we cannot iterate multiple times over the same
1517                         set at the same time. :-(((((
1518                 */
1519                 int     n         = nodeset_count(values);
1520                 int     i         = 0;
1521                 ir_node **val_arr = NEW_ARR_F(ir_node *, n);
1522
1523                 foreach_nodeset(values, u_irn)
1524                         val_arr[i++] = u_irn;
1525
1526                 if (temp)
1527                         del_nodeset(temp);
1528
1529                 temp = new_nodeset(10);
1530
1531                 /* Select all nodes from current value set, having another node in the set as descendant. */
1532                 for (i = 0; i < n; ++i) {
1533                         rss_irn_t *u = get_rss_irn(rss, val_arr[i]);
1534
1535                         for (j = 0; j < n; ++j) {
1536                                 if (i != j) {
1537                                         rss_edge_t key;
1538
1539                                         key.src = val_arr[i];
1540                                         key.tgt = val_arr[j];
1541
1542                                         if (pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1543                                                 /* v[j] is descendant of u -> remove u and break */
1544                                                 nodeset_insert(temp, u->irn);
1545                                                 nodeset_remove(values, u->irn);
1546
1547                                                 DBG((rss->dbg, LEVEL_3, "\t\t\tremoving %+F from values, adding it to temp\n", u->irn));
1548
1549                                                 break;
1550                                         }
1551                                 }
1552                         }
1553                 }
1554
1555                 /* Try to insert the chain predecessor of all selected u's */
1556                 foreach_nodeset(temp, u_irn) {
1557                         rss_irn_t       *u  = get_rss_irn(rss, u_irn);
1558                         chain_t         *c  = u->chain;
1559                         plist_element_t *el = plist_find_value(c->elements, u_irn);
1560
1561                         assert(el && "Missing element in chain!");
1562
1563                         /* If u has predecessor in chain: insert the predecessor */
1564                         if (el = plist_element_get_prev(el)) {
1565                                 nodeset_insert(values, plist_element_get_value(el));
1566                                 DBG((rss->dbg, LEVEL_3, "\t\t\tadding %+F to values\n", plist_element_get_value(el)));
1567                         }
1568                 }
1569
1570
1571                 DEL_ARR_F(val_arr);
1572         } while (nodeset_count(temp) > 0);
1573
1574         DBG((rss->dbg, LEVEL_2, "\t\tfinal set:\n"));
1575         DEBUG_ONLY(
1576                 if (firm_dbg_get_mask(rss->dbg) & LEVEL_2) {
1577                         dump_nodeset(values, "\t\t\t");
1578                         debug_vcg_dump_pkg(rss, values, iteration);
1579                 }
1580         );
1581
1582         del_nodeset(temp);
1583
1584         return values;
1585
1586 #undef MAP_IDX
1587 }
1588
1589 /**
1590  * Computes the best serialization between two nodes of sat_vals.
1591  */
1592 static serialization_t *compute_best_admissible_serialization(rss_t *rss, nodeset *sat_vals, serialization_t *ser, int num_regs) {
1593         int        n                    = nodeset_count(sat_vals);
1594         int        n_idx                = ARR_LEN_SAFE(rss->idx_map);
1595         int        i                    = 0;
1596         ir_node    **val_arr            = alloca(n * sizeof(val_arr[0]));
1597         bitset_t   *bs_sv               = bitset_alloca(n_idx);
1598         bitset_t   *bs_vdesc            = bitset_alloca(n_idx);
1599         bitset_t   *bs_tmp              = bitset_alloca(n_idx);
1600         bitset_t   *bs_ukilldesc        = bitset_alloca(n_idx);
1601         unsigned   best_benefit         = UINT_MAX;
1602         unsigned   best_omega2          = UINT_MAX;
1603         unsigned   best_benefit_omega20 = UINT_MAX;
1604         int        has_positive_omega1  = 0;
1605         int        j, k;
1606         ir_node    *irn;
1607         rss_edge_t min_benefit_edge;
1608         rss_edge_t min_omega20_edge;
1609         rss_irn_t  *ser_u_omega1, *ser_v_omega1;
1610         rss_irn_t  *ser_u_omega20, *ser_v_omega20;
1611
1612         DBG((rss->dbg, LEVEL_1, "\tcomputing admissible serializations:\n"));
1613
1614         /*
1615                 We need an explicit array for the values as
1616                 we cannot iterate multiple times over the same
1617                 set at the same time. :-(((((
1618         */
1619
1620         foreach_nodeset(sat_vals, irn) {
1621                 val_arr[i++] = irn;
1622                 bitset_set(bs_sv, BLOCK_IDX_MAP(rss, irn));
1623         }
1624
1625         /*
1626                 We build all admissible serializations and remember the best found so far.
1627                 For u in sat_vals:
1628                  For v in sat_val:
1629                    if v in pkiller(u): add edge from v to all other pkiller(u)
1630                    else: for all vv in pkiller(u): add edge from v to vv if there exists no path from vv to v
1631         */
1632
1633 /*
1634         A node is unserializable if:
1635         - it has only one killer and this one is Sink
1636     - it kills no other values
1637         In this case there is no serialization which could
1638         reduce the registerpressure
1639 */
1640 #define IS_UNSERIALIZABLE_NODE(rss_node)           \
1641         ((plist_count(rss_node->pkiller_list) == 1) && \
1642         is_Sink(rss_node->killer)                   && \
1643         (plist_count(rss_node->kill_value_list) == 0))
1644
1645         /* for all u in sat_vals */
1646         for (i = 0; i < n; ++i) {
1647                 rss_irn_t       *u       = get_rss_irn(rss, val_arr[i]);
1648                 int             u_height = get_irn_height(rss->h, val_arr[i]);
1649                 plist_element_t *el;
1650
1651                 /* ignore nodes where serialization does not help */
1652                 if (IS_UNSERIALIZABLE_NODE(u))
1653                         continue;
1654
1655                 /* accumulate all descendants of all pkiller(u) */
1656                 bitset_clear_all(bs_ukilldesc);
1657                 foreach_plist(u->pkiller_list, el) {
1658                         ir_node   *irn  = plist_element_get_value(el);
1659                         rss_irn_t *node = get_rss_irn(rss, irn);
1660
1661                         if (! is_Sink(irn))
1662                                 bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, irn));
1663                         else
1664                                 continue;
1665
1666                         for (k = ARR_LEN_SAFE(node->descendants) - 1; k >= 0; --k) {
1667                                 if (! is_Sink(node->descendants[k]))
1668                                         bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, node->descendants[k]));
1669                         }
1670                 }
1671
1672                 /* for all v in sat_vals */
1673                 for (j = 0; j < n; ++j) {
1674                         ir_node   *v_irn   = val_arr[j];
1675                         rss_irn_t *v       = get_rss_irn(rss, v_irn);
1676                         unsigned  v_height = get_irn_height(rss->h, v_irn);
1677                         unsigned  omega1, omega2, is_pkiller;
1678
1679                         /* v cannot be serialized with itself
1680                          * ignore nodes where serialization does not help */
1681                         if (i == j || IS_UNSERIALIZABLE_NODE(v))
1682                                 continue;
1683
1684                         /* get descendants of v */
1685                         bitset_clear_all(bs_vdesc);
1686                         bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v_irn));
1687                         for (k = ARR_LEN_SAFE(v->descendants) - 1; k >= 0; --k) {
1688                                 if (! is_Sink(v->descendants[k]))
1689                                         bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v->descendants[k]));
1690                         }
1691
1692                         /* if v is in pkiller(u) */
1693                         is_pkiller = plist_has_value(u->pkiller_list, val_arr[j]);
1694
1695                         /* for all vv in pkiller(u) */
1696                         foreach_plist(u->pkiller_list, el) {
1697                                 ir_node *vv_irn  = plist_element_get_value(el);
1698                                 int     add_edge;
1699
1700                                 if (is_Sink(vv_irn))
1701                                         continue;
1702
1703                                 if (is_pkiller)
1704                                         add_edge = vv_irn != v_irn && skip_Proj(vv_irn) != skip_Proj(v_irn);
1705                                 else
1706                                         add_edge = ! heights_reachable_in_block(rss->h, skip_Proj(vv_irn), skip_Proj(v_irn));
1707
1708                                 /*
1709                                         As we add an edge from vv -> v, we have to make sure,
1710                                         that there exists no path from v to vv.
1711                                 */
1712
1713                                 if (add_edge) {
1714                                         unsigned vv_height = get_irn_height(rss->h, vv_irn);
1715                                         unsigned mu1, mu2, critical_path_cost;
1716
1717                                         /*
1718                                                 mu1 = | descendants(v) cut sat_vals |
1719                                                 the number of saturating values which cannot
1720                                                 be simultaneously alive with u
1721                                         */
1722                                         bitset_copy(bs_tmp, bs_vdesc);
1723                                         mu1 = bitset_popcnt(bitset_and(bs_tmp, bs_sv));
1724
1725                                         /*
1726                                                 mu2 = | accum_desc_all_pkiller(u) without descendants(v) |
1727                                         */
1728                                         if (is_pkiller) {
1729                                                 bitset_copy(bs_tmp, bs_ukilldesc);
1730                                                 mu2 = bitset_popcnt(bitset_andnot(bs_tmp, bs_vdesc));
1731                                         }
1732                                         else {
1733                                                 mu2 = 0;
1734                                         }
1735
1736                                         //assert(mu1 >= mu2);
1737
1738                                         /* omega1 = mu1 - mu2 */
1739                                         omega1 = mu2 >= mu2 ? mu1 - mu2 : 0;
1740
1741                                         if (omega1 > 0)
1742                                                 has_positive_omega1 = 1;
1743
1744                                         /* omega2 = increase of critical path */
1745                                         critical_path_cost =
1746                                                 v_height                        /* longest path from v to sink */
1747                                                 + rss->max_height - vv_height   /* longest path from source to vv */
1748                                                 + 1;                            /* edge */
1749
1750                                         /*
1751                                                 If critical_path_cost > max_height -> the new edge
1752                                                 would increase the longest critical path by the difference.
1753                                         */
1754                                         omega2 = critical_path_cost > rss->max_height ? critical_path_cost - rss->max_height : 0;
1755
1756                                         /* this keeps track of the edge with the best benefit */
1757                                         if (num_regs - omega1 < best_benefit) {
1758                                                 min_benefit_edge.src = v_irn;
1759                                                 min_benefit_edge.tgt = vv_irn;
1760
1761                                                 ser_u_omega1 = u;
1762                                                 ser_v_omega1 = v;
1763
1764                                                 best_benefit = num_regs - omega1;
1765                                         }
1766
1767                                         /* this keeps track of the edge with the best omega1 costs where omega2 == 0 */
1768                                         if (omega2 == 0 && (num_regs - omega1 < best_benefit_omega20)) {
1769                                                 min_omega20_edge.src = v_irn;
1770                                                 min_omega20_edge.tgt = vv_irn;
1771
1772                                                 ser_u_omega20 = u;
1773                                                 ser_v_omega20 = v;
1774
1775                                                 best_benefit_omega20 = num_regs - omega1;
1776                                         }
1777
1778                                         best_omega2 = MIN(best_omega2, omega2);
1779
1780                                         DBG((rss->dbg, LEVEL_2, "\t\tfound %+F -> %+F (w1 %d, w2 %d)\n",
1781                                                 v_irn, vv_irn, omega1, omega2));
1782                                 } /* if add_edge */
1783                         } /* for all vv in pkiller(u) */
1784                 } /* for all v in sat_vals */
1785         } /* for all u in sat_vals */
1786
1787         if (! has_positive_omega1)
1788                 return NULL;
1789
1790         if (best_omega2 == 0) {
1791                 ser->u         = ser_u_omega20;
1792                 ser->v         = ser_v_omega20;
1793                 ser->edge->src = min_omega20_edge.src;
1794                 ser->edge->tgt = min_omega20_edge.tgt;
1795                 ser->omega1    = best_benefit_omega20;
1796                 ser->omega2    = best_omega2;
1797         }
1798         else {
1799                 ser->u         = ser_u_omega1;
1800                 ser->v         = ser_v_omega1;
1801                 ser->edge->src = min_benefit_edge.src;
1802                 ser->edge->tgt = min_benefit_edge.tgt;
1803                 ser->omega1    = best_benefit;
1804                 ser->omega2    = best_omega2;
1805         }
1806
1807         return ser;
1808
1809 #undef IS_UNSERIALIZABLE_NODE
1810 }
1811
1812 /**
1813  * Perform the value serialization heuristic and add all
1814  * computed serialization edges as dependencies to the irg.
1815  */
1816 static void perform_value_serialization_heuristic(rss_t *rss) {
1817         bitset_t *arch_nonign_bs = bitset_alloca(arch_register_class_n_regs(rss->cls));
1818         bitset_t *abi_ign_bs     = bitset_alloca(arch_register_class_n_regs(rss->cls));
1819         int      available_regs, iteration;
1820         dvg_t    dvg;
1821         nodeset  *sat_vals;
1822         pset *ser_set = new_pset(cmp_rss_edges, 20);
1823
1824         /* available_regs = R = |arch_non_ignore_regs cut ~abi_ignore_regs| */
1825         arch_put_non_ignore_regs(rss->arch_env, rss->cls, arch_nonign_bs);
1826         be_abi_put_ignore_regs(rss->abi, rss->cls, abi_ign_bs);
1827         bitset_andnot(arch_nonign_bs, abi_ign_bs);
1828         available_regs = bitset_popcnt(arch_nonign_bs);
1829
1830         DBG((rss->dbg, LEVEL_1, "\n\t#available regs: %d\n\n", available_regs));
1831
1832         /*
1833                 At first we need to compute the disjoint value DAG (DVG = {V, E_dv}).
1834                 V    = set of all nodes we are currently interested in
1835                 E_dv = there is an edge from u to v iff v is a descendant of killer(u), forall u, v in V
1836         */
1837         dvg.nodes = new_nodeset(plist_count(rss->nodes));
1838         dvg.edges = new_pset(cmp_rss_edges, plist_count(rss->nodes) * 5);
1839         compute_dvg(rss, &dvg);
1840
1841         /*
1842                 Then we perform the heuristic serialization algorithm
1843                 on the DVG which gives us all necessary serialization
1844                 edges.
1845         */
1846         DBG((rss->dbg, LEVEL_1, "\tcomputing maximal antichain:\n"));
1847         iteration = 0;
1848         sat_vals  = compute_maximal_antichain(rss, &dvg, iteration++);
1849         while (sat_vals && (nodeset_count(sat_vals) > available_regs)) {
1850                 serialization_t *ser, best_ser;
1851                 rss_edge_t      *edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*edge));
1852                 rss_irn_t       *tgt, *src;
1853                 ir_node         *dep_src, *dep_tgt;
1854
1855                 best_ser.edge = edge;
1856                 ser = compute_best_admissible_serialization(rss, sat_vals, &best_ser, available_regs);
1857
1858                 DBG((rss->dbg, LEVEL_1, "\tcurrent register saturation %d, target %d\n", nodeset_count(sat_vals), available_regs));
1859
1860                 if (! ser) {
1861                         DBG((rss->dbg, LEVEL_1, "\tno RS improving serialization found, breaking at iteration %d\n", iteration));
1862                         break;
1863                 }
1864
1865                 /* Insert the serialization as dependency edge into the irg. */
1866                 DBG((rss->dbg, LEVEL_1, "\tserializing %+F -> %+F with edge %+F -> %+F and cost %d, %d\n",
1867                         ser->u->irn, ser->v->irn, ser->edge->src, ser->edge->tgt, ser->omega1, ser->omega2));
1868
1869                 if (pset_find(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge)))
1870                         ir_printf("WARNING: serialization %+F -> %+F computed twice!\n", ser->edge->src, ser->edge->tgt);
1871
1872
1873                 pset_insert(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge));
1874
1875                 /* update the dvg */
1876                 update_dvg(rss, &dvg, ser->v, ser->u);
1877                 update_dvg(rss, &dvg, get_rss_irn(rss, ser->edge->src), get_rss_irn(rss, ser->edge->tgt));
1878                 del_nodeset(sat_vals);
1879
1880                 dep_src = skip_Proj(ser->edge->src);
1881                 dep_tgt = ser->edge->tgt;
1882                 add_irn_dep(dep_src, dep_tgt);
1883
1884                 /* Update descendants, consumer and pkillers of the target */
1885                 update_node_info(rss, ser->edge->tgt, ser->edge->src);
1886
1887                 /* TODO: try to find a cheaper way for updating height information */
1888                 rss->max_height = heights_recompute_block(rss->h, rss->block);
1889
1890 #if 0
1891                 src = get_rss_irn(rss, ser->edge->src);
1892                 tgt = get_rss_irn(rss, ser->edge->tgt);
1893                 src->killer = tgt->irn;
1894                 del_nodeset(dvg.nodes);
1895                 del_pset(dvg.edges);
1896                 dvg.nodes = new_nodeset(plist_count(rss->nodes));
1897                 dvg.edges = new_pset(cmp_rss_edges, plist_count(rss->nodes) * 5);
1898                 compute_dvg(rss, &dvg);
1899 #endif /* if 0 */
1900
1901                 /* Recompute the antichain for next serialization */
1902                 DBG((rss->dbg, LEVEL_1, "\tre-computing maximal antichain:\n"));
1903                 sat_vals = compute_maximal_antichain(rss, &dvg, iteration++);
1904         }
1905
1906         del_nodeset(dvg.nodes);
1907         del_pset(dvg.edges);
1908 }
1909
1910 /**
1911  * Do initial calculations for a block.
1912  */
1913 static void process_block(ir_node *block, void *env) {
1914         rss_t *rss = env;
1915         int   i, n;
1916         const ir_edge_t *edge;
1917
1918         phase_init(&rss->ph, "rss block preprocessor", rss->irg, PHASE_DEFAULT_GROWTH, init_rss_irn);
1919
1920         DBG((rss->dbg, LEVEL_1, "preprocessing block %+F\n", block));
1921         rss->block = block;
1922
1923         /* build an index map for all nodes in the current block */
1924         i            = 0;
1925         n            = get_irn_n_edges(block);
1926         NEW_ARR_A(int *, rss->idx_map, n);
1927         foreach_out_edge(block, edge) {
1928                 ir_node *irn      = get_edge_src_irn(edge);
1929                 rss->idx_map[i++] = get_irn_idx(irn);
1930         }
1931         qsort(rss->idx_map, n, sizeof(rss->idx_map[0]), cmp_int);
1932         rss->max_height = heights_recompute_block(rss->h, block);
1933
1934         /* loop over all register classes */
1935         for (i = arch_isa_get_n_reg_class(rss->arch_env->isa) - 1; i >= 0; --i) {
1936                 const arch_register_class_t *cls = arch_isa_get_reg_class(rss->arch_env->isa, i);
1937
1938                 rss->cls = cls;
1939                 DBG((rss->dbg, LEVEL_1, "register class %s\n", arch_register_class_name(cls)));
1940
1941                 /* reset the list of interesting nodes */
1942                 plist_clear(rss->nodes);
1943                 plist_insert_back(rss->nodes, _sink);
1944
1945                 /* collect all nodes having a certain register class */
1946                 foreach_out_edge(block, edge) {
1947                         ir_node *irn = get_edge_src_irn(edge);
1948
1949                         if (get_irn_mode(irn) == mode_T || be_is_Keep(irn) || be_is_Barrier(skip_Proj(irn)))
1950                                 continue;
1951
1952                         if (! arch_irn_is(rss->arch_env, irn, ignore) && arch_get_irn_reg_class(rss->arch_env, irn, -1) == cls) {
1953                                 plist_insert_back(rss->nodes, irn);
1954
1955                                 /* calculate the descendants and consumer for each node in the block */
1956                                 collect_node_info(rss, irn);
1957                         }
1958                 }
1959
1960                 /* compute the potential killing set PK(G) */
1961                 compute_pkill_set(rss);
1962
1963                 /* compute the killing function k* */
1964                 compute_killing_function(rss);
1965
1966                 /*
1967                         Compute the heuristic value serialization and
1968                         add the necessary dependencies to the irg.
1969                 */
1970                 perform_value_serialization_heuristic(rss);
1971         }
1972
1973         phase_free(&rss->ph);
1974 }
1975
1976 /**
1977  * Preprocess the irg for scheduling.
1978  */
1979 void rss_schedule_preparation(const be_irg_t *birg) {
1980         rss_t rss;
1981
1982         FIRM_DBG_REGISTER(rss.dbg, "firm.be.sched.rss");
1983
1984         //firm_dbg_set_mask(rss.dbg, LEVEL_1 | LEVEL_2 | LEVEL_3);
1985
1986         init_rss_special_nodes(birg->irg);
1987
1988         rss.irg      = birg->irg;
1989         rss.arch_env = birg->main_env->arch_env;
1990         rss.abi      = birg->abi;
1991         rss.h        = heights_new(birg->irg);
1992         rss.nodes    = plist_new();
1993         irg_block_walk_graph(birg->irg, NULL, process_block, &rss);
1994         heights_free(rss.h);
1995         plist_free(rss.nodes);
1996 }