removed C99 style
[libfirm] / ir / be / beschedrss.c
1 /**
2  * Implementation of a register saturating list scheduler
3  * as described in: Sid-Ahmed-Ali Touati
4  * Register Saturation in Superscalar and VLIW Codes
5  *
6  * @license This file protected by GPL -  GNU GENERAL PUBLIC LICENSE.
7  * @author  Christian Wuerdig
8  * @date    29.08.2006
9  * @cvs-id  $Id$
10  */
11 #ifdef HAVE_CONFIG_H
12 #include "config.h"
13 #endif
14
15 #include <limits.h>
16
17 #include "obst.h"
18 #include "debug.h"
19
20 #include "irgraph_t.h"
21 #include "irnode_t.h"
22 #include "iredges_t.h"
23 #include "ircons_t.h"
24 #include "irphase_t.h"
25 #include "irgwalk.h"
26 #include "irdump.h"
27 #include "irtools.h"
28 #include "irbitset.h"
29 #include "irprintf.h"
30 #include "bipartite.h"
31 #include "hungarian.h"
32 #include "plist.h"
33
34 #include "height.h"
35
36 #include "beabi.h"
37 #include "bemodule.h"
38 #include "benode_t.h"
39 #include "besched_t.h"
40
41 #ifdef WITH_LIBCORE
42 #include <libcore/lc_opts.h>
43 #include <libcore/lc_opts_enum.h>
44 #endif /* WITH_LIBCORE */
45
46
47 #define ARR_LEN_SAFE(arr) ((arr) != NULL ? ARR_LEN((arr)) : 0)
48
49 #define HASH_RSS_EDGE(edge) ((get_irn_node_nr((edge)->src) << 16) | (get_irn_node_nr((edge)->tgt) & 0xFFFF))
50 #define BSEARCH_IRN_ARR(val, arr) \
51         bsearch(&(val), (arr), ARR_LEN_SAFE((arr)), sizeof((arr)[0]), cmp_irn_idx)
52
53 #define BLOCK_IDX_MAP(rss, irn) bsearch_for_index(get_irn_idx((irn)), (rss)->idx_map, ARR_LEN_SAFE((rss)->idx_map), 1)
54
55 /* the rss options */
56 typedef struct _rss_opts_t {
57         int dump_flags;
58 } rss_opts_t;
59
60 /* Represents a child with associated costs */
61 typedef struct _child {
62         ir_node *irn;
63         float   cost;
64 } child_t;
65
66 /* We need edges for several purposes. */
67 typedef struct _rss_edge {
68         ir_node *src;
69         ir_node *tgt;
70         void    *next;
71 } rss_edge_t;
72
73 /* Represents a connected bipartite component. */
74 typedef struct _cbc {
75         nodeset *parents;       /**< = S  a set of value producers */
76         nodeset *children;      /**< = T  a set of value consumers */
77         pset    *kill_edges;    /**< = E  a set of edges (t in T, s in S) such as each s in S gets killed by at least one t in T */
78         int     nr;             /**< a deterministic index for set insertion (used as hash) */
79 } cbc_t;
80
81 /* Represents a disjoint value DAG. */
82 typedef struct _dvg {
83         nodeset *nodes;
84         pset    *edges;
85 } dvg_t;
86
87 /* Represents a chain of nodes. */
88 typedef struct _chain {
89         plist_t *elements;   /**< List of chain elements */
90         int     nr;          /**< a deterministic index for set insertion (used as hash) */
91 } chain_t;
92
93 typedef struct _rss_irn {
94         plist_t  *consumer_list;    /**< List of consumers */
95         ir_node **consumer;         /**< Sorted consumer array (needed for faster access) */
96
97         plist_t  *parent_list;      /**< List of parents */
98         plist_t  *pkiller_list;     /**< List of potential killers */
99
100         plist_t  *descendant_list;  /**< List of descendants */
101         ir_node **descendants;      /**< Sorted descendant array (needed for faster access) */
102
103         ir_node  *killer;           /**< The selected unique killer */
104         ir_node  *irn;              /**< The corresponding firm node to this rss_irn */
105
106         chain_t  *chain;            /**< The chain, this node is associated to */
107
108         unsigned desc_walk;         /**< visited flag for collecting descendants */
109         unsigned kill_count;        /**< number of nodes killed by this one */
110
111         unsigned live_out : 1;      /**< irn has consumers outside of it's block */
112         unsigned visited  : 1;      /**< visited flag for bipartite decomposition */
113         unsigned havecons : 1;      /**< visited flag collect consumer */
114         unsigned handled  : 1;      /**< flag indicating whether or not the list structures have been build */
115         unsigned dumped   : 1;      /**< flag indication whether or not this node was dumped */
116 } rss_irn_t;
117
118 /* Represents a serialization edge with associated costs. */
119 typedef struct _serialization {
120         rss_irn_t  *u;       /* the top node */
121         rss_irn_t  *v;       /* the node about to be serialized after u */
122         rss_edge_t *edge;    /* the edge selected for the serialization */
123         int        omega1;   /* estimated: available regs - RS reduction */
124         int        omega2;   /* increase in critical path length */
125         int        new_killer;
126 } serialization_t;
127
128 typedef struct _rss {
129         phase_t          ph;              /**< Phase to hold some data */
130         heights_t        *h;              /**< The current height object */
131         ir_graph         *irg;            /**< The irg to preprocess */
132         plist_t          *nodes;          /**< The list of interesting nodes */
133         const arch_env_t *arch_env;       /**< The architecture environment */
134         be_abi_irg_t     *abi;            /**< The abi for this irg */
135         pset             *cbc_set;        /**< A set of connected bipartite components */
136         ir_node          *block;          /**< The current block in progress. */
137         int              *idx_map;        /**< Mapping irn indices to per block indices */
138         unsigned         max_height;      /**< maximum height in the current block */
139         rss_opts_t       *opts;           /**< The options */
140         be_lv_t          *liveness;       /**< The liveness information for this irg */
141         pset             *live_block;     /**< Values alive at end of block */
142         const arch_register_class_t *cls; /**< The current register class */
143         DEBUG_ONLY(firm_dbg_module_t *dbg);
144 } rss_t;
145
146 #define get_rss_irn(rss, irn)  (phase_get_or_set_irn_data(&rss->ph, irn))
147
148 /**
149  * We need some special nodes:
150  * a source and a sink for all live-in and live-out values of a block
151  */
152 enum {
153         iro_rss_Source,
154         iro_rss_Sink,
155         iro_rss_last
156 };
157
158 /** The opcode of the rss_Source node once allocated. */
159 static ir_op *op_rss_Source;
160 /** The opcode of the rss_Sink node once allocated. */
161 static ir_op *op_rss_Sink;
162
163 /** The rss_Source node of the current graph. */
164 static ir_node *_source = NULL;
165 /** The rss_Sink node of the current graph. */
166 static ir_node *_sink   = NULL;
167
168 #define is_Source(irn) ((irn) == _source)
169 #define is_Sink(irn)   ((irn) == _sink)
170
171 enum {
172         RSS_DUMP_NONE  = 0,
173         RSS_DUMP_CBC   = 1 << 0,
174         RSS_DUMP_PKG   = 1 << 1,
175         RSS_DUMP_KILL  = 1 << 2,
176         RSS_DUMP_DVG   = 1 << 3,
177         RSS_DUMP_MAXAC = 1 << 4,
178         RSS_DUMP_ALL   = (RSS_DUMP_MAXAC << 1) - 1,
179 };
180
181 static rss_opts_t rss_options = {
182         RSS_DUMP_NONE,
183 };
184
185 #ifdef WITH_LIBCORE
186 static const lc_opt_enum_int_items_t dump_items[] = {
187         { "none",  RSS_DUMP_NONE  },
188         { "cbc",   RSS_DUMP_CBC   },
189         { "pkg",   RSS_DUMP_PKG   },
190         { "kill",  RSS_DUMP_KILL  },
191         { "dvg",   RSS_DUMP_DVG   },
192         { "maxac", RSS_DUMP_MAXAC },
193         { "all",   RSS_DUMP_ALL   },
194         { NULL, 0 }
195 };
196
197 static lc_opt_enum_int_var_t dump_var = {
198         &rss_options.dump_flags, dump_items
199 };
200
201 static const lc_opt_table_entry_t rss_option_table[] = {
202         LC_OPT_ENT_ENUM_MASK("dump", "dump phases", &dump_var),
203         { NULL }
204 };
205 #endif /* WITH_LIBCORE */
206
207 /******************************************************************************
208  *  _          _                    __                  _   _
209  * | |        | |                  / _|                | | (_)
210  * | |__   ___| |_ __   ___ _ __  | |_ _   _ _ __   ___| |_ _  ___  _ __  ___
211  * | '_ \ / _ \ | '_ \ / _ \ '__| |  _| | | | '_ \ / __| __| |/ _ \| '_ \/ __|
212  * | | | |  __/ | |_) |  __/ |    | | | |_| | | | | (__| |_| | (_) | | | \__ \
213  * |_| |_|\___|_| .__/ \___|_|    |_|  \__,_|_| |_|\___|\__|_|\___/|_| |_|___/
214  *            | |
215  *            |_|
216  ******************************************************************************/
217
218 /**
219  * Acquire opcodes if needed and create source and sink nodes.
220  */
221 static void init_rss_special_nodes(ir_graph *irg) {
222         ir_node *block;
223
224         if (op_rss_Source == NULL) {
225                 int iro_rss_base = get_next_ir_opcodes(iro_rss_last);
226                 op_rss_Source = new_ir_op(iro_rss_base + iro_rss_Source, "rss_Source", op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
227                 op_rss_Sink   = new_ir_op(iro_rss_base + iro_rss_Sink,   "rss_Sink",   op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
228         }
229         block   = get_irg_start_block(irg);
230         _source = new_ir_node(NULL, irg, block, op_rss_Source, mode_ANY, 0, NULL);
231         _sink   = new_ir_node(NULL, irg, block, op_rss_Sink, mode_ANY, 0, NULL);
232 }
233
234 static int cmp_int(const void *a, const void *b) {
235         const int *i1 = a;
236         const int *i2 = b;
237
238         return QSORT_CMP(*i1, *i2);
239 }
240
241 static int cmp_child_costs(const void *a, const void *b) {
242         const child_t *c1 = a;
243         const child_t *c2 = b;
244
245         return QSORT_CMP(c1->cost, c2->cost);
246 }
247
248 static int cmp_irn_idx(const void *a, const void *b) {
249         const ir_node *n1 = *(ir_node **)a;
250         const ir_node *n2 = *(ir_node **)b;
251
252         return QSORT_CMP(get_irn_idx(n1), get_irn_idx(n2));
253 }
254
255 static int cmp_rss_edges(const void *a, const void *b) {
256         const rss_edge_t *e1 = a;
257         const rss_edge_t *e2 = b;
258
259         return (e1->src != e2->src) || (e1->tgt != e2->tgt);
260 }
261
262 static int bsearch_for_index(int key, int *arr, size_t len, int force) {
263         int left = 0;
264         int right = len;
265
266         while (right >= left) {
267                 int idx = (left + right) / 2;
268
269                 if (key < arr[idx])
270                         right = idx - 1;
271                 else if (key > arr[idx])
272                         left = idx + 1;
273                 else
274                         return idx;
275         }
276
277         if (force)
278                 assert(0 && "Something is wrong, key not found.");
279         return -1;
280 }
281
282 static ir_node **build_sorted_array_from_list(plist_t *irn_list, struct obstack *obst) {
283         plist_element_t *el;
284         int     i     = 0;
285         int     len   = plist_count(irn_list);
286         ir_node **arr = NEW_ARR_D(ir_node *, obst, len);
287
288         /* copy the list into the array */
289         foreach_plist(irn_list, el) {
290                 arr[i++] = plist_element_get_value(el);
291         }
292
293         /* sort the array by node index */
294         qsort(arr, len, sizeof(arr[0]), cmp_irn_idx);
295
296         return arr;
297 }
298
299 /*****************************************************
300  *      _      _                       _
301  *     | |    | |                     (_)
302  *   __| | ___| |__  _   _  __ _  __ _ _ _ __   __ _
303  *  / _` |/ _ \ '_ \| | | |/ _` |/ _` | | '_ \ / _` |
304  * | (_| |  __/ |_) | |_| | (_| | (_| | | | | | (_| |
305  *  \__,_|\___|_.__/ \__,_|\__, |\__, |_|_| |_|\__, |
306  *                          __/ | __/ |         __/ |
307  *                         |___/ |___/         |___/
308  *
309  *****************************************************/
310
311 #ifdef DEBUG_libfirm
312 static void dump_nodeset(nodeset *ns, const char *prefix) {
313         ir_node *irn;
314         foreach_nodeset(ns, irn) {
315                 ir_fprintf(stderr, "%s%+F\n", prefix, irn);
316         }
317 }
318 #endif
319
320 static void build_file_name(rss_t *rss, const char *suffix, size_t suf_len, char *buf, size_t len) {
321         const char *irg_name;
322
323         memset(buf, 0, len);
324         irg_name = get_entity_name(get_irg_entity(rss->irg));
325         snprintf(buf, len - suf_len, "%s-%s-block-%ld",
326                 irg_name, arch_register_class_name(rss->cls), get_irn_node_nr(rss->block));
327         strcat(buf, suffix);
328 }
329
330 /* Dumps all collected bipartite components of current irg as vcg. */
331 static void debug_vcg_dump_bipartite(rss_t *rss) {
332         cbc_t *cbc;
333         FILE  *f;
334         char  file_name[256];
335         static const char suffix[] = "-RSS-CBC.vcg";
336
337         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
338         f = fopen(file_name, "w");
339
340         ir_fprintf(f, "graph: { title: \"connected bipartite component graph of %+F\"\n", rss->irg);
341         fprintf(f, "display_edge_labels: no\n");
342         fprintf(f, "layoutalgorithm: mindepth\n");
343         fprintf(f, "manhattan_edges: yes\n\n");
344
345         foreach_pset(rss->cbc_set, cbc) {
346                 ir_node    *n;
347                 rss_edge_t *ke;
348
349                 fprintf(f, "graph: { titel: \"cbc %d\" label: \"cbc %d\" status:clustered color:yellow\n", cbc->nr, cbc->nr);
350                 foreach_nodeset(cbc->parents, n) {
351                         ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
352                 }
353                 foreach_nodeset(cbc->children, n) {
354                         ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
355                 }
356                 foreach_pset(cbc->kill_edges, ke) {
357                         ir_fprintf(f, "edge: { sourcename: \"n%d_%d\" targetname: \"n%d_%d\" }\n",
358                                 get_irn_node_nr(ke->src), cbc->nr, get_irn_node_nr(ke->tgt), cbc->nr);
359                 }
360                 fprintf(f, "}\n\n");
361         }
362         fprintf(f, "}\n");
363         fclose(f);
364 }
365
366 /* Dump the computed killing function as vcg. */
367 static void debug_vcg_dump_kill(rss_t *rss) {
368         FILE            *f;
369         char            file_name[256];
370         plist_element_t *el;
371         static const char suffix[] = "-RSS-KILL.vcg";
372
373         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
374         f = fopen(file_name, "w");
375
376         ir_fprintf(f, "graph: { title: \"computed kill graph of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
377         fprintf(f, "display_edge_labels: no\n");
378         fprintf(f, "layoutalgorithm: mindepth\n");
379         fprintf(f, "manhattan_edges: yes\n\n");
380
381         {
382                 /* reset dump_flag */
383                 ir_node *irn;
384                 foreach_phase_irn(&rss->ph, irn) {
385                         rss_irn_t *node = get_rss_irn(rss, irn);
386                         node->dumped = 0;
387                 }
388         }
389
390         /* dump all nodes and their killers */
391         foreach_plist(rss->nodes, el) {
392                 ir_node   *irn     = plist_element_get_value(el);
393                 rss_irn_t *rirn    = get_rss_irn(rss, irn);
394                 rss_irn_t *pk_rirn = get_rss_irn(rss, rirn->killer);
395
396                 if (! rirn->dumped) {
397                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
398                         rirn->dumped = 1;
399                 }
400
401                 if (! pk_rirn->dumped) {
402                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(rirn->killer), rirn->killer);
403                         pk_rirn->dumped = 1;
404                 }
405
406                 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
407                         get_irn_node_nr(rirn->killer), get_irn_node_nr(irn));
408         }
409
410         fprintf(f, "}\n");
411         fclose(f);
412 }
413
414 /* Dumps the potential killing DAG (PKG) as vcg. */
415 static void debug_vcg_dump_pkg(rss_t *rss, nodeset *max_ac, int iteration) {
416         FILE              *f;
417         char              file_name[256];
418         char              *suffix   = alloca(32);
419         static const char suffix1[] = "-RSS-PKG.vcg";
420         static const char suffix2[] = "-RSS-PKG-MAXAC.vcg";
421         plist_element_t   *el;
422
423         if (! max_ac) {
424                 snprintf(suffix, 32, "%s", suffix1);
425         }
426         else {
427                 snprintf(suffix, 32, "-%02d%s", iteration, suffix2);
428         }
429
430         build_file_name(rss, suffix, strlen(suffix) + 1, file_name, sizeof(file_name));
431         f = fopen(file_name, "w");
432
433         ir_fprintf(f, "graph: { title: \"potential killing DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
434         fprintf(f, "display_edge_labels: no\n");
435         fprintf(f, "layoutalgorithm: mindepth\n");
436         fprintf(f, "manhattan_edges: yes\n\n");
437
438         {
439                 /* reset dump_flag */
440                 ir_node *irn;
441                 foreach_phase_irn(&rss->ph, irn) {
442                         rss_irn_t *node = get_rss_irn(rss, irn);
443                         node->dumped = 0;
444                 }
445         }
446
447         foreach_plist(rss->nodes, el) {
448                 ir_node   *irn  = plist_element_get_value(el);
449                 rss_irn_t *rirn = get_rss_irn(rss, irn);
450                 char      *c1   = "";
451                 plist_element_t *k_el;
452
453                 /* dump selected saturating values in yellow */
454                 if (max_ac && nodeset_find(max_ac, irn))
455                         c1 = "color:yellow";
456
457                 if (! rirn->dumped) {
458                         if (rirn->chain)
459                                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F   c%d\" %s }\n", get_irn_node_nr(irn), irn, rirn->chain->nr, c1);
460                         else
461                                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(irn), irn, c1);
462                         rirn->dumped = 1;
463                 }
464
465                 foreach_plist(rirn->pkiller_list, k_el) {
466                         ir_node   *pkiller = plist_element_get_value(k_el);
467                         rss_irn_t *pk_rirn = get_rss_irn(rss, pkiller);
468                         char      *c2      = "";
469
470                         if (max_ac && nodeset_find(max_ac, pkiller))
471                                 c2 = "color:yellow";
472
473                         if (! pk_rirn->dumped) {
474                                 if (pk_rirn->chain)
475                                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F   c%d\" %s }\n", get_irn_node_nr(pkiller), pkiller, pk_rirn->chain->nr, c2);
476                                 else
477                                         ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(pkiller), pkiller, c2);
478                                 pk_rirn->dumped = 1;
479                         }
480                         ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
481                                 get_irn_node_nr(pkiller), get_irn_node_nr(irn));
482                 }
483         }
484         fprintf(f, "}\n");
485         fclose(f);
486 }
487
488 /* Dumps the disjoint value DAG (DVG) as vcg. */
489 static void debug_vcg_dump_dvg(rss_t *rss, dvg_t *dvg) {
490         static const char suffix[] = "-RSS-DVG.vcg";
491         FILE       *f;
492         char       file_name[256];
493         ir_node    *irn;
494         rss_edge_t *edge;
495
496         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
497         f = fopen(file_name, "w");
498
499         ir_fprintf(f, "graph: { title: \"disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
500         fprintf(f, "display_edge_labels: no\n");
501         fprintf(f, "layoutalgorithm: mindepth\n");
502         fprintf(f, "manhattan_edges: yes\n\n");
503
504         /* dump all nodes */
505         foreach_nodeset(dvg->nodes, irn) {
506                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
507         }
508
509         /* dump all edges */
510         foreach_pset(dvg->edges, edge) {
511                 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
512                         get_irn_node_nr(edge->src), get_irn_node_nr(edge->tgt));
513         }
514
515         fprintf(f, "}\n");
516         fclose(f);
517 }
518
519 #if 0
520 /* Dumps the PKG(DVG). */
521 static void debug_vcg_dump_dvg_pkiller(rss_t *rss, dvg_t *dvg) {
522         static const char suffix[] = "-RSS-DVG-PKG.vcg";
523         FILE       *f;
524         char       file_name[256];
525         ir_node    *irn;
526
527         build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
528         f = fopen(file_name, "w");
529
530         ir_fprintf(f, "graph: { title: \"PKG of disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
531         fprintf(f, "display_edge_labels: no\n");
532         fprintf(f, "layoutalgorithm: mindepth\n");
533         fprintf(f, "manhattan_edges: yes\n\n");
534
535         /* dump all nodes */
536         foreach_nodeset(dvg->nodes, irn) {
537                 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
538         }
539
540         /* dump all edges */
541         foreach_nodeset(dvg->nodes, irn) {
542                 rss_irn_t       *node = get_rss_irn(rss, irn);
543                 plist_element_t *el;
544
545                 foreach_plist(node->dvg_pkiller_list, el) {
546                         ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
547                                 get_irn_node_nr(plist_element_get_value(el)), get_irn_node_nr(irn));
548                 }
549         }
550
551         fprintf(f, "}\n");
552         fclose(f);
553 }
554 #endif /* if 0 */
555
556 /**
557  * In case there is no rss information for irn, initialize it.
558  */
559 static void *init_rss_irn(phase_t *ph, ir_node *irn, void *old) {
560         rss_irn_t *res = old ? old : phase_alloc(ph, sizeof(res[0]));
561
562         res->descendant_list = plist_obstack_new(phase_obst(ph));
563         res->descendants     = NULL;
564
565         res->consumer_list   = plist_obstack_new(phase_obst(ph));
566         res->consumer        = NULL;
567
568         res->pkiller_list    = plist_obstack_new(phase_obst(ph));
569
570         res->parent_list     = plist_obstack_new(phase_obst(ph));
571
572         res->killer          = NULL;
573         res->irn             = irn;
574         res->chain           = NULL;
575
576         res->kill_count      = 0;
577         res->desc_walk       = 0;
578         res->live_out        = 0;
579         res->visited         = 0;
580         res->handled         = 0;
581         res->dumped          = 0;
582         res->havecons        = 0;
583
584         return res;
585 }
586
587 /**
588  * Collect all nodes data dependent on current node.
589  */
590 static void collect_descendants(rss_t *rss, rss_irn_t *rirn, ir_node *irn, int *got_sink, unsigned cur_desc_walk) {
591         const ir_edge_t *edge;
592         rss_irn_t       *cur_node = get_rss_irn(rss, irn);
593         ir_node         *block    = rss->block;
594         ir_edge_kind_t  ekind[2]  = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
595         int             i;
596
597         if (cur_node->desc_walk >= cur_desc_walk)
598                 return;
599         cur_node->desc_walk = cur_desc_walk;
600
601         /* Stop at Barriers */
602         if (be_is_Barrier(irn))
603                 return;
604
605         /* loop over normal and dependency edges */
606         for (i = 0; i < 2; ++i) {
607                 foreach_out_edge_kind(irn, edge, ekind[i]) {
608                         ir_node *user = get_edge_src_irn(edge);
609
610                         /* skip ignore nodes as they do not really contribute to register pressure */
611                         if (arch_irn_is(rss->arch_env, user, ignore))
612                                 continue;
613
614                         /*
615                                 (a) mode_X means Jump            -> out_edge
616                                 (b) Phi as user of a normal node -> out-edge
617                         */
618                         if (get_irn_mode(user) == mode_X || is_Phi(user)) {
619                                 if (! *got_sink)
620                                         goto force_sink;
621                                 else
622                                         continue;
623                         }
624
625                         if (is_Proj(user)) {
626
627                                 //if (arch_get_irn_reg_class(rss->arch_env, user, -1) == rss->cls)
628                                         collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
629                         }
630                         else {
631                                 /* check if user lives in block and is not a control flow node */
632                                 if (get_nodes_block(user) == block) {
633                                         if (! plist_has_value(rirn->descendant_list, user)) {
634                                                 plist_insert_back(rirn->descendant_list, user);
635                                                 DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", user));
636                                         }
637                                         collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
638                                 }
639                                 else if (! *got_sink) {
640 force_sink:
641                                         /* user lives out of block: add sink as descendant if not already done */
642                                         plist_insert_back(rirn->descendant_list, _sink);
643                                         *got_sink = 1;
644                                         DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", _sink));
645                                 }
646                         }
647                 }
648         }
649 }
650
651 /**
652  * Handles a single consumer.
653  */
654 static void collect_single_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *consumer, int *got_sink) {
655         ir_node *block = rss->block;
656
657         assert(! is_Proj(consumer) && "Cannot handle Projs");
658
659         if (! is_Phi(consumer) && ! is_Block(consumer) && get_nodes_block(consumer) == block) {
660                 if (! arch_irn_is(rss->arch_env, consumer, ignore) && ! plist_has_value(rss_irn->consumer_list, consumer)) {
661                         plist_insert_back(rss_irn->consumer_list, consumer);
662                         DBG((rss->dbg, LEVEL_2, "\t\tconsumer %+F\n", consumer));
663                 }
664         }
665         else {
666                 rss_irn->live_out = 1;
667                 DBG((rss->dbg, LEVEL_2, "\t\tlive out %+F", consumer));
668                 if (! *got_sink) {
669                         plist_insert_back(rss_irn->consumer_list, _sink);
670                         *got_sink = 1;
671                         DB((rss->dbg, LEVEL_2, ", %+F added instead", _sink));
672                 }
673                 DB((rss->dbg, LEVEL_2, "\n"));
674         }
675 }
676
677 /**
678  * Collect all nodes consuming the value(s) produced by current node.
679  */
680 static void collect_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *irn, int *got_sink) {
681         const ir_edge_t *edge;
682         int             i;
683         ir_edge_kind_t  ekind[2]  = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
684         rss_irn_t       *cur_node = get_rss_irn(rss, irn);
685
686         if (cur_node->havecons)
687                 return;
688         cur_node->havecons = 1;
689
690         for (i = 0; i < 2; ++i) {
691                 foreach_out_edge_kind(irn, edge, ekind[i]) {
692                         ir_node *consumer = get_edge_src_irn(edge);
693
694                         if (is_Proj(consumer)) {
695                                 //if (arch_get_irn_reg_class(rss->arch_env, consumer, -1) == rss->cls)
696                                         collect_consumer(rss, rss_irn, consumer, got_sink);
697                         }
698                         else
699                                 collect_single_consumer(rss, rss_irn, consumer, got_sink);
700                 }
701         }
702 }
703
704 /**
705  * Collects all consumer and descendant of a irn.
706  */
707 static void collect_node_info(rss_t *rss, ir_node *irn) {
708         static unsigned cur_desc_walk = 0;
709         rss_irn_t       *rss_irn      = get_rss_irn(rss, irn);
710         int             got_sink;
711
712         assert(! is_Proj(irn) && "Cannot handle Projs.");
713
714         /* check if node info is already available */
715         if (rss_irn->handled)
716                 return;
717                 //phase_reinit_single_irn_data(&rss->ph, irn);
718
719         DBG((rss->dbg, LEVEL_1, "\tcomputing consumers of %+F:\n", irn));
720
721         /* collect all consumer */
722         got_sink = 0;
723         collect_consumer(rss, rss_irn, irn, &got_sink);
724
725         /* build sorted consumer array */
726         rss_irn->consumer = build_sorted_array_from_list(rss_irn->consumer_list, phase_obst(&rss->ph));
727
728         DBG((rss->dbg, LEVEL_1, "\tcompute descendants of %+F:\n", irn));
729
730         /* collect descendants */
731         got_sink = 0;
732         collect_descendants(rss, rss_irn, irn, &got_sink, ++cur_desc_walk);
733
734         /* build sorted descendant array */
735         rss_irn->descendants = build_sorted_array_from_list(rss_irn->descendant_list, phase_obst(&rss->ph));
736
737         rss_irn->handled = 1;
738 }
739
740 /**
741  * Checks if v is a potential killer of u.
742  * v is in pkill(u) iff descendants(v) cut consumer(u) is v
743  *
744  * @param rss   The rss object
745  * @param v      The node to check for killer
746  * @param u      The potentially killed value
747  * @return 1 if v is in pkill(u), 0 otherwise
748  */
749 static int is_potential_killer(rss_t *rss, rss_irn_t *v, rss_irn_t *u) {
750         plist_t *list;
751         ir_node **arr;
752         plist_element_t *el;
753
754         assert(is_Sink(v->irn) || ((plist_count(v->descendant_list) > 0 && v->descendants) || 1));
755         assert(is_Sink(u->irn) || ((plist_count(u->consumer_list)   > 0 && u->consumer)    || 1));
756
757         /* as we loop over the list: loop over the shorter one */
758         if (plist_count(v->descendant_list) > plist_count(u->consumer_list)) {
759                 list = u->consumer_list;
760                 arr  = v->descendants;
761         }
762         else {
763                 list = v->descendant_list;
764                 arr  = u->consumer;
765         }
766
767         /* for each list element: try to find element in array */
768         foreach_plist(list, el) {
769                 ir_node *irn   = plist_element_get_value(el);
770                 ir_node *match = BSEARCH_IRN_ARR(irn, arr);
771
772                 if (match && match != irn)
773                         return 0;
774         }
775
776         return 1;
777 }
778
779 /**
780  * Update descendants, consumer and pkiller list for the given irn.
781  */
782 static void update_node_info(rss_t *rss, ir_node *irn, ir_node *pk_irn) {
783         rss_irn_t *node    = get_rss_irn(rss, irn);
784         rss_irn_t *pkiller = get_rss_irn(rss, pk_irn);
785
786         /* add consumer and rebuild the consumer array */
787         if (! plist_has_value(node->consumer_list, pk_irn)) {
788                 plist_insert_back(node->consumer_list, pk_irn);
789                 node->consumer = build_sorted_array_from_list(node->consumer_list, phase_obst(&rss->ph));
790         }
791
792         /* add pk_irn as descendant, add it's descendants to irn's and rebuild array */
793         if (! plist_has_value(node->descendant_list, pk_irn)) {
794                 plist_element_t *el;
795
796                 plist_insert_back(node->descendant_list, pk_irn);
797
798                 foreach_plist(pkiller->descendant_list, el) {
799                         ir_node *desc = plist_element_get_value(el);
800
801                         if (! plist_has_value(node->descendant_list, desc))
802                                 plist_insert_back(node->descendant_list, desc);
803                 }
804
805                 node->descendants = build_sorted_array_from_list(node->descendant_list, phase_obst(&rss->ph));
806         }
807
808 }
809
810 /**
811  * Compute the potential killing set PK.
812  */
813 static void compute_pkill_set(rss_t *rss) {
814         plist_element_t *u_el, *v_el;
815
816         foreach_plist(rss->nodes, u_el) {
817                 ir_node   *u_irn = plist_element_get_value(u_el);
818                 rss_irn_t *u     = get_rss_irn(rss, u_irn);
819
820                 DBG((rss->dbg, LEVEL_1, "\tcomputing potential killers of %+F:\n", u_irn));
821
822                 /* check each consumer if it is a potential killer  */
823                 foreach_plist(u->consumer_list, v_el) {
824                         ir_node   *v_irn = plist_element_get_value(v_el);
825                         rss_irn_t *v     = get_rss_irn(rss, v_irn);
826
827                         /* check, if v is a potential killer of u */
828                         if (is_potential_killer(rss, v, u)) {
829                                 if (! plist_has_value(u->pkiller_list, v_irn))
830                                         plist_insert_back(u->pkiller_list, v_irn);
831                                 v->kill_count++;
832                                 DBG((rss->dbg, LEVEL_2, "\t\tpotential killer %+F\n", v_irn));
833                         }
834                 }
835
836                 u->killer = _sink;
837         }
838
839         if (rss->opts->dump_flags & RSS_DUMP_PKG)
840                 debug_vcg_dump_pkg(rss, NULL, 0);
841 }
842
843 /**
844  * Build set of killing edges (from values to their potential killers)
845  */
846 static void build_kill_edges(rss_t *rss, pset *epk) {
847         plist_element_t *el, *k_el;
848
849         foreach_plist(rss->nodes, el) {
850                 ir_node    *irn  = plist_element_get_value(el);
851                 rss_irn_t *rirn = get_rss_irn(rss, irn);
852
853                 DBG((rss->dbg, LEVEL_3, "\t\tbuilding kill edges for %+F:\n", irn));
854
855                 foreach_plist(rirn->pkiller_list, k_el) {
856                         ir_node    *pkiller = plist_element_get_value(k_el);
857                         rss_edge_t *ke      = obstack_alloc(phase_obst(&rss->ph), sizeof(*ke));
858
859                         ke->src  = irn;
860                         ke->tgt  = pkiller;
861                         ke->next = NULL;
862
863                         DBG((rss->dbg, LEVEL_3, "\t\t\ttarget %+F\n", pkiller));
864
865                         pset_insert(epk, ke, HASH_RSS_EDGE(ke));
866                 }
867         }
868 }
869
870 #ifdef DEBUG_libfirm
871 /* print the given cbc for debugging purpose */
872 static void debug_print_cbc(firm_dbg_module_t *mod, cbc_t *cbc) {
873         ir_node    *n;
874         rss_edge_t *ke;
875
876         DBG((mod, LEVEL_3, "\t\tS = set of parents:\n"));
877         foreach_nodeset(cbc->parents, n) {
878                 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
879         }
880         DBG((mod, LEVEL_3, "\t\tT = set of children:\n"));
881         foreach_nodeset(cbc->children, n) {
882                 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
883         }
884         DBG((mod, LEVEL_3, "\t\tE = Edges from producers to consumers\n"));
885         foreach_pset(cbc->kill_edges, ke) {
886                 DBG((mod, LEVEL_3, "\t\t\t%+F -> %+F\n", ke->src, ke->tgt));
887         }
888 }
889 #endif
890
891 /**
892  * Construct the bipartite decomposition.
893  * Sid-Ahmed-Ali Touati, Phd Thesis
894  * Register Pressure in Instruction Level Parallelism, p. 71
895  */
896 static void compute_bipartite_decomposition(rss_t *rss) {
897         pset *epk    = new_pset(cmp_rss_edges, 10);
898         int  cur_num = 0;
899
900         plist_element_t *el;
901
902         DBG((rss->dbg, LEVEL_1, "\tcomputing bipartite decomposition:\n"));
903
904         build_kill_edges(rss, epk);
905
906         foreach_plist(rss->nodes, el) {
907                 ir_node   *u_irn   = plist_element_get_value(el);
908                 rss_irn_t *u       = get_rss_irn(rss, u_irn);
909                 int       p_change = 1;
910                 int       c_change = 1;
911                 int       vrfy_ok  = 1;
912
913                 cbc_t           *cbc;
914                 plist_element_t *el2;
915                 rss_edge_t      *k_edge;
916                 rss_edge_t      *kedge_root = NULL;
917                 ir_node         *t_irn, *s_irn;
918
919                 if (u->visited || u_irn == _sink)
920                         continue;
921
922                 DBG((rss->dbg, LEVEL_2, "\t\t%+F choosen:\n", u_irn));
923
924                 cbc     = obstack_alloc(phase_obst(&rss->ph), sizeof(*cbc));
925                 cbc->nr = cur_num++;
926
927                 /* initialize S_cb */
928                 cbc->parents = new_nodeset(5);
929                 nodeset_insert(cbc->parents, u_irn);
930                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (init)\n", u_irn));
931
932                 /* E_cb = empty */
933                 cbc->kill_edges = new_pset(cmp_rss_edges, 5);
934
935                 /* each parent gets killed by at least one value from children */
936
937                 /* T_cb = PK_successors(u) */
938                 cbc->children = new_nodeset(5);
939                 foreach_plist(u->pkiller_list, el2) {
940                         nodeset_insert(cbc->children, plist_element_get_value(el2));
941                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (init)\n", plist_element_get_value(el2)));
942                 }
943
944                 /*
945                         Now: insert the parents of all children into the parent set
946                         and insert the children of all parents into the children set
947                         until the sets don't change any more
948                 */
949                 while (p_change || c_change) {
950                         p_change = c_change = 0;
951
952                         /* accumulate parents */
953                         foreach_nodeset(cbc->children, t_irn) {
954                                 foreach_pset(epk, k_edge) {
955                                         ir_node *val = k_edge->src;
956
957                                         if (k_edge->tgt == t_irn && ! nodeset_find(cbc->parents, val)) {
958                                                 nodeset_insert(cbc->parents, val);
959                                                 p_change = 1;
960                                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (killed by %+F)\n", val, t_irn));
961                                         }
962                                 }
963                         }
964
965                         /* accumulate children */
966                         foreach_nodeset(cbc->parents, s_irn) {
967                                 foreach_pset(epk, k_edge) {
968                                         ir_node *val = k_edge->tgt;
969
970                                         if (k_edge->src == s_irn  && ! nodeset_find(cbc->children, val)) {
971                                                 nodeset_insert(cbc->children, val);
972                                                 c_change = 1;
973                                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (kills %+F)\n", val, s_irn));
974                                         }
975                                 }
976                         }
977                 }
978
979                 /* mark all parent values as visited */
980                 foreach_nodeset(cbc->parents, s_irn) {
981                         rss_irn_t *s = get_rss_irn(rss, s_irn);
982                         s->visited = 1;
983                         /* assure bipartite property */
984 #if 0
985                         if (nodeset_find(cbc->children, s_irn)) {
986                                 nodeset_remove(cbc->children, s_irn);
987                                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F removed from to children\n", s_irn));
988                         }
989 #endif
990                 }
991
992                 /* update edges */
993                 foreach_pset(epk, k_edge) {
994                         if (nodeset_find(cbc->parents, k_edge->src) && nodeset_find(cbc->children, k_edge->tgt)) {
995                                 pset_insert(cbc->kill_edges, k_edge, HASH_RSS_EDGE(k_edge));
996                                 /*
997                                         Link all k_edges which are about to be removed together.
998                                         Beware: pset_remove kills the iterator.
999                                 */
1000                                 k_edge->next = kedge_root;
1001                                 kedge_root   = k_edge;
1002                         }
1003                 }
1004
1005                 /* remove all linked k_edges */
1006                 for (; kedge_root; kedge_root = kedge_root->next) {
1007                         pset_remove(epk, kedge_root, HASH_RSS_EDGE(kedge_root));
1008                 }
1009
1010                 /* verify the cbc */
1011                 foreach_nodeset(cbc->parents, s_irn) {
1012                         int is_killed = 0;
1013
1014                         foreach_pset(cbc->kill_edges, k_edge) {
1015                                 if (k_edge->src == s_irn) {
1016                                         is_killed = 1;
1017                                         pset_break(cbc->kill_edges);
1018                                         break;
1019                                 }
1020                         }
1021
1022                         if (! is_killed) {
1023                                 vrfy_ok = 0;
1024                                 ir_fprintf(stderr, "Warning: parent %+F is not killed in current cbc\n", s_irn);
1025                         }
1026                 }
1027                 assert(vrfy_ok && "Verification of CBC failed");
1028
1029                 /* add the connected bipartite component */
1030                 if (nodeset_count(cbc->parents) > 0 && nodeset_count(cbc->children) > 0 && pset_count(cbc->kill_edges) > 0)
1031                         pset_insert(rss->cbc_set, cbc, (unsigned)cbc->nr);
1032                 DBG((rss->dbg, LEVEL_2, "\tbipartite component %d inserted:\n", cbc->nr));
1033                 DEBUG_ONLY(
1034                         if (firm_dbg_get_mask(rss->dbg) & LEVEL_2)
1035                                 debug_print_cbc(rss->dbg, cbc);
1036                 );
1037         }
1038
1039         if (rss->opts->dump_flags & RSS_DUMP_CBC)
1040                 debug_vcg_dump_bipartite(rss);
1041
1042         del_pset(epk);
1043 }
1044
1045 /**
1046  * Select the child with the maximum cost.
1047  */
1048 static child_t *select_child_max_cost(rss_t *rss, nodeset *x, nodeset *y, child_t *t, cbc_t *cbc) {
1049         ir_node *child;
1050         float   max_cost = -1.0f;
1051
1052         DBG((rss->dbg, LEVEL_2, "\t\tcomputing children costs:\n"));
1053
1054         foreach_nodeset(cbc->children, child) {
1055                 rss_irn_t  *r_child             = get_rss_irn(rss, child);
1056                 int         num_unkilled_parents = 0;
1057                 int         num_descendants;
1058                 rss_edge_t *k_edge;
1059                 float       cost;
1060
1061                 /* get the number of unkilled parents */
1062                 foreach_pset(cbc->kill_edges, k_edge) {
1063                         if (k_edge->tgt == child && nodeset_find(x, k_edge->src))
1064                                 ++num_unkilled_parents;
1065                 }
1066
1067                 cost = (float)num_unkilled_parents;
1068
1069                 num_descendants = plist_count(r_child->descendant_list) + nodeset_count(y);
1070
1071                 if (num_descendants > 0)
1072                         cost /= (float)num_descendants;
1073
1074                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F, #desc %d, cost %.3f\n", child, num_descendants, cost));
1075
1076                 if (cost > max_cost) {
1077                         t->irn   = child;
1078                         t->cost  = cost;
1079                         max_cost = cost;
1080                 }
1081         }
1082
1083         return t;
1084 }
1085
1086 /**
1087  * Remove all parents from x which are killed by t_irn.
1088  */
1089 static void remove_covered_parents(rss_t *rss, nodeset *x, ir_node *t_irn, cbc_t *cbc) {
1090         rss_irn_t  *t = get_rss_irn(rss, t_irn);
1091         rss_edge_t *k_edge;
1092
1093         DBG((rss->dbg, LEVEL_2, "\t\tremoving parents covered by %+F:\n", t_irn));
1094
1095         foreach_pset(cbc->kill_edges, k_edge) {
1096                 if (k_edge->tgt == t_irn && nodeset_find(x, k_edge->src)) {
1097                         nodeset_remove(x, k_edge->src);
1098                         plist_insert_back(t->parent_list, k_edge->src);
1099                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", k_edge->src));
1100                 }
1101         }
1102 }
1103
1104 static void update_cumulated_descendent_values(rss_t *rss, nodeset *y, ir_node *t_irn) {
1105         rss_irn_t *t = get_rss_irn(rss, t_irn);
1106         plist_element_t *el;
1107
1108         DBG((rss->dbg, LEVEL_2, "\t\tupdating cumulated descendant value of %+F:\n", t_irn));
1109
1110         foreach_plist(t->descendant_list, el) {
1111                 nodeset_insert(y, plist_element_get_value(el));
1112                 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", plist_element_get_value(el)));
1113         }
1114 }
1115
1116 /**
1117  * Greedy-k: a heuristics for the MMA problem
1118  */
1119 static void compute_killing_function(rss_t *rss) {
1120         cbc_t *cbc;
1121         struct obstack obst;
1122
1123         obstack_init(&obst);
1124
1125         rss->cbc_set = pset_new_ptr(5);
1126         compute_bipartite_decomposition(rss);
1127
1128         /* for all bipartite components do: */
1129         foreach_pset(rss->cbc_set, cbc) {
1130                 ir_node *p;
1131                 nodeset *x       = new_nodeset(10);
1132                 nodeset *y       = new_nodeset(10);
1133                 child_t **sks    = NEW_ARR_F(child_t *, 20);
1134                 int     cur_len  = 0;
1135                 int     cur_size = 20;
1136                 int     i;
1137
1138                 DBG((rss->dbg, LEVEL_1, "\tcomputing SKS for cbc %d:\n", cbc->nr));
1139                 DBG((rss->dbg, LEVEL_2, "\t\tinitializing parents X:\n"));
1140
1141                 /* X = S_cb (all parents are initially uncovered) */
1142                 foreach_nodeset(cbc->parents, p) {
1143                         nodeset_insert(x, p);
1144                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", p));
1145                 }
1146
1147                 /* while X not empty */
1148                 while (nodeset_count(x) > 0) {
1149                         child_t *t = obstack_alloc(&obst, sizeof(*t));
1150                         memset(t, 0, sizeof(*t));
1151
1152                         t = select_child_max_cost(rss, x, y, t, cbc);
1153
1154                         if (cur_len >= cur_size) {
1155                                 ARR_EXTO(child_t *, sks, cur_size * 2);
1156                                 cur_size *= 2;
1157                         }
1158
1159                         DBG((rss->dbg, LEVEL_2, "\t\tinsert child %+F (%.3f) into SKS at pos %d\n", t->irn, t->cost, cur_len));
1160
1161                         sks[cur_len++] = t;
1162                         remove_covered_parents(rss, x, t->irn, cbc);
1163                         update_cumulated_descendent_values(rss, y, t->irn);
1164                 }
1165
1166                 ARR_SHRINKLEN(sks, cur_len);
1167
1168                 /* sort SKS in increasing cost order */
1169                 qsort(sks, cur_len, sizeof(sks[0]), cmp_child_costs);
1170
1171                 DBG((rss->dbg, LEVEL_2, "\tprocessing SKS for cbc %d:\n", cbc->nr));
1172
1173                 /* build killing function */
1174                 for (i = cur_len - 1; i >= 0; --i) { /* loop over sks in decreasing cost order */
1175                         child_t         *t = sks[i];
1176                         rss_irn_t       *rt = get_rss_irn(rss, t->irn);
1177                         plist_element_t *p_el;
1178
1179                         DBG((rss->dbg, LEVEL_3, "\t\t\tkiller %+F (%.3f):\n", t->irn, t->cost));
1180
1181                         /* kill all unkilled parents of t */
1182                         foreach_plist(rt->parent_list, p_el) {
1183                                 ir_node    *par = plist_element_get_value(p_el);
1184                                 rss_irn_t *rpar = get_rss_irn(rss, par);
1185
1186                                 if (is_Sink(rpar->killer)) {
1187                                         rpar->killer = t->irn;
1188                                         DBG((rss->dbg, LEVEL_2, "\t\tkill %+F\n", rpar->irn));
1189                                 }
1190                                 else {
1191                                         DBG((rss->dbg, LEVEL_3, "\t\t\tkeeping %+F as killer for %+F\n", rpar->killer, rpar->irn));
1192                                 }
1193                         }
1194                 }
1195
1196                 del_nodeset(x);
1197                 del_nodeset(y);
1198                 DEL_ARR_F(sks);
1199         }
1200
1201         if (rss->opts->dump_flags & RSS_DUMP_KILL)
1202                 debug_vcg_dump_kill(rss);
1203
1204         del_pset(rss->cbc_set);
1205         obstack_free(&obst, NULL);
1206 }
1207
1208 /**
1209  * Adds the edge src -> tgt to the dvg. Checks if reverse edge is already there (asserts).
1210  */
1211 static INLINE void add_dvg_edge(rss_t *rss, dvg_t *dvg, ir_node *src, ir_node *tgt, int have_source) {
1212         rss_edge_t *dvg_edge;
1213         rss_edge_t key;
1214
1215         if (! have_source)
1216                 nodeset_insert(dvg->nodes, src);
1217         else
1218                 assert(nodeset_find(dvg->nodes, src) != NULL && "Wrong assumption");
1219
1220         nodeset_insert(dvg->nodes, tgt);
1221
1222         key.src = tgt;
1223         key.tgt = src;
1224         assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1225
1226         key.src = src;
1227         key.tgt = tgt;
1228         if (NULL != pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1229                 /* add the edge to the DVG */
1230                 dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1231
1232                 dvg_edge->src  = src;
1233                 dvg_edge->tgt  = tgt;
1234                 dvg_edge->next = NULL;
1235
1236                 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", src, tgt));
1237                 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1238         }
1239 }
1240
1241 /**
1242  * Computes the disjoint value DAG (DVG).
1243  * BEWARE: It is not made explicitly clear in the Touati paper,
1244  *         but the DVG is meant to be build from the KILLING DAG
1245  */
1246 static void compute_dvg(rss_t *rss, dvg_t *dvg) {
1247         plist_element_t *el;
1248
1249         DBG((rss->dbg, LEVEL_1, "\tcomputing DVG:\n"));
1250
1251         foreach_plist(rss->nodes, el) {
1252                 ir_node    *u_irn    = plist_element_get_value(el);
1253                 rss_irn_t  *u        = get_rss_irn(rss, u_irn);
1254                 rss_irn_t  *u_killer = get_rss_irn(rss, u->killer);
1255                 int        i;
1256
1257                 /* TODO: omit nodes only having sink as pkiller and killing no one */
1258
1259                 /* add an edge to killer */
1260                 add_dvg_edge(rss, dvg, u_irn, u->killer, 0);
1261
1262                 if (is_Sink(u->killer))
1263                         continue;
1264
1265                 /* We add an edge to every killer, from where we could be reached. */
1266                 for (i = ARR_LEN_SAFE(u_killer->descendants) - 1; i >= 0; --i) {
1267                         add_dvg_edge(rss, dvg, u_irn, u_killer->descendants[i], 1);
1268                 }
1269
1270 #if 0
1271
1272                 foreach_plist(rss->nodes, el2) {
1273                         ir_node *v_irn = plist_element_get_value(el2);
1274
1275                         /*
1276                                 There is an edge (u, v) in the DVG iff v is a descendant of the killer(u).
1277                         */
1278                         if (BSEARCH_IRN_ARR(v_irn, u_kill->descendants)) {
1279                                 rss_edge_t *dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1280                                 rss_edge_t key;
1281
1282                                 /* insert the user into the DVG and append it to the user list of u */
1283                                 nodeset_insert(dvg->nodes, v_irn);
1284                                 if (! plist_has_value(u->dvg_user_list, v_irn))
1285                                         plist_insert_back(u->dvg_user_list, v_irn);
1286
1287                                 dvg_edge->src  = u_irn;
1288                                 dvg_edge->tgt  = v_irn;
1289                                 dvg_edge->next = NULL;
1290
1291                                 key.src = v_irn;
1292                                 key.tgt = u_irn;
1293
1294                                 assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1295
1296                                 /* add the edge to the DVG */
1297                                 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", u_irn, v_irn));
1298                                 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1299                         }
1300                 }
1301 #endif /* if 0 */
1302         }
1303
1304         if (rss->opts->dump_flags & RSS_DUMP_DVG)
1305                 debug_vcg_dump_dvg(rss, dvg);
1306 }
1307
1308 /**
1309  * Updates the dvg structure when a serialization edge from src -> tgt is added.
1310  */
1311 static void update_dvg(rss_t *rss, dvg_t *dvg, rss_irn_t *src, rss_irn_t *tgt) {
1312         int i, j, idx;
1313         rss_edge_t *edge;
1314         rss_edge_t **arr = alloca(pset_count(dvg->edges) * sizeof(arr[0]));
1315
1316         /*
1317                 Add an edge from serialization target to serialization src:
1318                 src cannot be alive with target
1319         */
1320         add_dvg_edge(rss, dvg, tgt->irn, src->irn, 0);
1321
1322         /* Add edges to src's descendants as well, they also getting serialized. */
1323         for (i = ARR_LEN_SAFE(src->descendants) - 1; i >= 0; --i) {
1324                 add_dvg_edge(rss, dvg, tgt->irn, src->descendants[i], 1);
1325         }
1326
1327         /* We also have to add edges from targets predecessors in dvg */
1328         idx = 0;
1329         /* We cannot insert elements into set over which we iterate ... */
1330         foreach_pset(dvg->edges, edge) {
1331                 if (edge->tgt == tgt->irn) {
1332                         arr[idx++] = edge;
1333                 }
1334         }
1335
1336         for (i = 0; i < idx; ++i) {
1337                 edge = arr[i];
1338                 add_dvg_edge(rss, dvg, edge->src, src->irn, 1);
1339                 for (j = ARR_LEN_SAFE(src->descendants) - 1; j >= 0; --j) {
1340                         add_dvg_edge(rss, dvg, edge->src, src->descendants[j], 1);
1341                 }
1342         }
1343 }
1344
1345 #if 0
1346 /**
1347  * Accumulate all descendants for root into list.
1348  */
1349 static void accumulate_dvg_descendant_values(rss_t *rss, rss_irn_t *root, plist_t *list) {
1350         if (plist_count(root->dvg_user_list) > 0) {
1351                 plist_element_t *el;
1352
1353                 foreach_plist(root->dvg_user_list, el) {
1354                         ir_node   *v_irn = plist_element_get_value(el);
1355                         rss_irn_t *v     = get_rss_irn(rss, v_irn);
1356
1357                         /* add v as descendant */
1358                         if (! plist_has_value(list, v_irn)) {
1359                                 plist_insert_back(list, v_irn);
1360                                 DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG descendant %+F\n", v_irn));
1361                         }
1362
1363                         /* accumulate v's descendants */
1364                         accumulate_dvg_descendant_values(rss, v, list);
1365                 }
1366         }
1367 }
1368
1369 /**
1370  * Builds the list of potential killers for each node
1371  * in the given DVG.
1372  * Needs the descendant list for all user as sorted array.
1373  */
1374 static void build_dvg_pkiller_list(rss_t *rss, dvg_t *dvg) {
1375         ir_node *irn;
1376
1377         foreach_nodeset(dvg->nodes, irn) {
1378                 rss_irn_t       *node = get_rss_irn(rss, irn);
1379                 plist_element_t *el, *el2;
1380
1381                 DBG((rss->dbg, DEBUG_DVG, "\t\tbuilding pkiller list for %+F\n", irn));
1382
1383                 /* check each user */
1384                 foreach_plist(node->dvg_user_list, el) {
1385                         ir_node *u_irn = plist_element_get_value(el);
1386
1387                         /* is the current user u_irn not a descendant of any other user -> pkiller */
1388                         foreach_plist(node->dvg_user_list, el2) {
1389                                 ir_node   *v_irn = plist_element_get_value(el2);
1390                                 rss_irn_t *v     = get_rss_irn(rss, v_irn);
1391
1392                                 if (el != el2                             &&
1393                                         ! BSEARCH_IRN_ARR(u_irn, v->dvg_desc) &&
1394                                         ! plist_has_value(node->dvg_pkiller_list, u_irn))
1395                                 {
1396                                         plist_insert_back(node->dvg_pkiller_list, u_irn);
1397                                         DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG pkiller %+F\n", u_irn));
1398                                 }
1399                         }
1400                 }
1401
1402                 node->dvg_pkiller = build_sorted_array_from_list(node->dvg_pkiller_list, phase_obst(&rss->ph));
1403         }
1404
1405         DEBUG_ONLY(
1406                 if (firm_dbg_get_mask(rss->dbg) & DEBUG_DVG)
1407                         debug_vcg_dump_dvg_pkiller(rss, dvg);
1408         );
1409 }
1410
1411 #endif /* if 0 */
1412
1413 /**
1414  * Compute the maximal antichain of the current DVG.
1415  * This is a reimplementation of the MAXIMAL_ANTI_CHAIN function
1416  * from the DDG library 1.1 (DAG.cpp).
1417  */
1418 static nodeset *compute_maximal_antichain(rss_t *rss, dvg_t *dvg, int iteration) {
1419         int         n               = nodeset_count(dvg->nodes);
1420         int         *assignment     = alloca(n * sizeof(assignment[0]));
1421         int         *assignment_rev = alloca(n * sizeof(assignment_rev[0]));
1422         int         *idx_map        = alloca(n * sizeof(idx_map[0]));
1423         hungarian_problem_t *bp;
1424         nodeset     *values, *temp;
1425         ir_node     *u_irn;
1426         int         i, j, cost, cur_chain, res;
1427         rss_edge_t  *dvg_edge;
1428
1429 #define MAP_IDX(irn) bsearch_for_index(get_irn_idx(irn), idx_map,  n,  1)
1430
1431         if (pset_count(dvg->edges) == 0)
1432                 return NULL;
1433
1434         bp = hungarian_new(n, n, 1, HUNGARIAN_MATCH_NORMAL);
1435
1436         /*
1437                 At first, we build an index map for the nodes in the DVG,
1438                 because we cannot use the irn idx therefore as the resulting
1439                 bipartite data structure would have around 1.2GB.
1440                 So we limit the size to the number of nodes we have in the DVG
1441                 and build a sorted index map for their irn indices.
1442         */
1443         i = 0;
1444         foreach_nodeset(dvg->nodes, u_irn) {
1445                 idx_map[i++] = get_irn_idx(u_irn);
1446         }
1447         qsort(idx_map, n, sizeof(idx_map[0]), cmp_int);
1448
1449         foreach_pset(dvg->edges, dvg_edge) {
1450                 int idx_u = MAP_IDX(dvg_edge->src);
1451                 int idx_v = MAP_IDX(dvg_edge->tgt);
1452
1453                 /* add the entry to the bipartite data structure */
1454                 hungarian_add(bp, idx_u, idx_v, 1);
1455                 DBG((rss->dbg, LEVEL_3, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1456                         idx_u, dvg_edge->src, idx_v, dvg_edge->tgt));
1457         }
1458 #if 0
1459         /*
1460                 Add a bipartite entry for each pair of nodes (u, v), where exists a
1461                 path in the DVG from u to v, ie. connect all descendants(v) to v.
1462                 desc_dvg(v) = accumulated descendants of all z in dvg_user_list(v)
1463         */
1464         foreach_nodeset(dvg->nodes, u_irn) {
1465                 rss_irn_t *u        = get_rss_irn(rss, u_irn);
1466                 int       idx_u_irn = MAP_IDX(u_irn);
1467
1468                 DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG descendants of %+F:\n", u_irn));
1469
1470                 //plist_clear(u->dvg_desc_list);
1471                 //accumulate_dvg_descendant_values(rss, u, u->dvg_desc_list);
1472
1473                 /*
1474                         FIXME: The array is build on the phase obstack and we cannot free the data.
1475                         So the array get as many times allocated as this function is called.
1476                 */
1477
1478                 /* build the sorted array for faster searches */
1479                 //u->dvg_desc = build_sorted_array_from_list(u->dvg_desc_list, phase_obst(&rss->ph));
1480
1481                 DBG((rss->dbg, DEBUG_MAX_AC, "\t\tadding bipartite entries of %+F:\n", u_irn));
1482
1483                 /* add the bipartite entries for all descendants */
1484                 for (i = ARR_LEN_SAFE(u->dvg_desc) - 1; i >= 0; --i) {
1485                         rss_irn_t *desc    = get_rss_irn(rss, u->dvg_desc[i]);
1486                         int       idx_desc = MAP_IDX(u->dvg_desc[i]);
1487
1488                         /* add the entry to the bipartite data structure */
1489                         hungarian_add(bp, idx_u_irn, idx_desc, 1);
1490                         DBG((rss->dbg, DEBUG_MAX_AC, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1491                                 idx_u_irn, u_irn, idx_desc, u->dvg_desc[i]));
1492
1493                         need_matching = 1;
1494                 }
1495         }
1496 #endif
1497
1498         /* We want maximum cardinality matching */
1499         hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL);
1500
1501 #if 0
1502         DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG pkiller:\n"));
1503         /* beware: the following function needs the dvg_desc array */
1504         build_dvg_pkiller_list(rss, dvg);
1505 #endif
1506
1507         DBG((rss->dbg, LEVEL_2, "\t\tcomputing bipartite matching\n"));
1508         /*
1509                 The maximum cardinality bipartite matching gives us the minimal
1510                 chain partition, which corresponds to the maximum anti chains.
1511         */
1512         res = hungarian_solve(bp, assignment, &cost, 1);
1513         assert(res == 0 && "Bipartite matching failed!");
1514
1515         hungarian_free(bp);
1516         memset(assignment_rev, -1, n * sizeof(assignment_rev[0]));
1517
1518         /* build the reverse assignment, ie. foreach i -> j, add j -> i */
1519         for (i = 0; i < n; ++i) {
1520                 if (assignment[i] >= 0) {
1521                         int j = assignment[i];
1522                         assignment_rev[j] = i;
1523                 }
1524         }
1525
1526         DBG((rss->dbg, LEVEL_2, "\t\tgot assignment with cost %d\n", cost));
1527         DBG((rss->dbg, LEVEL_3, "\t\t\tassignment   ---   reverse assignment\n", cost));
1528         for (i = 0; i < n; ++i) {
1529                 DBG((rss->dbg, LEVEL_3, "\t\t\t%3d -> %3d         %3d -> %3d\n", i, assignment[i], i, assignment_rev[i]));
1530         }
1531
1532         values    = new_nodeset(10);
1533         cur_chain = 0;
1534         /* Construction of the minimal chain partition */
1535         for (j = 0; j < n; ++j) {
1536                 /* check nodes, which did not occur as target */
1537                 if (assignment_rev[j] == -1) {
1538                         int       xj      = idx_map[j];
1539                         ir_node   *xj_irn = get_idx_irn(rss->irg, xj);
1540                         rss_irn_t *xj_rss = get_rss_irn(rss, xj_irn);
1541                         chain_t   *c      = obstack_alloc(phase_obst(&rss->ph), sizeof(*c));
1542                         int       source;
1543
1544                         /* there was no source for j -> we have a source of a new chain */
1545                         nodeset_insert(values, xj_irn);
1546
1547                         c->elements = plist_obstack_new(phase_obst(&rss->ph));
1548                         c->nr       = cur_chain++;
1549                         plist_insert_back(c->elements, xj_irn);
1550
1551                         xj_rss->chain = c;
1552
1553                         DBG((rss->dbg, LEVEL_2, "\t\tstarting chain %d:\n", c->nr));
1554                         DBG((rss->dbg, LEVEL_2, "\t\t\t%+F (%d)", xj_irn, j));
1555
1556                         /* follow chain, having j as source */
1557                         source = j;
1558                         while (assignment[source] >= 0) {
1559                                 int       target  = assignment[source];
1560                                 int       irn_idx = idx_map[target];
1561                                 ir_node   *irn    = get_idx_irn(rss->irg, irn_idx);
1562                                 rss_irn_t *node   = get_rss_irn(rss, irn);
1563
1564                                 plist_insert_back(c->elements, irn);
1565                                 node->chain = c;
1566
1567                                 DB((rss->dbg, LEVEL_2, " -> %+F (%d)", irn, target));
1568
1569                                 /* new source = last target */
1570                                 source = target;
1571                         }
1572
1573                         DB((rss->dbg, LEVEL_2, "\n"));
1574                 }
1575         }
1576
1577         /*
1578                 Computing the maximal antichain: Select an element from each
1579                 chain such, such it is parallel with the others.
1580         */
1581         DBG((rss->dbg, LEVEL_1, "\t\tcomputing set of saturation values (MAX AC)\n"));
1582         DBG((rss->dbg, LEVEL_3, "\t\tstarting with:\n"));
1583
1584         DEBUG_ONLY(
1585                 if (firm_dbg_get_mask(rss->dbg) & LEVEL_3)
1586                         dump_nodeset(values, "\t\t\t");
1587         )
1588
1589         temp = NULL;
1590         do {
1591                 /*
1592                         We need an explicit array for the values as
1593                         we cannot iterate multiple times over the same
1594                         set at the same time. :-(((((
1595                 */
1596                 int     n         = nodeset_count(values);
1597                 int     i         = 0;
1598                 ir_node **val_arr = NEW_ARR_F(ir_node *, n);
1599
1600                 foreach_nodeset(values, u_irn)
1601                         val_arr[i++] = u_irn;
1602
1603                 if (temp)
1604                         del_nodeset(temp);
1605
1606                 temp = new_nodeset(10);
1607
1608                 /* Select all nodes from current value set, having another node in the set as descendant. */
1609                 for (i = 0; i < n; ++i) {
1610                         rss_irn_t *u = get_rss_irn(rss, val_arr[i]);
1611
1612                         for (j = 0; j < n; ++j) {
1613                                 if (i != j) {
1614                                         rss_edge_t key;
1615
1616                                         key.src = val_arr[i];
1617                                         key.tgt = val_arr[j];
1618
1619                                         if (pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1620                                                 /* v[j] is descendant of u -> remove u and break */
1621                                                 nodeset_insert(temp, u->irn);
1622                                                 nodeset_remove(values, u->irn);
1623
1624                                                 DBG((rss->dbg, LEVEL_3, "\t\t\tremoving %+F from values, adding it to temp\n", u->irn));
1625
1626                                                 break;
1627                                         }
1628                                 }
1629                         }
1630                 }
1631
1632                 /* Try to insert the chain predecessor of all selected u's */
1633                 foreach_nodeset(temp, u_irn) {
1634                         rss_irn_t       *u  = get_rss_irn(rss, u_irn);
1635                         chain_t         *c  = u->chain;
1636                         plist_element_t *el = plist_find_value(c->elements, u_irn);
1637
1638                         assert(el && "Missing element in chain!");
1639
1640                         /* If u has predecessor in chain: insert the predecessor */
1641                         if (el == plist_element_get_prev(el)) {
1642                                 nodeset_insert(values, plist_element_get_value(el));
1643                                 DBG((rss->dbg, LEVEL_3, "\t\t\tadding %+F to values\n", plist_element_get_value(el)));
1644                         }
1645                 }
1646
1647
1648                 DEL_ARR_F(val_arr);
1649         } while (nodeset_count(temp) > 0);
1650
1651         DBG((rss->dbg, LEVEL_2, "\t\tfinal set:\n"));
1652         DEBUG_ONLY(
1653                 if (firm_dbg_get_mask(rss->dbg) & LEVEL_2) {
1654                         dump_nodeset(values, "\t\t\t");
1655                 }
1656         );
1657
1658         if (rss->opts->dump_flags & RSS_DUMP_MAXAC)
1659                 debug_vcg_dump_pkg(rss, values, iteration);
1660
1661         del_nodeset(temp);
1662
1663         return values;
1664
1665 #undef MAP_IDX
1666 }
1667
1668 /**
1669  * Computes the best serialization between two nodes of sat_vals.
1670  */
1671 static serialization_t *compute_best_admissible_serialization(rss_t *rss, nodeset *sat_vals, serialization_t *ser, int num_regs) {
1672         int        n                    = nodeset_count(sat_vals);
1673         int        n_idx                = ARR_LEN_SAFE(rss->idx_map);
1674         int        i                    = 0;
1675         ir_node    **val_arr            = alloca(n * sizeof(val_arr[0]));
1676         bitset_t   *bs_sv               = bitset_alloca(n_idx);
1677         bitset_t   *bs_vdesc            = bitset_alloca(n_idx);
1678         bitset_t   *bs_tmp              = bitset_alloca(n_idx);
1679         bitset_t   *bs_ukilldesc        = bitset_alloca(n_idx);
1680         int        best_benefit         = INT_MAX;
1681         int        best_omega2          = INT_MAX;
1682         int        best_benefit_omega20 = INT_MAX;
1683         int        has_omega1           = 0;
1684         int        j, k;
1685         ir_node    *irn;
1686         rss_edge_t min_benefit_edge;
1687         rss_edge_t min_omega20_edge;
1688         rss_irn_t  *ser_u_omega1 = NULL, *ser_v_omega1 = NULL;
1689         rss_irn_t  *ser_u_omega20 = NULL, *ser_v_omega20 = NULL;
1690
1691         DBG((rss->dbg, LEVEL_1, "\tcomputing admissible serializations:\n"));
1692
1693         /*
1694                 We need an explicit array for the values as
1695                 we cannot iterate multiple times over the same
1696                 set at the same time. :-(((((
1697         */
1698
1699         foreach_nodeset(sat_vals, irn) {
1700                 val_arr[i++] = irn;
1701                 bitset_set(bs_sv, BLOCK_IDX_MAP(rss, irn));
1702         }
1703
1704         /*
1705                 We build all admissible serializations and remember the best found so far.
1706                 For u in sat_vals:
1707                  For v in sat_val:
1708                    if v in pkiller(u): add edge from v to all other pkiller(u)
1709                    else: for all vv in pkiller(u): add edge from v to vv if there exists no path from vv to v
1710         */
1711
1712 /*
1713         A node is unserializable if:
1714         - it has only one killer and this one is Sink
1715     - it kills no other values
1716         In this case there is no serialization which could
1717         reduce the registerpressure
1718 */
1719 #define IS_UNSERIALIZABLE_NODE(rss_node)                  \
1720         (                                                     \
1721                 (                                                 \
1722                         (plist_count(rss_node->pkiller_list) == 1) && \
1723                         is_Sink(rss_node->killer)                  && \
1724                         (rss_node->kill_count                == 0)    \
1725                 )                            ||                   \
1726                 be_is_Barrier(rss_node->irn) ||                   \
1727                 be_is_Keep(rss_node->irn)                         \
1728         )
1729
1730         /* for all u in sat_vals */
1731         for (i = 0; i < n; ++i) {
1732                 rss_irn_t       *u = get_rss_irn(rss, val_arr[i]);
1733                 plist_element_t *el;
1734
1735                 /* ignore nodes where serialization does not help */
1736                 if (IS_UNSERIALIZABLE_NODE(u)) {
1737                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", u->irn));
1738                         continue;
1739                 }
1740
1741                 /* accumulate all descendants of all pkiller(u) */
1742                 bitset_clear_all(bs_ukilldesc);
1743                 foreach_plist(u->pkiller_list, el) {
1744                         ir_node   *irn  = plist_element_get_value(el);
1745                         rss_irn_t *node = get_rss_irn(rss, irn);
1746
1747                         if (! is_Sink(irn))
1748                                 bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, irn));
1749                         else
1750                                 continue;
1751
1752                         for (k = ARR_LEN_SAFE(node->descendants) - 1; k >= 0; --k) {
1753                                 if (! is_Sink(node->descendants[k]))
1754                                         bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, node->descendants[k]));
1755                         }
1756                 }
1757
1758                 /* for all v in sat_vals */
1759                 for (j = 0; j < n; ++j) {
1760                         ir_node   *v_irn   = val_arr[j];
1761                         rss_irn_t *v       = get_rss_irn(rss, v_irn);
1762                         unsigned  v_height = get_irn_height(rss->h, v_irn);
1763                         int       omega1, omega2, is_pkiller;
1764
1765                         /* v cannot be serialized with itself
1766                          * ignore nodes where serialization does not help */
1767                         if (i == j || IS_UNSERIALIZABLE_NODE(v)) {
1768                                 if (i != j)
1769                                         DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", v->irn));
1770                                 continue;
1771                         }
1772
1773                         /* get descendants of v */
1774                         bitset_clear_all(bs_vdesc);
1775                         bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v_irn));
1776                         for (k = ARR_LEN_SAFE(v->descendants) - 1; k >= 0; --k) {
1777                                 if (! is_Sink(v->descendants[k]))
1778                                         bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v->descendants[k]));
1779                         }
1780
1781                         /* if v is in pkiller(u) */
1782                         is_pkiller = plist_has_value(u->pkiller_list, v_irn);
1783
1784                         /* for all vv in pkiller(u) */
1785                         foreach_plist(u->pkiller_list, el) {
1786                                 ir_node *vv_irn  = plist_element_get_value(el);
1787                                 int     add_edge;
1788
1789                                 if (is_Sink(vv_irn) || is_cfop(vv_irn))
1790                                         continue;
1791
1792                                 if (is_pkiller)
1793                                         add_edge = vv_irn != v_irn && skip_Proj(vv_irn) != skip_Proj(v_irn);
1794                                 else
1795                                         add_edge = ! heights_reachable_in_block(rss->h, skip_Proj(vv_irn), skip_Proj(v_irn));
1796
1797                                 /*
1798                                         As we add an edge from vv -> v, we have to make sure,
1799                                         that there exists no path from v to vv.
1800                                 */
1801
1802                                 if (add_edge) {
1803                                         int vv_height = get_irn_height(rss->h, vv_irn);
1804                                         int mu1, mu2, critical_path_cost;
1805
1806                                         /*
1807                                                 mu1 = | descendants(v) cut sat_vals |
1808                                                 the number of saturating values which cannot
1809                                                 be simultaneously alive with u
1810                                         */
1811                                         bitset_copy(bs_tmp, bs_vdesc);
1812                                         mu1 = bitset_popcnt(bitset_and(bs_tmp, bs_sv));
1813
1814                                         /*
1815                                                 mu2 = | accum_desc_all_pkiller(u) without descendants(v) |
1816                                         */
1817                                         if (is_pkiller) {
1818                                                 bitset_copy(bs_tmp, bs_ukilldesc);
1819                                                 mu2 = bitset_popcnt(bitset_andnot(bs_tmp, bs_vdesc));
1820                                         }
1821                                         else {
1822                                                 mu2 = 0;
1823                                         }
1824
1825                                         /* omega1 = mu1 - mu2 */
1826                                         omega1 = mu1 - mu2;
1827
1828                                         if (omega1 != 0)
1829                                                 has_omega1 = 1;
1830
1831                                         /* omega2 = increase of critical path */
1832                                         critical_path_cost =
1833                                                 v_height                        /* longest path from v to sink */
1834                                                 + rss->max_height - vv_height   /* longest path from source to vv */
1835                                                 + 1;                            /* edge */
1836
1837                                         /*
1838                                                 If critical_path_cost > max_height -> the new edge
1839                                                 would increase the longest critical path by the difference.
1840                                         */
1841                                         omega2 = critical_path_cost > rss->max_height ? critical_path_cost - rss->max_height : 0;
1842
1843                                         /* this keeps track of the edge with the best benefit */
1844                                         if (omega1 >= num_regs - n && omega1 < best_benefit) {
1845                                                 min_benefit_edge.src = v_irn;
1846                                                 min_benefit_edge.tgt = vv_irn;
1847
1848                                                 ser_u_omega1 = u;
1849                                                 ser_v_omega1 = v;
1850
1851                                                 best_benefit    = omega1;
1852                                                 ser->new_killer = is_pkiller;
1853                                         }
1854
1855                                         /* this keeps track of the edge with the best omega1 costs where omega2 == 0 */
1856                                         if (omega2 == 0 && omega1 >= num_regs - n && omega1 < best_benefit_omega20) {
1857                                                 min_omega20_edge.src = v_irn;
1858                                                 min_omega20_edge.tgt = vv_irn;
1859
1860                                                 ser_u_omega20 = u;
1861                                                 ser_v_omega20 = v;
1862
1863                                                 best_benefit_omega20 = omega1;
1864                                                 ser->new_killer      = is_pkiller;
1865                                         }
1866
1867                                         best_omega2 = MIN(best_omega2, omega2);
1868
1869                                         DBG((rss->dbg, LEVEL_2, "\t\tfound %+F -> %+F (w1 %d, w2 %d)\n",
1870                                                 v_irn, vv_irn, omega1, omega2));
1871                                 } /* if add_edge */
1872                         } /* for all vv in pkiller(u) */
1873                 } /* for all v in sat_vals */
1874         } /* for all u in sat_vals */
1875
1876         if (! has_omega1)
1877                 return NULL;
1878
1879         if (best_omega2 == 0) {
1880                 ser->u         = ser_u_omega20;
1881                 ser->v         = ser_v_omega20;
1882                 ser->edge->src = min_omega20_edge.src;
1883                 ser->edge->tgt = min_omega20_edge.tgt;
1884                 ser->omega1    = best_benefit_omega20;
1885                 ser->omega2    = best_omega2;
1886         }
1887         else {
1888                 ser->u         = ser_u_omega1;
1889                 ser->v         = ser_v_omega1;
1890                 ser->edge->src = min_benefit_edge.src;
1891                 ser->edge->tgt = min_benefit_edge.tgt;
1892                 ser->omega1    = best_benefit;
1893                 ser->omega2    = best_omega2;
1894         }
1895
1896         return ser;
1897
1898 #undef IS_UNSERIALIZABLE_NODE
1899 }
1900
1901 /**
1902  * Perform the value serialization heuristic and add all
1903  * computed serialization edges as dependencies to the irg.
1904  */
1905 static void perform_value_serialization_heuristic(rss_t *rss) {
1906         bitset_t *arch_nonign_bs = bitset_alloca(arch_register_class_n_regs(rss->cls));
1907         bitset_t *abi_ign_bs     = bitset_alloca(arch_register_class_n_regs(rss->cls));
1908         int      available_regs, iteration;
1909         dvg_t    dvg;
1910         nodeset  *sat_vals;
1911         pset *ser_set = new_pset(cmp_rss_edges, 20);
1912
1913         /* available_regs = R = |arch_non_ignore_regs cut ~abi_ignore_regs| */
1914         arch_put_non_ignore_regs(rss->arch_env, rss->cls, arch_nonign_bs);
1915         be_abi_put_ignore_regs(rss->abi, rss->cls, abi_ign_bs);
1916         bitset_andnot(arch_nonign_bs, abi_ign_bs);
1917         available_regs  = bitset_popcnt(arch_nonign_bs);
1918         //num_live = pset_count(rss->live_block);
1919         //available_regs -= num_live < available_regs ? num_live : 0;
1920
1921         DBG((rss->dbg, LEVEL_1, "\n\t#available regs: %d\n\n", available_regs));
1922
1923         /*
1924                 At first we need to compute the disjoint value DAG (DVG = {V, E_dv}).
1925                 V    = set of all nodes we are currently interested in
1926                 E_dv = there is an edge from u to v iff v is a descendant of killer(u), forall u, v in V
1927         */
1928         dvg.nodes = new_nodeset(plist_count(rss->nodes));
1929         dvg.edges = new_pset(cmp_rss_edges, plist_count(rss->nodes) * 5);
1930         compute_dvg(rss, &dvg);
1931
1932         /*
1933                 Then we perform the heuristic serialization algorithm
1934                 on the DVG which gives us all necessary serialization
1935                 edges.
1936         */
1937         DBG((rss->dbg, LEVEL_1, "\tcomputing maximal antichain:\n"));
1938         iteration = 0;
1939         sat_vals  = compute_maximal_antichain(rss, &dvg, iteration++);
1940         while (sat_vals && (nodeset_count(sat_vals) > available_regs)) {
1941                 serialization_t *ser, best_ser;
1942                 rss_edge_t      *edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*edge));
1943                 ir_node         *dep_src, *dep_tgt;
1944
1945                 best_ser.edge = edge;
1946                 ser = compute_best_admissible_serialization(rss, sat_vals, &best_ser, available_regs);
1947
1948                 DBG((rss->dbg, LEVEL_1, "\tcurrent register saturation %d, target %d\n", nodeset_count(sat_vals), available_regs));
1949
1950                 if (! ser) {
1951                         DBG((rss->dbg, LEVEL_1, "\tno RS improving serialization found, breaking at iteration %d\n", iteration));
1952                         break;
1953                 }
1954
1955                 /* Insert the serialization as dependency edge into the irg. */
1956                 DBG((rss->dbg, LEVEL_1, "\tserializing %+F -> %+F with edge %+F -> %+F and cost %d, %d\n",
1957                         ser->u->irn, ser->v->irn, ser->edge->src, ser->edge->tgt, ser->omega1, ser->omega2));
1958
1959                 if (pset_find(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge)))
1960                         ir_printf("WARNING: serialization %+F -> %+F computed twice!\n", ser->edge->src, ser->edge->tgt);
1961
1962
1963                 pset_insert(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge));
1964
1965                 /* update the dvg */
1966                 update_dvg(rss, &dvg, ser->v, ser->u);
1967                 update_dvg(rss, &dvg, get_rss_irn(rss, ser->edge->src), get_rss_irn(rss, ser->edge->tgt));
1968                 del_nodeset(sat_vals);
1969
1970                 dep_src = skip_Proj(ser->edge->src);
1971                 dep_tgt = ser->edge->tgt;
1972                 add_irn_dep(dep_src, dep_tgt);
1973
1974                 /* Update descendants, consumer and pkillers of the target */
1975                 update_node_info(rss, ser->edge->tgt, ser->edge->src);
1976
1977                 /* TODO: try to find a cheaper way for updating height information */
1978                 rss->max_height = heights_recompute_block(rss->h, rss->block);
1979
1980                 /* Recompute the antichain for next serialization */
1981                 DBG((rss->dbg, LEVEL_1, "\tre-computing maximal antichain:\n"));
1982                 sat_vals = compute_maximal_antichain(rss, &dvg, iteration++);
1983         }
1984
1985         del_nodeset(dvg.nodes);
1986         del_pset(dvg.edges);
1987 }
1988
1989 /**
1990  * Do initial calculations for a block.
1991  */
1992 static void process_block(ir_node *block, void *env) {
1993         rss_t *rss = env;
1994         int   i, n;
1995         const ir_edge_t *edge;
1996
1997         phase_init(&rss->ph, "rss block preprocessor", rss->irg, PHASE_DEFAULT_GROWTH, init_rss_irn);
1998
1999         DBG((rss->dbg, LEVEL_1, "preprocessing block %+F\n", block));
2000         rss->block = block;
2001
2002         /* build an index map for all nodes in the current block */
2003         i            = 0;
2004         n            = get_irn_n_edges(block);
2005         NEW_ARR_A(int *, rss->idx_map, n);
2006         foreach_out_edge(block, edge) {
2007                 ir_node *irn      = get_edge_src_irn(edge);
2008                 rss->idx_map[i++] = get_irn_idx(irn);
2009         }
2010         qsort(rss->idx_map, n, sizeof(rss->idx_map[0]), cmp_int);
2011         rss->max_height = heights_recompute_block(rss->h, block);
2012
2013         /* loop over all register classes */
2014         for (i = arch_isa_get_n_reg_class(rss->arch_env->isa) - 1; i >= 0; --i) {
2015                 const arch_register_class_t *cls = arch_isa_get_reg_class(rss->arch_env->isa, i);
2016
2017                 rss->cls = cls;
2018                 DBG((rss->dbg, LEVEL_1, "register class %s\n", arch_register_class_name(cls)));
2019
2020                 /* Get all live value at end of Block having current register class */
2021                 rss->live_block = pset_new_ptr(10);
2022                 be_liveness_end_of_block(rss->liveness, rss->arch_env, rss->cls, rss->block, rss->live_block);
2023
2024                 /* reset the list of interesting nodes */
2025                 plist_clear(rss->nodes);
2026                 plist_insert_back(rss->nodes, _sink);
2027
2028                 /* collect all nodes having a certain register class */
2029                 foreach_out_edge(block, edge) {
2030                         ir_node *irn  = get_edge_src_irn(edge);
2031                         ir_mode *mode = get_irn_mode(irn);
2032
2033                         /*
2034                                 We skip:
2035                                 - mode_T nodes (the projs are asked)
2036                                 - mode_X nodes (control flow nodes are always scheduled last)
2037                                 - Keeps (they are always immediately scheduled)
2038                                 - Phi (same as Keep)
2039                         */
2040                         if (mode == mode_T || mode == mode_X || is_Phi(irn))
2041                                 continue;
2042
2043                         /*
2044                                 In case of a proj, we skip
2045                                 - Barrier (they are a Barrier :)
2046                                 - Start
2047                                 - the Proj itself, as it's scheduled always with it's super node
2048                         */
2049                         if (is_Proj(irn)) {
2050                                 ir_node *pred = get_Proj_pred(irn);
2051                                 if (be_is_Barrier(pred) || is_Start(pred))
2052                                         continue;
2053                         }
2054
2055                         /* calculate the descendants and consumer for each node in the block */
2056                         collect_node_info(rss, skip_Proj(irn));
2057
2058                         if (be_is_Keep(irn))
2059                                 continue;
2060
2061                         if (! arch_irn_is(rss->arch_env, irn, ignore) && arch_get_irn_reg_class(rss->arch_env, irn, -1) == cls) {
2062                                 plist_insert_back(rss->nodes, skip_Proj(irn));
2063                         }
2064                         //}
2065                 }
2066
2067                 /* compute the potential killing set PK(G) */
2068                 compute_pkill_set(rss);
2069
2070                 /* compute the killing function k* */
2071                 compute_killing_function(rss);
2072
2073                 /*
2074                         Compute the heuristic value serialization and
2075                         add the necessary dependencies to the irg.
2076                 */
2077                 perform_value_serialization_heuristic(rss);
2078
2079                 del_pset(rss->live_block);
2080         }
2081
2082         phase_free(&rss->ph);
2083 }
2084
2085 #ifdef WITH_LIBCORE
2086 /**
2087  * Register the options.
2088  */
2089 void be_init_schedrss(void) {
2090         lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
2091         lc_opt_entry_t *sched_grp = lc_opt_get_grp(be_grp, "sched");
2092         lc_opt_entry_t *rss_grp = lc_opt_get_grp(sched_grp, "rss");
2093
2094         lc_opt_add_table(rss_grp, rss_option_table);
2095 }
2096
2097 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_schedrss);
2098 #endif /* WITH_LIBCORE */
2099
2100 /**
2101  * Preprocess the irg for scheduling.
2102  */
2103 void rss_schedule_preparation(const be_irg_t *birg) {
2104         rss_t rss;
2105
2106         FIRM_DBG_REGISTER(rss.dbg, "firm.be.sched.rss");
2107
2108         //firm_dbg_set_mask(rss.dbg, LEVEL_1 | LEVEL_2 | LEVEL_3);
2109
2110         init_rss_special_nodes(birg->irg);
2111
2112         rss.irg      = birg->irg;
2113         rss.arch_env = birg->main_env->arch_env;
2114         rss.abi      = birg->abi;
2115         rss.h        = heights_new(birg->irg);
2116         rss.nodes    = plist_new();
2117         rss.opts     = &rss_options;
2118         rss.liveness = be_liveness(birg->irg);
2119         irg_block_walk_graph(birg->irg, NULL, process_block, &rss);
2120         heights_free(rss.h);
2121         plist_free(rss.nodes);
2122         be_liveness_free(rss.liveness);
2123
2124         if (birg->main_env->options->dump_flags & DUMP_SCHED)
2125                 be_dump(rss.irg, "-rss", dump_ir_block_graph);
2126 }