2 * Implementation of a register saturating list scheduler
3 * as described in: Sid-Ahmed-Ali Touati
4 * Register Saturation in Superscalar and VLIW Codes
6 * @license This file protected by GPL - GNU GENERAL PUBLIC LICENSE.
7 * @author Christian Wuerdig
20 #include "irgraph_t.h"
22 #include "iredges_t.h"
24 #include "irphase_t.h"
30 #include "bipartite.h"
31 #include "hungarian.h"
38 #include "besched_t.h"
41 #include <libcore/lc_opts.h>
42 #include <libcore/lc_opts_enum.h>
43 #endif /* WITH_LIBCORE */
46 #define ARR_LEN_SAFE(arr) ((arr) != NULL ? ARR_LEN((arr)) : 0)
48 #define HASH_RSS_EDGE(edge) ((get_irn_node_nr((edge)->src) << 16) | (get_irn_node_nr((edge)->tgt) & 0xFFFF))
49 #define BSEARCH_IRN_ARR(val, arr) \
50 bsearch(&(val), (arr), ARR_LEN_SAFE((arr)), sizeof((arr)[0]), cmp_irn_idx)
52 #define BLOCK_IDX_MAP(rss, irn) bsearch_for_index(get_irn_idx((irn)), (rss)->idx_map, ARR_LEN_SAFE((rss)->idx_map), 1)
55 typedef struct _rss_opts_t {
59 /* Represents a child with associated costs */
60 typedef struct _child {
65 /* We need edges for several purposes. */
66 typedef struct _rss_edge {
72 /* Represents a connected bipartite component. */
74 nodeset *parents; /**< = S a set of value producers */
75 nodeset *children; /**< = T a set of value consumers */
76 pset *kill_edges; /**< = E a set of edges (t in T, s in S) such as each s in S gets killed by at least one t in T */
77 int nr; /**< a deterministic index for set insertion (used as hash) */
80 /* Represents a disjoint value DAG. */
86 /* Represents a chain of nodes. */
87 typedef struct _chain {
88 plist_t *elements; /**< List of chain elements */
89 int nr; /**< a deterministic index for set insertion (used as hash) */
92 typedef struct _rss_irn {
93 plist_t *consumer_list; /**< List of consumers */
94 ir_node **consumer; /**< Sorted consumer array (needed for faster access) */
96 plist_t *parent_list; /**< List of parents */
97 ir_node **parents; /**< Sorted parent array (needed for faster access) */
99 plist_t *descendant_list; /**< List of descendants */
100 ir_node **descendants; /**< Sorted descendant array (needed for faster access) */
102 plist_t *pkiller_list; /**< List of potential killers */
103 ir_node **pkillers; /**< Sorted pkiller array (needed for faster access) */
106 plist_t *dvg_desc_list; /**< List of all descendants in the DVG */
107 ir_node **dvg_desc; /**< Sorted dvg descendant array (needed for faster access) */
109 plist_t *dvg_pkiller_list; /**< List of potential killers in the DVG */
110 ir_node **dvg_pkiller; /**< Sorted dvg pkiller array (needed for faster access) */
112 plist_t *dvg_user_list; /**< List of users in the disjoint value DAG DVG */
114 plist_t *kill_value_list; /**< List of values getting potentially killed by this node */
116 ir_node *killer; /**< The selected unique killer */
117 ir_node *irn; /**< The corresponding firm node to this rss_irn */
119 chain_t *chain; /**< The chain, this node is associated to */
121 unsigned live_out : 1; /**< irn has consumers outside of it's block */
122 unsigned visited : 1; /**< visited flag for bipartite decomposition */
123 unsigned havedesc : 1; /**< visited flag collect descendants */
124 unsigned havecons : 1; /**< visited flag collect consumer */
125 unsigned handled : 1; /**< flag indicating whether or not the list structures have been build */
126 unsigned dumped : 1; /**< flag indication whether or not this node was dumped */
129 /* Represents a serialization edge with associated costs. */
130 typedef struct _serialization {
131 rss_irn_t *u; /* the top node */
132 rss_irn_t *v; /* the node about to be serialized after u */
133 rss_edge_t *edge; /* the edge selected for the serialization */
134 int omega1; /* estimated: available regs - RS reduction */
135 int omega2; /* increase in critical path length */
139 typedef struct _rss {
140 phase_t ph; /**< Phase to hold some data */
141 heights_t *h; /**< The current height object */
142 ir_graph *irg; /**< The irg to preprocess */
143 plist_t *nodes; /**< The list of interesting nodes */
144 const arch_env_t *arch_env; /**< The architecture environment */
145 be_abi_irg_t *abi; /**< The abi for this irg */
146 pset *cbc_set; /**< A set of connected bipartite components */
147 ir_node *block; /**< The current block in progress. */
148 int *idx_map; /**< Mapping irn indices to per block indices */
149 unsigned max_height; /**< maximum height in the current block */
150 rss_opts_t *opts; /**< The options */
151 const arch_register_class_t *cls; /**< The current register class */
152 DEBUG_ONLY(firm_dbg_module_t *dbg);
155 #define get_rss_irn(rss, irn) (phase_get_or_set_irn_data(&rss->ph, irn))
158 * We need some special nodes:
159 * a source and a sink for all live-in and live-out values of a block
168 static ir_node *_source = NULL;
169 static ir_node *_sink = NULL;
171 #define is_Source(irn) ((irn) == _source)
172 #define is_Sink(irn) ((irn) == _sink)
176 RSS_DUMP_CBC = 1 << 0,
177 RSS_DUMP_PKG = 1 << 1,
178 RSS_DUMP_KILL = 1 << 2,
179 RSS_DUMP_DVG = 1 << 3,
180 RSS_DUMP_MAXAC = 1 << 4,
181 RSS_DUMP_ALL = (RSS_DUMP_MAXAC << 1) - 1,
184 static rss_opts_t rss_options = {
189 static const lc_opt_enum_int_items_t dump_items[] = {
190 { "none", RSS_DUMP_NONE },
191 { "cbc", RSS_DUMP_CBC },
192 { "pkg", RSS_DUMP_PKG },
193 { "kill", RSS_DUMP_KILL },
194 { "dvg", RSS_DUMP_DVG },
195 { "maxac", RSS_DUMP_MAXAC },
196 { "all", RSS_DUMP_ALL },
200 static lc_opt_enum_int_var_t dump_var = {
201 &rss_options.dump_flags, dump_items
204 static const lc_opt_table_entry_t rss_option_table[] = {
205 LC_OPT_ENT_ENUM_MASK("dump", "dump phases (none, cbc, pkg, kill, dvg, maxac, all)", &dump_var),
208 #endif /* WITH_LIBCORE */
210 /******************************************************************************
212 * | | | | / _| | | (_)
213 * | |__ ___| |_ __ ___ _ __ | |_ _ _ _ __ ___| |_ _ ___ _ __ ___
214 * | '_ \ / _ \ | '_ \ / _ \ '__| | _| | | | '_ \ / __| __| |/ _ \| '_ \/ __|
215 * | | | | __/ | |_) | __/ | | | | |_| | | | | (__| |_| | (_) | | | \__ \
216 * |_| |_|\___|_| .__/ \___|_| |_| \__,_|_| |_|\___|\__|_|\___/|_| |_|___/
219 ******************************************************************************/
222 * Acquire opcodes and create source and sink nodes.
224 static void init_rss_special_nodes(ir_graph *irg) {
225 ir_node *block = get_irg_start_block(irg);
226 int iro_rss_base = get_next_ir_opcodes(iro_rss_last);
227 ir_op *op_rss_Source = new_ir_op(iro_rss_base + iro_rss_Source, "rss_Source", op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
228 ir_op *op_rss_Sink = new_ir_op(iro_rss_base + iro_rss_Sink, "rss_Sink", op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
229 _source = new_ir_node(NULL, irg, block, op_rss_Source, mode_ANY, 0, NULL);
230 _sink = new_ir_node(NULL, irg, block, op_rss_Sink, mode_ANY, 0, NULL);
233 static int cmp_int(const void *a, const void *b) {
237 return QSORT_CMP(*i1, *i2);
240 static int cmp_child_costs(const void *a, const void *b) {
241 const child_t *c1 = a;
242 const child_t *c2 = b;
244 return QSORT_CMP(c1->cost, c2->cost);
247 static int cmp_irn_idx(const void *a, const void *b) {
248 const ir_node *n1 = *(ir_node **)a;
249 const ir_node *n2 = *(ir_node **)b;
251 return QSORT_CMP(get_irn_idx(n1), get_irn_idx(n2));
254 static int cmp_rss_edges(const void *a, const void *b) {
255 const rss_edge_t *e1 = a;
256 const rss_edge_t *e2 = b;
258 return (e1->src != e2->src) || (e1->tgt != e2->tgt);
261 static int bsearch_for_index(int key, int *arr, size_t len, int force) {
265 while (right >= left) {
266 int idx = (left + right) / 2;
270 else if (key > arr[idx])
277 assert(0 && "Something is wrong, key not found.");
281 static ir_node **build_sorted_array_from_list(plist_t *irn_list, struct obstack *obst) {
284 int len = plist_count(irn_list);
285 ir_node **arr = NEW_ARR_D(ir_node *, obst, len);
287 /* copy the list into the array */
288 foreach_plist(irn_list, el) {
289 arr[i++] = plist_element_get_value(el);
292 /* sort the array by node index */
293 qsort(arr, len, sizeof(arr[0]), cmp_irn_idx);
298 /*****************************************************
301 * __| | ___| |__ _ _ __ _ __ _ _ _ __ __ _
302 * / _` |/ _ \ '_ \| | | |/ _` |/ _` | | '_ \ / _` |
303 * | (_| | __/ |_) | |_| | (_| | (_| | | | | | (_| |
304 * \__,_|\___|_.__/ \__,_|\__, |\__, |_|_| |_|\__, |
308 *****************************************************/
310 static void dump_nodeset(nodeset *ns, const char *prefix) {
312 foreach_nodeset(ns, irn) {
313 ir_fprintf(stderr, "%s%+F\n", prefix, irn);
317 static void build_file_name(rss_t *rss, const char *suffix, size_t suf_len, char *buf, size_t len) {
318 const char *irg_name;
321 irg_name = get_entity_name(get_irg_entity(rss->irg));
322 snprintf(buf, len - suf_len, "%s-%s-block-%ld",
323 irg_name, arch_register_class_name(rss->cls), get_irn_node_nr(rss->block));
327 /* Dumps all collected bipartite components of current irg as vcg. */
328 static void debug_vcg_dump_bipartite(rss_t *rss) {
332 static const char suffix[] = "-RSS-CBC.vcg";
334 build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
335 f = fopen(file_name, "w");
337 ir_fprintf(f, "graph: { title: \"connected bipartite component graph of %+F\"\n", rss->irg);
338 fprintf(f, "display_edge_labels: no\n");
339 fprintf(f, "layoutalgorithm: mindepth\n");
340 fprintf(f, "manhattan_edges: yes\n\n");
342 foreach_pset(rss->cbc_set, cbc) {
346 fprintf(f, "graph: { titel: \"cbc %d\" label: \"cbc %d\" status:clustered color:yellow\n", cbc->nr, cbc->nr);
347 foreach_nodeset(cbc->parents, n) {
348 ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
350 foreach_nodeset(cbc->children, n) {
351 ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
353 foreach_pset(cbc->kill_edges, ke) {
354 ir_fprintf(f, "edge: { sourcename: \"n%d_%d\" targetname: \"n%d_%d\" }\n",
355 get_irn_node_nr(ke->src), cbc->nr, get_irn_node_nr(ke->tgt), cbc->nr);
363 /* Dump the computed killing function as vcg. */
364 static void debug_vcg_dump_kill(rss_t *rss) {
368 static const char suffix[] = "-RSS-KILL.vcg";
370 build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
371 f = fopen(file_name, "w");
373 ir_fprintf(f, "graph: { title: \"computed kill graph of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
374 fprintf(f, "display_edge_labels: no\n");
375 fprintf(f, "layoutalgorithm: mindepth\n");
376 fprintf(f, "manhattan_edges: yes\n\n");
378 /* first: reset dumped flag of all nodes */
379 foreach_plist(rss->nodes, el) {
380 ir_node *irn = plist_element_get_value(el);
381 rss_irn_t *rirn = get_rss_irn(rss, irn);
385 /* dump all nodes and their killers */
386 foreach_plist(rss->nodes, el) {
387 ir_node *irn = plist_element_get_value(el);
388 rss_irn_t *rirn = get_rss_irn(rss, irn);
389 rss_irn_t *pk_rirn = get_rss_irn(rss, rirn->killer);
391 if (! rirn->dumped) {
392 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
396 if (! pk_rirn->dumped) {
397 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(rirn->killer), rirn->killer);
401 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
402 get_irn_node_nr(rirn->killer), get_irn_node_nr(irn));
409 /* Dumps the potential killing DAG (PKG) as vcg. */
410 static void debug_vcg_dump_pkg(rss_t *rss, nodeset *max_ac, int iteration) {
413 char *suffix = alloca(32);
414 static const char suffix1[] = "-RSS-PKG.vcg";
415 static const char suffix2[] = "-RSS-PKG-MAXAC.vcg";
419 snprintf(suffix, 32, "%s", suffix1);
422 snprintf(suffix, 32, "-%02d%s", iteration, suffix2);
425 build_file_name(rss, suffix, strlen(suffix) + 1, file_name, sizeof(file_name));
426 f = fopen(file_name, "w");
428 ir_fprintf(f, "graph: { title: \"potential killing DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
429 fprintf(f, "display_edge_labels: no\n");
430 fprintf(f, "layoutalgorithm: mindepth\n");
431 fprintf(f, "manhattan_edges: yes\n\n");
433 foreach_plist(rss->nodes, el) {
434 ir_node *irn = plist_element_get_value(el);
435 rss_irn_t *rirn = get_rss_irn(rss, irn);
437 plist_element_t *k_el;
439 /* dump selected saturating values in yellow */
440 if (max_ac && nodeset_find(max_ac, irn))
444 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F c%d\" %s }\n", get_irn_node_nr(irn), irn, rirn->chain->nr, c1);
446 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(irn), irn, c1);
449 foreach_plist(rirn->pkiller_list, k_el) {
450 ir_node *pkiller = plist_element_get_value(k_el);
451 rss_irn_t *pk_rirn = get_rss_irn(rss, pkiller);
454 if (max_ac && nodeset_find(max_ac, pkiller))
457 if (! pk_rirn->dumped) {
459 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F c%d\" %s }\n", get_irn_node_nr(pkiller), pkiller, pk_rirn->chain->nr, c2);
461 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(pkiller), pkiller, c2);
464 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
465 get_irn_node_nr(pkiller), get_irn_node_nr(irn));
472 /* Dumps the disjoint value DAG (DVG) as vcg. */
473 static void debug_vcg_dump_dvg(rss_t *rss, dvg_t *dvg) {
474 static const char suffix[] = "-RSS-DVG.vcg";
480 build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
481 f = fopen(file_name, "w");
483 ir_fprintf(f, "graph: { title: \"disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
484 fprintf(f, "display_edge_labels: no\n");
485 fprintf(f, "layoutalgorithm: mindepth\n");
486 fprintf(f, "manhattan_edges: yes\n\n");
489 foreach_nodeset(dvg->nodes, irn) {
490 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
494 foreach_pset(dvg->edges, edge) {
495 rss_irn_t *src = get_rss_irn(rss, edge->src);
496 rss_irn_t *tgt = get_rss_irn(rss, edge->tgt);
498 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
499 get_irn_node_nr(edge->src), get_irn_node_nr(edge->tgt));
507 /* Dumps the PKG(DVG). */
508 static void debug_vcg_dump_dvg_pkiller(rss_t *rss, dvg_t *dvg) {
509 static const char suffix[] = "-RSS-DVG-PKG.vcg";
514 build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
515 f = fopen(file_name, "w");
517 ir_fprintf(f, "graph: { title: \"PKG of disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
518 fprintf(f, "display_edge_labels: no\n");
519 fprintf(f, "layoutalgorithm: mindepth\n");
520 fprintf(f, "manhattan_edges: yes\n\n");
523 foreach_nodeset(dvg->nodes, irn) {
524 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
528 foreach_nodeset(dvg->nodes, irn) {
529 rss_irn_t *node = get_rss_irn(rss, irn);
532 foreach_plist(node->dvg_pkiller_list, el) {
533 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
534 get_irn_node_nr(plist_element_get_value(el)), get_irn_node_nr(irn));
544 * In case there is no rss information for irn, initialize it.
546 static void *init_rss_irn(phase_t *ph, ir_node *irn, void *old) {
547 rss_irn_t *res = old ? old : phase_alloc(ph, sizeof(res[0]));
549 res->descendant_list = plist_obstack_new(phase_obst(ph));
550 res->descendants = NULL;
552 res->consumer_list = plist_obstack_new(phase_obst(ph));
553 res->consumer = NULL;
555 res->pkiller_list = plist_obstack_new(phase_obst(ph));
556 res->pkillers = NULL;
558 res->parent_list = plist_obstack_new(phase_obst(ph));
561 //res->dvg_desc_list = plist_obstack_new(phase_obst(ph));
562 //res->dvg_desc = NULL;
564 res->kill_value_list = plist_obstack_new(phase_obst(ph));
565 //res->dvg_user_list = plist_obstack_new(phase_obst(ph));
566 //res->dvg_pkiller_list = plist_obstack_new(phase_obst(ph));
583 * Collect all nodes data dependent on current node.
585 static void collect_descendants(rss_t *rss, rss_irn_t *rirn, ir_node *irn, int *got_sink) {
586 const ir_edge_t *edge;
587 rss_irn_t *cur_node = get_rss_irn(rss, irn);
588 ir_node *block = rss->block;
589 ir_edge_kind_t ekind[2] = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
592 // if (cur_node->havedesc)
594 // cur_node->havedesc = 1;
596 /* Stop at Barriers and Keeps */
597 if (be_is_Barrier(irn) || be_is_Keep(irn))
600 /* loop over normal and dependency edges */
601 for (i = 0; i < 2; ++i) {
602 foreach_out_edge_kind(irn, edge, ekind[i]) {
603 ir_node *user = get_edge_src_irn(edge);
605 /* skip ignore nodes as they do not really contribute to register pressure */
606 if (arch_irn_is(rss->arch_env, user, ignore))
610 if (get_irn_mode(user) != mode_X && arch_get_irn_reg_class(rss->arch_env, user, -1) == rss->cls)
611 collect_descendants(rss, rirn, user, got_sink);
614 /* check if user lives in block and is not a control flow node */
615 if (get_nodes_block(user) == block) {
616 if (! plist_has_value(rirn->descendant_list, user)) {
617 plist_insert_back(rirn->descendant_list, user);
618 DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", user));
620 collect_descendants(rss, rirn, user, got_sink);
622 else if (! *got_sink) {
623 /* user lives out of block: add sink as descendant if not already done */
624 plist_insert_back(rirn->descendant_list, _sink);
626 DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", _sink));
634 * Handles a single consumer.
636 static void collect_single_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *consumer, int *got_sink) {
637 ir_node *block = rss->block;
638 ir_edge_kind_t ekind[2] = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
640 assert(! is_Proj(consumer) && "Cannot handle Projs");
642 if (get_nodes_block(consumer) == block) {
643 if (! arch_irn_is(rss->arch_env, consumer, ignore)) {
644 plist_insert_back(rss_irn->consumer_list, consumer);
645 DBG((rss->dbg, LEVEL_2, "\t\tconsumer %+F\n", consumer));
649 rss_irn->live_out = 1;
650 DBG((rss->dbg, LEVEL_2, "\t\tlive out %+F", consumer));
652 plist_insert_back(rss_irn->consumer_list, _sink);
654 DB((rss->dbg, LEVEL_2, ", %+F added instead", _sink));
656 DB((rss->dbg, LEVEL_2, "\n"));
661 * Collect all nodes consuming the value(s) produced by current node.
663 static void collect_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *irn, int *got_sink) {
664 const ir_edge_t *edge;
666 ir_edge_kind_t ekind[2] = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
667 rss_irn_t *cur_node = get_rss_irn(rss, irn);
669 if (cur_node->havecons)
671 cur_node->havecons = 1;
673 for (i = 0; i < 2; ++i) {
674 foreach_out_edge_kind(irn, edge, ekind[i]) {
675 ir_node *consumer = get_edge_src_irn(edge);
677 if (is_Proj(consumer)) {
678 if (arch_get_irn_reg_class(rss->arch_env, consumer, -1) == rss->cls)
679 collect_consumer(rss, rss_irn, consumer, got_sink);
682 collect_single_consumer(rss, rss_irn, consumer, got_sink);
689 * We need to build the consumer and descendant list for _source.
691 static void collect_node_info_source(rss_t *rss) {
692 const ir_edge_t *edge;
693 rss_irn_t *rirn = get_rss_irn(rss, _source);
694 ir_node *block = rss->block;
699 foreach_out_edge(block, edge) {
700 ir_node *irn = get_edge_src_irn(edge);
703 for (i = get_irn_arity(n) - 1; i >= 0; --i) {
709 static void reset_node_info(rss_irn_t *rss_irn) {
710 /* Beware: array data resides on phase obstack, so it gets removed automatically */
712 plist_clear(rss_irn->consumer_list);
713 rss_irn->consumer = NULL;
715 plist_clear(rss_irn->parent_list);
716 rss_irn->parents = NULL;
718 plist_clear(rss_irn->descendant_list);
719 rss_irn->descendants = NULL;
721 plist_clear(rss_irn->pkiller_list);
722 rss_irn->pkillers = NULL;
724 plist_clear(rss_irn->kill_value_list);
726 rss_irn->killer = NULL;
727 rss_irn->live_out = 0;
728 rss_irn->visited = 0;
729 rss_irn->handled = 0;
734 * Collects all consumer and descendant of a irn.
736 static void collect_node_info(rss_t *rss, ir_node *irn) {
737 rss_irn_t *rss_irn = get_rss_irn(rss, irn);
740 assert(! is_Proj(irn) && "Cannot handle Projs.");
742 /* check if node info is already available */
743 if (rss_irn->handled)
744 phase_reinit_single_irn_data(&rss->ph, irn);
746 DBG((rss->dbg, LEVEL_1, "\tcomputing consumers of %+F:\n", irn));
748 /* collect all consumer */
750 collect_consumer(rss, rss_irn, irn, &got_sink);
752 /* build sorted consumer array */
753 rss_irn->consumer = build_sorted_array_from_list(rss_irn->consumer_list, phase_obst(&rss->ph));
755 DBG((rss->dbg, LEVEL_1, "\tcompute descendants of %+F:\n", irn));
757 /* collect descendants */
759 collect_descendants(rss, rss_irn, irn, &got_sink);
761 /* build sorted descendant array */
762 rss_irn->descendants = build_sorted_array_from_list(rss_irn->descendant_list, phase_obst(&rss->ph));
764 rss_irn->handled = 1;
768 * Checks if v is a potential killer of u.
769 * v is in pkill(u) iff descendants(v) cut consumer(u) is v
771 * @param rss The rss object
772 * @param v The node to check for killer
773 * @param u The potentially killed value
774 * @return 1 if v is in pkill(u), 0 otherwise
776 static int is_potential_killer(rss_t *rss, rss_irn_t *v, rss_irn_t *u) {
781 assert(is_Sink(v->irn) || ((plist_count(v->descendant_list) > 0 && v->descendants) || 1));
782 assert(is_Sink(u->irn) || ((plist_count(u->consumer_list) > 0 && u->consumer) || 1));
784 /* as we loop over the list: loop over the shorter one */
785 if (plist_count(v->descendant_list) > plist_count(u->consumer_list)) {
786 list = u->consumer_list;
787 arr = v->descendants;
790 list = v->descendant_list;
794 /* for each list element: try to find element in array */
795 foreach_plist(list, el) {
796 ir_node *irn = plist_element_get_value(el);
797 ir_node *match = BSEARCH_IRN_ARR(irn, arr);
799 if (match && match != irn)
807 * Update descendants, consumer and pkiller list for the given irn.
809 static void update_node_info(rss_t *rss, ir_node *irn, ir_node *pk_irn) {
810 rss_irn_t *node = get_rss_irn(rss, irn);
811 rss_irn_t *pkiller = get_rss_irn(rss, pk_irn);
813 /* add consumer and rebuild the consumer array */
814 if (! plist_has_value(node->consumer_list, pk_irn)) {
815 plist_insert_back(node->consumer_list, pk_irn);
816 node->consumer = build_sorted_array_from_list(node->consumer_list, phase_obst(&rss->ph));
819 /* add pk_irn as descendant, add it's descendants to irn's and rebuild array */
820 if (! plist_has_value(node->descendant_list, pk_irn)) {
823 plist_insert_back(node->descendant_list, pk_irn);
825 foreach_plist(pkiller->descendant_list, el) {
826 ir_node *desc = plist_element_get_value(el);
828 if (! plist_has_value(node->descendant_list, desc))
829 plist_insert_back(node->descendant_list, desc);
832 node->descendants = build_sorted_array_from_list(node->descendant_list, phase_obst(&rss->ph));
836 /* check, if pkiller is a potential killer of irn */
837 if (is_potential_killer(rss, pkiller, node)) {
838 if (! plist_has_value(node->pkiller_list, pk_irn))
839 plist_insert_back(node->pkiller_list, pk_irn);
840 if (! plist_has_value(pkiller->kill_value_list, irn))
841 plist_insert_back(pkiller->kill_value_list, irn);
842 DBG((rss->dbg, LEVEL_1, "\tpotential killer %+F added to %+F\n", pk_irn, irn));
848 * Compute the potential killing set PK.
850 static void compute_pkill_set(rss_t *rss) {
851 plist_element_t *u_el, *v_el;
853 foreach_plist(rss->nodes, u_el) {
854 ir_node *u_irn = plist_element_get_value(u_el);
855 rss_irn_t *u = get_rss_irn(rss, u_irn);
857 DBG((rss->dbg, LEVEL_1, "\tcomputing potential killers of %+F:\n", u_irn));
859 /* check each consumer if it is a potential killer */
860 foreach_plist(u->consumer_list, v_el) {
861 ir_node *v_irn = plist_element_get_value(v_el);
862 rss_irn_t *v = get_rss_irn(rss, v_irn);
864 /* check, if v is a potential killer of u */
865 if (is_potential_killer(rss, v, u)) {
866 if (! plist_has_value(u->pkiller_list, v_irn))
867 plist_insert_back(u->pkiller_list, v_irn);
868 if (! plist_has_value(v->kill_value_list, u_irn))
869 plist_insert_back(v->kill_value_list, u_irn);
870 DBG((rss->dbg, LEVEL_2, "\t\tpotential killer %+F\n", v_irn));
877 if (rss->opts->dump_flags & RSS_DUMP_PKG)
878 debug_vcg_dump_pkg(rss, NULL, 0);
882 * Build set of killing edges (from values to their potential killers)
884 static void build_kill_edges(rss_t *rss, pset *epk) {
885 plist_element_t *el, *k_el;
887 foreach_plist(rss->nodes, el) {
888 ir_node *irn = plist_element_get_value(el);
889 rss_irn_t *rirn = get_rss_irn(rss, irn);
891 DBG((rss->dbg, LEVEL_3, "\t\tbuilding kill edges for %+F:\n", irn));
893 foreach_plist(rirn->pkiller_list, k_el) {
894 ir_node *pkiller = plist_element_get_value(k_el);
895 rss_edge_t *ke = obstack_alloc(phase_obst(&rss->ph), sizeof(*ke));
901 DBG((rss->dbg, LEVEL_3, "\t\t\ttarget %+F\n", pkiller));
903 pset_insert(epk, ke, HASH_RSS_EDGE(ke));
908 /* print the given cbc for debugging purpose */
909 static void debug_print_cbc(firm_dbg_module_t *mod, cbc_t *cbc) {
913 DBG((mod, LEVEL_3, "\t\tS = set of parents:\n"));
914 foreach_nodeset(cbc->parents, n) {
915 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
917 DBG((mod, LEVEL_3, "\t\tT = set of children:\n"));
918 foreach_nodeset(cbc->children, n) {
919 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
921 DBG((mod, LEVEL_3, "\t\tE = Edges from producers to consumers\n"));
922 foreach_pset(cbc->kill_edges, ke) {
923 DBG((mod, LEVEL_3, "\t\t\t%+F -> %+F\n", ke->src, ke->tgt));
928 * Construct the bipartite decomposition.
929 * Sid-Ahmed-Ali Touati, Phd Thesis
930 * Register Pressure in Instruction Level Parallelism, p. 71
932 static void compute_bipartite_decomposition(rss_t *rss) {
933 pset *epk = new_pset(cmp_rss_edges, 10);
938 DBG((rss->dbg, LEVEL_1, "\tcomputing bipartite decomposition:\n"));
940 build_kill_edges(rss, epk);
942 foreach_plist(rss->nodes, el) {
943 ir_node *u_irn = plist_element_get_value(el);
944 rss_irn_t *u = get_rss_irn(rss, u_irn);
949 plist_element_t *el2;
951 rss_edge_t *kedge_root = NULL;
952 ir_node *t_irn, *s_irn;
954 if (u->visited || u_irn == _sink)
957 DBG((rss->dbg, LEVEL_2, "\t\t%+F choosen:\n", u_irn));
959 cbc = obstack_alloc(phase_obst(&rss->ph), sizeof(*cbc));
962 /* initialize S_cb */
963 cbc->parents = new_nodeset(5);
964 nodeset_insert(cbc->parents, u_irn);
965 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (init)\n", u_irn));
968 cbc->kill_edges = new_pset(cmp_rss_edges, 5);
970 /* each parent gets killed by at least one value from children */
972 /* T_cb = PK_successors(u) */
973 cbc->children = new_nodeset(5);
974 foreach_plist(u->pkiller_list, el2) {
975 nodeset_insert(cbc->children, plist_element_get_value(el2));
976 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (init)\n", plist_element_get_value(el2)));
980 Now: insert the parents of all children into the parent set
981 and insert the children of all parents into the children set
982 until the sets don't change any more
984 while (p_change || c_change) {
985 p_change = c_change = 0;
987 /* accumulate parents */
988 foreach_nodeset(cbc->children, t_irn) {
989 rss_irn_t *t = get_rss_irn(rss, t_irn);
990 plist_element_t *kvl_el;
992 foreach_pset(epk, k_edge) {
993 ir_node *val = k_edge->src;
995 if (k_edge->src == t_irn && ! nodeset_find(cbc->parents, k_edge->src)) {
996 nodeset_insert(cbc->parents, val);
998 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents\n", val));
1003 /* accumulate children */
1004 foreach_nodeset(cbc->parents, s_irn) {
1005 rss_irn_t *s = get_rss_irn(rss, s_irn);
1006 plist_element_t *pkl_el;
1008 foreach_plist(s->pkiller_list, pkl_el) {
1009 ir_node *val = plist_element_get_value(pkl_el);
1011 if (! nodeset_find(cbc->children, val)) {
1012 nodeset_insert(cbc->children, val);
1014 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children\n", val));
1020 /* mark all parent values as visited */
1021 foreach_nodeset(cbc->parents, s_irn) {
1022 rss_irn_t *s = get_rss_irn(rss, s_irn);
1024 /* assure bipartite property */
1025 if (nodeset_find(cbc->children, s_irn)) {
1026 nodeset_remove(cbc->children, s_irn);
1027 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F removed from to children\n", s_irn));
1032 foreach_pset(epk, k_edge) {
1033 if (nodeset_find(cbc->parents, k_edge->src) && nodeset_find(cbc->children, k_edge->tgt)) {
1034 pset_insert(cbc->kill_edges, k_edge, HASH_RSS_EDGE(k_edge));
1036 Link all k_edges which are about to be removed together.
1037 Beware: pset_remove kills the iterator.
1039 k_edge->next = kedge_root;
1040 kedge_root = k_edge;
1044 /* remove all linked k_edges */
1045 for (; kedge_root; kedge_root = kedge_root->next) {
1046 pset_remove(epk, kedge_root, HASH_RSS_EDGE(kedge_root));
1049 /* add the connected bipartite component */
1050 if (nodeset_count(cbc->parents) > 0 && nodeset_count(cbc->children) > 0 && pset_count(cbc->kill_edges) > 0)
1051 pset_insert(rss->cbc_set, cbc, (unsigned)cbc->nr);
1052 DBG((rss->dbg, LEVEL_2, "\tbipartite component %d inserted:\n", cbc->nr));
1054 if (firm_dbg_get_mask(rss->dbg) & LEVEL_2)
1055 debug_print_cbc(rss->dbg, cbc);
1059 if (rss->opts->dump_flags & RSS_DUMP_CBC)
1060 debug_vcg_dump_bipartite(rss);
1066 * Select the child with the maximum cost.
1068 static child_t *select_child_max_cost(rss_t *rss, nodeset *x, nodeset *y, child_t *t, cbc_t *cbc) {
1070 float max_cost = -1.0f;
1072 DBG((rss->dbg, LEVEL_2, "\t\tcomputing children costs:\n"));
1074 foreach_nodeset(cbc->children, child) {
1075 rss_irn_t *r_child = get_rss_irn(rss, child);
1076 int num_unkilled_parents = 0;
1077 int num_descendants;
1081 /* get the number of unkilled parents */
1082 foreach_pset(cbc->kill_edges, k_edge) {
1083 if (k_edge->tgt == child && nodeset_find(x, k_edge->src))
1084 ++num_unkilled_parents;
1087 cost = (float)num_unkilled_parents;
1089 num_descendants = plist_count(r_child->descendant_list) + nodeset_count(y);
1091 if (num_descendants > 0)
1092 cost /= (float)num_descendants;
1094 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F, #desc %d, cost %.3f\n", child, num_descendants, cost));
1096 if (cost > max_cost) {
1107 * Remove all parents from x which are killed by t_irn.
1109 static void remove_covered_parents(rss_t *rss, nodeset *x, ir_node *t_irn, cbc_t *cbc) {
1110 rss_irn_t *t = get_rss_irn(rss, t_irn);
1113 DBG((rss->dbg, LEVEL_2, "\t\tremoving parents covered by %+F:\n", t_irn));
1115 foreach_pset(cbc->kill_edges, k_edge) {
1116 if (k_edge->tgt == t_irn && nodeset_find(x, k_edge->src)) {
1117 nodeset_remove(x, k_edge->src);
1118 plist_insert_back(t->parent_list, k_edge->src);
1119 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", k_edge->src));
1124 static void update_cumulated_descendent_values(rss_t *rss, nodeset *y, ir_node *t_irn) {
1125 rss_irn_t *t = get_rss_irn(rss, t_irn);
1126 plist_element_t *el;
1128 DBG((rss->dbg, LEVEL_2, "\t\tupdating cumulated descendant value of %+F:\n", t_irn));
1130 foreach_plist(t->descendant_list, el) {
1131 nodeset_insert(y, plist_element_get_value(el));
1132 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", plist_element_get_value(el)));
1137 * Greedy-k: a heuristics for the MMA problem
1139 static void compute_killing_function(rss_t *rss) {
1141 struct obstack obst;
1143 obstack_init(&obst);
1145 rss->cbc_set = pset_new_ptr(5);
1146 compute_bipartite_decomposition(rss);
1148 /* for all bipartite components do: */
1149 foreach_pset(rss->cbc_set, cbc) {
1151 nodeset *x = new_nodeset(10);
1152 nodeset *y = new_nodeset(10);
1153 child_t **sks = NEW_ARR_F(child_t *, 20);
1158 DBG((rss->dbg, LEVEL_1, "\tcomputing SKS for cbc %d:\n", cbc->nr));
1159 DBG((rss->dbg, LEVEL_2, "\t\tinitializing parents X:\n"));
1161 /* X = S_cb (all parents are initially uncovered) */
1162 foreach_nodeset(cbc->parents, p) {
1163 nodeset_insert(x, p);
1164 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", p));
1167 /* while X not empty */
1168 while (nodeset_count(x) > 0) {
1169 child_t *t = obstack_alloc(&obst, sizeof(*t));
1170 memset(t, 0, sizeof(*t));
1172 t = select_child_max_cost(rss, x, y, t, cbc);
1174 if (cur_len >= cur_size) {
1175 ARR_EXTO(child_t *, sks, cur_size * 2);
1179 DBG((rss->dbg, LEVEL_2, "\t\tinsert child %+F (%.3f) into SKS at pos %d\n", t->irn, t->cost, cur_len));
1182 remove_covered_parents(rss, x, t->irn, cbc);
1183 update_cumulated_descendent_values(rss, y, t->irn);
1186 ARR_SHRINKLEN(sks, cur_len);
1188 /* sort SKS in increasing cost order */
1189 qsort(sks, cur_len, sizeof(sks[0]), cmp_child_costs);
1191 DBG((rss->dbg, LEVEL_2, "\tprocessing SKS for cbc %d:\n", cbc->nr));
1193 /* build killing function */
1194 for (i = cur_len - 1; i >= 0; --i) { /* loop over sks in decreasing cost order */
1195 child_t *t = sks[i];
1196 rss_irn_t *rt = get_rss_irn(rss, t->irn);
1197 plist_element_t *p_el;
1199 DBG((rss->dbg, LEVEL_3, "\t\t\tkiller %+F (%.3f):\n", t->irn, t->cost));
1201 /* kill all unkilled parents of t */
1202 foreach_plist(rt->parent_list, p_el) {
1203 ir_node *par = plist_element_get_value(p_el);
1204 rss_irn_t *rpar = get_rss_irn(rss, par);
1206 if (is_Sink(rpar->killer)) {
1207 rpar->killer = t->irn;
1208 DBG((rss->dbg, LEVEL_2, "\t\tkill %+F\n", rpar->irn));
1211 DBG((rss->dbg, LEVEL_3, "\t\t\tkeeping %+F as killer for %+F\n", rpar->killer, rpar->irn));
1221 if (rss->opts->dump_flags & RSS_DUMP_KILL)
1222 debug_vcg_dump_kill(rss);
1224 del_pset(rss->cbc_set);
1225 obstack_free(&obst, NULL);
1229 * Adds the edge src -> tgt to the dvg. Checks if reverse edge is already there (asserts).
1231 static INLINE void add_dvg_edge(rss_t *rss, dvg_t *dvg, ir_node *src, ir_node *tgt, int have_source) {
1232 rss_edge_t *dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1236 nodeset_insert(dvg->nodes, src);
1238 nodeset_insert(dvg->nodes, tgt);
1240 /* add an edge to our killer */
1241 dvg_edge->src = src;
1242 dvg_edge->tgt = tgt;
1243 dvg_edge->next = NULL;
1247 assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1249 /* add the edge to the DVG */
1250 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", src, tgt));
1251 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1255 * Computes the disjoint value DAG (DVG).
1256 * BEWARE: It is not made explicitly clear in the Touati paper,
1257 * but the DVG is meant to be build from the KILLING DAG
1259 static void compute_dvg(rss_t *rss, dvg_t *dvg) {
1260 plist_element_t *el;
1262 DBG((rss->dbg, LEVEL_1, "\tcomputing DVG:\n"));
1264 foreach_plist(rss->nodes, el) {
1265 ir_node *u_irn = plist_element_get_value(el);
1266 rss_irn_t *u = get_rss_irn(rss, u_irn);
1267 rss_irn_t *u_killer = get_rss_irn(rss, u->killer);
1270 /* TODO: omit nodes only having sink as pkiller and killing no one */
1272 /* add an edge to killer */
1273 add_dvg_edge(rss, dvg, u_irn, u->killer, 0);
1275 if (is_Sink(u->killer))
1278 /* We add an edge to every killer, from where we could be reached. */
1279 for (i = ARR_LEN_SAFE(u_killer->descendants) - 1; i >= 0; --i) {
1280 add_dvg_edge(rss, dvg, u_irn, u_killer->descendants[i], 1);
1285 foreach_plist(rss->nodes, el2) {
1286 ir_node *v_irn = plist_element_get_value(el2);
1289 There is an edge (u, v) in the DVG iff v is a descendant of the killer(u).
1291 if (BSEARCH_IRN_ARR(v_irn, u_kill->descendants)) {
1292 rss_edge_t *dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1295 /* insert the user into the DVG and append it to the user list of u */
1296 nodeset_insert(dvg->nodes, v_irn);
1297 if (! plist_has_value(u->dvg_user_list, v_irn))
1298 plist_insert_back(u->dvg_user_list, v_irn);
1300 dvg_edge->src = u_irn;
1301 dvg_edge->tgt = v_irn;
1302 dvg_edge->next = NULL;
1307 assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1309 /* add the edge to the DVG */
1310 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", u_irn, v_irn));
1311 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1317 if (rss->opts->dump_flags & RSS_DUMP_DVG)
1318 debug_vcg_dump_dvg(rss, dvg);
1322 * Updates the dvg structure when a serialization edge from src -> tgt is added.
1324 static void update_dvg(rss_t *rss, dvg_t *dvg, rss_irn_t *src, rss_irn_t *tgt) {
1327 add_dvg_edge(rss, dvg, tgt->irn, src->irn, 1);
1329 for (i = ARR_LEN_SAFE(src->descendants) - 1; i >= 0; --i) {
1330 add_dvg_edge(rss, dvg, tgt->irn, src->descendants[i], 1);
1336 * Accumulate all descendants for root into list.
1338 static void accumulate_dvg_descendant_values(rss_t *rss, rss_irn_t *root, plist_t *list) {
1339 if (plist_count(root->dvg_user_list) > 0) {
1340 plist_element_t *el;
1342 foreach_plist(root->dvg_user_list, el) {
1343 ir_node *v_irn = plist_element_get_value(el);
1344 rss_irn_t *v = get_rss_irn(rss, v_irn);
1346 /* add v as descendant */
1347 if (! plist_has_value(list, v_irn)) {
1348 plist_insert_back(list, v_irn);
1349 DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG descendant %+F\n", v_irn));
1352 /* accumulate v's descendants */
1353 accumulate_dvg_descendant_values(rss, v, list);
1359 * Builds the list of potential killers for each node
1361 * Needs the descendant list for all user as sorted array.
1363 static void build_dvg_pkiller_list(rss_t *rss, dvg_t *dvg) {
1366 foreach_nodeset(dvg->nodes, irn) {
1367 rss_irn_t *node = get_rss_irn(rss, irn);
1368 plist_element_t *el, *el2;
1370 DBG((rss->dbg, DEBUG_DVG, "\t\tbuilding pkiller list for %+F\n", irn));
1372 /* check each user */
1373 foreach_plist(node->dvg_user_list, el) {
1374 ir_node *u_irn = plist_element_get_value(el);
1376 /* is the current user u_irn not a descendant of any other user -> pkiller */
1377 foreach_plist(node->dvg_user_list, el2) {
1378 ir_node *v_irn = plist_element_get_value(el2);
1379 rss_irn_t *v = get_rss_irn(rss, v_irn);
1382 ! BSEARCH_IRN_ARR(u_irn, v->dvg_desc) &&
1383 ! plist_has_value(node->dvg_pkiller_list, u_irn))
1385 plist_insert_back(node->dvg_pkiller_list, u_irn);
1386 DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG pkiller %+F\n", u_irn));
1391 node->dvg_pkiller = build_sorted_array_from_list(node->dvg_pkiller_list, phase_obst(&rss->ph));
1395 if (firm_dbg_get_mask(rss->dbg) & DEBUG_DVG)
1396 debug_vcg_dump_dvg_pkiller(rss, dvg);
1403 * Compute the maximal antichain of the current DVG.
1404 * This is a reimplementation of the MAXIMAL_ANTI_CHAIN function
1405 * from the DDG library 1.1 (DAG.cpp).
1407 static nodeset *compute_maximal_antichain(rss_t *rss, dvg_t *dvg, int iteration) {
1408 int n = nodeset_count(dvg->nodes);
1409 int *assignment = alloca(n * sizeof(assignment[0]));
1410 int *assignment_rev = alloca(n * sizeof(assignment_rev[0]));
1411 int *idx_map = alloca(n * sizeof(idx_map[0]));
1412 hungarian_problem_t *bp;
1413 nodeset *values, *temp;
1415 int i, j, cost, cur_chain, res;
1416 rss_edge_t *dvg_edge;
1418 #define MAP_IDX(irn) bsearch_for_index(get_irn_idx(irn), idx_map, n, 1)
1420 if (pset_count(dvg->edges) == 0)
1423 bp = hungarian_new(n, n, 1, HUNGARIAN_MATCH_NORMAL);
1426 At first, we build an index map for the nodes in the DVG,
1427 because we cannot use the irn idx therefore as the resulting
1428 bipartite data structure would have around 1.2GB.
1429 So we limit the size to the number of nodes we have in the DVG
1430 and build a sorted index map for their irn indices.
1433 foreach_nodeset(dvg->nodes, u_irn) {
1434 idx_map[i++] = get_irn_idx(u_irn);
1436 qsort(idx_map, n, sizeof(idx_map[0]), cmp_int);
1438 foreach_pset(dvg->edges, dvg_edge) {
1439 int idx_u = MAP_IDX(dvg_edge->src);
1440 int idx_v = MAP_IDX(dvg_edge->tgt);
1442 /* add the entry to the bipartite data structure */
1443 hungarian_add(bp, idx_u, idx_v, 1);
1444 DBG((rss->dbg, LEVEL_3, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1445 idx_u, dvg_edge->src, idx_v, dvg_edge->tgt));
1449 Add a bipartite entry for each pair of nodes (u, v), where exists a
1450 path in the DVG from u to v, ie. connect all descendants(v) to v.
1451 desc_dvg(v) = accumulated descendants of all z in dvg_user_list(v)
1453 foreach_nodeset(dvg->nodes, u_irn) {
1454 rss_irn_t *u = get_rss_irn(rss, u_irn);
1455 int idx_u_irn = MAP_IDX(u_irn);
1457 DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG descendants of %+F:\n", u_irn));
1459 //plist_clear(u->dvg_desc_list);
1460 //accumulate_dvg_descendant_values(rss, u, u->dvg_desc_list);
1463 FIXME: The array is build on the phase obstack and we cannot free the data.
1464 So the array get as many times allocated as this function is called.
1467 /* build the sorted array for faster searches */
1468 //u->dvg_desc = build_sorted_array_from_list(u->dvg_desc_list, phase_obst(&rss->ph));
1470 DBG((rss->dbg, DEBUG_MAX_AC, "\t\tadding bipartite entries of %+F:\n", u_irn));
1472 /* add the bipartite entries for all descendants */
1473 for (i = ARR_LEN_SAFE(u->dvg_desc) - 1; i >= 0; --i) {
1474 rss_irn_t *desc = get_rss_irn(rss, u->dvg_desc[i]);
1475 int idx_desc = MAP_IDX(u->dvg_desc[i]);
1477 /* add the entry to the bipartite data structure */
1478 hungarian_add(bp, idx_u_irn, idx_desc, 1);
1479 DBG((rss->dbg, DEBUG_MAX_AC, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1480 idx_u_irn, u_irn, idx_desc, u->dvg_desc[i]));
1487 /* We want maximum cardinality matching */
1488 hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL);
1491 DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG pkiller:\n"));
1492 /* beware: the following function needs the dvg_desc array */
1493 build_dvg_pkiller_list(rss, dvg);
1496 DBG((rss->dbg, LEVEL_2, "\t\tcomputing bipartite matching\n"));
1498 The maximum cardinality bipartite matching gives us the minimal
1499 chain partition, which corresponds to the maximum anti chains.
1501 res = hungarian_solve(bp, assignment, &cost, 1);
1502 assert(res == 0 && "Bipartite matching failed!");
1505 memset(assignment_rev, -1, n * sizeof(assignment_rev[0]));
1507 /* build the reverse assignment, ie. foreach i -> j, add j -> i */
1508 for (i = 0; i < n; ++i) {
1509 if (assignment[i] >= 0) {
1510 int j = assignment[i];
1511 assignment_rev[j] = i;
1515 DBG((rss->dbg, LEVEL_2, "\t\tgot assignment with cost %d\n", cost));
1516 DBG((rss->dbg, LEVEL_3, "\t\t\tassignment --- reverse assignment\n", cost));
1517 for (i = 0; i < n; ++i) {
1518 DBG((rss->dbg, LEVEL_3, "\t\t\t%3d -> %3d %3d -> %3d\n", i, assignment[i], i, assignment_rev[i]));
1521 values = new_nodeset(10);
1523 /* Construction of the minimal chain partition */
1524 for (j = 0; j < n; ++j) {
1525 /* check nodes, which did not occur as target */
1526 if (assignment_rev[j] == -1) {
1527 int xj = idx_map[j];
1528 ir_node *xj_irn = get_idx_irn(rss->irg, xj);
1529 rss_irn_t *xj_rss = get_rss_irn(rss, xj_irn);
1530 chain_t *c = obstack_alloc(phase_obst(&rss->ph), sizeof(*c));
1533 /* there was no source for j -> we have a source of a new chain */
1534 nodeset_insert(values, xj_irn);
1536 c->elements = plist_obstack_new(phase_obst(&rss->ph));
1537 c->nr = cur_chain++;
1538 plist_insert_back(c->elements, xj_irn);
1542 DBG((rss->dbg, LEVEL_2, "\t\tstarting chain %d:\n", c->nr));
1543 DBG((rss->dbg, LEVEL_2, "\t\t\t%+F (%d)", xj_irn, j));
1545 /* follow chain, having j as source */
1547 while (assignment[source] >= 0) {
1548 int target = assignment[source];
1549 int irn_idx = idx_map[target];
1550 ir_node *irn = get_idx_irn(rss->irg, irn_idx);
1551 rss_irn_t *node = get_rss_irn(rss, irn);
1553 plist_insert_back(c->elements, irn);
1556 DB((rss->dbg, LEVEL_2, " -> %+F (%d)", irn, target));
1558 /* new source = last target */
1562 DB((rss->dbg, LEVEL_2, "\n"));
1567 Computing the maximal antichain: Select an element from each
1568 chain such, such it is parallel with the others.
1570 DBG((rss->dbg, LEVEL_1, "\t\tcomputing set of saturation values (MAX AC)\n"));
1571 DBG((rss->dbg, LEVEL_3, "\t\tstarting with:\n"));
1574 if (firm_dbg_get_mask(rss->dbg) & LEVEL_3)
1575 dump_nodeset(values, "\t\t\t");
1581 We need an explicit array for the values as
1582 we cannot iterate multiple times over the same
1583 set at the same time. :-(((((
1585 int n = nodeset_count(values);
1587 ir_node **val_arr = NEW_ARR_F(ir_node *, n);
1589 foreach_nodeset(values, u_irn)
1590 val_arr[i++] = u_irn;
1595 temp = new_nodeset(10);
1597 /* Select all nodes from current value set, having another node in the set as descendant. */
1598 for (i = 0; i < n; ++i) {
1599 rss_irn_t *u = get_rss_irn(rss, val_arr[i]);
1601 for (j = 0; j < n; ++j) {
1605 key.src = val_arr[i];
1606 key.tgt = val_arr[j];
1608 if (pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1609 /* v[j] is descendant of u -> remove u and break */
1610 nodeset_insert(temp, u->irn);
1611 nodeset_remove(values, u->irn);
1613 DBG((rss->dbg, LEVEL_3, "\t\t\tremoving %+F from values, adding it to temp\n", u->irn));
1621 /* Try to insert the chain predecessor of all selected u's */
1622 foreach_nodeset(temp, u_irn) {
1623 rss_irn_t *u = get_rss_irn(rss, u_irn);
1624 chain_t *c = u->chain;
1625 plist_element_t *el = plist_find_value(c->elements, u_irn);
1627 assert(el && "Missing element in chain!");
1629 /* If u has predecessor in chain: insert the predecessor */
1630 if (el = plist_element_get_prev(el)) {
1631 nodeset_insert(values, plist_element_get_value(el));
1632 DBG((rss->dbg, LEVEL_3, "\t\t\tadding %+F to values\n", plist_element_get_value(el)));
1638 } while (nodeset_count(temp) > 0);
1640 DBG((rss->dbg, LEVEL_2, "\t\tfinal set:\n"));
1642 if (firm_dbg_get_mask(rss->dbg) & LEVEL_2) {
1643 dump_nodeset(values, "\t\t\t");
1647 if (rss->opts->dump_flags & RSS_DUMP_MAXAC)
1648 debug_vcg_dump_pkg(rss, values, iteration);
1658 * Computes the best serialization between two nodes of sat_vals.
1660 static serialization_t *compute_best_admissible_serialization(rss_t *rss, nodeset *sat_vals, serialization_t *ser, int num_regs) {
1661 int n = nodeset_count(sat_vals);
1662 int n_idx = ARR_LEN_SAFE(rss->idx_map);
1664 ir_node **val_arr = alloca(n * sizeof(val_arr[0]));
1665 bitset_t *bs_sv = bitset_alloca(n_idx);
1666 bitset_t *bs_vdesc = bitset_alloca(n_idx);
1667 bitset_t *bs_tmp = bitset_alloca(n_idx);
1668 bitset_t *bs_ukilldesc = bitset_alloca(n_idx);
1669 int best_benefit = INT_MAX;
1670 int best_omega2 = INT_MAX;
1671 int best_benefit_omega20 = INT_MAX;
1675 rss_edge_t min_benefit_edge;
1676 rss_edge_t min_omega20_edge;
1677 rss_irn_t *ser_u_omega1, *ser_v_omega1;
1678 rss_irn_t *ser_u_omega20, *ser_v_omega20;
1680 DBG((rss->dbg, LEVEL_1, "\tcomputing admissible serializations:\n"));
1683 We need an explicit array for the values as
1684 we cannot iterate multiple times over the same
1685 set at the same time. :-(((((
1688 foreach_nodeset(sat_vals, irn) {
1690 bitset_set(bs_sv, BLOCK_IDX_MAP(rss, irn));
1694 We build all admissible serializations and remember the best found so far.
1697 if v in pkiller(u): add edge from v to all other pkiller(u)
1698 else: for all vv in pkiller(u): add edge from v to vv if there exists no path from vv to v
1702 A node is unserializable if:
1703 - it has only one killer and this one is Sink
1704 - it kills no other values
1705 In this case there is no serialization which could
1706 reduce the registerpressure
1708 #define IS_UNSERIALIZABLE_NODE(rss_node) \
1709 ((plist_count(rss_node->pkiller_list) == 1) && \
1710 is_Sink(rss_node->killer) && \
1711 (plist_count(rss_node->kill_value_list) == 0))
1713 /* for all u in sat_vals */
1714 for (i = 0; i < n; ++i) {
1715 rss_irn_t *u = get_rss_irn(rss, val_arr[i]);
1716 int u_height = get_irn_height(rss->h, val_arr[i]);
1717 plist_element_t *el;
1719 /* ignore nodes where serialization does not help */
1720 if (IS_UNSERIALIZABLE_NODE(u)) {
1721 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", u->irn));
1725 /* accumulate all descendants of all pkiller(u) */
1726 bitset_clear_all(bs_ukilldesc);
1727 foreach_plist(u->pkiller_list, el) {
1728 ir_node *irn = plist_element_get_value(el);
1729 rss_irn_t *node = get_rss_irn(rss, irn);
1732 bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, irn));
1736 for (k = ARR_LEN_SAFE(node->descendants) - 1; k >= 0; --k) {
1737 if (! is_Sink(node->descendants[k]))
1738 bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, node->descendants[k]));
1742 /* for all v in sat_vals */
1743 for (j = 0; j < n; ++j) {
1744 ir_node *v_irn = val_arr[j];
1745 rss_irn_t *v = get_rss_irn(rss, v_irn);
1746 unsigned v_height = get_irn_height(rss->h, v_irn);
1747 int omega1, omega2, is_pkiller;
1749 /* v cannot be serialized with itself
1750 * ignore nodes where serialization does not help */
1751 if (i == j || IS_UNSERIALIZABLE_NODE(v)) {
1753 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", v->irn));
1757 /* get descendants of v */
1758 bitset_clear_all(bs_vdesc);
1759 bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v_irn));
1760 for (k = ARR_LEN_SAFE(v->descendants) - 1; k >= 0; --k) {
1761 if (! is_Sink(v->descendants[k]))
1762 bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v->descendants[k]));
1765 /* if v is in pkiller(u) */
1766 is_pkiller = plist_has_value(u->pkiller_list, v_irn);
1768 /* for all vv in pkiller(u) */
1769 foreach_plist(u->pkiller_list, el) {
1770 ir_node *vv_irn = plist_element_get_value(el);
1773 if (is_Sink(vv_irn))
1777 add_edge = vv_irn != v_irn && skip_Proj(vv_irn) != skip_Proj(v_irn);
1779 add_edge = ! heights_reachable_in_block(rss->h, skip_Proj(vv_irn), skip_Proj(v_irn));
1782 As we add an edge from vv -> v, we have to make sure,
1783 that there exists no path from v to vv.
1787 unsigned vv_height = get_irn_height(rss->h, vv_irn);
1788 int mu1, mu2, critical_path_cost;
1791 mu1 = | descendants(v) cut sat_vals |
1792 the number of saturating values which cannot
1793 be simultaneously alive with u
1795 bitset_copy(bs_tmp, bs_vdesc);
1796 mu1 = bitset_popcnt(bitset_and(bs_tmp, bs_sv));
1799 mu2 = | accum_desc_all_pkiller(u) without descendants(v) |
1802 bitset_copy(bs_tmp, bs_ukilldesc);
1803 mu2 = bitset_popcnt(bitset_andnot(bs_tmp, bs_vdesc));
1809 /* omega1 = mu1 - mu2 */
1815 /* omega2 = increase of critical path */
1816 critical_path_cost =
1817 v_height /* longest path from v to sink */
1818 + rss->max_height - vv_height /* longest path from source to vv */
1822 If critical_path_cost > max_height -> the new edge
1823 would increase the longest critical path by the difference.
1825 omega2 = critical_path_cost > rss->max_height ? critical_path_cost - rss->max_height : 0;
1827 /* this keeps track of the edge with the best benefit */
1828 if (omega1 >= num_regs - n && omega1 < best_benefit) {
1829 min_benefit_edge.src = v_irn;
1830 min_benefit_edge.tgt = vv_irn;
1835 best_benefit = omega1;
1836 ser->new_killer = is_pkiller;
1839 /* this keeps track of the edge with the best omega1 costs where omega2 == 0 */
1840 if (omega2 == 0 && omega1 >= num_regs - n && omega1 < best_benefit_omega20) {
1841 min_omega20_edge.src = v_irn;
1842 min_omega20_edge.tgt = vv_irn;
1847 best_benefit_omega20 = omega1;
1848 ser->new_killer = is_pkiller;
1851 best_omega2 = MIN(best_omega2, omega2);
1853 DBG((rss->dbg, LEVEL_2, "\t\tfound %+F -> %+F (w1 %d, w2 %d)\n",
1854 v_irn, vv_irn, omega1, omega2));
1856 } /* for all vv in pkiller(u) */
1857 } /* for all v in sat_vals */
1858 } /* for all u in sat_vals */
1863 if (best_omega2 == 0) {
1864 ser->u = ser_u_omega20;
1865 ser->v = ser_v_omega20;
1866 ser->edge->src = min_omega20_edge.src;
1867 ser->edge->tgt = min_omega20_edge.tgt;
1868 ser->omega1 = best_benefit_omega20;
1869 ser->omega2 = best_omega2;
1872 ser->u = ser_u_omega1;
1873 ser->v = ser_v_omega1;
1874 ser->edge->src = min_benefit_edge.src;
1875 ser->edge->tgt = min_benefit_edge.tgt;
1876 ser->omega1 = best_benefit;
1877 ser->omega2 = best_omega2;
1882 #undef IS_UNSERIALIZABLE_NODE
1886 * Perform the value serialization heuristic and add all
1887 * computed serialization edges as dependencies to the irg.
1889 static void perform_value_serialization_heuristic(rss_t *rss) {
1890 bitset_t *arch_nonign_bs = bitset_alloca(arch_register_class_n_regs(rss->cls));
1891 bitset_t *abi_ign_bs = bitset_alloca(arch_register_class_n_regs(rss->cls));
1892 int available_regs, iteration;
1895 pset *ser_set = new_pset(cmp_rss_edges, 20);
1897 /* available_regs = R = |arch_non_ignore_regs cut ~abi_ignore_regs| */
1898 arch_put_non_ignore_regs(rss->arch_env, rss->cls, arch_nonign_bs);
1899 be_abi_put_ignore_regs(rss->abi, rss->cls, abi_ign_bs);
1900 bitset_andnot(arch_nonign_bs, abi_ign_bs);
1901 available_regs = bitset_popcnt(arch_nonign_bs);
1903 DBG((rss->dbg, LEVEL_1, "\n\t#available regs: %d\n\n", available_regs));
1906 At first we need to compute the disjoint value DAG (DVG = {V, E_dv}).
1907 V = set of all nodes we are currently interested in
1908 E_dv = there is an edge from u to v iff v is a descendant of killer(u), forall u, v in V
1910 dvg.nodes = new_nodeset(plist_count(rss->nodes));
1911 dvg.edges = new_pset(cmp_rss_edges, plist_count(rss->nodes) * 5);
1912 compute_dvg(rss, &dvg);
1915 Then we perform the heuristic serialization algorithm
1916 on the DVG which gives us all necessary serialization
1919 DBG((rss->dbg, LEVEL_1, "\tcomputing maximal antichain:\n"));
1921 sat_vals = compute_maximal_antichain(rss, &dvg, iteration++);
1922 while (sat_vals && (nodeset_count(sat_vals) > available_regs)) {
1923 serialization_t *ser, best_ser;
1924 rss_edge_t *edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*edge));
1925 rss_irn_t *tgt, *src;
1926 ir_node *dep_src, *dep_tgt;
1928 best_ser.edge = edge;
1929 ser = compute_best_admissible_serialization(rss, sat_vals, &best_ser, available_regs);
1931 DBG((rss->dbg, LEVEL_1, "\tcurrent register saturation %d, target %d\n", nodeset_count(sat_vals), available_regs));
1934 DBG((rss->dbg, LEVEL_1, "\tno RS improving serialization found, breaking at iteration %d\n", iteration));
1938 /* Insert the serialization as dependency edge into the irg. */
1939 DBG((rss->dbg, LEVEL_1, "\tserializing %+F -> %+F with edge %+F -> %+F and cost %d, %d\n",
1940 ser->u->irn, ser->v->irn, ser->edge->src, ser->edge->tgt, ser->omega1, ser->omega2));
1942 if (pset_find(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge)))
1943 ir_printf("WARNING: serialization %+F -> %+F computed twice!\n", ser->edge->src, ser->edge->tgt);
1946 pset_insert(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge));
1948 /* update the dvg */
1949 update_dvg(rss, &dvg, ser->v, ser->u);
1950 update_dvg(rss, &dvg, get_rss_irn(rss, ser->edge->src), get_rss_irn(rss, ser->edge->tgt));
1951 del_nodeset(sat_vals);
1953 dep_src = skip_Proj(ser->edge->src);
1954 dep_tgt = ser->edge->tgt;
1955 add_irn_dep(dep_src, dep_tgt);
1957 /* Update descendants, consumer and pkillers of the target */
1958 update_node_info(rss, ser->edge->tgt, ser->edge->src);
1960 /* TODO: try to find a cheaper way for updating height information */
1961 rss->max_height = heights_recompute_block(rss->h, rss->block);
1963 /* Recompute the antichain for next serialization */
1964 DBG((rss->dbg, LEVEL_1, "\tre-computing maximal antichain:\n"));
1965 sat_vals = compute_maximal_antichain(rss, &dvg, iteration++);
1968 del_nodeset(dvg.nodes);
1969 del_pset(dvg.edges);
1973 * Do initial calculations for a block.
1975 static void process_block(ir_node *block, void *env) {
1978 const ir_edge_t *edge;
1980 phase_init(&rss->ph, "rss block preprocessor", rss->irg, PHASE_DEFAULT_GROWTH, init_rss_irn);
1982 DBG((rss->dbg, LEVEL_1, "preprocessing block %+F\n", block));
1985 /* build an index map for all nodes in the current block */
1987 n = get_irn_n_edges(block);
1988 NEW_ARR_A(int *, rss->idx_map, n);
1989 foreach_out_edge(block, edge) {
1990 ir_node *irn = get_edge_src_irn(edge);
1991 rss->idx_map[i++] = get_irn_idx(irn);
1993 qsort(rss->idx_map, n, sizeof(rss->idx_map[0]), cmp_int);
1994 rss->max_height = heights_recompute_block(rss->h, block);
1996 /* loop over all register classes */
1997 for (i = arch_isa_get_n_reg_class(rss->arch_env->isa) - 1; i >= 0; --i) {
1998 const arch_register_class_t *cls = arch_isa_get_reg_class(rss->arch_env->isa, i);
2001 DBG((rss->dbg, LEVEL_1, "register class %s\n", arch_register_class_name(cls)));
2003 /* reset the list of interesting nodes */
2004 plist_clear(rss->nodes);
2005 plist_insert_back(rss->nodes, _sink);
2007 /* collect all nodes having a certain register class */
2008 foreach_out_edge(block, edge) {
2009 ir_node *irn = get_edge_src_irn(edge);
2013 - mode_T nodes (the projs are asked)
2014 - mode_X nodes (control flow nodes are always scheduled last)
2015 - Keeps (they are always immediately scheduled)
2016 - Phi (same as Keep)
2018 if (get_irn_mode(irn) == mode_T || be_is_Keep(irn) || is_Phi(irn))
2022 In case of a proj, we skip
2023 - Barrier (they are a Barrier :)
2025 - the Proj itself, as it's scheduled always with it's super node
2028 ir_node *pred = get_Proj_pred(irn);
2029 if (be_is_Barrier(pred) || is_Start(pred))
2033 if (! arch_irn_is(rss->arch_env, irn, ignore) && arch_get_irn_reg_class(rss->arch_env, irn, -1) == cls) {
2034 plist_insert_back(rss->nodes, skip_Proj(irn));
2036 /* calculate the descendants and consumer for each node in the block */
2037 collect_node_info(rss, skip_Proj(irn));
2041 /* compute the potential killing set PK(G) */
2042 compute_pkill_set(rss);
2044 /* compute the killing function k* */
2045 compute_killing_function(rss);
2048 Compute the heuristic value serialization and
2049 add the necessary dependencies to the irg.
2051 perform_value_serialization_heuristic(rss);
2054 phase_free(&rss->ph);
2059 * Register the options.
2061 void rss_register_options(lc_opt_entry_t *grp) {
2062 static int run_once = 0;
2063 lc_opt_entry_t *rss_grp;
2067 rss_grp = lc_opt_get_grp(grp, "rss");
2069 lc_opt_add_table(rss_grp, rss_option_table);
2072 #endif /* WITH_LIBCORE */
2075 * Preprocess the irg for scheduling.
2077 void rss_schedule_preparation(const be_irg_t *birg) {
2080 FIRM_DBG_REGISTER(rss.dbg, "firm.be.sched.rss");
2082 //firm_dbg_set_mask(rss.dbg, LEVEL_1 | LEVEL_2 | LEVEL_3);
2084 init_rss_special_nodes(birg->irg);
2086 rss.irg = birg->irg;
2087 rss.arch_env = birg->main_env->arch_env;
2088 rss.abi = birg->abi;
2089 rss.h = heights_new(birg->irg);
2090 rss.nodes = plist_new();
2091 rss.opts = &rss_options;
2092 irg_block_walk_graph(birg->irg, NULL, process_block, &rss);
2093 heights_free(rss.h);
2094 plist_free(rss.nodes);
2096 if (birg->main_env->options->dump_flags & DUMP_SCHED)
2097 be_dump(rss.irg, "-rss", dump_ir_block_graph);