2 * Implementation of a register saturating list scheduler
3 * as described in: Sid-Ahmed-Ali Touati
4 * Register Saturation in Superscalar and VLIW Codes
6 * @license This file protected by GPL - GNU GENERAL PUBLIC LICENSE.
7 * @author Christian Wuerdig
20 #include "irgraph_t.h"
22 #include "iredges_t.h"
24 #include "irphase_t.h"
30 #include "bipartite.h"
31 #include "hungarian.h"
38 #include "besched_t.h"
41 #include <libcore/lc_opts.h>
42 #include <libcore/lc_opts_enum.h>
43 #endif /* WITH_LIBCORE */
46 #define ARR_LEN_SAFE(arr) ((arr) != NULL ? ARR_LEN((arr)) : 0)
48 #define HASH_RSS_EDGE(edge) ((get_irn_node_nr((edge)->src) << 16) | (get_irn_node_nr((edge)->tgt) & 0xFFFF))
49 #define BSEARCH_IRN_ARR(val, arr) \
50 bsearch(&(val), (arr), ARR_LEN_SAFE((arr)), sizeof((arr)[0]), cmp_irn_idx)
52 #define BLOCK_IDX_MAP(rss, irn) bsearch_for_index(get_irn_idx((irn)), (rss)->idx_map, ARR_LEN_SAFE((rss)->idx_map), 1)
55 typedef struct _rss_opts_t {
59 /* Represents a child with associated costs */
60 typedef struct _child {
65 /* We need edges for several purposes. */
66 typedef struct _rss_edge {
72 /* Represents a connected bipartite component. */
74 nodeset *parents; /**< = S a set of value producers */
75 nodeset *children; /**< = T a set of value consumers */
76 pset *kill_edges; /**< = E a set of edges (t in T, s in S) such as each s in S gets killed by at least one t in T */
77 int nr; /**< a deterministic index for set insertion (used as hash) */
80 /* Represents a disjoint value DAG. */
86 /* Represents a chain of nodes. */
87 typedef struct _chain {
88 plist_t *elements; /**< List of chain elements */
89 int nr; /**< a deterministic index for set insertion (used as hash) */
92 typedef struct _rss_irn {
93 plist_t *consumer_list; /**< List of consumers */
94 ir_node **consumer; /**< Sorted consumer array (needed for faster access) */
96 plist_t *parent_list; /**< List of parents */
97 ir_node **parents; /**< Sorted parent array (needed for faster access) */
99 plist_t *descendant_list; /**< List of descendants */
100 ir_node **descendants; /**< Sorted descendant array (needed for faster access) */
102 plist_t *pkiller_list; /**< List of potential killers */
103 ir_node **pkillers; /**< Sorted pkiller array (needed for faster access) */
106 plist_t *dvg_desc_list; /**< List of all descendants in the DVG */
107 ir_node **dvg_desc; /**< Sorted dvg descendant array (needed for faster access) */
109 plist_t *dvg_pkiller_list; /**< List of potential killers in the DVG */
110 ir_node **dvg_pkiller; /**< Sorted dvg pkiller array (needed for faster access) */
112 plist_t *dvg_user_list; /**< List of users in the disjoint value DAG DVG */
114 plist_t *kill_value_list; /**< List of values getting potentially killed by this node */
116 ir_node *killer; /**< The selected unique killer */
117 ir_node *irn; /**< The corresponding firm node to this rss_irn */
119 chain_t *chain; /**< The chain, this node is associated to */
121 unsigned desc_walk; /**< visited flag for collecting descendants */
123 unsigned live_out : 1; /**< irn has consumers outside of it's block */
124 unsigned visited : 1; /**< visited flag for bipartite decomposition */
125 unsigned havecons : 1; /**< visited flag collect consumer */
126 unsigned handled : 1; /**< flag indicating whether or not the list structures have been build */
127 unsigned dumped : 1; /**< flag indication whether or not this node was dumped */
130 /* Represents a serialization edge with associated costs. */
131 typedef struct _serialization {
132 rss_irn_t *u; /* the top node */
133 rss_irn_t *v; /* the node about to be serialized after u */
134 rss_edge_t *edge; /* the edge selected for the serialization */
135 int omega1; /* estimated: available regs - RS reduction */
136 int omega2; /* increase in critical path length */
140 typedef struct _rss {
141 phase_t ph; /**< Phase to hold some data */
142 heights_t *h; /**< The current height object */
143 ir_graph *irg; /**< The irg to preprocess */
144 plist_t *nodes; /**< The list of interesting nodes */
145 const arch_env_t *arch_env; /**< The architecture environment */
146 be_abi_irg_t *abi; /**< The abi for this irg */
147 pset *cbc_set; /**< A set of connected bipartite components */
148 ir_node *block; /**< The current block in progress. */
149 int *idx_map; /**< Mapping irn indices to per block indices */
150 unsigned max_height; /**< maximum height in the current block */
151 rss_opts_t *opts; /**< The options */
152 const arch_register_class_t *cls; /**< The current register class */
153 DEBUG_ONLY(firm_dbg_module_t *dbg);
156 #define get_rss_irn(rss, irn) (phase_get_or_set_irn_data(&rss->ph, irn))
159 * We need some special nodes:
160 * a source and a sink for all live-in and live-out values of a block
169 static ir_node *_source = NULL;
170 static ir_node *_sink = NULL;
172 #define is_Source(irn) ((irn) == _source)
173 #define is_Sink(irn) ((irn) == _sink)
177 RSS_DUMP_CBC = 1 << 0,
178 RSS_DUMP_PKG = 1 << 1,
179 RSS_DUMP_KILL = 1 << 2,
180 RSS_DUMP_DVG = 1 << 3,
181 RSS_DUMP_MAXAC = 1 << 4,
182 RSS_DUMP_ALL = (RSS_DUMP_MAXAC << 1) - 1,
185 static rss_opts_t rss_options = {
190 static const lc_opt_enum_int_items_t dump_items[] = {
191 { "none", RSS_DUMP_NONE },
192 { "cbc", RSS_DUMP_CBC },
193 { "pkg", RSS_DUMP_PKG },
194 { "kill", RSS_DUMP_KILL },
195 { "dvg", RSS_DUMP_DVG },
196 { "maxac", RSS_DUMP_MAXAC },
197 { "all", RSS_DUMP_ALL },
201 static lc_opt_enum_int_var_t dump_var = {
202 &rss_options.dump_flags, dump_items
205 static const lc_opt_table_entry_t rss_option_table[] = {
206 LC_OPT_ENT_ENUM_MASK("dump", "dump phases (none, cbc, pkg, kill, dvg, maxac, all)", &dump_var),
209 #endif /* WITH_LIBCORE */
211 /******************************************************************************
213 * | | | | / _| | | (_)
214 * | |__ ___| |_ __ ___ _ __ | |_ _ _ _ __ ___| |_ _ ___ _ __ ___
215 * | '_ \ / _ \ | '_ \ / _ \ '__| | _| | | | '_ \ / __| __| |/ _ \| '_ \/ __|
216 * | | | | __/ | |_) | __/ | | | | |_| | | | | (__| |_| | (_) | | | \__ \
217 * |_| |_|\___|_| .__/ \___|_| |_| \__,_|_| |_|\___|\__|_|\___/|_| |_|___/
220 ******************************************************************************/
223 * Acquire opcodes and create source and sink nodes.
225 static void init_rss_special_nodes(ir_graph *irg) {
226 ir_node *block = get_irg_start_block(irg);
227 int iro_rss_base = get_next_ir_opcodes(iro_rss_last);
228 ir_op *op_rss_Source = new_ir_op(iro_rss_base + iro_rss_Source, "rss_Source", op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
229 ir_op *op_rss_Sink = new_ir_op(iro_rss_base + iro_rss_Sink, "rss_Sink", op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
230 _source = new_ir_node(NULL, irg, block, op_rss_Source, mode_ANY, 0, NULL);
231 _sink = new_ir_node(NULL, irg, block, op_rss_Sink, mode_ANY, 0, NULL);
234 static int cmp_int(const void *a, const void *b) {
238 return QSORT_CMP(*i1, *i2);
241 static int cmp_child_costs(const void *a, const void *b) {
242 const child_t *c1 = a;
243 const child_t *c2 = b;
245 return QSORT_CMP(c1->cost, c2->cost);
248 static int cmp_irn_idx(const void *a, const void *b) {
249 const ir_node *n1 = *(ir_node **)a;
250 const ir_node *n2 = *(ir_node **)b;
252 return QSORT_CMP(get_irn_idx(n1), get_irn_idx(n2));
255 static int cmp_rss_edges(const void *a, const void *b) {
256 const rss_edge_t *e1 = a;
257 const rss_edge_t *e2 = b;
259 return (e1->src != e2->src) || (e1->tgt != e2->tgt);
262 static int bsearch_for_index(int key, int *arr, size_t len, int force) {
266 while (right >= left) {
267 int idx = (left + right) / 2;
271 else if (key > arr[idx])
278 assert(0 && "Something is wrong, key not found.");
282 static ir_node **build_sorted_array_from_list(plist_t *irn_list, struct obstack *obst) {
285 int len = plist_count(irn_list);
286 ir_node **arr = NEW_ARR_D(ir_node *, obst, len);
288 /* copy the list into the array */
289 foreach_plist(irn_list, el) {
290 arr[i++] = plist_element_get_value(el);
293 /* sort the array by node index */
294 qsort(arr, len, sizeof(arr[0]), cmp_irn_idx);
299 /*****************************************************
302 * __| | ___| |__ _ _ __ _ __ _ _ _ __ __ _
303 * / _` |/ _ \ '_ \| | | |/ _` |/ _` | | '_ \ / _` |
304 * | (_| | __/ |_) | |_| | (_| | (_| | | | | | (_| |
305 * \__,_|\___|_.__/ \__,_|\__, |\__, |_|_| |_|\__, |
309 *****************************************************/
311 static void dump_nodeset(nodeset *ns, const char *prefix) {
313 foreach_nodeset(ns, irn) {
314 ir_fprintf(stderr, "%s%+F\n", prefix, irn);
318 static void build_file_name(rss_t *rss, const char *suffix, size_t suf_len, char *buf, size_t len) {
319 const char *irg_name;
322 irg_name = get_entity_name(get_irg_entity(rss->irg));
323 snprintf(buf, len - suf_len, "%s-%s-block-%ld",
324 irg_name, arch_register_class_name(rss->cls), get_irn_node_nr(rss->block));
328 /* Dumps all collected bipartite components of current irg as vcg. */
329 static void debug_vcg_dump_bipartite(rss_t *rss) {
333 static const char suffix[] = "-RSS-CBC.vcg";
335 build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
336 f = fopen(file_name, "w");
338 ir_fprintf(f, "graph: { title: \"connected bipartite component graph of %+F\"\n", rss->irg);
339 fprintf(f, "display_edge_labels: no\n");
340 fprintf(f, "layoutalgorithm: mindepth\n");
341 fprintf(f, "manhattan_edges: yes\n\n");
343 foreach_pset(rss->cbc_set, cbc) {
347 fprintf(f, "graph: { titel: \"cbc %d\" label: \"cbc %d\" status:clustered color:yellow\n", cbc->nr, cbc->nr);
348 foreach_nodeset(cbc->parents, n) {
349 ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
351 foreach_nodeset(cbc->children, n) {
352 ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
354 foreach_pset(cbc->kill_edges, ke) {
355 ir_fprintf(f, "edge: { sourcename: \"n%d_%d\" targetname: \"n%d_%d\" }\n",
356 get_irn_node_nr(ke->src), cbc->nr, get_irn_node_nr(ke->tgt), cbc->nr);
364 /* Dump the computed killing function as vcg. */
365 static void debug_vcg_dump_kill(rss_t *rss) {
369 static const char suffix[] = "-RSS-KILL.vcg";
371 build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
372 f = fopen(file_name, "w");
374 ir_fprintf(f, "graph: { title: \"computed kill graph of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
375 fprintf(f, "display_edge_labels: no\n");
376 fprintf(f, "layoutalgorithm: mindepth\n");
377 fprintf(f, "manhattan_edges: yes\n\n");
379 /* first: reset dumped flag of all nodes */
380 foreach_plist(rss->nodes, el) {
381 ir_node *irn = plist_element_get_value(el);
382 rss_irn_t *rirn = get_rss_irn(rss, irn);
386 /* dump all nodes and their killers */
387 foreach_plist(rss->nodes, el) {
388 ir_node *irn = plist_element_get_value(el);
389 rss_irn_t *rirn = get_rss_irn(rss, irn);
390 rss_irn_t *pk_rirn = get_rss_irn(rss, rirn->killer);
392 if (! rirn->dumped) {
393 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
397 if (! pk_rirn->dumped) {
398 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(rirn->killer), rirn->killer);
402 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
403 get_irn_node_nr(rirn->killer), get_irn_node_nr(irn));
410 /* Dumps the potential killing DAG (PKG) as vcg. */
411 static void debug_vcg_dump_pkg(rss_t *rss, nodeset *max_ac, int iteration) {
414 char *suffix = alloca(32);
415 static const char suffix1[] = "-RSS-PKG.vcg";
416 static const char suffix2[] = "-RSS-PKG-MAXAC.vcg";
420 snprintf(suffix, 32, "%s", suffix1);
423 snprintf(suffix, 32, "-%02d%s", iteration, suffix2);
426 build_file_name(rss, suffix, strlen(suffix) + 1, file_name, sizeof(file_name));
427 f = fopen(file_name, "w");
429 ir_fprintf(f, "graph: { title: \"potential killing DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
430 fprintf(f, "display_edge_labels: no\n");
431 fprintf(f, "layoutalgorithm: mindepth\n");
432 fprintf(f, "manhattan_edges: yes\n\n");
434 foreach_plist(rss->nodes, el) {
435 ir_node *irn = plist_element_get_value(el);
436 rss_irn_t *rirn = get_rss_irn(rss, irn);
438 plist_element_t *k_el;
440 /* dump selected saturating values in yellow */
441 if (max_ac && nodeset_find(max_ac, irn))
445 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F c%d\" %s }\n", get_irn_node_nr(irn), irn, rirn->chain->nr, c1);
447 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(irn), irn, c1);
450 foreach_plist(rirn->pkiller_list, k_el) {
451 ir_node *pkiller = plist_element_get_value(k_el);
452 rss_irn_t *pk_rirn = get_rss_irn(rss, pkiller);
455 if (max_ac && nodeset_find(max_ac, pkiller))
458 if (! pk_rirn->dumped) {
460 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F c%d\" %s }\n", get_irn_node_nr(pkiller), pkiller, pk_rirn->chain->nr, c2);
462 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(pkiller), pkiller, c2);
465 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
466 get_irn_node_nr(pkiller), get_irn_node_nr(irn));
473 /* Dumps the disjoint value DAG (DVG) as vcg. */
474 static void debug_vcg_dump_dvg(rss_t *rss, dvg_t *dvg) {
475 static const char suffix[] = "-RSS-DVG.vcg";
481 build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
482 f = fopen(file_name, "w");
484 ir_fprintf(f, "graph: { title: \"disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
485 fprintf(f, "display_edge_labels: no\n");
486 fprintf(f, "layoutalgorithm: mindepth\n");
487 fprintf(f, "manhattan_edges: yes\n\n");
490 foreach_nodeset(dvg->nodes, irn) {
491 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
495 foreach_pset(dvg->edges, edge) {
496 rss_irn_t *src = get_rss_irn(rss, edge->src);
497 rss_irn_t *tgt = get_rss_irn(rss, edge->tgt);
499 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
500 get_irn_node_nr(edge->src), get_irn_node_nr(edge->tgt));
508 /* Dumps the PKG(DVG). */
509 static void debug_vcg_dump_dvg_pkiller(rss_t *rss, dvg_t *dvg) {
510 static const char suffix[] = "-RSS-DVG-PKG.vcg";
515 build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
516 f = fopen(file_name, "w");
518 ir_fprintf(f, "graph: { title: \"PKG of disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
519 fprintf(f, "display_edge_labels: no\n");
520 fprintf(f, "layoutalgorithm: mindepth\n");
521 fprintf(f, "manhattan_edges: yes\n\n");
524 foreach_nodeset(dvg->nodes, irn) {
525 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
529 foreach_nodeset(dvg->nodes, irn) {
530 rss_irn_t *node = get_rss_irn(rss, irn);
533 foreach_plist(node->dvg_pkiller_list, el) {
534 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
535 get_irn_node_nr(plist_element_get_value(el)), get_irn_node_nr(irn));
545 * In case there is no rss information for irn, initialize it.
547 static void *init_rss_irn(phase_t *ph, ir_node *irn, void *old) {
548 rss_irn_t *res = old ? old : phase_alloc(ph, sizeof(res[0]));
550 res->descendant_list = plist_obstack_new(phase_obst(ph));
551 res->descendants = NULL;
553 res->consumer_list = plist_obstack_new(phase_obst(ph));
554 res->consumer = NULL;
556 res->pkiller_list = plist_obstack_new(phase_obst(ph));
557 res->pkillers = NULL;
559 res->parent_list = plist_obstack_new(phase_obst(ph));
562 //res->dvg_desc_list = plist_obstack_new(phase_obst(ph));
563 //res->dvg_desc = NULL;
565 res->kill_value_list = plist_obstack_new(phase_obst(ph));
566 //res->dvg_user_list = plist_obstack_new(phase_obst(ph));
567 //res->dvg_pkiller_list = plist_obstack_new(phase_obst(ph));
584 * Collect all nodes data dependent on current node.
586 static void collect_descendants(rss_t *rss, rss_irn_t *rirn, ir_node *irn, int *got_sink, unsigned cur_desc_walk) {
587 const ir_edge_t *edge;
588 rss_irn_t *cur_node = get_rss_irn(rss, irn);
589 ir_node *block = rss->block;
590 ir_edge_kind_t ekind[2] = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
593 if (cur_node->desc_walk >= cur_desc_walk)
595 cur_node->desc_walk = cur_desc_walk;
597 /* Stop at Barriers and Keeps */
598 if (be_is_Barrier(irn) || be_is_Keep(irn))
601 /* loop over normal and dependency edges */
602 for (i = 0; i < 2; ++i) {
603 foreach_out_edge_kind(irn, edge, ekind[i]) {
604 ir_node *user = get_edge_src_irn(edge);
606 /* skip ignore nodes as they do not really contribute to register pressure */
607 if (arch_irn_is(rss->arch_env, user, ignore))
611 if (get_irn_mode(user) != mode_X && arch_get_irn_reg_class(rss->arch_env, user, -1) == rss->cls)
612 collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
615 /* check if user lives in block and is not a control flow node */
616 if (get_nodes_block(user) == block) {
617 if (! plist_has_value(rirn->descendant_list, user)) {
618 plist_insert_back(rirn->descendant_list, user);
619 DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", user));
621 collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
623 else if (! *got_sink) {
624 /* user lives out of block: add sink as descendant if not already done */
625 plist_insert_back(rirn->descendant_list, _sink);
627 DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", _sink));
635 * Handles a single consumer.
637 static void collect_single_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *consumer, int *got_sink) {
638 ir_node *block = rss->block;
639 ir_edge_kind_t ekind[2] = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
641 assert(! is_Proj(consumer) && "Cannot handle Projs");
643 if (get_nodes_block(consumer) == block) {
644 if (! arch_irn_is(rss->arch_env, consumer, ignore)) {
645 plist_insert_back(rss_irn->consumer_list, consumer);
646 DBG((rss->dbg, LEVEL_2, "\t\tconsumer %+F\n", consumer));
650 rss_irn->live_out = 1;
651 DBG((rss->dbg, LEVEL_2, "\t\tlive out %+F", consumer));
653 plist_insert_back(rss_irn->consumer_list, _sink);
655 DB((rss->dbg, LEVEL_2, ", %+F added instead", _sink));
657 DB((rss->dbg, LEVEL_2, "\n"));
662 * Collect all nodes consuming the value(s) produced by current node.
664 static void collect_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *irn, int *got_sink) {
665 const ir_edge_t *edge;
667 ir_edge_kind_t ekind[2] = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
668 rss_irn_t *cur_node = get_rss_irn(rss, irn);
670 if (cur_node->havecons)
672 cur_node->havecons = 1;
674 for (i = 0; i < 2; ++i) {
675 foreach_out_edge_kind(irn, edge, ekind[i]) {
676 ir_node *consumer = get_edge_src_irn(edge);
678 if (is_Proj(consumer)) {
679 if (arch_get_irn_reg_class(rss->arch_env, consumer, -1) == rss->cls)
680 collect_consumer(rss, rss_irn, consumer, got_sink);
683 collect_single_consumer(rss, rss_irn, consumer, got_sink);
690 * We need to build the consumer and descendant list for _source.
692 static void collect_node_info_source(rss_t *rss) {
693 const ir_edge_t *edge;
694 rss_irn_t *rirn = get_rss_irn(rss, _source);
695 ir_node *block = rss->block;
700 foreach_out_edge(block, edge) {
701 ir_node *irn = get_edge_src_irn(edge);
704 for (i = get_irn_arity(n) - 1; i >= 0; --i) {
710 static void reset_node_info(rss_irn_t *rss_irn) {
711 /* Beware: array data resides on phase obstack, so it gets removed automatically */
713 plist_clear(rss_irn->consumer_list);
714 rss_irn->consumer = NULL;
716 plist_clear(rss_irn->parent_list);
717 rss_irn->parents = NULL;
719 plist_clear(rss_irn->descendant_list);
720 rss_irn->descendants = NULL;
722 plist_clear(rss_irn->pkiller_list);
723 rss_irn->pkillers = NULL;
725 plist_clear(rss_irn->kill_value_list);
727 rss_irn->killer = NULL;
728 rss_irn->live_out = 0;
729 rss_irn->visited = 0;
730 rss_irn->handled = 0;
735 * Collects all consumer and descendant of a irn.
737 static void collect_node_info(rss_t *rss, ir_node *irn) {
738 static unsigned cur_desc_walk = 0;
739 rss_irn_t *rss_irn = get_rss_irn(rss, irn);
742 assert(! is_Proj(irn) && "Cannot handle Projs.");
744 /* check if node info is already available */
745 if (rss_irn->handled)
746 phase_reinit_single_irn_data(&rss->ph, irn);
748 DBG((rss->dbg, LEVEL_1, "\tcomputing consumers of %+F:\n", irn));
750 /* collect all consumer */
752 collect_consumer(rss, rss_irn, irn, &got_sink);
754 /* build sorted consumer array */
755 rss_irn->consumer = build_sorted_array_from_list(rss_irn->consumer_list, phase_obst(&rss->ph));
757 DBG((rss->dbg, LEVEL_1, "\tcompute descendants of %+F:\n", irn));
759 /* collect descendants */
761 collect_descendants(rss, rss_irn, irn, &got_sink, ++cur_desc_walk);
763 /* build sorted descendant array */
764 rss_irn->descendants = build_sorted_array_from_list(rss_irn->descendant_list, phase_obst(&rss->ph));
766 rss_irn->handled = 1;
770 * Checks if v is a potential killer of u.
771 * v is in pkill(u) iff descendants(v) cut consumer(u) is v
773 * @param rss The rss object
774 * @param v The node to check for killer
775 * @param u The potentially killed value
776 * @return 1 if v is in pkill(u), 0 otherwise
778 static int is_potential_killer(rss_t *rss, rss_irn_t *v, rss_irn_t *u) {
783 assert(is_Sink(v->irn) || ((plist_count(v->descendant_list) > 0 && v->descendants) || 1));
784 assert(is_Sink(u->irn) || ((plist_count(u->consumer_list) > 0 && u->consumer) || 1));
786 /* as we loop over the list: loop over the shorter one */
787 if (plist_count(v->descendant_list) > plist_count(u->consumer_list)) {
788 list = u->consumer_list;
789 arr = v->descendants;
792 list = v->descendant_list;
796 /* for each list element: try to find element in array */
797 foreach_plist(list, el) {
798 ir_node *irn = plist_element_get_value(el);
799 ir_node *match = BSEARCH_IRN_ARR(irn, arr);
801 if (match && match != irn)
809 * Update descendants, consumer and pkiller list for the given irn.
811 static void update_node_info(rss_t *rss, ir_node *irn, ir_node *pk_irn) {
812 rss_irn_t *node = get_rss_irn(rss, irn);
813 rss_irn_t *pkiller = get_rss_irn(rss, pk_irn);
815 /* add consumer and rebuild the consumer array */
816 if (! plist_has_value(node->consumer_list, pk_irn)) {
817 plist_insert_back(node->consumer_list, pk_irn);
818 node->consumer = build_sorted_array_from_list(node->consumer_list, phase_obst(&rss->ph));
821 /* add pk_irn as descendant, add it's descendants to irn's and rebuild array */
822 if (! plist_has_value(node->descendant_list, pk_irn)) {
825 plist_insert_back(node->descendant_list, pk_irn);
827 foreach_plist(pkiller->descendant_list, el) {
828 ir_node *desc = plist_element_get_value(el);
830 if (! plist_has_value(node->descendant_list, desc))
831 plist_insert_back(node->descendant_list, desc);
834 node->descendants = build_sorted_array_from_list(node->descendant_list, phase_obst(&rss->ph));
838 /* check, if pkiller is a potential killer of irn */
839 if (is_potential_killer(rss, pkiller, node)) {
840 if (! plist_has_value(node->pkiller_list, pk_irn))
841 plist_insert_back(node->pkiller_list, pk_irn);
842 if (! plist_has_value(pkiller->kill_value_list, irn))
843 plist_insert_back(pkiller->kill_value_list, irn);
844 DBG((rss->dbg, LEVEL_1, "\tpotential killer %+F added to %+F\n", pk_irn, irn));
850 * Compute the potential killing set PK.
852 static void compute_pkill_set(rss_t *rss) {
853 plist_element_t *u_el, *v_el;
855 foreach_plist(rss->nodes, u_el) {
856 ir_node *u_irn = plist_element_get_value(u_el);
857 rss_irn_t *u = get_rss_irn(rss, u_irn);
859 DBG((rss->dbg, LEVEL_1, "\tcomputing potential killers of %+F:\n", u_irn));
861 /* check each consumer if it is a potential killer */
862 foreach_plist(u->consumer_list, v_el) {
863 ir_node *v_irn = plist_element_get_value(v_el);
864 rss_irn_t *v = get_rss_irn(rss, v_irn);
866 /* check, if v is a potential killer of u */
867 if (is_potential_killer(rss, v, u)) {
868 if (! plist_has_value(u->pkiller_list, v_irn))
869 plist_insert_back(u->pkiller_list, v_irn);
870 if (! plist_has_value(v->kill_value_list, u_irn))
871 plist_insert_back(v->kill_value_list, u_irn);
872 DBG((rss->dbg, LEVEL_2, "\t\tpotential killer %+F\n", v_irn));
879 if (rss->opts->dump_flags & RSS_DUMP_PKG)
880 debug_vcg_dump_pkg(rss, NULL, 0);
884 * Build set of killing edges (from values to their potential killers)
886 static void build_kill_edges(rss_t *rss, pset *epk) {
887 plist_element_t *el, *k_el;
889 foreach_plist(rss->nodes, el) {
890 ir_node *irn = plist_element_get_value(el);
891 rss_irn_t *rirn = get_rss_irn(rss, irn);
893 DBG((rss->dbg, LEVEL_3, "\t\tbuilding kill edges for %+F:\n", irn));
895 foreach_plist(rirn->pkiller_list, k_el) {
896 ir_node *pkiller = plist_element_get_value(k_el);
897 rss_edge_t *ke = obstack_alloc(phase_obst(&rss->ph), sizeof(*ke));
903 DBG((rss->dbg, LEVEL_3, "\t\t\ttarget %+F\n", pkiller));
905 pset_insert(epk, ke, HASH_RSS_EDGE(ke));
910 /* print the given cbc for debugging purpose */
911 static void debug_print_cbc(firm_dbg_module_t *mod, cbc_t *cbc) {
915 DBG((mod, LEVEL_3, "\t\tS = set of parents:\n"));
916 foreach_nodeset(cbc->parents, n) {
917 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
919 DBG((mod, LEVEL_3, "\t\tT = set of children:\n"));
920 foreach_nodeset(cbc->children, n) {
921 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
923 DBG((mod, LEVEL_3, "\t\tE = Edges from producers to consumers\n"));
924 foreach_pset(cbc->kill_edges, ke) {
925 DBG((mod, LEVEL_3, "\t\t\t%+F -> %+F\n", ke->src, ke->tgt));
930 * Construct the bipartite decomposition.
931 * Sid-Ahmed-Ali Touati, Phd Thesis
932 * Register Pressure in Instruction Level Parallelism, p. 71
934 static void compute_bipartite_decomposition(rss_t *rss) {
935 pset *epk = new_pset(cmp_rss_edges, 10);
940 DBG((rss->dbg, LEVEL_1, "\tcomputing bipartite decomposition:\n"));
942 build_kill_edges(rss, epk);
944 foreach_plist(rss->nodes, el) {
945 ir_node *u_irn = plist_element_get_value(el);
946 rss_irn_t *u = get_rss_irn(rss, u_irn);
951 plist_element_t *el2;
953 rss_edge_t *kedge_root = NULL;
954 ir_node *t_irn, *s_irn;
956 if (u->visited || u_irn == _sink)
959 DBG((rss->dbg, LEVEL_2, "\t\t%+F choosen:\n", u_irn));
961 cbc = obstack_alloc(phase_obst(&rss->ph), sizeof(*cbc));
964 /* initialize S_cb */
965 cbc->parents = new_nodeset(5);
966 nodeset_insert(cbc->parents, u_irn);
967 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (init)\n", u_irn));
970 cbc->kill_edges = new_pset(cmp_rss_edges, 5);
972 /* each parent gets killed by at least one value from children */
974 /* T_cb = PK_successors(u) */
975 cbc->children = new_nodeset(5);
976 foreach_plist(u->pkiller_list, el2) {
977 nodeset_insert(cbc->children, plist_element_get_value(el2));
978 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (init)\n", plist_element_get_value(el2)));
982 Now: insert the parents of all children into the parent set
983 and insert the children of all parents into the children set
984 until the sets don't change any more
986 while (p_change || c_change) {
987 p_change = c_change = 0;
989 /* accumulate parents */
990 foreach_nodeset(cbc->children, t_irn) {
991 rss_irn_t *t = get_rss_irn(rss, t_irn);
992 plist_element_t *kvl_el;
994 foreach_pset(epk, k_edge) {
995 ir_node *val = k_edge->src;
997 if (k_edge->tgt == t_irn && ! nodeset_find(cbc->parents, k_edge->src)) {
998 nodeset_insert(cbc->parents, val);
1000 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents\n", val));
1005 /* accumulate children */
1006 foreach_nodeset(cbc->parents, s_irn) {
1007 rss_irn_t *s = get_rss_irn(rss, s_irn);
1008 plist_element_t *pkl_el;
1010 foreach_plist(s->pkiller_list, pkl_el) {
1011 ir_node *val = plist_element_get_value(pkl_el);
1013 if (! nodeset_find(cbc->children, val)) {
1014 nodeset_insert(cbc->children, val);
1016 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children\n", val));
1022 /* mark all parent values as visited */
1023 foreach_nodeset(cbc->parents, s_irn) {
1024 rss_irn_t *s = get_rss_irn(rss, s_irn);
1026 /* assure bipartite property */
1027 if (nodeset_find(cbc->children, s_irn)) {
1028 nodeset_remove(cbc->children, s_irn);
1029 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F removed from to children\n", s_irn));
1034 foreach_pset(epk, k_edge) {
1035 if (nodeset_find(cbc->parents, k_edge->src) && nodeset_find(cbc->children, k_edge->tgt)) {
1036 pset_insert(cbc->kill_edges, k_edge, HASH_RSS_EDGE(k_edge));
1038 Link all k_edges which are about to be removed together.
1039 Beware: pset_remove kills the iterator.
1041 k_edge->next = kedge_root;
1042 kedge_root = k_edge;
1046 /* remove all linked k_edges */
1047 for (; kedge_root; kedge_root = kedge_root->next) {
1048 pset_remove(epk, kedge_root, HASH_RSS_EDGE(kedge_root));
1051 /* add the connected bipartite component */
1052 if (nodeset_count(cbc->parents) > 0 && nodeset_count(cbc->children) > 0 && pset_count(cbc->kill_edges) > 0)
1053 pset_insert(rss->cbc_set, cbc, (unsigned)cbc->nr);
1054 DBG((rss->dbg, LEVEL_2, "\tbipartite component %d inserted:\n", cbc->nr));
1056 if (firm_dbg_get_mask(rss->dbg) & LEVEL_2)
1057 debug_print_cbc(rss->dbg, cbc);
1061 if (rss->opts->dump_flags & RSS_DUMP_CBC)
1062 debug_vcg_dump_bipartite(rss);
1068 * Select the child with the maximum cost.
1070 static child_t *select_child_max_cost(rss_t *rss, nodeset *x, nodeset *y, child_t *t, cbc_t *cbc) {
1072 float max_cost = -1.0f;
1074 DBG((rss->dbg, LEVEL_2, "\t\tcomputing children costs:\n"));
1076 foreach_nodeset(cbc->children, child) {
1077 rss_irn_t *r_child = get_rss_irn(rss, child);
1078 int num_unkilled_parents = 0;
1079 int num_descendants;
1083 /* get the number of unkilled parents */
1084 foreach_pset(cbc->kill_edges, k_edge) {
1085 if (k_edge->tgt == child && nodeset_find(x, k_edge->src))
1086 ++num_unkilled_parents;
1089 cost = (float)num_unkilled_parents;
1091 num_descendants = plist_count(r_child->descendant_list) + nodeset_count(y);
1093 if (num_descendants > 0)
1094 cost /= (float)num_descendants;
1096 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F, #desc %d, cost %.3f\n", child, num_descendants, cost));
1098 if (cost > max_cost) {
1109 * Remove all parents from x which are killed by t_irn.
1111 static void remove_covered_parents(rss_t *rss, nodeset *x, ir_node *t_irn, cbc_t *cbc) {
1112 rss_irn_t *t = get_rss_irn(rss, t_irn);
1115 DBG((rss->dbg, LEVEL_2, "\t\tremoving parents covered by %+F:\n", t_irn));
1117 foreach_pset(cbc->kill_edges, k_edge) {
1118 if (k_edge->tgt == t_irn && nodeset_find(x, k_edge->src)) {
1119 nodeset_remove(x, k_edge->src);
1120 plist_insert_back(t->parent_list, k_edge->src);
1121 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", k_edge->src));
1126 static void update_cumulated_descendent_values(rss_t *rss, nodeset *y, ir_node *t_irn) {
1127 rss_irn_t *t = get_rss_irn(rss, t_irn);
1128 plist_element_t *el;
1130 DBG((rss->dbg, LEVEL_2, "\t\tupdating cumulated descendant value of %+F:\n", t_irn));
1132 foreach_plist(t->descendant_list, el) {
1133 nodeset_insert(y, plist_element_get_value(el));
1134 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", plist_element_get_value(el)));
1139 * Greedy-k: a heuristics for the MMA problem
1141 static void compute_killing_function(rss_t *rss) {
1143 struct obstack obst;
1145 obstack_init(&obst);
1147 rss->cbc_set = pset_new_ptr(5);
1148 compute_bipartite_decomposition(rss);
1150 /* for all bipartite components do: */
1151 foreach_pset(rss->cbc_set, cbc) {
1153 nodeset *x = new_nodeset(10);
1154 nodeset *y = new_nodeset(10);
1155 child_t **sks = NEW_ARR_F(child_t *, 20);
1160 DBG((rss->dbg, LEVEL_1, "\tcomputing SKS for cbc %d:\n", cbc->nr));
1161 DBG((rss->dbg, LEVEL_2, "\t\tinitializing parents X:\n"));
1163 /* X = S_cb (all parents are initially uncovered) */
1164 foreach_nodeset(cbc->parents, p) {
1165 nodeset_insert(x, p);
1166 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", p));
1169 /* while X not empty */
1170 while (nodeset_count(x) > 0) {
1171 child_t *t = obstack_alloc(&obst, sizeof(*t));
1172 memset(t, 0, sizeof(*t));
1174 t = select_child_max_cost(rss, x, y, t, cbc);
1176 if (cur_len >= cur_size) {
1177 ARR_EXTO(child_t *, sks, cur_size * 2);
1181 DBG((rss->dbg, LEVEL_2, "\t\tinsert child %+F (%.3f) into SKS at pos %d\n", t->irn, t->cost, cur_len));
1184 remove_covered_parents(rss, x, t->irn, cbc);
1185 update_cumulated_descendent_values(rss, y, t->irn);
1188 ARR_SHRINKLEN(sks, cur_len);
1190 /* sort SKS in increasing cost order */
1191 qsort(sks, cur_len, sizeof(sks[0]), cmp_child_costs);
1193 DBG((rss->dbg, LEVEL_2, "\tprocessing SKS for cbc %d:\n", cbc->nr));
1195 /* build killing function */
1196 for (i = cur_len - 1; i >= 0; --i) { /* loop over sks in decreasing cost order */
1197 child_t *t = sks[i];
1198 rss_irn_t *rt = get_rss_irn(rss, t->irn);
1199 plist_element_t *p_el;
1201 DBG((rss->dbg, LEVEL_3, "\t\t\tkiller %+F (%.3f):\n", t->irn, t->cost));
1203 /* kill all unkilled parents of t */
1204 foreach_plist(rt->parent_list, p_el) {
1205 ir_node *par = plist_element_get_value(p_el);
1206 rss_irn_t *rpar = get_rss_irn(rss, par);
1208 if (is_Sink(rpar->killer)) {
1209 rpar->killer = t->irn;
1210 DBG((rss->dbg, LEVEL_2, "\t\tkill %+F\n", rpar->irn));
1213 DBG((rss->dbg, LEVEL_3, "\t\t\tkeeping %+F as killer for %+F\n", rpar->killer, rpar->irn));
1223 if (rss->opts->dump_flags & RSS_DUMP_KILL)
1224 debug_vcg_dump_kill(rss);
1226 del_pset(rss->cbc_set);
1227 obstack_free(&obst, NULL);
1231 * Adds the edge src -> tgt to the dvg. Checks if reverse edge is already there (asserts).
1233 static INLINE void add_dvg_edge(rss_t *rss, dvg_t *dvg, ir_node *src, ir_node *tgt, int have_source) {
1234 rss_edge_t *dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1238 nodeset_insert(dvg->nodes, src);
1240 nodeset_insert(dvg->nodes, tgt);
1242 /* add an edge to our killer */
1243 dvg_edge->src = src;
1244 dvg_edge->tgt = tgt;
1245 dvg_edge->next = NULL;
1249 assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1251 /* add the edge to the DVG */
1252 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", src, tgt));
1253 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1257 * Computes the disjoint value DAG (DVG).
1258 * BEWARE: It is not made explicitly clear in the Touati paper,
1259 * but the DVG is meant to be build from the KILLING DAG
1261 static void compute_dvg(rss_t *rss, dvg_t *dvg) {
1262 plist_element_t *el;
1264 DBG((rss->dbg, LEVEL_1, "\tcomputing DVG:\n"));
1266 foreach_plist(rss->nodes, el) {
1267 ir_node *u_irn = plist_element_get_value(el);
1268 rss_irn_t *u = get_rss_irn(rss, u_irn);
1269 rss_irn_t *u_killer = get_rss_irn(rss, u->killer);
1272 /* TODO: omit nodes only having sink as pkiller and killing no one */
1274 /* add an edge to killer */
1275 add_dvg_edge(rss, dvg, u_irn, u->killer, 0);
1277 if (is_Sink(u->killer))
1280 /* We add an edge to every killer, from where we could be reached. */
1281 for (i = ARR_LEN_SAFE(u_killer->descendants) - 1; i >= 0; --i) {
1282 add_dvg_edge(rss, dvg, u_irn, u_killer->descendants[i], 1);
1287 foreach_plist(rss->nodes, el2) {
1288 ir_node *v_irn = plist_element_get_value(el2);
1291 There is an edge (u, v) in the DVG iff v is a descendant of the killer(u).
1293 if (BSEARCH_IRN_ARR(v_irn, u_kill->descendants)) {
1294 rss_edge_t *dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1297 /* insert the user into the DVG and append it to the user list of u */
1298 nodeset_insert(dvg->nodes, v_irn);
1299 if (! plist_has_value(u->dvg_user_list, v_irn))
1300 plist_insert_back(u->dvg_user_list, v_irn);
1302 dvg_edge->src = u_irn;
1303 dvg_edge->tgt = v_irn;
1304 dvg_edge->next = NULL;
1309 assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1311 /* add the edge to the DVG */
1312 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", u_irn, v_irn));
1313 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1319 if (rss->opts->dump_flags & RSS_DUMP_DVG)
1320 debug_vcg_dump_dvg(rss, dvg);
1324 * Updates the dvg structure when a serialization edge from src -> tgt is added.
1326 static void update_dvg(rss_t *rss, dvg_t *dvg, rss_irn_t *src, rss_irn_t *tgt) {
1329 add_dvg_edge(rss, dvg, tgt->irn, src->irn, 1);
1331 for (i = ARR_LEN_SAFE(src->descendants) - 1; i >= 0; --i) {
1332 add_dvg_edge(rss, dvg, tgt->irn, src->descendants[i], 1);
1338 * Accumulate all descendants for root into list.
1340 static void accumulate_dvg_descendant_values(rss_t *rss, rss_irn_t *root, plist_t *list) {
1341 if (plist_count(root->dvg_user_list) > 0) {
1342 plist_element_t *el;
1344 foreach_plist(root->dvg_user_list, el) {
1345 ir_node *v_irn = plist_element_get_value(el);
1346 rss_irn_t *v = get_rss_irn(rss, v_irn);
1348 /* add v as descendant */
1349 if (! plist_has_value(list, v_irn)) {
1350 plist_insert_back(list, v_irn);
1351 DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG descendant %+F\n", v_irn));
1354 /* accumulate v's descendants */
1355 accumulate_dvg_descendant_values(rss, v, list);
1361 * Builds the list of potential killers for each node
1363 * Needs the descendant list for all user as sorted array.
1365 static void build_dvg_pkiller_list(rss_t *rss, dvg_t *dvg) {
1368 foreach_nodeset(dvg->nodes, irn) {
1369 rss_irn_t *node = get_rss_irn(rss, irn);
1370 plist_element_t *el, *el2;
1372 DBG((rss->dbg, DEBUG_DVG, "\t\tbuilding pkiller list for %+F\n", irn));
1374 /* check each user */
1375 foreach_plist(node->dvg_user_list, el) {
1376 ir_node *u_irn = plist_element_get_value(el);
1378 /* is the current user u_irn not a descendant of any other user -> pkiller */
1379 foreach_plist(node->dvg_user_list, el2) {
1380 ir_node *v_irn = plist_element_get_value(el2);
1381 rss_irn_t *v = get_rss_irn(rss, v_irn);
1384 ! BSEARCH_IRN_ARR(u_irn, v->dvg_desc) &&
1385 ! plist_has_value(node->dvg_pkiller_list, u_irn))
1387 plist_insert_back(node->dvg_pkiller_list, u_irn);
1388 DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG pkiller %+F\n", u_irn));
1393 node->dvg_pkiller = build_sorted_array_from_list(node->dvg_pkiller_list, phase_obst(&rss->ph));
1397 if (firm_dbg_get_mask(rss->dbg) & DEBUG_DVG)
1398 debug_vcg_dump_dvg_pkiller(rss, dvg);
1405 * Compute the maximal antichain of the current DVG.
1406 * This is a reimplementation of the MAXIMAL_ANTI_CHAIN function
1407 * from the DDG library 1.1 (DAG.cpp).
1409 static nodeset *compute_maximal_antichain(rss_t *rss, dvg_t *dvg, int iteration) {
1410 int n = nodeset_count(dvg->nodes);
1411 int *assignment = alloca(n * sizeof(assignment[0]));
1412 int *assignment_rev = alloca(n * sizeof(assignment_rev[0]));
1413 int *idx_map = alloca(n * sizeof(idx_map[0]));
1414 hungarian_problem_t *bp;
1415 nodeset *values, *temp;
1417 int i, j, cost, cur_chain, res;
1418 rss_edge_t *dvg_edge;
1420 #define MAP_IDX(irn) bsearch_for_index(get_irn_idx(irn), idx_map, n, 1)
1422 if (pset_count(dvg->edges) == 0)
1425 bp = hungarian_new(n, n, 1, HUNGARIAN_MATCH_NORMAL);
1428 At first, we build an index map for the nodes in the DVG,
1429 because we cannot use the irn idx therefore as the resulting
1430 bipartite data structure would have around 1.2GB.
1431 So we limit the size to the number of nodes we have in the DVG
1432 and build a sorted index map for their irn indices.
1435 foreach_nodeset(dvg->nodes, u_irn) {
1436 idx_map[i++] = get_irn_idx(u_irn);
1438 qsort(idx_map, n, sizeof(idx_map[0]), cmp_int);
1440 foreach_pset(dvg->edges, dvg_edge) {
1441 int idx_u = MAP_IDX(dvg_edge->src);
1442 int idx_v = MAP_IDX(dvg_edge->tgt);
1444 /* add the entry to the bipartite data structure */
1445 hungarian_add(bp, idx_u, idx_v, 1);
1446 DBG((rss->dbg, LEVEL_3, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1447 idx_u, dvg_edge->src, idx_v, dvg_edge->tgt));
1451 Add a bipartite entry for each pair of nodes (u, v), where exists a
1452 path in the DVG from u to v, ie. connect all descendants(v) to v.
1453 desc_dvg(v) = accumulated descendants of all z in dvg_user_list(v)
1455 foreach_nodeset(dvg->nodes, u_irn) {
1456 rss_irn_t *u = get_rss_irn(rss, u_irn);
1457 int idx_u_irn = MAP_IDX(u_irn);
1459 DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG descendants of %+F:\n", u_irn));
1461 //plist_clear(u->dvg_desc_list);
1462 //accumulate_dvg_descendant_values(rss, u, u->dvg_desc_list);
1465 FIXME: The array is build on the phase obstack and we cannot free the data.
1466 So the array get as many times allocated as this function is called.
1469 /* build the sorted array for faster searches */
1470 //u->dvg_desc = build_sorted_array_from_list(u->dvg_desc_list, phase_obst(&rss->ph));
1472 DBG((rss->dbg, DEBUG_MAX_AC, "\t\tadding bipartite entries of %+F:\n", u_irn));
1474 /* add the bipartite entries for all descendants */
1475 for (i = ARR_LEN_SAFE(u->dvg_desc) - 1; i >= 0; --i) {
1476 rss_irn_t *desc = get_rss_irn(rss, u->dvg_desc[i]);
1477 int idx_desc = MAP_IDX(u->dvg_desc[i]);
1479 /* add the entry to the bipartite data structure */
1480 hungarian_add(bp, idx_u_irn, idx_desc, 1);
1481 DBG((rss->dbg, DEBUG_MAX_AC, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1482 idx_u_irn, u_irn, idx_desc, u->dvg_desc[i]));
1489 /* We want maximum cardinality matching */
1490 hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL);
1493 DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG pkiller:\n"));
1494 /* beware: the following function needs the dvg_desc array */
1495 build_dvg_pkiller_list(rss, dvg);
1498 DBG((rss->dbg, LEVEL_2, "\t\tcomputing bipartite matching\n"));
1500 The maximum cardinality bipartite matching gives us the minimal
1501 chain partition, which corresponds to the maximum anti chains.
1503 res = hungarian_solve(bp, assignment, &cost, 1);
1504 assert(res == 0 && "Bipartite matching failed!");
1507 memset(assignment_rev, -1, n * sizeof(assignment_rev[0]));
1509 /* build the reverse assignment, ie. foreach i -> j, add j -> i */
1510 for (i = 0; i < n; ++i) {
1511 if (assignment[i] >= 0) {
1512 int j = assignment[i];
1513 assignment_rev[j] = i;
1517 DBG((rss->dbg, LEVEL_2, "\t\tgot assignment with cost %d\n", cost));
1518 DBG((rss->dbg, LEVEL_3, "\t\t\tassignment --- reverse assignment\n", cost));
1519 for (i = 0; i < n; ++i) {
1520 DBG((rss->dbg, LEVEL_3, "\t\t\t%3d -> %3d %3d -> %3d\n", i, assignment[i], i, assignment_rev[i]));
1523 values = new_nodeset(10);
1525 /* Construction of the minimal chain partition */
1526 for (j = 0; j < n; ++j) {
1527 /* check nodes, which did not occur as target */
1528 if (assignment_rev[j] == -1) {
1529 int xj = idx_map[j];
1530 ir_node *xj_irn = get_idx_irn(rss->irg, xj);
1531 rss_irn_t *xj_rss = get_rss_irn(rss, xj_irn);
1532 chain_t *c = obstack_alloc(phase_obst(&rss->ph), sizeof(*c));
1535 /* there was no source for j -> we have a source of a new chain */
1536 nodeset_insert(values, xj_irn);
1538 c->elements = plist_obstack_new(phase_obst(&rss->ph));
1539 c->nr = cur_chain++;
1540 plist_insert_back(c->elements, xj_irn);
1544 DBG((rss->dbg, LEVEL_2, "\t\tstarting chain %d:\n", c->nr));
1545 DBG((rss->dbg, LEVEL_2, "\t\t\t%+F (%d)", xj_irn, j));
1547 /* follow chain, having j as source */
1549 while (assignment[source] >= 0) {
1550 int target = assignment[source];
1551 int irn_idx = idx_map[target];
1552 ir_node *irn = get_idx_irn(rss->irg, irn_idx);
1553 rss_irn_t *node = get_rss_irn(rss, irn);
1555 plist_insert_back(c->elements, irn);
1558 DB((rss->dbg, LEVEL_2, " -> %+F (%d)", irn, target));
1560 /* new source = last target */
1564 DB((rss->dbg, LEVEL_2, "\n"));
1569 Computing the maximal antichain: Select an element from each
1570 chain such, such it is parallel with the others.
1572 DBG((rss->dbg, LEVEL_1, "\t\tcomputing set of saturation values (MAX AC)\n"));
1573 DBG((rss->dbg, LEVEL_3, "\t\tstarting with:\n"));
1576 if (firm_dbg_get_mask(rss->dbg) & LEVEL_3)
1577 dump_nodeset(values, "\t\t\t");
1583 We need an explicit array for the values as
1584 we cannot iterate multiple times over the same
1585 set at the same time. :-(((((
1587 int n = nodeset_count(values);
1589 ir_node **val_arr = NEW_ARR_F(ir_node *, n);
1591 foreach_nodeset(values, u_irn)
1592 val_arr[i++] = u_irn;
1597 temp = new_nodeset(10);
1599 /* Select all nodes from current value set, having another node in the set as descendant. */
1600 for (i = 0; i < n; ++i) {
1601 rss_irn_t *u = get_rss_irn(rss, val_arr[i]);
1603 for (j = 0; j < n; ++j) {
1607 key.src = val_arr[i];
1608 key.tgt = val_arr[j];
1610 if (pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1611 /* v[j] is descendant of u -> remove u and break */
1612 nodeset_insert(temp, u->irn);
1613 nodeset_remove(values, u->irn);
1615 DBG((rss->dbg, LEVEL_3, "\t\t\tremoving %+F from values, adding it to temp\n", u->irn));
1623 /* Try to insert the chain predecessor of all selected u's */
1624 foreach_nodeset(temp, u_irn) {
1625 rss_irn_t *u = get_rss_irn(rss, u_irn);
1626 chain_t *c = u->chain;
1627 plist_element_t *el = plist_find_value(c->elements, u_irn);
1629 assert(el && "Missing element in chain!");
1631 /* If u has predecessor in chain: insert the predecessor */
1632 if (el = plist_element_get_prev(el)) {
1633 nodeset_insert(values, plist_element_get_value(el));
1634 DBG((rss->dbg, LEVEL_3, "\t\t\tadding %+F to values\n", plist_element_get_value(el)));
1640 } while (nodeset_count(temp) > 0);
1642 DBG((rss->dbg, LEVEL_2, "\t\tfinal set:\n"));
1644 if (firm_dbg_get_mask(rss->dbg) & LEVEL_2) {
1645 dump_nodeset(values, "\t\t\t");
1649 if (rss->opts->dump_flags & RSS_DUMP_MAXAC)
1650 debug_vcg_dump_pkg(rss, values, iteration);
1660 * Computes the best serialization between two nodes of sat_vals.
1662 static serialization_t *compute_best_admissible_serialization(rss_t *rss, nodeset *sat_vals, serialization_t *ser, int num_regs) {
1663 int n = nodeset_count(sat_vals);
1664 int n_idx = ARR_LEN_SAFE(rss->idx_map);
1666 ir_node **val_arr = alloca(n * sizeof(val_arr[0]));
1667 bitset_t *bs_sv = bitset_alloca(n_idx);
1668 bitset_t *bs_vdesc = bitset_alloca(n_idx);
1669 bitset_t *bs_tmp = bitset_alloca(n_idx);
1670 bitset_t *bs_ukilldesc = bitset_alloca(n_idx);
1671 int best_benefit = INT_MAX;
1672 int best_omega2 = INT_MAX;
1673 int best_benefit_omega20 = INT_MAX;
1677 rss_edge_t min_benefit_edge;
1678 rss_edge_t min_omega20_edge;
1679 rss_irn_t *ser_u_omega1, *ser_v_omega1;
1680 rss_irn_t *ser_u_omega20, *ser_v_omega20;
1682 DBG((rss->dbg, LEVEL_1, "\tcomputing admissible serializations:\n"));
1685 We need an explicit array for the values as
1686 we cannot iterate multiple times over the same
1687 set at the same time. :-(((((
1690 foreach_nodeset(sat_vals, irn) {
1692 bitset_set(bs_sv, BLOCK_IDX_MAP(rss, irn));
1696 We build all admissible serializations and remember the best found so far.
1699 if v in pkiller(u): add edge from v to all other pkiller(u)
1700 else: for all vv in pkiller(u): add edge from v to vv if there exists no path from vv to v
1704 A node is unserializable if:
1705 - it has only one killer and this one is Sink
1706 - it kills no other values
1707 In this case there is no serialization which could
1708 reduce the registerpressure
1710 #define IS_UNSERIALIZABLE_NODE(rss_node) \
1711 ((plist_count(rss_node->pkiller_list) == 1) && \
1712 is_Sink(rss_node->killer) && \
1713 (plist_count(rss_node->kill_value_list) == 0))
1715 /* for all u in sat_vals */
1716 for (i = 0; i < n; ++i) {
1717 rss_irn_t *u = get_rss_irn(rss, val_arr[i]);
1718 int u_height = get_irn_height(rss->h, val_arr[i]);
1719 plist_element_t *el;
1721 /* ignore nodes where serialization does not help */
1722 if (IS_UNSERIALIZABLE_NODE(u)) {
1723 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", u->irn));
1727 /* accumulate all descendants of all pkiller(u) */
1728 bitset_clear_all(bs_ukilldesc);
1729 foreach_plist(u->pkiller_list, el) {
1730 ir_node *irn = plist_element_get_value(el);
1731 rss_irn_t *node = get_rss_irn(rss, irn);
1734 bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, irn));
1738 for (k = ARR_LEN_SAFE(node->descendants) - 1; k >= 0; --k) {
1739 if (! is_Sink(node->descendants[k]))
1740 bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, node->descendants[k]));
1744 /* for all v in sat_vals */
1745 for (j = 0; j < n; ++j) {
1746 ir_node *v_irn = val_arr[j];
1747 rss_irn_t *v = get_rss_irn(rss, v_irn);
1748 unsigned v_height = get_irn_height(rss->h, v_irn);
1749 int omega1, omega2, is_pkiller;
1751 /* v cannot be serialized with itself
1752 * ignore nodes where serialization does not help */
1753 if (i == j || IS_UNSERIALIZABLE_NODE(v)) {
1755 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", v->irn));
1759 /* get descendants of v */
1760 bitset_clear_all(bs_vdesc);
1761 bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v_irn));
1762 for (k = ARR_LEN_SAFE(v->descendants) - 1; k >= 0; --k) {
1763 if (! is_Sink(v->descendants[k]))
1764 bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v->descendants[k]));
1767 /* if v is in pkiller(u) */
1768 is_pkiller = plist_has_value(u->pkiller_list, v_irn);
1770 /* for all vv in pkiller(u) */
1771 foreach_plist(u->pkiller_list, el) {
1772 ir_node *vv_irn = plist_element_get_value(el);
1775 if (is_Sink(vv_irn))
1779 add_edge = vv_irn != v_irn && skip_Proj(vv_irn) != skip_Proj(v_irn);
1781 add_edge = ! heights_reachable_in_block(rss->h, skip_Proj(vv_irn), skip_Proj(v_irn));
1784 As we add an edge from vv -> v, we have to make sure,
1785 that there exists no path from v to vv.
1789 int vv_height = get_irn_height(rss->h, vv_irn);
1790 int mu1, mu2, critical_path_cost;
1793 mu1 = | descendants(v) cut sat_vals |
1794 the number of saturating values which cannot
1795 be simultaneously alive with u
1797 bitset_copy(bs_tmp, bs_vdesc);
1798 mu1 = bitset_popcnt(bitset_and(bs_tmp, bs_sv));
1801 mu2 = | accum_desc_all_pkiller(u) without descendants(v) |
1804 bitset_copy(bs_tmp, bs_ukilldesc);
1805 mu2 = bitset_popcnt(bitset_andnot(bs_tmp, bs_vdesc));
1811 /* omega1 = mu1 - mu2 */
1817 /* omega2 = increase of critical path */
1818 critical_path_cost =
1819 v_height /* longest path from v to sink */
1820 + rss->max_height - vv_height /* longest path from source to vv */
1824 If critical_path_cost > max_height -> the new edge
1825 would increase the longest critical path by the difference.
1827 omega2 = critical_path_cost > rss->max_height ? critical_path_cost - rss->max_height : 0;
1829 /* this keeps track of the edge with the best benefit */
1830 if (omega1 >= num_regs - n && omega1 < best_benefit) {
1831 min_benefit_edge.src = v_irn;
1832 min_benefit_edge.tgt = vv_irn;
1837 best_benefit = omega1;
1838 ser->new_killer = is_pkiller;
1841 /* this keeps track of the edge with the best omega1 costs where omega2 == 0 */
1842 if (omega2 == 0 && omega1 >= num_regs - n && omega1 < best_benefit_omega20) {
1843 min_omega20_edge.src = v_irn;
1844 min_omega20_edge.tgt = vv_irn;
1849 best_benefit_omega20 = omega1;
1850 ser->new_killer = is_pkiller;
1853 best_omega2 = MIN(best_omega2, omega2);
1855 DBG((rss->dbg, LEVEL_2, "\t\tfound %+F -> %+F (w1 %d, w2 %d)\n",
1856 v_irn, vv_irn, omega1, omega2));
1858 } /* for all vv in pkiller(u) */
1859 } /* for all v in sat_vals */
1860 } /* for all u in sat_vals */
1865 if (best_omega2 == 0) {
1866 ser->u = ser_u_omega20;
1867 ser->v = ser_v_omega20;
1868 ser->edge->src = min_omega20_edge.src;
1869 ser->edge->tgt = min_omega20_edge.tgt;
1870 ser->omega1 = best_benefit_omega20;
1871 ser->omega2 = best_omega2;
1874 ser->u = ser_u_omega1;
1875 ser->v = ser_v_omega1;
1876 ser->edge->src = min_benefit_edge.src;
1877 ser->edge->tgt = min_benefit_edge.tgt;
1878 ser->omega1 = best_benefit;
1879 ser->omega2 = best_omega2;
1884 #undef IS_UNSERIALIZABLE_NODE
1888 * Perform the value serialization heuristic and add all
1889 * computed serialization edges as dependencies to the irg.
1891 static void perform_value_serialization_heuristic(rss_t *rss) {
1892 bitset_t *arch_nonign_bs = bitset_alloca(arch_register_class_n_regs(rss->cls));
1893 bitset_t *abi_ign_bs = bitset_alloca(arch_register_class_n_regs(rss->cls));
1894 int available_regs, iteration;
1897 pset *ser_set = new_pset(cmp_rss_edges, 20);
1899 /* available_regs = R = |arch_non_ignore_regs cut ~abi_ignore_regs| */
1900 arch_put_non_ignore_regs(rss->arch_env, rss->cls, arch_nonign_bs);
1901 be_abi_put_ignore_regs(rss->abi, rss->cls, abi_ign_bs);
1902 bitset_andnot(arch_nonign_bs, abi_ign_bs);
1903 available_regs = bitset_popcnt(arch_nonign_bs);
1905 DBG((rss->dbg, LEVEL_1, "\n\t#available regs: %d\n\n", available_regs));
1908 At first we need to compute the disjoint value DAG (DVG = {V, E_dv}).
1909 V = set of all nodes we are currently interested in
1910 E_dv = there is an edge from u to v iff v is a descendant of killer(u), forall u, v in V
1912 dvg.nodes = new_nodeset(plist_count(rss->nodes));
1913 dvg.edges = new_pset(cmp_rss_edges, plist_count(rss->nodes) * 5);
1914 compute_dvg(rss, &dvg);
1917 Then we perform the heuristic serialization algorithm
1918 on the DVG which gives us all necessary serialization
1921 DBG((rss->dbg, LEVEL_1, "\tcomputing maximal antichain:\n"));
1923 sat_vals = compute_maximal_antichain(rss, &dvg, iteration++);
1924 while (sat_vals && (nodeset_count(sat_vals) > available_regs)) {
1925 serialization_t *ser, best_ser;
1926 rss_edge_t *edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*edge));
1927 rss_irn_t *tgt, *src;
1928 ir_node *dep_src, *dep_tgt;
1930 best_ser.edge = edge;
1931 ser = compute_best_admissible_serialization(rss, sat_vals, &best_ser, available_regs);
1933 DBG((rss->dbg, LEVEL_1, "\tcurrent register saturation %d, target %d\n", nodeset_count(sat_vals), available_regs));
1936 DBG((rss->dbg, LEVEL_1, "\tno RS improving serialization found, breaking at iteration %d\n", iteration));
1940 /* Insert the serialization as dependency edge into the irg. */
1941 DBG((rss->dbg, LEVEL_1, "\tserializing %+F -> %+F with edge %+F -> %+F and cost %d, %d\n",
1942 ser->u->irn, ser->v->irn, ser->edge->src, ser->edge->tgt, ser->omega1, ser->omega2));
1944 if (pset_find(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge)))
1945 ir_printf("WARNING: serialization %+F -> %+F computed twice!\n", ser->edge->src, ser->edge->tgt);
1948 pset_insert(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge));
1950 /* update the dvg */
1951 update_dvg(rss, &dvg, ser->v, ser->u);
1952 update_dvg(rss, &dvg, get_rss_irn(rss, ser->edge->src), get_rss_irn(rss, ser->edge->tgt));
1953 del_nodeset(sat_vals);
1955 dep_src = skip_Proj(ser->edge->src);
1956 dep_tgt = ser->edge->tgt;
1957 add_irn_dep(dep_src, dep_tgt);
1959 /* Update descendants, consumer and pkillers of the target */
1960 update_node_info(rss, ser->edge->tgt, ser->edge->src);
1962 /* TODO: try to find a cheaper way for updating height information */
1963 rss->max_height = heights_recompute_block(rss->h, rss->block);
1965 /* Recompute the antichain for next serialization */
1966 DBG((rss->dbg, LEVEL_1, "\tre-computing maximal antichain:\n"));
1967 sat_vals = compute_maximal_antichain(rss, &dvg, iteration++);
1970 del_nodeset(dvg.nodes);
1971 del_pset(dvg.edges);
1975 * Do initial calculations for a block.
1977 static void process_block(ir_node *block, void *env) {
1980 const ir_edge_t *edge;
1982 phase_init(&rss->ph, "rss block preprocessor", rss->irg, PHASE_DEFAULT_GROWTH, init_rss_irn);
1984 DBG((rss->dbg, LEVEL_1, "preprocessing block %+F\n", block));
1987 /* build an index map for all nodes in the current block */
1989 n = get_irn_n_edges(block);
1990 NEW_ARR_A(int *, rss->idx_map, n);
1991 foreach_out_edge(block, edge) {
1992 ir_node *irn = get_edge_src_irn(edge);
1993 rss->idx_map[i++] = get_irn_idx(irn);
1995 qsort(rss->idx_map, n, sizeof(rss->idx_map[0]), cmp_int);
1996 rss->max_height = heights_recompute_block(rss->h, block);
1998 /* loop over all register classes */
1999 for (i = arch_isa_get_n_reg_class(rss->arch_env->isa) - 1; i >= 0; --i) {
2000 const arch_register_class_t *cls = arch_isa_get_reg_class(rss->arch_env->isa, i);
2003 DBG((rss->dbg, LEVEL_1, "register class %s\n", arch_register_class_name(cls)));
2005 /* reset the list of interesting nodes */
2006 plist_clear(rss->nodes);
2007 plist_insert_back(rss->nodes, _sink);
2009 /* collect all nodes having a certain register class */
2010 foreach_out_edge(block, edge) {
2011 ir_node *irn = get_edge_src_irn(edge);
2015 - mode_T nodes (the projs are asked)
2016 - mode_X nodes (control flow nodes are always scheduled last)
2017 - Keeps (they are always immediately scheduled)
2018 - Phi (same as Keep)
2020 if (get_irn_mode(irn) == mode_T || be_is_Keep(irn) || is_Phi(irn))
2024 In case of a proj, we skip
2025 - Barrier (they are a Barrier :)
2027 - the Proj itself, as it's scheduled always with it's super node
2030 ir_node *pred = get_Proj_pred(irn);
2031 if (be_is_Barrier(pred) || is_Start(pred))
2035 if (! arch_irn_is(rss->arch_env, irn, ignore) && arch_get_irn_reg_class(rss->arch_env, irn, -1) == cls) {
2036 plist_insert_back(rss->nodes, skip_Proj(irn));
2038 /* calculate the descendants and consumer for each node in the block */
2039 collect_node_info(rss, skip_Proj(irn));
2043 /* compute the potential killing set PK(G) */
2044 compute_pkill_set(rss);
2046 /* compute the killing function k* */
2047 compute_killing_function(rss);
2050 Compute the heuristic value serialization and
2051 add the necessary dependencies to the irg.
2053 perform_value_serialization_heuristic(rss);
2056 phase_free(&rss->ph);
2061 * Register the options.
2063 void rss_register_options(lc_opt_entry_t *grp) {
2064 static int run_once = 0;
2065 lc_opt_entry_t *rss_grp;
2069 rss_grp = lc_opt_get_grp(grp, "rss");
2071 lc_opt_add_table(rss_grp, rss_option_table);
2074 #endif /* WITH_LIBCORE */
2077 * Preprocess the irg for scheduling.
2079 void rss_schedule_preparation(const be_irg_t *birg) {
2082 FIRM_DBG_REGISTER(rss.dbg, "firm.be.sched.rss");
2084 //firm_dbg_set_mask(rss.dbg, LEVEL_1 | LEVEL_2 | LEVEL_3);
2086 init_rss_special_nodes(birg->irg);
2088 rss.irg = birg->irg;
2089 rss.arch_env = birg->main_env->arch_env;
2090 rss.abi = birg->abi;
2091 rss.h = heights_new(birg->irg);
2092 rss.nodes = plist_new();
2093 rss.opts = &rss_options;
2094 irg_block_walk_graph(birg->irg, NULL, process_block, &rss);
2095 heights_free(rss.h);
2096 plist_free(rss.nodes);
2098 if (birg->main_env->options->dump_flags & DUMP_SCHED)
2099 be_dump(rss.irg, "-rss", dump_ir_block_graph);