2 * Implementation of a register saturating list scheduler
3 * as described in: Sid-Ahmed-Ali Touati
4 * Register Saturation in Superscalar and VLIW Codes
6 * @license This file protected by GPL - GNU GENERAL PUBLIC LICENSE.
7 * @author Christian Wuerdig
20 #include "irgraph_t.h"
22 #include "iredges_t.h"
24 #include "irphase_t.h"
30 #include "bipartite.h"
31 #include "hungarian.h"
38 #include "besched_t.h"
41 #include <libcore/lc_opts.h>
42 #include <libcore/lc_opts_enum.h>
43 #endif /* WITH_LIBCORE */
46 #define ARR_LEN_SAFE(arr) ((arr) != NULL ? ARR_LEN((arr)) : 0)
48 #define HASH_RSS_EDGE(edge) ((get_irn_node_nr((edge)->src) << 16) | (get_irn_node_nr((edge)->tgt) & 0xFFFF))
49 #define BSEARCH_IRN_ARR(val, arr) \
50 bsearch(&(val), (arr), ARR_LEN_SAFE((arr)), sizeof((arr)[0]), cmp_irn_idx)
52 #define BLOCK_IDX_MAP(rss, irn) bsearch_for_index(get_irn_idx((irn)), (rss)->idx_map, ARR_LEN_SAFE((rss)->idx_map), 1)
55 typedef struct _rss_opts_t {
59 /* Represents a child with associated costs */
60 typedef struct _child {
65 /* We need edges for several purposes. */
66 typedef struct _rss_edge {
72 /* Represents a connected bipartite component. */
74 nodeset *parents; /**< = S a set of value producers */
75 nodeset *children; /**< = T a set of value consumers */
76 pset *kill_edges; /**< = E a set of edges (t in T, s in S) such as each s in S gets killed by at least one t in T */
77 int nr; /**< a deterministic index for set insertion (used as hash) */
80 /* Represents a disjoint value DAG. */
86 /* Represents a chain of nodes. */
87 typedef struct _chain {
88 plist_t *elements; /**< List of chain elements */
89 int nr; /**< a deterministic index for set insertion (used as hash) */
92 typedef struct _rss_irn {
93 plist_t *consumer_list; /**< List of consumers */
94 ir_node **consumer; /**< Sorted consumer array (needed for faster access) */
96 plist_t *parent_list; /**< List of parents */
97 ir_node **parents; /**< Sorted parent array (needed for faster access) */
99 plist_t *descendant_list; /**< List of descendants */
100 ir_node **descendants; /**< Sorted descendant array (needed for faster access) */
102 plist_t *pkiller_list; /**< List of potential killers */
103 ir_node **pkillers; /**< Sorted pkiller array (needed for faster access) */
106 plist_t *dvg_desc_list; /**< List of all descendants in the DVG */
107 ir_node **dvg_desc; /**< Sorted dvg descendant array (needed for faster access) */
109 plist_t *dvg_pkiller_list; /**< List of potential killers in the DVG */
110 ir_node **dvg_pkiller; /**< Sorted dvg pkiller array (needed for faster access) */
112 plist_t *dvg_user_list; /**< List of users in the disjoint value DAG DVG */
114 plist_t *kill_value_list; /**< List of values getting potentially killed by this node */
116 ir_node *killer; /**< The selected unique killer */
117 ir_node *irn; /**< The corresponding firm node to this rss_irn */
119 chain_t *chain; /**< The chain, this node is associated to */
121 unsigned desc_walk; /**< visited flag for collecting descendants */
123 unsigned live_out : 1; /**< irn has consumers outside of it's block */
124 unsigned visited : 1; /**< visited flag for bipartite decomposition */
125 unsigned havecons : 1; /**< visited flag collect consumer */
126 unsigned handled : 1; /**< flag indicating whether or not the list structures have been build */
127 unsigned dumped : 1; /**< flag indication whether or not this node was dumped */
130 /* Represents a serialization edge with associated costs. */
131 typedef struct _serialization {
132 rss_irn_t *u; /* the top node */
133 rss_irn_t *v; /* the node about to be serialized after u */
134 rss_edge_t *edge; /* the edge selected for the serialization */
135 int omega1; /* estimated: available regs - RS reduction */
136 int omega2; /* increase in critical path length */
140 typedef struct _rss {
141 phase_t ph; /**< Phase to hold some data */
142 heights_t *h; /**< The current height object */
143 ir_graph *irg; /**< The irg to preprocess */
144 plist_t *nodes; /**< The list of interesting nodes */
145 const arch_env_t *arch_env; /**< The architecture environment */
146 be_abi_irg_t *abi; /**< The abi for this irg */
147 pset *cbc_set; /**< A set of connected bipartite components */
148 ir_node *block; /**< The current block in progress. */
149 int *idx_map; /**< Mapping irn indices to per block indices */
150 unsigned max_height; /**< maximum height in the current block */
151 rss_opts_t *opts; /**< The options */
152 const arch_register_class_t *cls; /**< The current register class */
153 DEBUG_ONLY(firm_dbg_module_t *dbg);
156 #define get_rss_irn(rss, irn) (phase_get_or_set_irn_data(&rss->ph, irn))
159 * We need some special nodes:
160 * a source and a sink for all live-in and live-out values of a block
169 static ir_node *_source = NULL;
170 static ir_node *_sink = NULL;
172 #define is_Source(irn) ((irn) == _source)
173 #define is_Sink(irn) ((irn) == _sink)
177 RSS_DUMP_CBC = 1 << 0,
178 RSS_DUMP_PKG = 1 << 1,
179 RSS_DUMP_KILL = 1 << 2,
180 RSS_DUMP_DVG = 1 << 3,
181 RSS_DUMP_MAXAC = 1 << 4,
182 RSS_DUMP_ALL = (RSS_DUMP_MAXAC << 1) - 1,
185 static rss_opts_t rss_options = {
190 static const lc_opt_enum_int_items_t dump_items[] = {
191 { "none", RSS_DUMP_NONE },
192 { "cbc", RSS_DUMP_CBC },
193 { "pkg", RSS_DUMP_PKG },
194 { "kill", RSS_DUMP_KILL },
195 { "dvg", RSS_DUMP_DVG },
196 { "maxac", RSS_DUMP_MAXAC },
197 { "all", RSS_DUMP_ALL },
201 static lc_opt_enum_int_var_t dump_var = {
202 &rss_options.dump_flags, dump_items
205 static const lc_opt_table_entry_t rss_option_table[] = {
206 LC_OPT_ENT_ENUM_MASK("dump", "dump phases (none, cbc, pkg, kill, dvg, maxac, all)", &dump_var),
209 #endif /* WITH_LIBCORE */
211 /******************************************************************************
213 * | | | | / _| | | (_)
214 * | |__ ___| |_ __ ___ _ __ | |_ _ _ _ __ ___| |_ _ ___ _ __ ___
215 * | '_ \ / _ \ | '_ \ / _ \ '__| | _| | | | '_ \ / __| __| |/ _ \| '_ \/ __|
216 * | | | | __/ | |_) | __/ | | | | |_| | | | | (__| |_| | (_) | | | \__ \
217 * |_| |_|\___|_| .__/ \___|_| |_| \__,_|_| |_|\___|\__|_|\___/|_| |_|___/
220 ******************************************************************************/
223 * Acquire opcodes and create source and sink nodes.
225 static void init_rss_special_nodes(ir_graph *irg) {
226 ir_node *block = get_irg_start_block(irg);
227 int iro_rss_base = get_next_ir_opcodes(iro_rss_last);
228 ir_op *op_rss_Source = new_ir_op(iro_rss_base + iro_rss_Source, "rss_Source", op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
229 ir_op *op_rss_Sink = new_ir_op(iro_rss_base + iro_rss_Sink, "rss_Sink", op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
230 _source = new_ir_node(NULL, irg, block, op_rss_Source, mode_ANY, 0, NULL);
231 _sink = new_ir_node(NULL, irg, block, op_rss_Sink, mode_ANY, 0, NULL);
234 static int cmp_int(const void *a, const void *b) {
238 return QSORT_CMP(*i1, *i2);
241 static int cmp_child_costs(const void *a, const void *b) {
242 const child_t *c1 = a;
243 const child_t *c2 = b;
245 return QSORT_CMP(c1->cost, c2->cost);
248 static int cmp_irn_idx(const void *a, const void *b) {
249 const ir_node *n1 = *(ir_node **)a;
250 const ir_node *n2 = *(ir_node **)b;
252 return QSORT_CMP(get_irn_idx(n1), get_irn_idx(n2));
255 static int cmp_rss_edges(const void *a, const void *b) {
256 const rss_edge_t *e1 = a;
257 const rss_edge_t *e2 = b;
259 return (e1->src != e2->src) || (e1->tgt != e2->tgt);
262 static int bsearch_for_index(int key, int *arr, size_t len, int force) {
266 while (right >= left) {
267 int idx = (left + right) / 2;
271 else if (key > arr[idx])
278 assert(0 && "Something is wrong, key not found.");
282 static ir_node **build_sorted_array_from_list(plist_t *irn_list, struct obstack *obst) {
285 int len = plist_count(irn_list);
286 ir_node **arr = NEW_ARR_D(ir_node *, obst, len);
288 /* copy the list into the array */
289 foreach_plist(irn_list, el) {
290 arr[i++] = plist_element_get_value(el);
293 /* sort the array by node index */
294 qsort(arr, len, sizeof(arr[0]), cmp_irn_idx);
299 /*****************************************************
302 * __| | ___| |__ _ _ __ _ __ _ _ _ __ __ _
303 * / _` |/ _ \ '_ \| | | |/ _` |/ _` | | '_ \ / _` |
304 * | (_| | __/ |_) | |_| | (_| | (_| | | | | | (_| |
305 * \__,_|\___|_.__/ \__,_|\__, |\__, |_|_| |_|\__, |
309 *****************************************************/
311 static void dump_nodeset(nodeset *ns, const char *prefix) {
313 foreach_nodeset(ns, irn) {
314 ir_fprintf(stderr, "%s%+F\n", prefix, irn);
318 static void build_file_name(rss_t *rss, const char *suffix, size_t suf_len, char *buf, size_t len) {
319 const char *irg_name;
322 irg_name = get_entity_name(get_irg_entity(rss->irg));
323 snprintf(buf, len - suf_len, "%s-%s-block-%ld",
324 irg_name, arch_register_class_name(rss->cls), get_irn_node_nr(rss->block));
328 /* Dumps all collected bipartite components of current irg as vcg. */
329 static void debug_vcg_dump_bipartite(rss_t *rss) {
333 static const char suffix[] = "-RSS-CBC.vcg";
335 build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
336 f = fopen(file_name, "w");
338 ir_fprintf(f, "graph: { title: \"connected bipartite component graph of %+F\"\n", rss->irg);
339 fprintf(f, "display_edge_labels: no\n");
340 fprintf(f, "layoutalgorithm: mindepth\n");
341 fprintf(f, "manhattan_edges: yes\n\n");
343 foreach_pset(rss->cbc_set, cbc) {
347 fprintf(f, "graph: { titel: \"cbc %d\" label: \"cbc %d\" status:clustered color:yellow\n", cbc->nr, cbc->nr);
348 foreach_nodeset(cbc->parents, n) {
349 ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
351 foreach_nodeset(cbc->children, n) {
352 ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
354 foreach_pset(cbc->kill_edges, ke) {
355 ir_fprintf(f, "edge: { sourcename: \"n%d_%d\" targetname: \"n%d_%d\" }\n",
356 get_irn_node_nr(ke->src), cbc->nr, get_irn_node_nr(ke->tgt), cbc->nr);
364 /* Dump the computed killing function as vcg. */
365 static void debug_vcg_dump_kill(rss_t *rss) {
369 static const char suffix[] = "-RSS-KILL.vcg";
371 build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
372 f = fopen(file_name, "w");
374 ir_fprintf(f, "graph: { title: \"computed kill graph of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
375 fprintf(f, "display_edge_labels: no\n");
376 fprintf(f, "layoutalgorithm: mindepth\n");
377 fprintf(f, "manhattan_edges: yes\n\n");
379 /* first: reset dumped flag of all nodes */
380 foreach_plist(rss->nodes, el) {
381 ir_node *irn = plist_element_get_value(el);
382 rss_irn_t *rirn = get_rss_irn(rss, irn);
386 /* dump all nodes and their killers */
387 foreach_plist(rss->nodes, el) {
388 ir_node *irn = plist_element_get_value(el);
389 rss_irn_t *rirn = get_rss_irn(rss, irn);
390 rss_irn_t *pk_rirn = get_rss_irn(rss, rirn->killer);
392 if (! rirn->dumped) {
393 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
397 if (! pk_rirn->dumped) {
398 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(rirn->killer), rirn->killer);
402 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
403 get_irn_node_nr(rirn->killer), get_irn_node_nr(irn));
410 /* Dumps the potential killing DAG (PKG) as vcg. */
411 static void debug_vcg_dump_pkg(rss_t *rss, nodeset *max_ac, int iteration) {
414 char *suffix = alloca(32);
415 static const char suffix1[] = "-RSS-PKG.vcg";
416 static const char suffix2[] = "-RSS-PKG-MAXAC.vcg";
420 snprintf(suffix, 32, "%s", suffix1);
423 snprintf(suffix, 32, "-%02d%s", iteration, suffix2);
426 build_file_name(rss, suffix, strlen(suffix) + 1, file_name, sizeof(file_name));
427 f = fopen(file_name, "w");
429 ir_fprintf(f, "graph: { title: \"potential killing DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
430 fprintf(f, "display_edge_labels: no\n");
431 fprintf(f, "layoutalgorithm: mindepth\n");
432 fprintf(f, "manhattan_edges: yes\n\n");
434 foreach_plist(rss->nodes, el) {
435 ir_node *irn = plist_element_get_value(el);
436 rss_irn_t *rirn = get_rss_irn(rss, irn);
438 plist_element_t *k_el;
440 /* dump selected saturating values in yellow */
441 if (max_ac && nodeset_find(max_ac, irn))
445 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F c%d\" %s }\n", get_irn_node_nr(irn), irn, rirn->chain->nr, c1);
447 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(irn), irn, c1);
450 foreach_plist(rirn->pkiller_list, k_el) {
451 ir_node *pkiller = plist_element_get_value(k_el);
452 rss_irn_t *pk_rirn = get_rss_irn(rss, pkiller);
455 if (max_ac && nodeset_find(max_ac, pkiller))
458 if (! pk_rirn->dumped) {
460 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F c%d\" %s }\n", get_irn_node_nr(pkiller), pkiller, pk_rirn->chain->nr, c2);
462 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(pkiller), pkiller, c2);
465 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
466 get_irn_node_nr(pkiller), get_irn_node_nr(irn));
473 /* Dumps the disjoint value DAG (DVG) as vcg. */
474 static void debug_vcg_dump_dvg(rss_t *rss, dvg_t *dvg) {
475 static const char suffix[] = "-RSS-DVG.vcg";
481 build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
482 f = fopen(file_name, "w");
484 ir_fprintf(f, "graph: { title: \"disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
485 fprintf(f, "display_edge_labels: no\n");
486 fprintf(f, "layoutalgorithm: mindepth\n");
487 fprintf(f, "manhattan_edges: yes\n\n");
490 foreach_nodeset(dvg->nodes, irn) {
491 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
495 foreach_pset(dvg->edges, edge) {
496 rss_irn_t *src = get_rss_irn(rss, edge->src);
497 rss_irn_t *tgt = get_rss_irn(rss, edge->tgt);
499 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
500 get_irn_node_nr(edge->src), get_irn_node_nr(edge->tgt));
508 /* Dumps the PKG(DVG). */
509 static void debug_vcg_dump_dvg_pkiller(rss_t *rss, dvg_t *dvg) {
510 static const char suffix[] = "-RSS-DVG-PKG.vcg";
515 build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
516 f = fopen(file_name, "w");
518 ir_fprintf(f, "graph: { title: \"PKG of disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
519 fprintf(f, "display_edge_labels: no\n");
520 fprintf(f, "layoutalgorithm: mindepth\n");
521 fprintf(f, "manhattan_edges: yes\n\n");
524 foreach_nodeset(dvg->nodes, irn) {
525 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
529 foreach_nodeset(dvg->nodes, irn) {
530 rss_irn_t *node = get_rss_irn(rss, irn);
533 foreach_plist(node->dvg_pkiller_list, el) {
534 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
535 get_irn_node_nr(plist_element_get_value(el)), get_irn_node_nr(irn));
545 * In case there is no rss information for irn, initialize it.
547 static void *init_rss_irn(phase_t *ph, ir_node *irn, void *old) {
548 rss_irn_t *res = old ? old : phase_alloc(ph, sizeof(res[0]));
550 res->descendant_list = plist_obstack_new(phase_obst(ph));
551 res->descendants = NULL;
553 res->consumer_list = plist_obstack_new(phase_obst(ph));
554 res->consumer = NULL;
556 res->pkiller_list = plist_obstack_new(phase_obst(ph));
557 res->pkillers = NULL;
559 res->parent_list = plist_obstack_new(phase_obst(ph));
562 //res->dvg_desc_list = plist_obstack_new(phase_obst(ph));
563 //res->dvg_desc = NULL;
565 res->kill_value_list = plist_obstack_new(phase_obst(ph));
566 //res->dvg_user_list = plist_obstack_new(phase_obst(ph));
567 //res->dvg_pkiller_list = plist_obstack_new(phase_obst(ph));
584 * Collect all nodes data dependent on current node.
586 static void collect_descendants(rss_t *rss, rss_irn_t *rirn, ir_node *irn, int *got_sink, unsigned cur_desc_walk) {
587 const ir_edge_t *edge;
588 rss_irn_t *cur_node = get_rss_irn(rss, irn);
589 ir_node *block = rss->block;
590 ir_edge_kind_t ekind[2] = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
593 if (cur_node->desc_walk >= cur_desc_walk)
595 cur_node->desc_walk = cur_desc_walk;
597 /* Stop at Barriers and Keeps */
598 if (be_is_Barrier(irn) || be_is_Keep(irn))
601 /* loop over normal and dependency edges */
602 for (i = 0; i < 2; ++i) {
603 foreach_out_edge_kind(irn, edge, ekind[i]) {
604 ir_node *user = get_edge_src_irn(edge);
606 /* skip ignore nodes as they do not really contribute to register pressure */
607 if (arch_irn_is(rss->arch_env, user, ignore))
611 if (get_irn_mode(user) != mode_X && arch_get_irn_reg_class(rss->arch_env, user, -1) == rss->cls)
612 collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
615 /* check if user lives in block and is not a control flow node */
616 if (get_nodes_block(user) == block) {
617 if (! plist_has_value(rirn->descendant_list, user)) {
618 plist_insert_back(rirn->descendant_list, user);
619 DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", user));
621 collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
623 else if (! *got_sink) {
624 /* user lives out of block: add sink as descendant if not already done */
625 plist_insert_back(rirn->descendant_list, _sink);
627 DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", _sink));
635 * Handles a single consumer.
637 static void collect_single_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *consumer, int *got_sink) {
638 ir_node *block = rss->block;
639 ir_edge_kind_t ekind[2] = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
641 assert(! is_Proj(consumer) && "Cannot handle Projs");
643 if (get_nodes_block(consumer) == block) {
644 if (! arch_irn_is(rss->arch_env, consumer, ignore)) {
645 plist_insert_back(rss_irn->consumer_list, consumer);
646 DBG((rss->dbg, LEVEL_2, "\t\tconsumer %+F\n", consumer));
650 rss_irn->live_out = 1;
651 DBG((rss->dbg, LEVEL_2, "\t\tlive out %+F", consumer));
653 plist_insert_back(rss_irn->consumer_list, _sink);
655 DB((rss->dbg, LEVEL_2, ", %+F added instead", _sink));
657 DB((rss->dbg, LEVEL_2, "\n"));
662 * Collect all nodes consuming the value(s) produced by current node.
664 static void collect_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *irn, int *got_sink) {
665 const ir_edge_t *edge;
667 ir_edge_kind_t ekind[2] = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
668 rss_irn_t *cur_node = get_rss_irn(rss, irn);
670 if (cur_node->havecons)
672 cur_node->havecons = 1;
674 for (i = 0; i < 2; ++i) {
675 foreach_out_edge_kind(irn, edge, ekind[i]) {
676 ir_node *consumer = get_edge_src_irn(edge);
678 if (is_Proj(consumer)) {
679 if (arch_get_irn_reg_class(rss->arch_env, consumer, -1) == rss->cls)
680 collect_consumer(rss, rss_irn, consumer, got_sink);
683 collect_single_consumer(rss, rss_irn, consumer, got_sink);
690 * We need to build the consumer and descendant list for _source.
692 static void collect_node_info_source(rss_t *rss) {
693 const ir_edge_t *edge;
694 rss_irn_t *rirn = get_rss_irn(rss, _source);
695 ir_node *block = rss->block;
700 foreach_out_edge(block, edge) {
701 ir_node *irn = get_edge_src_irn(edge);
704 for (i = get_irn_arity(n) - 1; i >= 0; --i) {
710 static void reset_node_info(rss_irn_t *rss_irn) {
711 /* Beware: array data resides on phase obstack, so it gets removed automatically */
713 plist_clear(rss_irn->consumer_list);
714 rss_irn->consumer = NULL;
716 plist_clear(rss_irn->parent_list);
717 rss_irn->parents = NULL;
719 plist_clear(rss_irn->descendant_list);
720 rss_irn->descendants = NULL;
722 plist_clear(rss_irn->pkiller_list);
723 rss_irn->pkillers = NULL;
725 plist_clear(rss_irn->kill_value_list);
727 rss_irn->killer = NULL;
728 rss_irn->live_out = 0;
729 rss_irn->visited = 0;
730 rss_irn->handled = 0;
735 * Collects all consumer and descendant of a irn.
737 static void collect_node_info(rss_t *rss, ir_node *irn) {
738 static unsigned cur_desc_walk = 0;
739 rss_irn_t *rss_irn = get_rss_irn(rss, irn);
742 assert(! is_Proj(irn) && "Cannot handle Projs.");
744 /* check if node info is already available */
745 if (rss_irn->handled)
746 phase_reinit_single_irn_data(&rss->ph, irn);
748 DBG((rss->dbg, LEVEL_1, "\tcomputing consumers of %+F:\n", irn));
750 /* collect all consumer */
752 collect_consumer(rss, rss_irn, irn, &got_sink);
754 /* build sorted consumer array */
755 rss_irn->consumer = build_sorted_array_from_list(rss_irn->consumer_list, phase_obst(&rss->ph));
757 DBG((rss->dbg, LEVEL_1, "\tcompute descendants of %+F:\n", irn));
759 /* collect descendants */
761 collect_descendants(rss, rss_irn, irn, &got_sink, ++cur_desc_walk);
763 /* build sorted descendant array */
764 rss_irn->descendants = build_sorted_array_from_list(rss_irn->descendant_list, phase_obst(&rss->ph));
766 rss_irn->handled = 1;
770 * Checks if v is a potential killer of u.
771 * v is in pkill(u) iff descendants(v) cut consumer(u) is v
773 * @param rss The rss object
774 * @param v The node to check for killer
775 * @param u The potentially killed value
776 * @return 1 if v is in pkill(u), 0 otherwise
778 static int is_potential_killer(rss_t *rss, rss_irn_t *v, rss_irn_t *u) {
783 assert(is_Sink(v->irn) || ((plist_count(v->descendant_list) > 0 && v->descendants) || 1));
784 assert(is_Sink(u->irn) || ((plist_count(u->consumer_list) > 0 && u->consumer) || 1));
786 /* as we loop over the list: loop over the shorter one */
787 if (plist_count(v->descendant_list) > plist_count(u->consumer_list)) {
788 list = u->consumer_list;
789 arr = v->descendants;
792 list = v->descendant_list;
796 /* for each list element: try to find element in array */
797 foreach_plist(list, el) {
798 ir_node *irn = plist_element_get_value(el);
799 ir_node *match = BSEARCH_IRN_ARR(irn, arr);
801 if (match && match != irn)
809 * Update descendants, consumer and pkiller list for the given irn.
811 static void update_node_info(rss_t *rss, ir_node *irn, ir_node *pk_irn) {
812 rss_irn_t *node = get_rss_irn(rss, irn);
813 rss_irn_t *pkiller = get_rss_irn(rss, pk_irn);
815 /* add consumer and rebuild the consumer array */
816 if (! plist_has_value(node->consumer_list, pk_irn)) {
817 plist_insert_back(node->consumer_list, pk_irn);
818 node->consumer = build_sorted_array_from_list(node->consumer_list, phase_obst(&rss->ph));
821 /* add pk_irn as descendant, add it's descendants to irn's and rebuild array */
822 if (! plist_has_value(node->descendant_list, pk_irn)) {
825 plist_insert_back(node->descendant_list, pk_irn);
827 foreach_plist(pkiller->descendant_list, el) {
828 ir_node *desc = plist_element_get_value(el);
830 if (! plist_has_value(node->descendant_list, desc))
831 plist_insert_back(node->descendant_list, desc);
834 node->descendants = build_sorted_array_from_list(node->descendant_list, phase_obst(&rss->ph));
838 /* check, if pkiller is a potential killer of irn */
839 if (is_potential_killer(rss, pkiller, node)) {
840 if (! plist_has_value(node->pkiller_list, pk_irn))
841 plist_insert_back(node->pkiller_list, pk_irn);
842 if (! plist_has_value(pkiller->kill_value_list, irn))
843 plist_insert_back(pkiller->kill_value_list, irn);
844 DBG((rss->dbg, LEVEL_1, "\tpotential killer %+F added to %+F\n", pk_irn, irn));
850 * Compute the potential killing set PK.
852 static void compute_pkill_set(rss_t *rss) {
853 plist_element_t *u_el, *v_el;
855 foreach_plist(rss->nodes, u_el) {
856 ir_node *u_irn = plist_element_get_value(u_el);
857 rss_irn_t *u = get_rss_irn(rss, u_irn);
859 DBG((rss->dbg, LEVEL_1, "\tcomputing potential killers of %+F:\n", u_irn));
861 /* check each consumer if it is a potential killer */
862 foreach_plist(u->consumer_list, v_el) {
863 ir_node *v_irn = plist_element_get_value(v_el);
864 rss_irn_t *v = get_rss_irn(rss, v_irn);
866 /* check, if v is a potential killer of u */
867 if (is_potential_killer(rss, v, u)) {
868 if (! plist_has_value(u->pkiller_list, v_irn))
869 plist_insert_back(u->pkiller_list, v_irn);
870 if (! plist_has_value(v->kill_value_list, u_irn))
871 plist_insert_back(v->kill_value_list, u_irn);
872 DBG((rss->dbg, LEVEL_2, "\t\tpotential killer %+F\n", v_irn));
879 if (rss->opts->dump_flags & RSS_DUMP_PKG)
880 debug_vcg_dump_pkg(rss, NULL, 0);
884 * Build set of killing edges (from values to their potential killers)
886 static void build_kill_edges(rss_t *rss, pset *epk) {
887 plist_element_t *el, *k_el;
889 foreach_plist(rss->nodes, el) {
890 ir_node *irn = plist_element_get_value(el);
891 rss_irn_t *rirn = get_rss_irn(rss, irn);
893 DBG((rss->dbg, LEVEL_3, "\t\tbuilding kill edges for %+F:\n", irn));
895 foreach_plist(rirn->pkiller_list, k_el) {
896 ir_node *pkiller = plist_element_get_value(k_el);
897 rss_edge_t *ke = obstack_alloc(phase_obst(&rss->ph), sizeof(*ke));
903 DBG((rss->dbg, LEVEL_3, "\t\t\ttarget %+F\n", pkiller));
905 pset_insert(epk, ke, HASH_RSS_EDGE(ke));
910 /* print the given cbc for debugging purpose */
911 static void debug_print_cbc(firm_dbg_module_t *mod, cbc_t *cbc) {
915 DBG((mod, LEVEL_3, "\t\tS = set of parents:\n"));
916 foreach_nodeset(cbc->parents, n) {
917 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
919 DBG((mod, LEVEL_3, "\t\tT = set of children:\n"));
920 foreach_nodeset(cbc->children, n) {
921 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
923 DBG((mod, LEVEL_3, "\t\tE = Edges from producers to consumers\n"));
924 foreach_pset(cbc->kill_edges, ke) {
925 DBG((mod, LEVEL_3, "\t\t\t%+F -> %+F\n", ke->src, ke->tgt));
930 * Construct the bipartite decomposition.
931 * Sid-Ahmed-Ali Touati, Phd Thesis
932 * Register Pressure in Instruction Level Parallelism, p. 71
934 static void compute_bipartite_decomposition(rss_t *rss) {
935 pset *epk = new_pset(cmp_rss_edges, 10);
940 DBG((rss->dbg, LEVEL_1, "\tcomputing bipartite decomposition:\n"));
942 build_kill_edges(rss, epk);
944 foreach_plist(rss->nodes, el) {
945 ir_node *u_irn = plist_element_get_value(el);
946 rss_irn_t *u = get_rss_irn(rss, u_irn);
951 plist_element_t *el2;
953 rss_edge_t *kedge_root = NULL;
954 ir_node *t_irn, *s_irn;
956 if (u->visited || u_irn == _sink)
959 DBG((rss->dbg, LEVEL_2, "\t\t%+F choosen:\n", u_irn));
961 cbc = obstack_alloc(phase_obst(&rss->ph), sizeof(*cbc));
964 /* initialize S_cb */
965 cbc->parents = new_nodeset(5);
966 nodeset_insert(cbc->parents, u_irn);
967 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (init)\n", u_irn));
970 cbc->kill_edges = new_pset(cmp_rss_edges, 5);
972 /* each parent gets killed by at least one value from children */
974 /* T_cb = PK_successors(u) */
975 cbc->children = new_nodeset(5);
976 foreach_plist(u->pkiller_list, el2) {
977 nodeset_insert(cbc->children, plist_element_get_value(el2));
978 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (init)\n", plist_element_get_value(el2)));
982 Now: insert the parents of all children into the parent set
983 and insert the children of all parents into the children set
984 until the sets don't change any more
986 while (p_change || c_change) {
987 p_change = c_change = 0;
989 /* accumulate parents */
990 foreach_nodeset(cbc->children, t_irn) {
991 rss_irn_t *t = get_rss_irn(rss, t_irn);
992 plist_element_t *kvl_el;
994 foreach_pset(epk, k_edge) {
995 ir_node *val = k_edge->src;
997 if (k_edge->tgt == t_irn && ! nodeset_find(cbc->parents, k_edge->src)) {
998 nodeset_insert(cbc->parents, val);
1000 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents\n", val));
1005 /* accumulate children */
1006 foreach_nodeset(cbc->parents, s_irn) {
1007 rss_irn_t *s = get_rss_irn(rss, s_irn);
1008 plist_element_t *pkl_el;
1010 foreach_plist(s->pkiller_list, pkl_el) {
1011 ir_node *val = plist_element_get_value(pkl_el);
1013 if (! nodeset_find(cbc->children, val)) {
1014 nodeset_insert(cbc->children, val);
1016 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children\n", val));
1022 /* mark all parent values as visited */
1023 foreach_nodeset(cbc->parents, s_irn) {
1024 rss_irn_t *s = get_rss_irn(rss, s_irn);
1026 /* assure bipartite property */
1027 if (nodeset_find(cbc->children, s_irn)) {
1028 nodeset_remove(cbc->children, s_irn);
1029 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F removed from to children\n", s_irn));
1034 foreach_pset(epk, k_edge) {
1035 if (nodeset_find(cbc->parents, k_edge->src) && nodeset_find(cbc->children, k_edge->tgt)) {
1036 pset_insert(cbc->kill_edges, k_edge, HASH_RSS_EDGE(k_edge));
1038 Link all k_edges which are about to be removed together.
1039 Beware: pset_remove kills the iterator.
1041 k_edge->next = kedge_root;
1042 kedge_root = k_edge;
1046 /* remove all linked k_edges */
1047 for (; kedge_root; kedge_root = kedge_root->next) {
1048 pset_remove(epk, kedge_root, HASH_RSS_EDGE(kedge_root));
1051 /* add the connected bipartite component */
1052 if (nodeset_count(cbc->parents) > 0 && nodeset_count(cbc->children) > 0 && pset_count(cbc->kill_edges) > 0)
1053 pset_insert(rss->cbc_set, cbc, (unsigned)cbc->nr);
1054 DBG((rss->dbg, LEVEL_2, "\tbipartite component %d inserted:\n", cbc->nr));
1056 if (firm_dbg_get_mask(rss->dbg) & LEVEL_2)
1057 debug_print_cbc(rss->dbg, cbc);
1061 if (rss->opts->dump_flags & RSS_DUMP_CBC)
1062 debug_vcg_dump_bipartite(rss);
1068 * Select the child with the maximum cost.
1070 static child_t *select_child_max_cost(rss_t *rss, nodeset *x, nodeset *y, child_t *t, cbc_t *cbc) {
1072 float max_cost = -1.0f;
1074 DBG((rss->dbg, LEVEL_2, "\t\tcomputing children costs:\n"));
1076 foreach_nodeset(cbc->children, child) {
1077 rss_irn_t *r_child = get_rss_irn(rss, child);
1078 int num_unkilled_parents = 0;
1079 int num_descendants;
1083 /* get the number of unkilled parents */
1084 foreach_pset(cbc->kill_edges, k_edge) {
1085 if (k_edge->tgt == child && nodeset_find(x, k_edge->src))
1086 ++num_unkilled_parents;
1089 cost = (float)num_unkilled_parents;
1091 num_descendants = plist_count(r_child->descendant_list) + nodeset_count(y);
1093 if (num_descendants > 0)
1094 cost /= (float)num_descendants;
1096 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F, #desc %d, cost %.3f\n", child, num_descendants, cost));
1098 if (cost > max_cost) {
1109 * Remove all parents from x which are killed by t_irn.
1111 static void remove_covered_parents(rss_t *rss, nodeset *x, ir_node *t_irn, cbc_t *cbc) {
1112 rss_irn_t *t = get_rss_irn(rss, t_irn);
1115 DBG((rss->dbg, LEVEL_2, "\t\tremoving parents covered by %+F:\n", t_irn));
1117 foreach_pset(cbc->kill_edges, k_edge) {
1118 if (k_edge->tgt == t_irn && nodeset_find(x, k_edge->src)) {
1119 nodeset_remove(x, k_edge->src);
1120 plist_insert_back(t->parent_list, k_edge->src);
1121 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", k_edge->src));
1126 static void update_cumulated_descendent_values(rss_t *rss, nodeset *y, ir_node *t_irn) {
1127 rss_irn_t *t = get_rss_irn(rss, t_irn);
1128 plist_element_t *el;
1130 DBG((rss->dbg, LEVEL_2, "\t\tupdating cumulated descendant value of %+F:\n", t_irn));
1132 foreach_plist(t->descendant_list, el) {
1133 nodeset_insert(y, plist_element_get_value(el));
1134 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", plist_element_get_value(el)));
1139 * Greedy-k: a heuristics for the MMA problem
1141 static void compute_killing_function(rss_t *rss) {
1143 struct obstack obst;
1145 obstack_init(&obst);
1147 rss->cbc_set = pset_new_ptr(5);
1148 compute_bipartite_decomposition(rss);
1150 /* for all bipartite components do: */
1151 foreach_pset(rss->cbc_set, cbc) {
1153 nodeset *x = new_nodeset(10);
1154 nodeset *y = new_nodeset(10);
1155 child_t **sks = NEW_ARR_F(child_t *, 20);
1160 DBG((rss->dbg, LEVEL_1, "\tcomputing SKS for cbc %d:\n", cbc->nr));
1161 DBG((rss->dbg, LEVEL_2, "\t\tinitializing parents X:\n"));
1163 /* X = S_cb (all parents are initially uncovered) */
1164 foreach_nodeset(cbc->parents, p) {
1165 nodeset_insert(x, p);
1166 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", p));
1169 /* while X not empty */
1170 while (nodeset_count(x) > 0) {
1171 child_t *t = obstack_alloc(&obst, sizeof(*t));
1172 memset(t, 0, sizeof(*t));
1174 t = select_child_max_cost(rss, x, y, t, cbc);
1176 if (cur_len >= cur_size) {
1177 ARR_EXTO(child_t *, sks, cur_size * 2);
1181 DBG((rss->dbg, LEVEL_2, "\t\tinsert child %+F (%.3f) into SKS at pos %d\n", t->irn, t->cost, cur_len));
1184 remove_covered_parents(rss, x, t->irn, cbc);
1185 update_cumulated_descendent_values(rss, y, t->irn);
1188 ARR_SHRINKLEN(sks, cur_len);
1190 /* sort SKS in increasing cost order */
1191 qsort(sks, cur_len, sizeof(sks[0]), cmp_child_costs);
1193 DBG((rss->dbg, LEVEL_2, "\tprocessing SKS for cbc %d:\n", cbc->nr));
1195 /* build killing function */
1196 for (i = cur_len - 1; i >= 0; --i) { /* loop over sks in decreasing cost order */
1197 child_t *t = sks[i];
1198 rss_irn_t *rt = get_rss_irn(rss, t->irn);
1199 plist_element_t *p_el;
1201 DBG((rss->dbg, LEVEL_3, "\t\t\tkiller %+F (%.3f):\n", t->irn, t->cost));
1203 /* kill all unkilled parents of t */
1204 foreach_plist(rt->parent_list, p_el) {
1205 ir_node *par = plist_element_get_value(p_el);
1206 rss_irn_t *rpar = get_rss_irn(rss, par);
1208 if (is_Sink(rpar->killer)) {
1209 rpar->killer = t->irn;
1210 DBG((rss->dbg, LEVEL_2, "\t\tkill %+F\n", rpar->irn));
1213 DBG((rss->dbg, LEVEL_3, "\t\t\tkeeping %+F as killer for %+F\n", rpar->killer, rpar->irn));
1223 if (rss->opts->dump_flags & RSS_DUMP_KILL)
1224 debug_vcg_dump_kill(rss);
1226 del_pset(rss->cbc_set);
1227 obstack_free(&obst, NULL);
1231 * Adds the edge src -> tgt to the dvg. Checks if reverse edge is already there (asserts).
1233 static INLINE void add_dvg_edge(rss_t *rss, dvg_t *dvg, ir_node *src, ir_node *tgt, int have_source) {
1234 rss_edge_t *dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1238 nodeset_insert(dvg->nodes, src);
1240 assert(nodeset_find(dvg->nodes, src) != NULL && "Wrong assumption");
1242 nodeset_insert(dvg->nodes, tgt);
1244 /* add an edge to our killer */
1245 dvg_edge->src = src;
1246 dvg_edge->tgt = tgt;
1247 dvg_edge->next = NULL;
1251 assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1253 /* add the edge to the DVG */
1254 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", src, tgt));
1255 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1259 * Computes the disjoint value DAG (DVG).
1260 * BEWARE: It is not made explicitly clear in the Touati paper,
1261 * but the DVG is meant to be build from the KILLING DAG
1263 static void compute_dvg(rss_t *rss, dvg_t *dvg) {
1264 plist_element_t *el;
1266 DBG((rss->dbg, LEVEL_1, "\tcomputing DVG:\n"));
1268 foreach_plist(rss->nodes, el) {
1269 ir_node *u_irn = plist_element_get_value(el);
1270 rss_irn_t *u = get_rss_irn(rss, u_irn);
1271 rss_irn_t *u_killer = get_rss_irn(rss, u->killer);
1274 /* TODO: omit nodes only having sink as pkiller and killing no one */
1276 /* add an edge to killer */
1277 add_dvg_edge(rss, dvg, u_irn, u->killer, 0);
1279 if (is_Sink(u->killer))
1282 /* We add an edge to every killer, from where we could be reached. */
1283 for (i = ARR_LEN_SAFE(u_killer->descendants) - 1; i >= 0; --i) {
1284 add_dvg_edge(rss, dvg, u_irn, u_killer->descendants[i], 1);
1289 foreach_plist(rss->nodes, el2) {
1290 ir_node *v_irn = plist_element_get_value(el2);
1293 There is an edge (u, v) in the DVG iff v is a descendant of the killer(u).
1295 if (BSEARCH_IRN_ARR(v_irn, u_kill->descendants)) {
1296 rss_edge_t *dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1299 /* insert the user into the DVG and append it to the user list of u */
1300 nodeset_insert(dvg->nodes, v_irn);
1301 if (! plist_has_value(u->dvg_user_list, v_irn))
1302 plist_insert_back(u->dvg_user_list, v_irn);
1304 dvg_edge->src = u_irn;
1305 dvg_edge->tgt = v_irn;
1306 dvg_edge->next = NULL;
1311 assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1313 /* add the edge to the DVG */
1314 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", u_irn, v_irn));
1315 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1321 if (rss->opts->dump_flags & RSS_DUMP_DVG)
1322 debug_vcg_dump_dvg(rss, dvg);
1326 * Updates the dvg structure when a serialization edge from src -> tgt is added.
1328 static void update_dvg(rss_t *rss, dvg_t *dvg, rss_irn_t *src, rss_irn_t *tgt) {
1331 add_dvg_edge(rss, dvg, tgt->irn, src->irn, 0);
1333 for (i = ARR_LEN_SAFE(src->descendants) - 1; i >= 0; --i) {
1334 add_dvg_edge(rss, dvg, tgt->irn, src->descendants[i], 1);
1340 * Accumulate all descendants for root into list.
1342 static void accumulate_dvg_descendant_values(rss_t *rss, rss_irn_t *root, plist_t *list) {
1343 if (plist_count(root->dvg_user_list) > 0) {
1344 plist_element_t *el;
1346 foreach_plist(root->dvg_user_list, el) {
1347 ir_node *v_irn = plist_element_get_value(el);
1348 rss_irn_t *v = get_rss_irn(rss, v_irn);
1350 /* add v as descendant */
1351 if (! plist_has_value(list, v_irn)) {
1352 plist_insert_back(list, v_irn);
1353 DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG descendant %+F\n", v_irn));
1356 /* accumulate v's descendants */
1357 accumulate_dvg_descendant_values(rss, v, list);
1363 * Builds the list of potential killers for each node
1365 * Needs the descendant list for all user as sorted array.
1367 static void build_dvg_pkiller_list(rss_t *rss, dvg_t *dvg) {
1370 foreach_nodeset(dvg->nodes, irn) {
1371 rss_irn_t *node = get_rss_irn(rss, irn);
1372 plist_element_t *el, *el2;
1374 DBG((rss->dbg, DEBUG_DVG, "\t\tbuilding pkiller list for %+F\n", irn));
1376 /* check each user */
1377 foreach_plist(node->dvg_user_list, el) {
1378 ir_node *u_irn = plist_element_get_value(el);
1380 /* is the current user u_irn not a descendant of any other user -> pkiller */
1381 foreach_plist(node->dvg_user_list, el2) {
1382 ir_node *v_irn = plist_element_get_value(el2);
1383 rss_irn_t *v = get_rss_irn(rss, v_irn);
1386 ! BSEARCH_IRN_ARR(u_irn, v->dvg_desc) &&
1387 ! plist_has_value(node->dvg_pkiller_list, u_irn))
1389 plist_insert_back(node->dvg_pkiller_list, u_irn);
1390 DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG pkiller %+F\n", u_irn));
1395 node->dvg_pkiller = build_sorted_array_from_list(node->dvg_pkiller_list, phase_obst(&rss->ph));
1399 if (firm_dbg_get_mask(rss->dbg) & DEBUG_DVG)
1400 debug_vcg_dump_dvg_pkiller(rss, dvg);
1407 * Compute the maximal antichain of the current DVG.
1408 * This is a reimplementation of the MAXIMAL_ANTI_CHAIN function
1409 * from the DDG library 1.1 (DAG.cpp).
1411 static nodeset *compute_maximal_antichain(rss_t *rss, dvg_t *dvg, int iteration) {
1412 int n = nodeset_count(dvg->nodes);
1413 int *assignment = alloca(n * sizeof(assignment[0]));
1414 int *assignment_rev = alloca(n * sizeof(assignment_rev[0]));
1415 int *idx_map = alloca(n * sizeof(idx_map[0]));
1416 hungarian_problem_t *bp;
1417 nodeset *values, *temp;
1419 int i, j, cost, cur_chain, res;
1420 rss_edge_t *dvg_edge;
1422 #define MAP_IDX(irn) bsearch_for_index(get_irn_idx(irn), idx_map, n, 1)
1424 if (pset_count(dvg->edges) == 0)
1427 bp = hungarian_new(n, n, 1, HUNGARIAN_MATCH_NORMAL);
1430 At first, we build an index map for the nodes in the DVG,
1431 because we cannot use the irn idx therefore as the resulting
1432 bipartite data structure would have around 1.2GB.
1433 So we limit the size to the number of nodes we have in the DVG
1434 and build a sorted index map for their irn indices.
1437 foreach_nodeset(dvg->nodes, u_irn) {
1438 idx_map[i++] = get_irn_idx(u_irn);
1440 qsort(idx_map, n, sizeof(idx_map[0]), cmp_int);
1442 foreach_pset(dvg->edges, dvg_edge) {
1443 int idx_u = MAP_IDX(dvg_edge->src);
1444 int idx_v = MAP_IDX(dvg_edge->tgt);
1446 /* add the entry to the bipartite data structure */
1447 hungarian_add(bp, idx_u, idx_v, 1);
1448 DBG((rss->dbg, LEVEL_3, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1449 idx_u, dvg_edge->src, idx_v, dvg_edge->tgt));
1453 Add a bipartite entry for each pair of nodes (u, v), where exists a
1454 path in the DVG from u to v, ie. connect all descendants(v) to v.
1455 desc_dvg(v) = accumulated descendants of all z in dvg_user_list(v)
1457 foreach_nodeset(dvg->nodes, u_irn) {
1458 rss_irn_t *u = get_rss_irn(rss, u_irn);
1459 int idx_u_irn = MAP_IDX(u_irn);
1461 DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG descendants of %+F:\n", u_irn));
1463 //plist_clear(u->dvg_desc_list);
1464 //accumulate_dvg_descendant_values(rss, u, u->dvg_desc_list);
1467 FIXME: The array is build on the phase obstack and we cannot free the data.
1468 So the array get as many times allocated as this function is called.
1471 /* build the sorted array for faster searches */
1472 //u->dvg_desc = build_sorted_array_from_list(u->dvg_desc_list, phase_obst(&rss->ph));
1474 DBG((rss->dbg, DEBUG_MAX_AC, "\t\tadding bipartite entries of %+F:\n", u_irn));
1476 /* add the bipartite entries for all descendants */
1477 for (i = ARR_LEN_SAFE(u->dvg_desc) - 1; i >= 0; --i) {
1478 rss_irn_t *desc = get_rss_irn(rss, u->dvg_desc[i]);
1479 int idx_desc = MAP_IDX(u->dvg_desc[i]);
1481 /* add the entry to the bipartite data structure */
1482 hungarian_add(bp, idx_u_irn, idx_desc, 1);
1483 DBG((rss->dbg, DEBUG_MAX_AC, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1484 idx_u_irn, u_irn, idx_desc, u->dvg_desc[i]));
1491 /* We want maximum cardinality matching */
1492 hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL);
1495 DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG pkiller:\n"));
1496 /* beware: the following function needs the dvg_desc array */
1497 build_dvg_pkiller_list(rss, dvg);
1500 DBG((rss->dbg, LEVEL_2, "\t\tcomputing bipartite matching\n"));
1502 The maximum cardinality bipartite matching gives us the minimal
1503 chain partition, which corresponds to the maximum anti chains.
1505 res = hungarian_solve(bp, assignment, &cost, 1);
1506 assert(res == 0 && "Bipartite matching failed!");
1509 memset(assignment_rev, -1, n * sizeof(assignment_rev[0]));
1511 /* build the reverse assignment, ie. foreach i -> j, add j -> i */
1512 for (i = 0; i < n; ++i) {
1513 if (assignment[i] >= 0) {
1514 int j = assignment[i];
1515 assignment_rev[j] = i;
1519 DBG((rss->dbg, LEVEL_2, "\t\tgot assignment with cost %d\n", cost));
1520 DBG((rss->dbg, LEVEL_3, "\t\t\tassignment --- reverse assignment\n", cost));
1521 for (i = 0; i < n; ++i) {
1522 DBG((rss->dbg, LEVEL_3, "\t\t\t%3d -> %3d %3d -> %3d\n", i, assignment[i], i, assignment_rev[i]));
1525 values = new_nodeset(10);
1527 /* Construction of the minimal chain partition */
1528 for (j = 0; j < n; ++j) {
1529 /* check nodes, which did not occur as target */
1530 if (assignment_rev[j] == -1) {
1531 int xj = idx_map[j];
1532 ir_node *xj_irn = get_idx_irn(rss->irg, xj);
1533 rss_irn_t *xj_rss = get_rss_irn(rss, xj_irn);
1534 chain_t *c = obstack_alloc(phase_obst(&rss->ph), sizeof(*c));
1537 /* there was no source for j -> we have a source of a new chain */
1538 nodeset_insert(values, xj_irn);
1540 c->elements = plist_obstack_new(phase_obst(&rss->ph));
1541 c->nr = cur_chain++;
1542 plist_insert_back(c->elements, xj_irn);
1546 DBG((rss->dbg, LEVEL_2, "\t\tstarting chain %d:\n", c->nr));
1547 DBG((rss->dbg, LEVEL_2, "\t\t\t%+F (%d)", xj_irn, j));
1549 /* follow chain, having j as source */
1551 while (assignment[source] >= 0) {
1552 int target = assignment[source];
1553 int irn_idx = idx_map[target];
1554 ir_node *irn = get_idx_irn(rss->irg, irn_idx);
1555 rss_irn_t *node = get_rss_irn(rss, irn);
1557 plist_insert_back(c->elements, irn);
1560 DB((rss->dbg, LEVEL_2, " -> %+F (%d)", irn, target));
1562 /* new source = last target */
1566 DB((rss->dbg, LEVEL_2, "\n"));
1571 Computing the maximal antichain: Select an element from each
1572 chain such, such it is parallel with the others.
1574 DBG((rss->dbg, LEVEL_1, "\t\tcomputing set of saturation values (MAX AC)\n"));
1575 DBG((rss->dbg, LEVEL_3, "\t\tstarting with:\n"));
1578 if (firm_dbg_get_mask(rss->dbg) & LEVEL_3)
1579 dump_nodeset(values, "\t\t\t");
1585 We need an explicit array for the values as
1586 we cannot iterate multiple times over the same
1587 set at the same time. :-(((((
1589 int n = nodeset_count(values);
1591 ir_node **val_arr = NEW_ARR_F(ir_node *, n);
1593 foreach_nodeset(values, u_irn)
1594 val_arr[i++] = u_irn;
1599 temp = new_nodeset(10);
1601 /* Select all nodes from current value set, having another node in the set as descendant. */
1602 for (i = 0; i < n; ++i) {
1603 rss_irn_t *u = get_rss_irn(rss, val_arr[i]);
1605 for (j = 0; j < n; ++j) {
1609 key.src = val_arr[i];
1610 key.tgt = val_arr[j];
1612 if (pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1613 /* v[j] is descendant of u -> remove u and break */
1614 nodeset_insert(temp, u->irn);
1615 nodeset_remove(values, u->irn);
1617 DBG((rss->dbg, LEVEL_3, "\t\t\tremoving %+F from values, adding it to temp\n", u->irn));
1625 /* Try to insert the chain predecessor of all selected u's */
1626 foreach_nodeset(temp, u_irn) {
1627 rss_irn_t *u = get_rss_irn(rss, u_irn);
1628 chain_t *c = u->chain;
1629 plist_element_t *el = plist_find_value(c->elements, u_irn);
1631 assert(el && "Missing element in chain!");
1633 /* If u has predecessor in chain: insert the predecessor */
1634 if (el = plist_element_get_prev(el)) {
1635 nodeset_insert(values, plist_element_get_value(el));
1636 DBG((rss->dbg, LEVEL_3, "\t\t\tadding %+F to values\n", plist_element_get_value(el)));
1642 } while (nodeset_count(temp) > 0);
1644 DBG((rss->dbg, LEVEL_2, "\t\tfinal set:\n"));
1646 if (firm_dbg_get_mask(rss->dbg) & LEVEL_2) {
1647 dump_nodeset(values, "\t\t\t");
1651 if (rss->opts->dump_flags & RSS_DUMP_MAXAC)
1652 debug_vcg_dump_pkg(rss, values, iteration);
1662 * Computes the best serialization between two nodes of sat_vals.
1664 static serialization_t *compute_best_admissible_serialization(rss_t *rss, nodeset *sat_vals, serialization_t *ser, int num_regs) {
1665 int n = nodeset_count(sat_vals);
1666 int n_idx = ARR_LEN_SAFE(rss->idx_map);
1668 ir_node **val_arr = alloca(n * sizeof(val_arr[0]));
1669 bitset_t *bs_sv = bitset_alloca(n_idx);
1670 bitset_t *bs_vdesc = bitset_alloca(n_idx);
1671 bitset_t *bs_tmp = bitset_alloca(n_idx);
1672 bitset_t *bs_ukilldesc = bitset_alloca(n_idx);
1673 int best_benefit = INT_MAX;
1674 int best_omega2 = INT_MAX;
1675 int best_benefit_omega20 = INT_MAX;
1679 rss_edge_t min_benefit_edge;
1680 rss_edge_t min_omega20_edge;
1681 rss_irn_t *ser_u_omega1, *ser_v_omega1;
1682 rss_irn_t *ser_u_omega20, *ser_v_omega20;
1684 DBG((rss->dbg, LEVEL_1, "\tcomputing admissible serializations:\n"));
1687 We need an explicit array for the values as
1688 we cannot iterate multiple times over the same
1689 set at the same time. :-(((((
1692 foreach_nodeset(sat_vals, irn) {
1694 bitset_set(bs_sv, BLOCK_IDX_MAP(rss, irn));
1698 We build all admissible serializations and remember the best found so far.
1701 if v in pkiller(u): add edge from v to all other pkiller(u)
1702 else: for all vv in pkiller(u): add edge from v to vv if there exists no path from vv to v
1706 A node is unserializable if:
1707 - it has only one killer and this one is Sink
1708 - it kills no other values
1709 In this case there is no serialization which could
1710 reduce the registerpressure
1712 #define IS_UNSERIALIZABLE_NODE(rss_node) \
1713 ((plist_count(rss_node->pkiller_list) == 1) && \
1714 is_Sink(rss_node->killer) && \
1715 (plist_count(rss_node->kill_value_list) == 0))
1717 /* for all u in sat_vals */
1718 for (i = 0; i < n; ++i) {
1719 rss_irn_t *u = get_rss_irn(rss, val_arr[i]);
1720 int u_height = get_irn_height(rss->h, val_arr[i]);
1721 plist_element_t *el;
1723 /* ignore nodes where serialization does not help */
1724 if (IS_UNSERIALIZABLE_NODE(u)) {
1725 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", u->irn));
1729 /* accumulate all descendants of all pkiller(u) */
1730 bitset_clear_all(bs_ukilldesc);
1731 foreach_plist(u->pkiller_list, el) {
1732 ir_node *irn = plist_element_get_value(el);
1733 rss_irn_t *node = get_rss_irn(rss, irn);
1736 bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, irn));
1740 for (k = ARR_LEN_SAFE(node->descendants) - 1; k >= 0; --k) {
1741 if (! is_Sink(node->descendants[k]))
1742 bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, node->descendants[k]));
1746 /* for all v in sat_vals */
1747 for (j = 0; j < n; ++j) {
1748 ir_node *v_irn = val_arr[j];
1749 rss_irn_t *v = get_rss_irn(rss, v_irn);
1750 unsigned v_height = get_irn_height(rss->h, v_irn);
1751 int omega1, omega2, is_pkiller;
1753 /* v cannot be serialized with itself
1754 * ignore nodes where serialization does not help */
1755 if (i == j || IS_UNSERIALIZABLE_NODE(v)) {
1757 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", v->irn));
1761 /* get descendants of v */
1762 bitset_clear_all(bs_vdesc);
1763 bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v_irn));
1764 for (k = ARR_LEN_SAFE(v->descendants) - 1; k >= 0; --k) {
1765 if (! is_Sink(v->descendants[k]))
1766 bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v->descendants[k]));
1769 /* if v is in pkiller(u) */
1770 is_pkiller = plist_has_value(u->pkiller_list, v_irn);
1772 /* for all vv in pkiller(u) */
1773 foreach_plist(u->pkiller_list, el) {
1774 ir_node *vv_irn = plist_element_get_value(el);
1777 if (is_Sink(vv_irn))
1781 add_edge = vv_irn != v_irn && skip_Proj(vv_irn) != skip_Proj(v_irn);
1783 add_edge = ! heights_reachable_in_block(rss->h, skip_Proj(vv_irn), skip_Proj(v_irn));
1786 As we add an edge from vv -> v, we have to make sure,
1787 that there exists no path from v to vv.
1791 int vv_height = get_irn_height(rss->h, vv_irn);
1792 int mu1, mu2, critical_path_cost;
1795 mu1 = | descendants(v) cut sat_vals |
1796 the number of saturating values which cannot
1797 be simultaneously alive with u
1799 bitset_copy(bs_tmp, bs_vdesc);
1800 mu1 = bitset_popcnt(bitset_and(bs_tmp, bs_sv));
1803 mu2 = | accum_desc_all_pkiller(u) without descendants(v) |
1806 bitset_copy(bs_tmp, bs_ukilldesc);
1807 mu2 = bitset_popcnt(bitset_andnot(bs_tmp, bs_vdesc));
1813 /* omega1 = mu1 - mu2 */
1819 /* omega2 = increase of critical path */
1820 critical_path_cost =
1821 v_height /* longest path from v to sink */
1822 + rss->max_height - vv_height /* longest path from source to vv */
1826 If critical_path_cost > max_height -> the new edge
1827 would increase the longest critical path by the difference.
1829 omega2 = critical_path_cost > rss->max_height ? critical_path_cost - rss->max_height : 0;
1831 /* this keeps track of the edge with the best benefit */
1832 if (omega1 >= num_regs - n && omega1 < best_benefit) {
1833 min_benefit_edge.src = v_irn;
1834 min_benefit_edge.tgt = vv_irn;
1839 best_benefit = omega1;
1840 ser->new_killer = is_pkiller;
1843 /* this keeps track of the edge with the best omega1 costs where omega2 == 0 */
1844 if (omega2 == 0 && omega1 >= num_regs - n && omega1 < best_benefit_omega20) {
1845 min_omega20_edge.src = v_irn;
1846 min_omega20_edge.tgt = vv_irn;
1851 best_benefit_omega20 = omega1;
1852 ser->new_killer = is_pkiller;
1855 best_omega2 = MIN(best_omega2, omega2);
1857 DBG((rss->dbg, LEVEL_2, "\t\tfound %+F -> %+F (w1 %d, w2 %d)\n",
1858 v_irn, vv_irn, omega1, omega2));
1860 } /* for all vv in pkiller(u) */
1861 } /* for all v in sat_vals */
1862 } /* for all u in sat_vals */
1867 if (best_omega2 == 0) {
1868 ser->u = ser_u_omega20;
1869 ser->v = ser_v_omega20;
1870 ser->edge->src = min_omega20_edge.src;
1871 ser->edge->tgt = min_omega20_edge.tgt;
1872 ser->omega1 = best_benefit_omega20;
1873 ser->omega2 = best_omega2;
1876 ser->u = ser_u_omega1;
1877 ser->v = ser_v_omega1;
1878 ser->edge->src = min_benefit_edge.src;
1879 ser->edge->tgt = min_benefit_edge.tgt;
1880 ser->omega1 = best_benefit;
1881 ser->omega2 = best_omega2;
1886 #undef IS_UNSERIALIZABLE_NODE
1890 * Perform the value serialization heuristic and add all
1891 * computed serialization edges as dependencies to the irg.
1893 static void perform_value_serialization_heuristic(rss_t *rss) {
1894 bitset_t *arch_nonign_bs = bitset_alloca(arch_register_class_n_regs(rss->cls));
1895 bitset_t *abi_ign_bs = bitset_alloca(arch_register_class_n_regs(rss->cls));
1896 int available_regs, iteration;
1899 pset *ser_set = new_pset(cmp_rss_edges, 20);
1901 /* available_regs = R = |arch_non_ignore_regs cut ~abi_ignore_regs| */
1902 arch_put_non_ignore_regs(rss->arch_env, rss->cls, arch_nonign_bs);
1903 be_abi_put_ignore_regs(rss->abi, rss->cls, abi_ign_bs);
1904 bitset_andnot(arch_nonign_bs, abi_ign_bs);
1905 available_regs = bitset_popcnt(arch_nonign_bs);
1907 DBG((rss->dbg, LEVEL_1, "\n\t#available regs: %d\n\n", available_regs));
1910 At first we need to compute the disjoint value DAG (DVG = {V, E_dv}).
1911 V = set of all nodes we are currently interested in
1912 E_dv = there is an edge from u to v iff v is a descendant of killer(u), forall u, v in V
1914 dvg.nodes = new_nodeset(plist_count(rss->nodes));
1915 dvg.edges = new_pset(cmp_rss_edges, plist_count(rss->nodes) * 5);
1916 compute_dvg(rss, &dvg);
1919 Then we perform the heuristic serialization algorithm
1920 on the DVG which gives us all necessary serialization
1923 DBG((rss->dbg, LEVEL_1, "\tcomputing maximal antichain:\n"));
1925 sat_vals = compute_maximal_antichain(rss, &dvg, iteration++);
1926 while (sat_vals && (nodeset_count(sat_vals) > available_regs)) {
1927 serialization_t *ser, best_ser;
1928 rss_edge_t *edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*edge));
1929 rss_irn_t *tgt, *src;
1930 ir_node *dep_src, *dep_tgt;
1932 best_ser.edge = edge;
1933 ser = compute_best_admissible_serialization(rss, sat_vals, &best_ser, available_regs);
1935 DBG((rss->dbg, LEVEL_1, "\tcurrent register saturation %d, target %d\n", nodeset_count(sat_vals), available_regs));
1938 DBG((rss->dbg, LEVEL_1, "\tno RS improving serialization found, breaking at iteration %d\n", iteration));
1942 /* Insert the serialization as dependency edge into the irg. */
1943 DBG((rss->dbg, LEVEL_1, "\tserializing %+F -> %+F with edge %+F -> %+F and cost %d, %d\n",
1944 ser->u->irn, ser->v->irn, ser->edge->src, ser->edge->tgt, ser->omega1, ser->omega2));
1946 if (pset_find(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge)))
1947 ir_printf("WARNING: serialization %+F -> %+F computed twice!\n", ser->edge->src, ser->edge->tgt);
1950 pset_insert(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge));
1952 /* update the dvg */
1953 update_dvg(rss, &dvg, ser->v, ser->u);
1954 update_dvg(rss, &dvg, get_rss_irn(rss, ser->edge->src), get_rss_irn(rss, ser->edge->tgt));
1955 del_nodeset(sat_vals);
1957 dep_src = skip_Proj(ser->edge->src);
1958 dep_tgt = ser->edge->tgt;
1959 add_irn_dep(dep_src, dep_tgt);
1961 /* Update descendants, consumer and pkillers of the target */
1962 update_node_info(rss, ser->edge->tgt, ser->edge->src);
1964 /* TODO: try to find a cheaper way for updating height information */
1965 rss->max_height = heights_recompute_block(rss->h, rss->block);
1967 /* Recompute the antichain for next serialization */
1968 DBG((rss->dbg, LEVEL_1, "\tre-computing maximal antichain:\n"));
1969 sat_vals = compute_maximal_antichain(rss, &dvg, iteration++);
1972 del_nodeset(dvg.nodes);
1973 del_pset(dvg.edges);
1977 * Do initial calculations for a block.
1979 static void process_block(ir_node *block, void *env) {
1982 const ir_edge_t *edge;
1984 phase_init(&rss->ph, "rss block preprocessor", rss->irg, PHASE_DEFAULT_GROWTH, init_rss_irn);
1986 DBG((rss->dbg, LEVEL_1, "preprocessing block %+F\n", block));
1989 /* build an index map for all nodes in the current block */
1991 n = get_irn_n_edges(block);
1992 NEW_ARR_A(int *, rss->idx_map, n);
1993 foreach_out_edge(block, edge) {
1994 ir_node *irn = get_edge_src_irn(edge);
1995 rss->idx_map[i++] = get_irn_idx(irn);
1997 qsort(rss->idx_map, n, sizeof(rss->idx_map[0]), cmp_int);
1998 rss->max_height = heights_recompute_block(rss->h, block);
2000 /* loop over all register classes */
2001 for (i = arch_isa_get_n_reg_class(rss->arch_env->isa) - 1; i >= 0; --i) {
2002 const arch_register_class_t *cls = arch_isa_get_reg_class(rss->arch_env->isa, i);
2005 DBG((rss->dbg, LEVEL_1, "register class %s\n", arch_register_class_name(cls)));
2007 /* reset the list of interesting nodes */
2008 plist_clear(rss->nodes);
2009 plist_insert_back(rss->nodes, _sink);
2011 /* collect all nodes having a certain register class */
2012 foreach_out_edge(block, edge) {
2013 ir_node *irn = get_edge_src_irn(edge);
2017 - mode_T nodes (the projs are asked)
2018 - mode_X nodes (control flow nodes are always scheduled last)
2019 - Keeps (they are always immediately scheduled)
2020 - Phi (same as Keep)
2022 if (get_irn_mode(irn) == mode_T || be_is_Keep(irn) || is_Phi(irn))
2026 In case of a proj, we skip
2027 - Barrier (they are a Barrier :)
2029 - the Proj itself, as it's scheduled always with it's super node
2032 ir_node *pred = get_Proj_pred(irn);
2033 if (be_is_Barrier(pred) || is_Start(pred))
2037 if (! arch_irn_is(rss->arch_env, irn, ignore) && arch_get_irn_reg_class(rss->arch_env, irn, -1) == cls) {
2038 plist_insert_back(rss->nodes, skip_Proj(irn));
2040 /* calculate the descendants and consumer for each node in the block */
2041 collect_node_info(rss, skip_Proj(irn));
2045 /* compute the potential killing set PK(G) */
2046 compute_pkill_set(rss);
2048 /* compute the killing function k* */
2049 compute_killing_function(rss);
2052 Compute the heuristic value serialization and
2053 add the necessary dependencies to the irg.
2055 perform_value_serialization_heuristic(rss);
2058 phase_free(&rss->ph);
2063 * Register the options.
2065 void rss_register_options(lc_opt_entry_t *grp) {
2066 static int run_once = 0;
2067 lc_opt_entry_t *rss_grp;
2071 rss_grp = lc_opt_get_grp(grp, "rss");
2073 lc_opt_add_table(rss_grp, rss_option_table);
2076 #endif /* WITH_LIBCORE */
2079 * Preprocess the irg for scheduling.
2081 void rss_schedule_preparation(const be_irg_t *birg) {
2084 FIRM_DBG_REGISTER(rss.dbg, "firm.be.sched.rss");
2086 //firm_dbg_set_mask(rss.dbg, LEVEL_1 | LEVEL_2 | LEVEL_3);
2088 init_rss_special_nodes(birg->irg);
2090 rss.irg = birg->irg;
2091 rss.arch_env = birg->main_env->arch_env;
2092 rss.abi = birg->abi;
2093 rss.h = heights_new(birg->irg);
2094 rss.nodes = plist_new();
2095 rss.opts = &rss_options;
2096 irg_block_walk_graph(birg->irg, NULL, process_block, &rss);
2097 heights_free(rss.h);
2098 plist_free(rss.nodes);
2100 if (birg->main_env->options->dump_flags & DUMP_SCHED)
2101 be_dump(rss.irg, "-rss", dump_ir_block_graph);