2 * Implementation of a register saturating list scheduler
3 * as described in: Sid-Ahmed-Ali Touati
4 * Register Saturation in Superscalar and VLIW Codes
6 * @license This file protected by GPL - GNU GENERAL PUBLIC LICENSE.
7 * @author Christian Wuerdig
20 #include "irgraph_t.h"
22 #include "iredges_t.h"
24 #include "irphase_t.h"
30 #include "bipartite.h"
31 #include "hungarian.h"
38 #include "besched_t.h"
41 #include <libcore/lc_opts.h>
42 #include <libcore/lc_opts_enum.h>
43 #endif /* WITH_LIBCORE */
46 #define ARR_LEN_SAFE(arr) ((arr) != NULL ? ARR_LEN((arr)) : 0)
48 #define HASH_RSS_EDGE(edge) ((get_irn_node_nr((edge)->src) << 16) | (get_irn_node_nr((edge)->tgt) & 0xFFFF))
49 #define BSEARCH_IRN_ARR(val, arr) \
50 bsearch(&(val), (arr), ARR_LEN_SAFE((arr)), sizeof((arr)[0]), cmp_irn_idx)
52 #define BLOCK_IDX_MAP(rss, irn) bsearch_for_index(get_irn_idx((irn)), (rss)->idx_map, ARR_LEN_SAFE((rss)->idx_map), 1)
55 typedef struct _rss_opts_t {
59 /* Represents a child with associated costs */
60 typedef struct _child {
65 /* We need edges for several purposes. */
66 typedef struct _rss_edge {
72 /* Represents a connected bipartite component. */
74 nodeset *parents; /**< = S a set of value producers */
75 nodeset *children; /**< = T a set of value consumers */
76 pset *kill_edges; /**< = E a set of edges (t in T, s in S) such as each s in S gets killed by at least one t in T */
77 int nr; /**< a deterministic index for set insertion (used as hash) */
80 /* Represents a disjoint value DAG. */
86 /* Represents a chain of nodes. */
87 typedef struct _chain {
88 plist_t *elements; /**< List of chain elements */
89 int nr; /**< a deterministic index for set insertion (used as hash) */
92 typedef struct _rss_irn {
93 plist_t *consumer_list; /**< List of consumers */
94 ir_node **consumer; /**< Sorted consumer array (needed for faster access) */
96 plist_t *parent_list; /**< List of parents */
97 plist_t *pkiller_list; /**< List of potential killers */
99 plist_t *descendant_list; /**< List of descendants */
100 ir_node **descendants; /**< Sorted descendant array (needed for faster access) */
102 ir_node *killer; /**< The selected unique killer */
103 ir_node *irn; /**< The corresponding firm node to this rss_irn */
105 chain_t *chain; /**< The chain, this node is associated to */
107 unsigned desc_walk; /**< visited flag for collecting descendants */
108 unsigned kill_count; /**< number of nodes killed by this one */
110 unsigned live_out : 1; /**< irn has consumers outside of it's block */
111 unsigned visited : 1; /**< visited flag for bipartite decomposition */
112 unsigned havecons : 1; /**< visited flag collect consumer */
113 unsigned handled : 1; /**< flag indicating whether or not the list structures have been build */
114 unsigned dumped : 1; /**< flag indication whether or not this node was dumped */
117 /* Represents a serialization edge with associated costs. */
118 typedef struct _serialization {
119 rss_irn_t *u; /* the top node */
120 rss_irn_t *v; /* the node about to be serialized after u */
121 rss_edge_t *edge; /* the edge selected for the serialization */
122 int omega1; /* estimated: available regs - RS reduction */
123 int omega2; /* increase in critical path length */
127 typedef struct _rss {
128 phase_t ph; /**< Phase to hold some data */
129 heights_t *h; /**< The current height object */
130 ir_graph *irg; /**< The irg to preprocess */
131 plist_t *nodes; /**< The list of interesting nodes */
132 const arch_env_t *arch_env; /**< The architecture environment */
133 be_abi_irg_t *abi; /**< The abi for this irg */
134 pset *cbc_set; /**< A set of connected bipartite components */
135 ir_node *block; /**< The current block in progress. */
136 int *idx_map; /**< Mapping irn indices to per block indices */
137 unsigned max_height; /**< maximum height in the current block */
138 rss_opts_t *opts; /**< The options */
139 be_lv_t *liveness; /**< The liveness information for this irg */
140 pset *live_block; /**< Values alive at end of block */
141 const arch_register_class_t *cls; /**< The current register class */
142 DEBUG_ONLY(firm_dbg_module_t *dbg);
145 #define get_rss_irn(rss, irn) (phase_get_or_set_irn_data(&rss->ph, irn))
148 * We need some special nodes:
149 * a source and a sink for all live-in and live-out values of a block
158 static ir_node *_source = NULL;
159 static ir_node *_sink = NULL;
161 #define is_Source(irn) ((irn) == _source)
162 #define is_Sink(irn) ((irn) == _sink)
166 RSS_DUMP_CBC = 1 << 0,
167 RSS_DUMP_PKG = 1 << 1,
168 RSS_DUMP_KILL = 1 << 2,
169 RSS_DUMP_DVG = 1 << 3,
170 RSS_DUMP_MAXAC = 1 << 4,
171 RSS_DUMP_ALL = (RSS_DUMP_MAXAC << 1) - 1,
174 static rss_opts_t rss_options = {
179 static const lc_opt_enum_int_items_t dump_items[] = {
180 { "none", RSS_DUMP_NONE },
181 { "cbc", RSS_DUMP_CBC },
182 { "pkg", RSS_DUMP_PKG },
183 { "kill", RSS_DUMP_KILL },
184 { "dvg", RSS_DUMP_DVG },
185 { "maxac", RSS_DUMP_MAXAC },
186 { "all", RSS_DUMP_ALL },
190 static lc_opt_enum_int_var_t dump_var = {
191 &rss_options.dump_flags, dump_items
194 static const lc_opt_table_entry_t rss_option_table[] = {
195 LC_OPT_ENT_ENUM_MASK("dump", "dump phases", &dump_var),
198 #endif /* WITH_LIBCORE */
200 /******************************************************************************
202 * | | | | / _| | | (_)
203 * | |__ ___| |_ __ ___ _ __ | |_ _ _ _ __ ___| |_ _ ___ _ __ ___
204 * | '_ \ / _ \ | '_ \ / _ \ '__| | _| | | | '_ \ / __| __| |/ _ \| '_ \/ __|
205 * | | | | __/ | |_) | __/ | | | | |_| | | | | (__| |_| | (_) | | | \__ \
206 * |_| |_|\___|_| .__/ \___|_| |_| \__,_|_| |_|\___|\__|_|\___/|_| |_|___/
209 ******************************************************************************/
212 * Acquire opcodes and create source and sink nodes.
214 static void init_rss_special_nodes(ir_graph *irg) {
215 ir_node *block = get_irg_start_block(irg);
216 int iro_rss_base = get_next_ir_opcodes(iro_rss_last);
217 ir_op *op_rss_Source = new_ir_op(iro_rss_base + iro_rss_Source, "rss_Source", op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
218 ir_op *op_rss_Sink = new_ir_op(iro_rss_base + iro_rss_Sink, "rss_Sink", op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
219 _source = new_ir_node(NULL, irg, block, op_rss_Source, mode_ANY, 0, NULL);
220 _sink = new_ir_node(NULL, irg, block, op_rss_Sink, mode_ANY, 0, NULL);
223 static int cmp_int(const void *a, const void *b) {
227 return QSORT_CMP(*i1, *i2);
230 static int cmp_child_costs(const void *a, const void *b) {
231 const child_t *c1 = a;
232 const child_t *c2 = b;
234 return QSORT_CMP(c1->cost, c2->cost);
237 static int cmp_irn_idx(const void *a, const void *b) {
238 const ir_node *n1 = *(ir_node **)a;
239 const ir_node *n2 = *(ir_node **)b;
241 return QSORT_CMP(get_irn_idx(n1), get_irn_idx(n2));
244 static int cmp_rss_edges(const void *a, const void *b) {
245 const rss_edge_t *e1 = a;
246 const rss_edge_t *e2 = b;
248 return (e1->src != e2->src) || (e1->tgt != e2->tgt);
251 static int bsearch_for_index(int key, int *arr, size_t len, int force) {
255 while (right >= left) {
256 int idx = (left + right) / 2;
260 else if (key > arr[idx])
267 assert(0 && "Something is wrong, key not found.");
271 static ir_node **build_sorted_array_from_list(plist_t *irn_list, struct obstack *obst) {
274 int len = plist_count(irn_list);
275 ir_node **arr = NEW_ARR_D(ir_node *, obst, len);
277 /* copy the list into the array */
278 foreach_plist(irn_list, el) {
279 arr[i++] = plist_element_get_value(el);
282 /* sort the array by node index */
283 qsort(arr, len, sizeof(arr[0]), cmp_irn_idx);
288 /*****************************************************
291 * __| | ___| |__ _ _ __ _ __ _ _ _ __ __ _
292 * / _` |/ _ \ '_ \| | | |/ _` |/ _` | | '_ \ / _` |
293 * | (_| | __/ |_) | |_| | (_| | (_| | | | | | (_| |
294 * \__,_|\___|_.__/ \__,_|\__, |\__, |_|_| |_|\__, |
298 *****************************************************/
301 static void dump_nodeset(nodeset *ns, const char *prefix) {
303 foreach_nodeset(ns, irn) {
304 ir_fprintf(stderr, "%s%+F\n", prefix, irn);
309 static void build_file_name(rss_t *rss, const char *suffix, size_t suf_len, char *buf, size_t len) {
310 const char *irg_name;
313 irg_name = get_entity_name(get_irg_entity(rss->irg));
314 snprintf(buf, len - suf_len, "%s-%s-block-%ld",
315 irg_name, arch_register_class_name(rss->cls), get_irn_node_nr(rss->block));
319 /* Dumps all collected bipartite components of current irg as vcg. */
320 static void debug_vcg_dump_bipartite(rss_t *rss) {
324 static const char suffix[] = "-RSS-CBC.vcg";
326 build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
327 f = fopen(file_name, "w");
329 ir_fprintf(f, "graph: { title: \"connected bipartite component graph of %+F\"\n", rss->irg);
330 fprintf(f, "display_edge_labels: no\n");
331 fprintf(f, "layoutalgorithm: mindepth\n");
332 fprintf(f, "manhattan_edges: yes\n\n");
334 foreach_pset(rss->cbc_set, cbc) {
338 fprintf(f, "graph: { titel: \"cbc %d\" label: \"cbc %d\" status:clustered color:yellow\n", cbc->nr, cbc->nr);
339 foreach_nodeset(cbc->parents, n) {
340 ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
342 foreach_nodeset(cbc->children, n) {
343 ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
345 foreach_pset(cbc->kill_edges, ke) {
346 ir_fprintf(f, "edge: { sourcename: \"n%d_%d\" targetname: \"n%d_%d\" }\n",
347 get_irn_node_nr(ke->src), cbc->nr, get_irn_node_nr(ke->tgt), cbc->nr);
355 /* Dump the computed killing function as vcg. */
356 static void debug_vcg_dump_kill(rss_t *rss) {
360 static const char suffix[] = "-RSS-KILL.vcg";
362 build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
363 f = fopen(file_name, "w");
365 ir_fprintf(f, "graph: { title: \"computed kill graph of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
366 fprintf(f, "display_edge_labels: no\n");
367 fprintf(f, "layoutalgorithm: mindepth\n");
368 fprintf(f, "manhattan_edges: yes\n\n");
371 /* reset dump_flag */
373 foreach_phase_irn(&rss->ph, irn) {
374 rss_irn_t *node = get_rss_irn(rss, irn);
379 /* dump all nodes and their killers */
380 foreach_plist(rss->nodes, el) {
381 ir_node *irn = plist_element_get_value(el);
382 rss_irn_t *rirn = get_rss_irn(rss, irn);
383 rss_irn_t *pk_rirn = get_rss_irn(rss, rirn->killer);
385 if (! rirn->dumped) {
386 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
390 if (! pk_rirn->dumped) {
391 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(rirn->killer), rirn->killer);
395 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
396 get_irn_node_nr(rirn->killer), get_irn_node_nr(irn));
403 /* Dumps the potential killing DAG (PKG) as vcg. */
404 static void debug_vcg_dump_pkg(rss_t *rss, nodeset *max_ac, int iteration) {
407 char *suffix = alloca(32);
408 static const char suffix1[] = "-RSS-PKG.vcg";
409 static const char suffix2[] = "-RSS-PKG-MAXAC.vcg";
413 snprintf(suffix, 32, "%s", suffix1);
416 snprintf(suffix, 32, "-%02d%s", iteration, suffix2);
419 build_file_name(rss, suffix, strlen(suffix) + 1, file_name, sizeof(file_name));
420 f = fopen(file_name, "w");
422 ir_fprintf(f, "graph: { title: \"potential killing DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
423 fprintf(f, "display_edge_labels: no\n");
424 fprintf(f, "layoutalgorithm: mindepth\n");
425 fprintf(f, "manhattan_edges: yes\n\n");
428 /* reset dump_flag */
430 foreach_phase_irn(&rss->ph, irn) {
431 rss_irn_t *node = get_rss_irn(rss, irn);
436 foreach_plist(rss->nodes, el) {
437 ir_node *irn = plist_element_get_value(el);
438 rss_irn_t *rirn = get_rss_irn(rss, irn);
440 plist_element_t *k_el;
442 /* dump selected saturating values in yellow */
443 if (max_ac && nodeset_find(max_ac, irn))
446 if (! rirn->dumped) {
448 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F c%d\" %s }\n", get_irn_node_nr(irn), irn, rirn->chain->nr, c1);
450 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(irn), irn, c1);
454 foreach_plist(rirn->pkiller_list, k_el) {
455 ir_node *pkiller = plist_element_get_value(k_el);
456 rss_irn_t *pk_rirn = get_rss_irn(rss, pkiller);
459 if (max_ac && nodeset_find(max_ac, pkiller))
462 if (! pk_rirn->dumped) {
464 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F c%d\" %s }\n", get_irn_node_nr(pkiller), pkiller, pk_rirn->chain->nr, c2);
466 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(pkiller), pkiller, c2);
469 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
470 get_irn_node_nr(pkiller), get_irn_node_nr(irn));
477 /* Dumps the disjoint value DAG (DVG) as vcg. */
478 static void debug_vcg_dump_dvg(rss_t *rss, dvg_t *dvg) {
479 static const char suffix[] = "-RSS-DVG.vcg";
485 build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
486 f = fopen(file_name, "w");
488 ir_fprintf(f, "graph: { title: \"disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
489 fprintf(f, "display_edge_labels: no\n");
490 fprintf(f, "layoutalgorithm: mindepth\n");
491 fprintf(f, "manhattan_edges: yes\n\n");
494 foreach_nodeset(dvg->nodes, irn) {
495 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
499 foreach_pset(dvg->edges, edge) {
500 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
501 get_irn_node_nr(edge->src), get_irn_node_nr(edge->tgt));
509 /* Dumps the PKG(DVG). */
510 static void debug_vcg_dump_dvg_pkiller(rss_t *rss, dvg_t *dvg) {
511 static const char suffix[] = "-RSS-DVG-PKG.vcg";
516 build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
517 f = fopen(file_name, "w");
519 ir_fprintf(f, "graph: { title: \"PKG of disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
520 fprintf(f, "display_edge_labels: no\n");
521 fprintf(f, "layoutalgorithm: mindepth\n");
522 fprintf(f, "manhattan_edges: yes\n\n");
525 foreach_nodeset(dvg->nodes, irn) {
526 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
530 foreach_nodeset(dvg->nodes, irn) {
531 rss_irn_t *node = get_rss_irn(rss, irn);
534 foreach_plist(node->dvg_pkiller_list, el) {
535 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
536 get_irn_node_nr(plist_element_get_value(el)), get_irn_node_nr(irn));
546 * In case there is no rss information for irn, initialize it.
548 static void *init_rss_irn(phase_t *ph, ir_node *irn, void *old) {
549 rss_irn_t *res = old ? old : phase_alloc(ph, sizeof(res[0]));
551 res->descendant_list = plist_obstack_new(phase_obst(ph));
552 res->descendants = NULL;
554 res->consumer_list = plist_obstack_new(phase_obst(ph));
555 res->consumer = NULL;
557 res->pkiller_list = plist_obstack_new(phase_obst(ph));
559 res->parent_list = plist_obstack_new(phase_obst(ph));
577 * Collect all nodes data dependent on current node.
579 static void collect_descendants(rss_t *rss, rss_irn_t *rirn, ir_node *irn, int *got_sink, unsigned cur_desc_walk) {
580 const ir_edge_t *edge;
581 rss_irn_t *cur_node = get_rss_irn(rss, irn);
582 ir_node *block = rss->block;
583 ir_edge_kind_t ekind[2] = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
586 if (cur_node->desc_walk >= cur_desc_walk)
588 cur_node->desc_walk = cur_desc_walk;
590 /* Stop at Barriers */
591 if (be_is_Barrier(irn))
594 /* loop over normal and dependency edges */
595 for (i = 0; i < 2; ++i) {
596 foreach_out_edge_kind(irn, edge, ekind[i]) {
597 ir_node *user = get_edge_src_irn(edge);
599 /* skip ignore nodes as they do not really contribute to register pressure */
600 if (arch_irn_is(rss->arch_env, user, ignore))
604 (a) mode_X means Jump -> out_edge
605 (b) Phi as user of a normal node -> out-edge
607 if (get_irn_mode(user) == mode_X || is_Phi(user)) {
616 //if (arch_get_irn_reg_class(rss->arch_env, user, -1) == rss->cls)
617 collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
620 /* check if user lives in block and is not a control flow node */
621 if (get_nodes_block(user) == block) {
622 if (! plist_has_value(rirn->descendant_list, user)) {
623 plist_insert_back(rirn->descendant_list, user);
624 DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", user));
626 collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
628 else if (! *got_sink) {
630 /* user lives out of block: add sink as descendant if not already done */
631 plist_insert_back(rirn->descendant_list, _sink);
633 DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", _sink));
641 * Handles a single consumer.
643 static void collect_single_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *consumer, int *got_sink) {
644 ir_node *block = rss->block;
646 assert(! is_Proj(consumer) && "Cannot handle Projs");
648 if (! is_Phi(consumer) && ! is_Block(consumer) && get_nodes_block(consumer) == block) {
649 if (! arch_irn_is(rss->arch_env, consumer, ignore) && ! plist_has_value(rss_irn->consumer_list, consumer)) {
650 plist_insert_back(rss_irn->consumer_list, consumer);
651 DBG((rss->dbg, LEVEL_2, "\t\tconsumer %+F\n", consumer));
655 rss_irn->live_out = 1;
656 DBG((rss->dbg, LEVEL_2, "\t\tlive out %+F", consumer));
658 plist_insert_back(rss_irn->consumer_list, _sink);
660 DB((rss->dbg, LEVEL_2, ", %+F added instead", _sink));
662 DB((rss->dbg, LEVEL_2, "\n"));
667 * Collect all nodes consuming the value(s) produced by current node.
669 static void collect_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *irn, int *got_sink) {
670 const ir_edge_t *edge;
672 ir_edge_kind_t ekind[2] = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
673 rss_irn_t *cur_node = get_rss_irn(rss, irn);
675 if (cur_node->havecons)
677 cur_node->havecons = 1;
679 for (i = 0; i < 2; ++i) {
680 foreach_out_edge_kind(irn, edge, ekind[i]) {
681 ir_node *consumer = get_edge_src_irn(edge);
683 if (is_Proj(consumer)) {
684 //if (arch_get_irn_reg_class(rss->arch_env, consumer, -1) == rss->cls)
685 collect_consumer(rss, rss_irn, consumer, got_sink);
688 collect_single_consumer(rss, rss_irn, consumer, got_sink);
694 * Collects all consumer and descendant of a irn.
696 static void collect_node_info(rss_t *rss, ir_node *irn) {
697 static unsigned cur_desc_walk = 0;
698 rss_irn_t *rss_irn = get_rss_irn(rss, irn);
701 assert(! is_Proj(irn) && "Cannot handle Projs.");
703 /* check if node info is already available */
704 if (rss_irn->handled)
706 //phase_reinit_single_irn_data(&rss->ph, irn);
708 DBG((rss->dbg, LEVEL_1, "\tcomputing consumers of %+F:\n", irn));
710 /* collect all consumer */
712 collect_consumer(rss, rss_irn, irn, &got_sink);
714 /* build sorted consumer array */
715 rss_irn->consumer = build_sorted_array_from_list(rss_irn->consumer_list, phase_obst(&rss->ph));
717 DBG((rss->dbg, LEVEL_1, "\tcompute descendants of %+F:\n", irn));
719 /* collect descendants */
721 collect_descendants(rss, rss_irn, irn, &got_sink, ++cur_desc_walk);
723 /* build sorted descendant array */
724 rss_irn->descendants = build_sorted_array_from_list(rss_irn->descendant_list, phase_obst(&rss->ph));
726 rss_irn->handled = 1;
730 * Checks if v is a potential killer of u.
731 * v is in pkill(u) iff descendants(v) cut consumer(u) is v
733 * @param rss The rss object
734 * @param v The node to check for killer
735 * @param u The potentially killed value
736 * @return 1 if v is in pkill(u), 0 otherwise
738 static int is_potential_killer(rss_t *rss, rss_irn_t *v, rss_irn_t *u) {
743 assert(is_Sink(v->irn) || ((plist_count(v->descendant_list) > 0 && v->descendants) || 1));
744 assert(is_Sink(u->irn) || ((plist_count(u->consumer_list) > 0 && u->consumer) || 1));
746 /* as we loop over the list: loop over the shorter one */
747 if (plist_count(v->descendant_list) > plist_count(u->consumer_list)) {
748 list = u->consumer_list;
749 arr = v->descendants;
752 list = v->descendant_list;
756 /* for each list element: try to find element in array */
757 foreach_plist(list, el) {
758 ir_node *irn = plist_element_get_value(el);
759 ir_node *match = BSEARCH_IRN_ARR(irn, arr);
761 if (match && match != irn)
769 * Update descendants, consumer and pkiller list for the given irn.
771 static void update_node_info(rss_t *rss, ir_node *irn, ir_node *pk_irn) {
772 rss_irn_t *node = get_rss_irn(rss, irn);
773 rss_irn_t *pkiller = get_rss_irn(rss, pk_irn);
775 /* add consumer and rebuild the consumer array */
776 if (! plist_has_value(node->consumer_list, pk_irn)) {
777 plist_insert_back(node->consumer_list, pk_irn);
778 node->consumer = build_sorted_array_from_list(node->consumer_list, phase_obst(&rss->ph));
781 /* add pk_irn as descendant, add it's descendants to irn's and rebuild array */
782 if (! plist_has_value(node->descendant_list, pk_irn)) {
785 plist_insert_back(node->descendant_list, pk_irn);
787 foreach_plist(pkiller->descendant_list, el) {
788 ir_node *desc = plist_element_get_value(el);
790 if (! plist_has_value(node->descendant_list, desc))
791 plist_insert_back(node->descendant_list, desc);
794 node->descendants = build_sorted_array_from_list(node->descendant_list, phase_obst(&rss->ph));
800 * Compute the potential killing set PK.
802 static void compute_pkill_set(rss_t *rss) {
803 plist_element_t *u_el, *v_el;
805 foreach_plist(rss->nodes, u_el) {
806 ir_node *u_irn = plist_element_get_value(u_el);
807 rss_irn_t *u = get_rss_irn(rss, u_irn);
809 DBG((rss->dbg, LEVEL_1, "\tcomputing potential killers of %+F:\n", u_irn));
811 /* check each consumer if it is a potential killer */
812 foreach_plist(u->consumer_list, v_el) {
813 ir_node *v_irn = plist_element_get_value(v_el);
814 rss_irn_t *v = get_rss_irn(rss, v_irn);
816 /* check, if v is a potential killer of u */
817 if (is_potential_killer(rss, v, u)) {
818 if (! plist_has_value(u->pkiller_list, v_irn))
819 plist_insert_back(u->pkiller_list, v_irn);
821 DBG((rss->dbg, LEVEL_2, "\t\tpotential killer %+F\n", v_irn));
828 if (rss->opts->dump_flags & RSS_DUMP_PKG)
829 debug_vcg_dump_pkg(rss, NULL, 0);
833 * Build set of killing edges (from values to their potential killers)
835 static void build_kill_edges(rss_t *rss, pset *epk) {
836 plist_element_t *el, *k_el;
838 foreach_plist(rss->nodes, el) {
839 ir_node *irn = plist_element_get_value(el);
840 rss_irn_t *rirn = get_rss_irn(rss, irn);
842 DBG((rss->dbg, LEVEL_3, "\t\tbuilding kill edges for %+F:\n", irn));
844 foreach_plist(rirn->pkiller_list, k_el) {
845 ir_node *pkiller = plist_element_get_value(k_el);
846 rss_edge_t *ke = obstack_alloc(phase_obst(&rss->ph), sizeof(*ke));
852 DBG((rss->dbg, LEVEL_3, "\t\t\ttarget %+F\n", pkiller));
854 pset_insert(epk, ke, HASH_RSS_EDGE(ke));
860 /* print the given cbc for debugging purpose */
861 static void debug_print_cbc(firm_dbg_module_t *mod, cbc_t *cbc) {
865 DBG((mod, LEVEL_3, "\t\tS = set of parents:\n"));
866 foreach_nodeset(cbc->parents, n) {
867 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
869 DBG((mod, LEVEL_3, "\t\tT = set of children:\n"));
870 foreach_nodeset(cbc->children, n) {
871 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
873 DBG((mod, LEVEL_3, "\t\tE = Edges from producers to consumers\n"));
874 foreach_pset(cbc->kill_edges, ke) {
875 DBG((mod, LEVEL_3, "\t\t\t%+F -> %+F\n", ke->src, ke->tgt));
881 * Construct the bipartite decomposition.
882 * Sid-Ahmed-Ali Touati, Phd Thesis
883 * Register Pressure in Instruction Level Parallelism, p. 71
885 static void compute_bipartite_decomposition(rss_t *rss) {
886 pset *epk = new_pset(cmp_rss_edges, 10);
891 DBG((rss->dbg, LEVEL_1, "\tcomputing bipartite decomposition:\n"));
893 build_kill_edges(rss, epk);
895 foreach_plist(rss->nodes, el) {
896 ir_node *u_irn = plist_element_get_value(el);
897 rss_irn_t *u = get_rss_irn(rss, u_irn);
903 plist_element_t *el2;
905 rss_edge_t *kedge_root = NULL;
906 ir_node *t_irn, *s_irn;
908 if (u->visited || u_irn == _sink)
911 DBG((rss->dbg, LEVEL_2, "\t\t%+F choosen:\n", u_irn));
913 cbc = obstack_alloc(phase_obst(&rss->ph), sizeof(*cbc));
916 /* initialize S_cb */
917 cbc->parents = new_nodeset(5);
918 nodeset_insert(cbc->parents, u_irn);
919 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (init)\n", u_irn));
922 cbc->kill_edges = new_pset(cmp_rss_edges, 5);
924 /* each parent gets killed by at least one value from children */
926 /* T_cb = PK_successors(u) */
927 cbc->children = new_nodeset(5);
928 foreach_plist(u->pkiller_list, el2) {
929 nodeset_insert(cbc->children, plist_element_get_value(el2));
930 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (init)\n", plist_element_get_value(el2)));
934 Now: insert the parents of all children into the parent set
935 and insert the children of all parents into the children set
936 until the sets don't change any more
938 while (p_change || c_change) {
939 p_change = c_change = 0;
941 /* accumulate parents */
942 foreach_nodeset(cbc->children, t_irn) {
943 foreach_pset(epk, k_edge) {
944 ir_node *val = k_edge->src;
946 if (k_edge->tgt == t_irn && ! nodeset_find(cbc->parents, val)) {
947 nodeset_insert(cbc->parents, val);
949 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (killed by %+F)\n", val, t_irn));
954 /* accumulate children */
955 foreach_nodeset(cbc->parents, s_irn) {
956 foreach_pset(epk, k_edge) {
957 ir_node *val = k_edge->tgt;
959 if (k_edge->src == s_irn && ! nodeset_find(cbc->children, val)) {
960 nodeset_insert(cbc->children, val);
962 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (kills %+F)\n", val, s_irn));
968 /* mark all parent values as visited */
969 foreach_nodeset(cbc->parents, s_irn) {
970 rss_irn_t *s = get_rss_irn(rss, s_irn);
972 /* assure bipartite property */
974 if (nodeset_find(cbc->children, s_irn)) {
975 nodeset_remove(cbc->children, s_irn);
976 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F removed from to children\n", s_irn));
982 foreach_pset(epk, k_edge) {
983 if (nodeset_find(cbc->parents, k_edge->src) && nodeset_find(cbc->children, k_edge->tgt)) {
984 pset_insert(cbc->kill_edges, k_edge, HASH_RSS_EDGE(k_edge));
986 Link all k_edges which are about to be removed together.
987 Beware: pset_remove kills the iterator.
989 k_edge->next = kedge_root;
994 /* remove all linked k_edges */
995 for (; kedge_root; kedge_root = kedge_root->next) {
996 pset_remove(epk, kedge_root, HASH_RSS_EDGE(kedge_root));
1000 foreach_nodeset(cbc->parents, s_irn) {
1003 foreach_pset(cbc->kill_edges, k_edge) {
1004 if (k_edge->src == s_irn) {
1006 pset_break(cbc->kill_edges);
1013 ir_fprintf(stderr, "Warning: parent %+F is not killed in current cbc\n", s_irn);
1016 assert(vrfy_ok && "Verification of CBC failed");
1018 /* add the connected bipartite component */
1019 if (nodeset_count(cbc->parents) > 0 && nodeset_count(cbc->children) > 0 && pset_count(cbc->kill_edges) > 0)
1020 pset_insert(rss->cbc_set, cbc, (unsigned)cbc->nr);
1021 DBG((rss->dbg, LEVEL_2, "\tbipartite component %d inserted:\n", cbc->nr));
1023 if (firm_dbg_get_mask(rss->dbg) & LEVEL_2)
1024 debug_print_cbc(rss->dbg, cbc);
1028 if (rss->opts->dump_flags & RSS_DUMP_CBC)
1029 debug_vcg_dump_bipartite(rss);
1035 * Select the child with the maximum cost.
1037 static child_t *select_child_max_cost(rss_t *rss, nodeset *x, nodeset *y, child_t *t, cbc_t *cbc) {
1039 float max_cost = -1.0f;
1041 DBG((rss->dbg, LEVEL_2, "\t\tcomputing children costs:\n"));
1043 foreach_nodeset(cbc->children, child) {
1044 rss_irn_t *r_child = get_rss_irn(rss, child);
1045 int num_unkilled_parents = 0;
1046 int num_descendants;
1050 /* get the number of unkilled parents */
1051 foreach_pset(cbc->kill_edges, k_edge) {
1052 if (k_edge->tgt == child && nodeset_find(x, k_edge->src))
1053 ++num_unkilled_parents;
1056 cost = (float)num_unkilled_parents;
1058 num_descendants = plist_count(r_child->descendant_list) + nodeset_count(y);
1060 if (num_descendants > 0)
1061 cost /= (float)num_descendants;
1063 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F, #desc %d, cost %.3f\n", child, num_descendants, cost));
1065 if (cost > max_cost) {
1076 * Remove all parents from x which are killed by t_irn.
1078 static void remove_covered_parents(rss_t *rss, nodeset *x, ir_node *t_irn, cbc_t *cbc) {
1079 rss_irn_t *t = get_rss_irn(rss, t_irn);
1082 DBG((rss->dbg, LEVEL_2, "\t\tremoving parents covered by %+F:\n", t_irn));
1084 foreach_pset(cbc->kill_edges, k_edge) {
1085 if (k_edge->tgt == t_irn && nodeset_find(x, k_edge->src)) {
1086 nodeset_remove(x, k_edge->src);
1087 plist_insert_back(t->parent_list, k_edge->src);
1088 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", k_edge->src));
1093 static void update_cumulated_descendent_values(rss_t *rss, nodeset *y, ir_node *t_irn) {
1094 rss_irn_t *t = get_rss_irn(rss, t_irn);
1095 plist_element_t *el;
1097 DBG((rss->dbg, LEVEL_2, "\t\tupdating cumulated descendant value of %+F:\n", t_irn));
1099 foreach_plist(t->descendant_list, el) {
1100 nodeset_insert(y, plist_element_get_value(el));
1101 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", plist_element_get_value(el)));
1106 * Greedy-k: a heuristics for the MMA problem
1108 static void compute_killing_function(rss_t *rss) {
1110 struct obstack obst;
1112 obstack_init(&obst);
1114 rss->cbc_set = pset_new_ptr(5);
1115 compute_bipartite_decomposition(rss);
1117 /* for all bipartite components do: */
1118 foreach_pset(rss->cbc_set, cbc) {
1120 nodeset *x = new_nodeset(10);
1121 nodeset *y = new_nodeset(10);
1122 child_t **sks = NEW_ARR_F(child_t *, 20);
1127 DBG((rss->dbg, LEVEL_1, "\tcomputing SKS for cbc %d:\n", cbc->nr));
1128 DBG((rss->dbg, LEVEL_2, "\t\tinitializing parents X:\n"));
1130 /* X = S_cb (all parents are initially uncovered) */
1131 foreach_nodeset(cbc->parents, p) {
1132 nodeset_insert(x, p);
1133 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", p));
1136 /* while X not empty */
1137 while (nodeset_count(x) > 0) {
1138 child_t *t = obstack_alloc(&obst, sizeof(*t));
1139 memset(t, 0, sizeof(*t));
1141 t = select_child_max_cost(rss, x, y, t, cbc);
1143 if (cur_len >= cur_size) {
1144 ARR_EXTO(child_t *, sks, cur_size * 2);
1148 DBG((rss->dbg, LEVEL_2, "\t\tinsert child %+F (%.3f) into SKS at pos %d\n", t->irn, t->cost, cur_len));
1151 remove_covered_parents(rss, x, t->irn, cbc);
1152 update_cumulated_descendent_values(rss, y, t->irn);
1155 ARR_SHRINKLEN(sks, cur_len);
1157 /* sort SKS in increasing cost order */
1158 qsort(sks, cur_len, sizeof(sks[0]), cmp_child_costs);
1160 DBG((rss->dbg, LEVEL_2, "\tprocessing SKS for cbc %d:\n", cbc->nr));
1162 /* build killing function */
1163 for (i = cur_len - 1; i >= 0; --i) { /* loop over sks in decreasing cost order */
1164 child_t *t = sks[i];
1165 rss_irn_t *rt = get_rss_irn(rss, t->irn);
1166 plist_element_t *p_el;
1168 DBG((rss->dbg, LEVEL_3, "\t\t\tkiller %+F (%.3f):\n", t->irn, t->cost));
1170 /* kill all unkilled parents of t */
1171 foreach_plist(rt->parent_list, p_el) {
1172 ir_node *par = plist_element_get_value(p_el);
1173 rss_irn_t *rpar = get_rss_irn(rss, par);
1175 if (is_Sink(rpar->killer)) {
1176 rpar->killer = t->irn;
1177 DBG((rss->dbg, LEVEL_2, "\t\tkill %+F\n", rpar->irn));
1180 DBG((rss->dbg, LEVEL_3, "\t\t\tkeeping %+F as killer for %+F\n", rpar->killer, rpar->irn));
1190 if (rss->opts->dump_flags & RSS_DUMP_KILL)
1191 debug_vcg_dump_kill(rss);
1193 del_pset(rss->cbc_set);
1194 obstack_free(&obst, NULL);
1198 * Adds the edge src -> tgt to the dvg. Checks if reverse edge is already there (asserts).
1200 static INLINE void add_dvg_edge(rss_t *rss, dvg_t *dvg, ir_node *src, ir_node *tgt, int have_source) {
1201 rss_edge_t *dvg_edge;
1205 nodeset_insert(dvg->nodes, src);
1207 assert(nodeset_find(dvg->nodes, src) != NULL && "Wrong assumption");
1209 nodeset_insert(dvg->nodes, tgt);
1213 assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1217 if (NULL != pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1218 /* add the edge to the DVG */
1219 dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1221 dvg_edge->src = src;
1222 dvg_edge->tgt = tgt;
1223 dvg_edge->next = NULL;
1225 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", src, tgt));
1226 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1231 * Computes the disjoint value DAG (DVG).
1232 * BEWARE: It is not made explicitly clear in the Touati paper,
1233 * but the DVG is meant to be build from the KILLING DAG
1235 static void compute_dvg(rss_t *rss, dvg_t *dvg) {
1236 plist_element_t *el;
1238 DBG((rss->dbg, LEVEL_1, "\tcomputing DVG:\n"));
1240 foreach_plist(rss->nodes, el) {
1241 ir_node *u_irn = plist_element_get_value(el);
1242 rss_irn_t *u = get_rss_irn(rss, u_irn);
1243 rss_irn_t *u_killer = get_rss_irn(rss, u->killer);
1246 /* TODO: omit nodes only having sink as pkiller and killing no one */
1248 /* add an edge to killer */
1249 add_dvg_edge(rss, dvg, u_irn, u->killer, 0);
1251 if (is_Sink(u->killer))
1254 /* We add an edge to every killer, from where we could be reached. */
1255 for (i = ARR_LEN_SAFE(u_killer->descendants) - 1; i >= 0; --i) {
1256 add_dvg_edge(rss, dvg, u_irn, u_killer->descendants[i], 1);
1261 foreach_plist(rss->nodes, el2) {
1262 ir_node *v_irn = plist_element_get_value(el2);
1265 There is an edge (u, v) in the DVG iff v is a descendant of the killer(u).
1267 if (BSEARCH_IRN_ARR(v_irn, u_kill->descendants)) {
1268 rss_edge_t *dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1271 /* insert the user into the DVG and append it to the user list of u */
1272 nodeset_insert(dvg->nodes, v_irn);
1273 if (! plist_has_value(u->dvg_user_list, v_irn))
1274 plist_insert_back(u->dvg_user_list, v_irn);
1276 dvg_edge->src = u_irn;
1277 dvg_edge->tgt = v_irn;
1278 dvg_edge->next = NULL;
1283 assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1285 /* add the edge to the DVG */
1286 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", u_irn, v_irn));
1287 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1293 if (rss->opts->dump_flags & RSS_DUMP_DVG)
1294 debug_vcg_dump_dvg(rss, dvg);
1298 * Updates the dvg structure when a serialization edge from src -> tgt is added.
1300 static void update_dvg(rss_t *rss, dvg_t *dvg, rss_irn_t *src, rss_irn_t *tgt) {
1303 rss_edge_t **arr = alloca(pset_count(dvg->edges) * sizeof(arr[0]));
1306 Add an edge from serialization target to serialization src:
1307 src cannot be alive with target
1309 add_dvg_edge(rss, dvg, tgt->irn, src->irn, 0);
1311 /* Add edges to src's descendants as well, they also getting serialized. */
1312 for (i = ARR_LEN_SAFE(src->descendants) - 1; i >= 0; --i) {
1313 add_dvg_edge(rss, dvg, tgt->irn, src->descendants[i], 1);
1316 /* We also have to add edges from targets predecessors in dvg */
1318 /* We cannot insert elements into set over which we iterate ... */
1319 foreach_pset(dvg->edges, edge) {
1320 if (edge->tgt == tgt->irn) {
1325 for (i = 0; i < idx; ++i) {
1327 add_dvg_edge(rss, dvg, edge->src, src->irn, 1);
1328 for (j = ARR_LEN_SAFE(src->descendants) - 1; j >= 0; --j) {
1329 add_dvg_edge(rss, dvg, edge->src, src->descendants[j], 1);
1336 * Accumulate all descendants for root into list.
1338 static void accumulate_dvg_descendant_values(rss_t *rss, rss_irn_t *root, plist_t *list) {
1339 if (plist_count(root->dvg_user_list) > 0) {
1340 plist_element_t *el;
1342 foreach_plist(root->dvg_user_list, el) {
1343 ir_node *v_irn = plist_element_get_value(el);
1344 rss_irn_t *v = get_rss_irn(rss, v_irn);
1346 /* add v as descendant */
1347 if (! plist_has_value(list, v_irn)) {
1348 plist_insert_back(list, v_irn);
1349 DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG descendant %+F\n", v_irn));
1352 /* accumulate v's descendants */
1353 accumulate_dvg_descendant_values(rss, v, list);
1359 * Builds the list of potential killers for each node
1361 * Needs the descendant list for all user as sorted array.
1363 static void build_dvg_pkiller_list(rss_t *rss, dvg_t *dvg) {
1366 foreach_nodeset(dvg->nodes, irn) {
1367 rss_irn_t *node = get_rss_irn(rss, irn);
1368 plist_element_t *el, *el2;
1370 DBG((rss->dbg, DEBUG_DVG, "\t\tbuilding pkiller list for %+F\n", irn));
1372 /* check each user */
1373 foreach_plist(node->dvg_user_list, el) {
1374 ir_node *u_irn = plist_element_get_value(el);
1376 /* is the current user u_irn not a descendant of any other user -> pkiller */
1377 foreach_plist(node->dvg_user_list, el2) {
1378 ir_node *v_irn = plist_element_get_value(el2);
1379 rss_irn_t *v = get_rss_irn(rss, v_irn);
1382 ! BSEARCH_IRN_ARR(u_irn, v->dvg_desc) &&
1383 ! plist_has_value(node->dvg_pkiller_list, u_irn))
1385 plist_insert_back(node->dvg_pkiller_list, u_irn);
1386 DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG pkiller %+F\n", u_irn));
1391 node->dvg_pkiller = build_sorted_array_from_list(node->dvg_pkiller_list, phase_obst(&rss->ph));
1395 if (firm_dbg_get_mask(rss->dbg) & DEBUG_DVG)
1396 debug_vcg_dump_dvg_pkiller(rss, dvg);
1403 * Compute the maximal antichain of the current DVG.
1404 * This is a reimplementation of the MAXIMAL_ANTI_CHAIN function
1405 * from the DDG library 1.1 (DAG.cpp).
1407 static nodeset *compute_maximal_antichain(rss_t *rss, dvg_t *dvg, int iteration) {
1408 int n = nodeset_count(dvg->nodes);
1409 int *assignment = alloca(n * sizeof(assignment[0]));
1410 int *assignment_rev = alloca(n * sizeof(assignment_rev[0]));
1411 int *idx_map = alloca(n * sizeof(idx_map[0]));
1412 hungarian_problem_t *bp;
1413 nodeset *values, *temp;
1415 int i, j, cost, cur_chain, res;
1416 rss_edge_t *dvg_edge;
1418 #define MAP_IDX(irn) bsearch_for_index(get_irn_idx(irn), idx_map, n, 1)
1420 if (pset_count(dvg->edges) == 0)
1423 bp = hungarian_new(n, n, 1, HUNGARIAN_MATCH_NORMAL);
1426 At first, we build an index map for the nodes in the DVG,
1427 because we cannot use the irn idx therefore as the resulting
1428 bipartite data structure would have around 1.2GB.
1429 So we limit the size to the number of nodes we have in the DVG
1430 and build a sorted index map for their irn indices.
1433 foreach_nodeset(dvg->nodes, u_irn) {
1434 idx_map[i++] = get_irn_idx(u_irn);
1436 qsort(idx_map, n, sizeof(idx_map[0]), cmp_int);
1438 foreach_pset(dvg->edges, dvg_edge) {
1439 int idx_u = MAP_IDX(dvg_edge->src);
1440 int idx_v = MAP_IDX(dvg_edge->tgt);
1442 /* add the entry to the bipartite data structure */
1443 hungarian_add(bp, idx_u, idx_v, 1);
1444 DBG((rss->dbg, LEVEL_3, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1445 idx_u, dvg_edge->src, idx_v, dvg_edge->tgt));
1449 Add a bipartite entry for each pair of nodes (u, v), where exists a
1450 path in the DVG from u to v, ie. connect all descendants(v) to v.
1451 desc_dvg(v) = accumulated descendants of all z in dvg_user_list(v)
1453 foreach_nodeset(dvg->nodes, u_irn) {
1454 rss_irn_t *u = get_rss_irn(rss, u_irn);
1455 int idx_u_irn = MAP_IDX(u_irn);
1457 DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG descendants of %+F:\n", u_irn));
1459 //plist_clear(u->dvg_desc_list);
1460 //accumulate_dvg_descendant_values(rss, u, u->dvg_desc_list);
1463 FIXME: The array is build on the phase obstack and we cannot free the data.
1464 So the array get as many times allocated as this function is called.
1467 /* build the sorted array for faster searches */
1468 //u->dvg_desc = build_sorted_array_from_list(u->dvg_desc_list, phase_obst(&rss->ph));
1470 DBG((rss->dbg, DEBUG_MAX_AC, "\t\tadding bipartite entries of %+F:\n", u_irn));
1472 /* add the bipartite entries for all descendants */
1473 for (i = ARR_LEN_SAFE(u->dvg_desc) - 1; i >= 0; --i) {
1474 rss_irn_t *desc = get_rss_irn(rss, u->dvg_desc[i]);
1475 int idx_desc = MAP_IDX(u->dvg_desc[i]);
1477 /* add the entry to the bipartite data structure */
1478 hungarian_add(bp, idx_u_irn, idx_desc, 1);
1479 DBG((rss->dbg, DEBUG_MAX_AC, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1480 idx_u_irn, u_irn, idx_desc, u->dvg_desc[i]));
1487 /* We want maximum cardinality matching */
1488 hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL);
1491 DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG pkiller:\n"));
1492 /* beware: the following function needs the dvg_desc array */
1493 build_dvg_pkiller_list(rss, dvg);
1496 DBG((rss->dbg, LEVEL_2, "\t\tcomputing bipartite matching\n"));
1498 The maximum cardinality bipartite matching gives us the minimal
1499 chain partition, which corresponds to the maximum anti chains.
1501 res = hungarian_solve(bp, assignment, &cost, 1);
1502 assert(res == 0 && "Bipartite matching failed!");
1505 memset(assignment_rev, -1, n * sizeof(assignment_rev[0]));
1507 /* build the reverse assignment, ie. foreach i -> j, add j -> i */
1508 for (i = 0; i < n; ++i) {
1509 if (assignment[i] >= 0) {
1510 int j = assignment[i];
1511 assignment_rev[j] = i;
1515 DBG((rss->dbg, LEVEL_2, "\t\tgot assignment with cost %d\n", cost));
1516 DBG((rss->dbg, LEVEL_3, "\t\t\tassignment --- reverse assignment\n", cost));
1517 for (i = 0; i < n; ++i) {
1518 DBG((rss->dbg, LEVEL_3, "\t\t\t%3d -> %3d %3d -> %3d\n", i, assignment[i], i, assignment_rev[i]));
1521 values = new_nodeset(10);
1523 /* Construction of the minimal chain partition */
1524 for (j = 0; j < n; ++j) {
1525 /* check nodes, which did not occur as target */
1526 if (assignment_rev[j] == -1) {
1527 int xj = idx_map[j];
1528 ir_node *xj_irn = get_idx_irn(rss->irg, xj);
1529 rss_irn_t *xj_rss = get_rss_irn(rss, xj_irn);
1530 chain_t *c = obstack_alloc(phase_obst(&rss->ph), sizeof(*c));
1533 /* there was no source for j -> we have a source of a new chain */
1534 nodeset_insert(values, xj_irn);
1536 c->elements = plist_obstack_new(phase_obst(&rss->ph));
1537 c->nr = cur_chain++;
1538 plist_insert_back(c->elements, xj_irn);
1542 DBG((rss->dbg, LEVEL_2, "\t\tstarting chain %d:\n", c->nr));
1543 DBG((rss->dbg, LEVEL_2, "\t\t\t%+F (%d)", xj_irn, j));
1545 /* follow chain, having j as source */
1547 while (assignment[source] >= 0) {
1548 int target = assignment[source];
1549 int irn_idx = idx_map[target];
1550 ir_node *irn = get_idx_irn(rss->irg, irn_idx);
1551 rss_irn_t *node = get_rss_irn(rss, irn);
1553 plist_insert_back(c->elements, irn);
1556 DB((rss->dbg, LEVEL_2, " -> %+F (%d)", irn, target));
1558 /* new source = last target */
1562 DB((rss->dbg, LEVEL_2, "\n"));
1567 Computing the maximal antichain: Select an element from each
1568 chain such, such it is parallel with the others.
1570 DBG((rss->dbg, LEVEL_1, "\t\tcomputing set of saturation values (MAX AC)\n"));
1571 DBG((rss->dbg, LEVEL_3, "\t\tstarting with:\n"));
1574 if (firm_dbg_get_mask(rss->dbg) & LEVEL_3)
1575 dump_nodeset(values, "\t\t\t");
1581 We need an explicit array for the values as
1582 we cannot iterate multiple times over the same
1583 set at the same time. :-(((((
1585 int n = nodeset_count(values);
1587 ir_node **val_arr = NEW_ARR_F(ir_node *, n);
1589 foreach_nodeset(values, u_irn)
1590 val_arr[i++] = u_irn;
1595 temp = new_nodeset(10);
1597 /* Select all nodes from current value set, having another node in the set as descendant. */
1598 for (i = 0; i < n; ++i) {
1599 rss_irn_t *u = get_rss_irn(rss, val_arr[i]);
1601 for (j = 0; j < n; ++j) {
1605 key.src = val_arr[i];
1606 key.tgt = val_arr[j];
1608 if (pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1609 /* v[j] is descendant of u -> remove u and break */
1610 nodeset_insert(temp, u->irn);
1611 nodeset_remove(values, u->irn);
1613 DBG((rss->dbg, LEVEL_3, "\t\t\tremoving %+F from values, adding it to temp\n", u->irn));
1621 /* Try to insert the chain predecessor of all selected u's */
1622 foreach_nodeset(temp, u_irn) {
1623 rss_irn_t *u = get_rss_irn(rss, u_irn);
1624 chain_t *c = u->chain;
1625 plist_element_t *el = plist_find_value(c->elements, u_irn);
1627 assert(el && "Missing element in chain!");
1629 /* If u has predecessor in chain: insert the predecessor */
1630 if (el == plist_element_get_prev(el)) {
1631 nodeset_insert(values, plist_element_get_value(el));
1632 DBG((rss->dbg, LEVEL_3, "\t\t\tadding %+F to values\n", plist_element_get_value(el)));
1638 } while (nodeset_count(temp) > 0);
1640 DBG((rss->dbg, LEVEL_2, "\t\tfinal set:\n"));
1642 if (firm_dbg_get_mask(rss->dbg) & LEVEL_2) {
1643 dump_nodeset(values, "\t\t\t");
1647 if (rss->opts->dump_flags & RSS_DUMP_MAXAC)
1648 debug_vcg_dump_pkg(rss, values, iteration);
1658 * Computes the best serialization between two nodes of sat_vals.
1660 static serialization_t *compute_best_admissible_serialization(rss_t *rss, nodeset *sat_vals, serialization_t *ser, int num_regs) {
1661 int n = nodeset_count(sat_vals);
1662 int n_idx = ARR_LEN_SAFE(rss->idx_map);
1664 ir_node **val_arr = alloca(n * sizeof(val_arr[0]));
1665 bitset_t *bs_sv = bitset_alloca(n_idx);
1666 bitset_t *bs_vdesc = bitset_alloca(n_idx);
1667 bitset_t *bs_tmp = bitset_alloca(n_idx);
1668 bitset_t *bs_ukilldesc = bitset_alloca(n_idx);
1669 int best_benefit = INT_MAX;
1670 int best_omega2 = INT_MAX;
1671 int best_benefit_omega20 = INT_MAX;
1675 rss_edge_t min_benefit_edge;
1676 rss_edge_t min_omega20_edge;
1677 rss_irn_t *ser_u_omega1 = NULL, *ser_v_omega1 = NULL;
1678 rss_irn_t *ser_u_omega20 = NULL, *ser_v_omega20 = NULL;
1680 DBG((rss->dbg, LEVEL_1, "\tcomputing admissible serializations:\n"));
1683 We need an explicit array for the values as
1684 we cannot iterate multiple times over the same
1685 set at the same time. :-(((((
1688 foreach_nodeset(sat_vals, irn) {
1690 bitset_set(bs_sv, BLOCK_IDX_MAP(rss, irn));
1694 We build all admissible serializations and remember the best found so far.
1697 if v in pkiller(u): add edge from v to all other pkiller(u)
1698 else: for all vv in pkiller(u): add edge from v to vv if there exists no path from vv to v
1702 A node is unserializable if:
1703 - it has only one killer and this one is Sink
1704 - it kills no other values
1705 In this case there is no serialization which could
1706 reduce the registerpressure
1708 #define IS_UNSERIALIZABLE_NODE(rss_node) \
1711 (plist_count(rss_node->pkiller_list) == 1) && \
1712 is_Sink(rss_node->killer) && \
1713 (rss_node->kill_count == 0) \
1715 be_is_Barrier(rss_node->irn) || \
1716 be_is_Keep(rss_node->irn) \
1719 /* for all u in sat_vals */
1720 for (i = 0; i < n; ++i) {
1721 rss_irn_t *u = get_rss_irn(rss, val_arr[i]);
1722 plist_element_t *el;
1724 /* ignore nodes where serialization does not help */
1725 if (IS_UNSERIALIZABLE_NODE(u)) {
1726 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", u->irn));
1730 /* accumulate all descendants of all pkiller(u) */
1731 bitset_clear_all(bs_ukilldesc);
1732 foreach_plist(u->pkiller_list, el) {
1733 ir_node *irn = plist_element_get_value(el);
1734 rss_irn_t *node = get_rss_irn(rss, irn);
1737 bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, irn));
1741 for (k = ARR_LEN_SAFE(node->descendants) - 1; k >= 0; --k) {
1742 if (! is_Sink(node->descendants[k]))
1743 bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, node->descendants[k]));
1747 /* for all v in sat_vals */
1748 for (j = 0; j < n; ++j) {
1749 ir_node *v_irn = val_arr[j];
1750 rss_irn_t *v = get_rss_irn(rss, v_irn);
1751 unsigned v_height = get_irn_height(rss->h, v_irn);
1752 int omega1, omega2, is_pkiller;
1754 /* v cannot be serialized with itself
1755 * ignore nodes where serialization does not help */
1756 if (i == j || IS_UNSERIALIZABLE_NODE(v)) {
1758 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", v->irn));
1762 /* get descendants of v */
1763 bitset_clear_all(bs_vdesc);
1764 bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v_irn));
1765 for (k = ARR_LEN_SAFE(v->descendants) - 1; k >= 0; --k) {
1766 if (! is_Sink(v->descendants[k]))
1767 bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v->descendants[k]));
1770 /* if v is in pkiller(u) */
1771 is_pkiller = plist_has_value(u->pkiller_list, v_irn);
1773 /* for all vv in pkiller(u) */
1774 foreach_plist(u->pkiller_list, el) {
1775 ir_node *vv_irn = plist_element_get_value(el);
1778 if (is_Sink(vv_irn) || is_cfop(vv_irn))
1782 add_edge = vv_irn != v_irn && skip_Proj(vv_irn) != skip_Proj(v_irn);
1784 add_edge = ! heights_reachable_in_block(rss->h, skip_Proj(vv_irn), skip_Proj(v_irn));
1787 As we add an edge from vv -> v, we have to make sure,
1788 that there exists no path from v to vv.
1792 int vv_height = get_irn_height(rss->h, vv_irn);
1793 int mu1, mu2, critical_path_cost;
1796 mu1 = | descendants(v) cut sat_vals |
1797 the number of saturating values which cannot
1798 be simultaneously alive with u
1800 bitset_copy(bs_tmp, bs_vdesc);
1801 mu1 = bitset_popcnt(bitset_and(bs_tmp, bs_sv));
1804 mu2 = | accum_desc_all_pkiller(u) without descendants(v) |
1807 bitset_copy(bs_tmp, bs_ukilldesc);
1808 mu2 = bitset_popcnt(bitset_andnot(bs_tmp, bs_vdesc));
1814 /* omega1 = mu1 - mu2 */
1820 /* omega2 = increase of critical path */
1821 critical_path_cost =
1822 v_height /* longest path from v to sink */
1823 + rss->max_height - vv_height /* longest path from source to vv */
1827 If critical_path_cost > max_height -> the new edge
1828 would increase the longest critical path by the difference.
1830 omega2 = critical_path_cost > rss->max_height ? critical_path_cost - rss->max_height : 0;
1832 /* this keeps track of the edge with the best benefit */
1833 if (omega1 >= num_regs - n && omega1 < best_benefit) {
1834 min_benefit_edge.src = v_irn;
1835 min_benefit_edge.tgt = vv_irn;
1840 best_benefit = omega1;
1841 ser->new_killer = is_pkiller;
1844 /* this keeps track of the edge with the best omega1 costs where omega2 == 0 */
1845 if (omega2 == 0 && omega1 >= num_regs - n && omega1 < best_benefit_omega20) {
1846 min_omega20_edge.src = v_irn;
1847 min_omega20_edge.tgt = vv_irn;
1852 best_benefit_omega20 = omega1;
1853 ser->new_killer = is_pkiller;
1856 best_omega2 = MIN(best_omega2, omega2);
1858 DBG((rss->dbg, LEVEL_2, "\t\tfound %+F -> %+F (w1 %d, w2 %d)\n",
1859 v_irn, vv_irn, omega1, omega2));
1861 } /* for all vv in pkiller(u) */
1862 } /* for all v in sat_vals */
1863 } /* for all u in sat_vals */
1868 if (best_omega2 == 0) {
1869 ser->u = ser_u_omega20;
1870 ser->v = ser_v_omega20;
1871 ser->edge->src = min_omega20_edge.src;
1872 ser->edge->tgt = min_omega20_edge.tgt;
1873 ser->omega1 = best_benefit_omega20;
1874 ser->omega2 = best_omega2;
1877 ser->u = ser_u_omega1;
1878 ser->v = ser_v_omega1;
1879 ser->edge->src = min_benefit_edge.src;
1880 ser->edge->tgt = min_benefit_edge.tgt;
1881 ser->omega1 = best_benefit;
1882 ser->omega2 = best_omega2;
1887 #undef IS_UNSERIALIZABLE_NODE
1891 * Perform the value serialization heuristic and add all
1892 * computed serialization edges as dependencies to the irg.
1894 static void perform_value_serialization_heuristic(rss_t *rss) {
1895 bitset_t *arch_nonign_bs = bitset_alloca(arch_register_class_n_regs(rss->cls));
1896 bitset_t *abi_ign_bs = bitset_alloca(arch_register_class_n_regs(rss->cls));
1897 int available_regs, iteration;
1900 pset *ser_set = new_pset(cmp_rss_edges, 20);
1902 /* available_regs = R = |arch_non_ignore_regs cut ~abi_ignore_regs| */
1903 arch_put_non_ignore_regs(rss->arch_env, rss->cls, arch_nonign_bs);
1904 be_abi_put_ignore_regs(rss->abi, rss->cls, abi_ign_bs);
1905 bitset_andnot(arch_nonign_bs, abi_ign_bs);
1906 available_regs = bitset_popcnt(arch_nonign_bs);
1907 //num_live = pset_count(rss->live_block);
1908 //available_regs -= num_live < available_regs ? num_live : 0;
1910 DBG((rss->dbg, LEVEL_1, "\n\t#available regs: %d\n\n", available_regs));
1913 At first we need to compute the disjoint value DAG (DVG = {V, E_dv}).
1914 V = set of all nodes we are currently interested in
1915 E_dv = there is an edge from u to v iff v is a descendant of killer(u), forall u, v in V
1917 dvg.nodes = new_nodeset(plist_count(rss->nodes));
1918 dvg.edges = new_pset(cmp_rss_edges, plist_count(rss->nodes) * 5);
1919 compute_dvg(rss, &dvg);
1922 Then we perform the heuristic serialization algorithm
1923 on the DVG which gives us all necessary serialization
1926 DBG((rss->dbg, LEVEL_1, "\tcomputing maximal antichain:\n"));
1928 sat_vals = compute_maximal_antichain(rss, &dvg, iteration++);
1929 while (sat_vals && (nodeset_count(sat_vals) > available_regs)) {
1930 serialization_t *ser, best_ser;
1931 rss_edge_t *edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*edge));
1932 ir_node *dep_src, *dep_tgt;
1934 best_ser.edge = edge;
1935 ser = compute_best_admissible_serialization(rss, sat_vals, &best_ser, available_regs);
1937 DBG((rss->dbg, LEVEL_1, "\tcurrent register saturation %d, target %d\n", nodeset_count(sat_vals), available_regs));
1940 DBG((rss->dbg, LEVEL_1, "\tno RS improving serialization found, breaking at iteration %d\n", iteration));
1944 /* Insert the serialization as dependency edge into the irg. */
1945 DBG((rss->dbg, LEVEL_1, "\tserializing %+F -> %+F with edge %+F -> %+F and cost %d, %d\n",
1946 ser->u->irn, ser->v->irn, ser->edge->src, ser->edge->tgt, ser->omega1, ser->omega2));
1948 if (pset_find(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge)))
1949 ir_printf("WARNING: serialization %+F -> %+F computed twice!\n", ser->edge->src, ser->edge->tgt);
1952 pset_insert(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge));
1954 /* update the dvg */
1955 update_dvg(rss, &dvg, ser->v, ser->u);
1956 update_dvg(rss, &dvg, get_rss_irn(rss, ser->edge->src), get_rss_irn(rss, ser->edge->tgt));
1957 del_nodeset(sat_vals);
1959 dep_src = skip_Proj(ser->edge->src);
1960 dep_tgt = ser->edge->tgt;
1961 add_irn_dep(dep_src, dep_tgt);
1963 /* Update descendants, consumer and pkillers of the target */
1964 update_node_info(rss, ser->edge->tgt, ser->edge->src);
1966 /* TODO: try to find a cheaper way for updating height information */
1967 rss->max_height = heights_recompute_block(rss->h, rss->block);
1969 /* Recompute the antichain for next serialization */
1970 DBG((rss->dbg, LEVEL_1, "\tre-computing maximal antichain:\n"));
1971 sat_vals = compute_maximal_antichain(rss, &dvg, iteration++);
1974 del_nodeset(dvg.nodes);
1975 del_pset(dvg.edges);
1979 * Do initial calculations for a block.
1981 static void process_block(ir_node *block, void *env) {
1984 const ir_edge_t *edge;
1986 phase_init(&rss->ph, "rss block preprocessor", rss->irg, PHASE_DEFAULT_GROWTH, init_rss_irn);
1988 DBG((rss->dbg, LEVEL_1, "preprocessing block %+F\n", block));
1991 /* build an index map for all nodes in the current block */
1993 n = get_irn_n_edges(block);
1994 NEW_ARR_A(int *, rss->idx_map, n);
1995 foreach_out_edge(block, edge) {
1996 ir_node *irn = get_edge_src_irn(edge);
1997 rss->idx_map[i++] = get_irn_idx(irn);
1999 qsort(rss->idx_map, n, sizeof(rss->idx_map[0]), cmp_int);
2000 rss->max_height = heights_recompute_block(rss->h, block);
2002 /* loop over all register classes */
2003 for (i = arch_isa_get_n_reg_class(rss->arch_env->isa) - 1; i >= 0; --i) {
2004 const arch_register_class_t *cls = arch_isa_get_reg_class(rss->arch_env->isa, i);
2007 DBG((rss->dbg, LEVEL_1, "register class %s\n", arch_register_class_name(cls)));
2009 /* Get all live value at end of Block having current register class */
2010 rss->live_block = pset_new_ptr(10);
2011 be_liveness_end_of_block(rss->liveness, rss->arch_env, rss->cls, rss->block, rss->live_block);
2013 /* reset the list of interesting nodes */
2014 plist_clear(rss->nodes);
2015 plist_insert_back(rss->nodes, _sink);
2017 /* collect all nodes having a certain register class */
2018 foreach_out_edge(block, edge) {
2019 ir_node *irn = get_edge_src_irn(edge);
2020 ir_mode *mode = get_irn_mode(irn);
2024 - mode_T nodes (the projs are asked)
2025 - mode_X nodes (control flow nodes are always scheduled last)
2026 - Keeps (they are always immediately scheduled)
2027 - Phi (same as Keep)
2029 if (mode == mode_T || mode == mode_X || is_Phi(irn))
2033 In case of a proj, we skip
2034 - Barrier (they are a Barrier :)
2036 - the Proj itself, as it's scheduled always with it's super node
2039 ir_node *pred = get_Proj_pred(irn);
2040 if (be_is_Barrier(pred) || is_Start(pred))
2044 /* calculate the descendants and consumer for each node in the block */
2045 collect_node_info(rss, skip_Proj(irn));
2047 if (be_is_Keep(irn))
2050 if (! arch_irn_is(rss->arch_env, irn, ignore) && arch_get_irn_reg_class(rss->arch_env, irn, -1) == cls) {
2051 plist_insert_back(rss->nodes, skip_Proj(irn));
2056 /* compute the potential killing set PK(G) */
2057 compute_pkill_set(rss);
2059 /* compute the killing function k* */
2060 compute_killing_function(rss);
2063 Compute the heuristic value serialization and
2064 add the necessary dependencies to the irg.
2066 perform_value_serialization_heuristic(rss);
2068 del_pset(rss->live_block);
2071 phase_free(&rss->ph);
2076 * Register the options.
2078 void rss_register_options(lc_opt_entry_t *grp) {
2079 static int run_once = 0;
2080 lc_opt_entry_t *rss_grp;
2084 rss_grp = lc_opt_get_grp(grp, "rss");
2086 lc_opt_add_table(rss_grp, rss_option_table);
2089 #endif /* WITH_LIBCORE */
2092 * Preprocess the irg for scheduling.
2094 void rss_schedule_preparation(const be_irg_t *birg) {
2097 FIRM_DBG_REGISTER(rss.dbg, "firm.be.sched.rss");
2099 //firm_dbg_set_mask(rss.dbg, LEVEL_1 | LEVEL_2 | LEVEL_3);
2101 init_rss_special_nodes(birg->irg);
2103 rss.irg = birg->irg;
2104 rss.arch_env = birg->main_env->arch_env;
2105 rss.abi = birg->abi;
2106 rss.h = heights_new(birg->irg);
2107 rss.nodes = plist_new();
2108 rss.opts = &rss_options;
2109 rss.liveness = be_liveness(birg->irg);
2110 irg_block_walk_graph(birg->irg, NULL, process_block, &rss);
2111 heights_free(rss.h);
2112 plist_free(rss.nodes);
2113 be_liveness_free(rss.liveness);
2115 if (birg->main_env->options->dump_flags & DUMP_SCHED)
2116 be_dump(rss.irg, "-rss", dump_ir_block_graph);