2 * Implementation of a register saturating list scheduler
3 * as described in: Sid-Ahmed-Ali Touati
4 * Register Saturation in Superscalar and VLIW Codes
6 * @license This file protected by GPL - GNU GENERAL PUBLIC LICENSE.
7 * @author Christian Wuerdig
20 #include "irgraph_t.h"
22 #include "iredges_t.h"
24 #include "irphase_t.h"
30 #include "bipartite.h"
31 #include "hungarian.h"
38 #include "besched_t.h"
41 #include <libcore/lc_opts.h>
42 #include <libcore/lc_opts_enum.h>
43 #endif /* WITH_LIBCORE */
46 #define ARR_LEN_SAFE(arr) ((arr) != NULL ? ARR_LEN((arr)) : 0)
48 #define HASH_RSS_EDGE(edge) ((get_irn_node_nr((edge)->src) << 16) | (get_irn_node_nr((edge)->tgt) & 0xFFFF))
49 #define BSEARCH_IRN_ARR(val, arr) \
50 bsearch(&(val), (arr), ARR_LEN_SAFE((arr)), sizeof((arr)[0]), cmp_irn_idx)
52 #define BLOCK_IDX_MAP(rss, irn) bsearch_for_index(get_irn_idx((irn)), (rss)->idx_map, ARR_LEN_SAFE((rss)->idx_map), 1)
55 typedef struct _rss_opts_t {
59 /* Represents a child with associated costs */
60 typedef struct _child {
65 /* We need edges for several purposes. */
66 typedef struct _rss_edge {
72 /* Represents a connected bipartite component. */
74 nodeset *parents; /**< = S a set of value producers */
75 nodeset *children; /**< = T a set of value consumers */
76 pset *kill_edges; /**< = E a set of edges (t in T, s in S) such as each s in S gets killed by at least one t in T */
77 int nr; /**< a deterministic index for set insertion (used as hash) */
80 /* Represents a disjoint value DAG. */
86 /* Represents a chain of nodes. */
87 typedef struct _chain {
88 plist_t *elements; /**< List of chain elements */
89 int nr; /**< a deterministic index for set insertion (used as hash) */
92 typedef struct _rss_irn {
93 plist_t *consumer_list; /**< List of consumers */
94 ir_node **consumer; /**< Sorted consumer array (needed for faster access) */
96 plist_t *parent_list; /**< List of parents */
97 plist_t *pkiller_list; /**< List of potential killers */
99 plist_t *descendant_list; /**< List of descendants */
100 ir_node **descendants; /**< Sorted descendant array (needed for faster access) */
102 ir_node *killer; /**< The selected unique killer */
103 ir_node *irn; /**< The corresponding firm node to this rss_irn */
105 chain_t *chain; /**< The chain, this node is associated to */
107 unsigned desc_walk; /**< visited flag for collecting descendants */
108 unsigned kill_count; /**< number of nodes killed by this one */
110 unsigned live_out : 1; /**< irn has consumers outside of it's block */
111 unsigned visited : 1; /**< visited flag for bipartite decomposition */
112 unsigned havecons : 1; /**< visited flag collect consumer */
113 unsigned handled : 1; /**< flag indicating whether or not the list structures have been build */
114 unsigned dumped : 1; /**< flag indication whether or not this node was dumped */
117 /* Represents a serialization edge with associated costs. */
118 typedef struct _serialization {
119 rss_irn_t *u; /* the top node */
120 rss_irn_t *v; /* the node about to be serialized after u */
121 rss_edge_t *edge; /* the edge selected for the serialization */
122 int omega1; /* estimated: available regs - RS reduction */
123 int omega2; /* increase in critical path length */
127 typedef struct _rss {
128 phase_t ph; /**< Phase to hold some data */
129 heights_t *h; /**< The current height object */
130 ir_graph *irg; /**< The irg to preprocess */
131 plist_t *nodes; /**< The list of interesting nodes */
132 const arch_env_t *arch_env; /**< The architecture environment */
133 be_abi_irg_t *abi; /**< The abi for this irg */
134 pset *cbc_set; /**< A set of connected bipartite components */
135 ir_node *block; /**< The current block in progress. */
136 int *idx_map; /**< Mapping irn indices to per block indices */
137 unsigned max_height; /**< maximum height in the current block */
138 rss_opts_t *opts; /**< The options */
139 be_lv_t *liveness; /**< The liveness information for this irg */
140 pset *live_block; /**< Values alive at end of block */
141 const arch_register_class_t *cls; /**< The current register class */
142 DEBUG_ONLY(firm_dbg_module_t *dbg);
145 #define get_rss_irn(rss, irn) (phase_get_or_set_irn_data(&rss->ph, irn))
148 * We need some special nodes:
149 * a source and a sink for all live-in and live-out values of a block
158 static ir_node *_source = NULL;
159 static ir_node *_sink = NULL;
161 #define is_Source(irn) ((irn) == _source)
162 #define is_Sink(irn) ((irn) == _sink)
166 RSS_DUMP_CBC = 1 << 0,
167 RSS_DUMP_PKG = 1 << 1,
168 RSS_DUMP_KILL = 1 << 2,
169 RSS_DUMP_DVG = 1 << 3,
170 RSS_DUMP_MAXAC = 1 << 4,
171 RSS_DUMP_ALL = (RSS_DUMP_MAXAC << 1) - 1,
174 static rss_opts_t rss_options = {
179 static const lc_opt_enum_int_items_t dump_items[] = {
180 { "none", RSS_DUMP_NONE },
181 { "cbc", RSS_DUMP_CBC },
182 { "pkg", RSS_DUMP_PKG },
183 { "kill", RSS_DUMP_KILL },
184 { "dvg", RSS_DUMP_DVG },
185 { "maxac", RSS_DUMP_MAXAC },
186 { "all", RSS_DUMP_ALL },
190 static lc_opt_enum_int_var_t dump_var = {
191 &rss_options.dump_flags, dump_items
194 static const lc_opt_table_entry_t rss_option_table[] = {
195 LC_OPT_ENT_ENUM_MASK("dump", "dump phases", &dump_var),
198 #endif /* WITH_LIBCORE */
200 /******************************************************************************
202 * | | | | / _| | | (_)
203 * | |__ ___| |_ __ ___ _ __ | |_ _ _ _ __ ___| |_ _ ___ _ __ ___
204 * | '_ \ / _ \ | '_ \ / _ \ '__| | _| | | | '_ \ / __| __| |/ _ \| '_ \/ __|
205 * | | | | __/ | |_) | __/ | | | | |_| | | | | (__| |_| | (_) | | | \__ \
206 * |_| |_|\___|_| .__/ \___|_| |_| \__,_|_| |_|\___|\__|_|\___/|_| |_|___/
209 ******************************************************************************/
212 * Acquire opcodes and create source and sink nodes.
214 static void init_rss_special_nodes(ir_graph *irg) {
215 ir_node *block = get_irg_start_block(irg);
216 int iro_rss_base = get_next_ir_opcodes(iro_rss_last);
217 ir_op *op_rss_Source = new_ir_op(iro_rss_base + iro_rss_Source, "rss_Source", op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
218 ir_op *op_rss_Sink = new_ir_op(iro_rss_base + iro_rss_Sink, "rss_Sink", op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
219 _source = new_ir_node(NULL, irg, block, op_rss_Source, mode_ANY, 0, NULL);
220 _sink = new_ir_node(NULL, irg, block, op_rss_Sink, mode_ANY, 0, NULL);
223 static int cmp_int(const void *a, const void *b) {
227 return QSORT_CMP(*i1, *i2);
230 static int cmp_child_costs(const void *a, const void *b) {
231 const child_t *c1 = a;
232 const child_t *c2 = b;
234 return QSORT_CMP(c1->cost, c2->cost);
237 static int cmp_irn_idx(const void *a, const void *b) {
238 const ir_node *n1 = *(ir_node **)a;
239 const ir_node *n2 = *(ir_node **)b;
241 return QSORT_CMP(get_irn_idx(n1), get_irn_idx(n2));
244 static int cmp_rss_edges(const void *a, const void *b) {
245 const rss_edge_t *e1 = a;
246 const rss_edge_t *e2 = b;
248 return (e1->src != e2->src) || (e1->tgt != e2->tgt);
251 static int bsearch_for_index(int key, int *arr, size_t len, int force) {
255 while (right >= left) {
256 int idx = (left + right) / 2;
260 else if (key > arr[idx])
267 assert(0 && "Something is wrong, key not found.");
271 static ir_node **build_sorted_array_from_list(plist_t *irn_list, struct obstack *obst) {
274 int len = plist_count(irn_list);
275 ir_node **arr = NEW_ARR_D(ir_node *, obst, len);
277 /* copy the list into the array */
278 foreach_plist(irn_list, el) {
279 arr[i++] = plist_element_get_value(el);
282 /* sort the array by node index */
283 qsort(arr, len, sizeof(arr[0]), cmp_irn_idx);
288 /*****************************************************
291 * __| | ___| |__ _ _ __ _ __ _ _ _ __ __ _
292 * / _` |/ _ \ '_ \| | | |/ _` |/ _` | | '_ \ / _` |
293 * | (_| | __/ |_) | |_| | (_| | (_| | | | | | (_| |
294 * \__,_|\___|_.__/ \__,_|\__, |\__, |_|_| |_|\__, |
298 *****************************************************/
300 static void dump_nodeset(nodeset *ns, const char *prefix) {
302 foreach_nodeset(ns, irn) {
303 ir_fprintf(stderr, "%s%+F\n", prefix, irn);
307 static void build_file_name(rss_t *rss, const char *suffix, size_t suf_len, char *buf, size_t len) {
308 const char *irg_name;
311 irg_name = get_entity_name(get_irg_entity(rss->irg));
312 snprintf(buf, len - suf_len, "%s-%s-block-%ld",
313 irg_name, arch_register_class_name(rss->cls), get_irn_node_nr(rss->block));
317 /* Dumps all collected bipartite components of current irg as vcg. */
318 static void debug_vcg_dump_bipartite(rss_t *rss) {
322 static const char suffix[] = "-RSS-CBC.vcg";
324 build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
325 f = fopen(file_name, "w");
327 ir_fprintf(f, "graph: { title: \"connected bipartite component graph of %+F\"\n", rss->irg);
328 fprintf(f, "display_edge_labels: no\n");
329 fprintf(f, "layoutalgorithm: mindepth\n");
330 fprintf(f, "manhattan_edges: yes\n\n");
332 foreach_pset(rss->cbc_set, cbc) {
336 fprintf(f, "graph: { titel: \"cbc %d\" label: \"cbc %d\" status:clustered color:yellow\n", cbc->nr, cbc->nr);
337 foreach_nodeset(cbc->parents, n) {
338 ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
340 foreach_nodeset(cbc->children, n) {
341 ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
343 foreach_pset(cbc->kill_edges, ke) {
344 ir_fprintf(f, "edge: { sourcename: \"n%d_%d\" targetname: \"n%d_%d\" }\n",
345 get_irn_node_nr(ke->src), cbc->nr, get_irn_node_nr(ke->tgt), cbc->nr);
353 /* Dump the computed killing function as vcg. */
354 static void debug_vcg_dump_kill(rss_t *rss) {
358 static const char suffix[] = "-RSS-KILL.vcg";
360 build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
361 f = fopen(file_name, "w");
363 ir_fprintf(f, "graph: { title: \"computed kill graph of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
364 fprintf(f, "display_edge_labels: no\n");
365 fprintf(f, "layoutalgorithm: mindepth\n");
366 fprintf(f, "manhattan_edges: yes\n\n");
369 /* reset dump_flag */
371 foreach_phase_irn(&rss->ph, irn) {
372 rss_irn_t *node = get_rss_irn(rss, irn);
377 /* dump all nodes and their killers */
378 foreach_plist(rss->nodes, el) {
379 ir_node *irn = plist_element_get_value(el);
380 rss_irn_t *rirn = get_rss_irn(rss, irn);
381 rss_irn_t *pk_rirn = get_rss_irn(rss, rirn->killer);
383 if (! rirn->dumped) {
384 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
388 if (! pk_rirn->dumped) {
389 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(rirn->killer), rirn->killer);
393 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
394 get_irn_node_nr(rirn->killer), get_irn_node_nr(irn));
401 /* Dumps the potential killing DAG (PKG) as vcg. */
402 static void debug_vcg_dump_pkg(rss_t *rss, nodeset *max_ac, int iteration) {
405 char *suffix = alloca(32);
406 static const char suffix1[] = "-RSS-PKG.vcg";
407 static const char suffix2[] = "-RSS-PKG-MAXAC.vcg";
411 snprintf(suffix, 32, "%s", suffix1);
414 snprintf(suffix, 32, "-%02d%s", iteration, suffix2);
417 build_file_name(rss, suffix, strlen(suffix) + 1, file_name, sizeof(file_name));
418 f = fopen(file_name, "w");
420 ir_fprintf(f, "graph: { title: \"potential killing DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
421 fprintf(f, "display_edge_labels: no\n");
422 fprintf(f, "layoutalgorithm: mindepth\n");
423 fprintf(f, "manhattan_edges: yes\n\n");
426 /* reset dump_flag */
428 foreach_phase_irn(&rss->ph, irn) {
429 rss_irn_t *node = get_rss_irn(rss, irn);
434 foreach_plist(rss->nodes, el) {
435 ir_node *irn = plist_element_get_value(el);
436 rss_irn_t *rirn = get_rss_irn(rss, irn);
438 plist_element_t *k_el;
440 /* dump selected saturating values in yellow */
441 if (max_ac && nodeset_find(max_ac, irn))
444 if (! rirn->dumped) {
446 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F c%d\" %s }\n", get_irn_node_nr(irn), irn, rirn->chain->nr, c1);
448 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(irn), irn, c1);
452 foreach_plist(rirn->pkiller_list, k_el) {
453 ir_node *pkiller = plist_element_get_value(k_el);
454 rss_irn_t *pk_rirn = get_rss_irn(rss, pkiller);
457 if (max_ac && nodeset_find(max_ac, pkiller))
460 if (! pk_rirn->dumped) {
462 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F c%d\" %s }\n", get_irn_node_nr(pkiller), pkiller, pk_rirn->chain->nr, c2);
464 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(pkiller), pkiller, c2);
467 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
468 get_irn_node_nr(pkiller), get_irn_node_nr(irn));
475 /* Dumps the disjoint value DAG (DVG) as vcg. */
476 static void debug_vcg_dump_dvg(rss_t *rss, dvg_t *dvg) {
477 static const char suffix[] = "-RSS-DVG.vcg";
483 build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
484 f = fopen(file_name, "w");
486 ir_fprintf(f, "graph: { title: \"disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
487 fprintf(f, "display_edge_labels: no\n");
488 fprintf(f, "layoutalgorithm: mindepth\n");
489 fprintf(f, "manhattan_edges: yes\n\n");
492 foreach_nodeset(dvg->nodes, irn) {
493 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
497 foreach_pset(dvg->edges, edge) {
498 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
499 get_irn_node_nr(edge->src), get_irn_node_nr(edge->tgt));
507 /* Dumps the PKG(DVG). */
508 static void debug_vcg_dump_dvg_pkiller(rss_t *rss, dvg_t *dvg) {
509 static const char suffix[] = "-RSS-DVG-PKG.vcg";
514 build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
515 f = fopen(file_name, "w");
517 ir_fprintf(f, "graph: { title: \"PKG of disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
518 fprintf(f, "display_edge_labels: no\n");
519 fprintf(f, "layoutalgorithm: mindepth\n");
520 fprintf(f, "manhattan_edges: yes\n\n");
523 foreach_nodeset(dvg->nodes, irn) {
524 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
528 foreach_nodeset(dvg->nodes, irn) {
529 rss_irn_t *node = get_rss_irn(rss, irn);
532 foreach_plist(node->dvg_pkiller_list, el) {
533 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
534 get_irn_node_nr(plist_element_get_value(el)), get_irn_node_nr(irn));
544 * In case there is no rss information for irn, initialize it.
546 static void *init_rss_irn(phase_t *ph, ir_node *irn, void *old) {
547 rss_irn_t *res = old ? old : phase_alloc(ph, sizeof(res[0]));
549 res->descendant_list = plist_obstack_new(phase_obst(ph));
550 res->descendants = NULL;
552 res->consumer_list = plist_obstack_new(phase_obst(ph));
553 res->consumer = NULL;
555 res->pkiller_list = plist_obstack_new(phase_obst(ph));
557 res->parent_list = plist_obstack_new(phase_obst(ph));
575 * Collect all nodes data dependent on current node.
577 static void collect_descendants(rss_t *rss, rss_irn_t *rirn, ir_node *irn, int *got_sink, unsigned cur_desc_walk) {
578 const ir_edge_t *edge;
579 rss_irn_t *cur_node = get_rss_irn(rss, irn);
580 ir_node *block = rss->block;
581 ir_edge_kind_t ekind[2] = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
584 if (cur_node->desc_walk >= cur_desc_walk)
586 cur_node->desc_walk = cur_desc_walk;
588 /* Stop at Barriers */
589 if (be_is_Barrier(irn))
592 /* loop over normal and dependency edges */
593 for (i = 0; i < 2; ++i) {
594 foreach_out_edge_kind(irn, edge, ekind[i]) {
595 ir_node *user = get_edge_src_irn(edge);
597 /* skip ignore nodes as they do not really contribute to register pressure */
598 if (arch_irn_is(rss->arch_env, user, ignore))
602 (a) mode_X means Jump -> out_edge
603 (b) Phi as user of a normal node -> out-edge
605 if (get_irn_mode(user) == mode_X || is_Phi(user)) {
614 //if (arch_get_irn_reg_class(rss->arch_env, user, -1) == rss->cls)
615 collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
618 /* check if user lives in block and is not a control flow node */
619 if (get_nodes_block(user) == block) {
620 if (! plist_has_value(rirn->descendant_list, user)) {
621 plist_insert_back(rirn->descendant_list, user);
622 DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", user));
624 collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
626 else if (! *got_sink) {
628 /* user lives out of block: add sink as descendant if not already done */
629 plist_insert_back(rirn->descendant_list, _sink);
631 DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", _sink));
639 * Handles a single consumer.
641 static void collect_single_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *consumer, int *got_sink) {
642 ir_node *block = rss->block;
644 assert(! is_Proj(consumer) && "Cannot handle Projs");
646 if (! is_Phi(consumer) && ! is_Block(consumer) && get_nodes_block(consumer) == block) {
647 if (! arch_irn_is(rss->arch_env, consumer, ignore) && ! plist_has_value(rss_irn->consumer_list, consumer)) {
648 plist_insert_back(rss_irn->consumer_list, consumer);
649 DBG((rss->dbg, LEVEL_2, "\t\tconsumer %+F\n", consumer));
653 rss_irn->live_out = 1;
654 DBG((rss->dbg, LEVEL_2, "\t\tlive out %+F", consumer));
656 plist_insert_back(rss_irn->consumer_list, _sink);
658 DB((rss->dbg, LEVEL_2, ", %+F added instead", _sink));
660 DB((rss->dbg, LEVEL_2, "\n"));
665 * Collect all nodes consuming the value(s) produced by current node.
667 static void collect_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *irn, int *got_sink) {
668 const ir_edge_t *edge;
670 ir_edge_kind_t ekind[2] = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
671 rss_irn_t *cur_node = get_rss_irn(rss, irn);
673 if (cur_node->havecons)
675 cur_node->havecons = 1;
677 for (i = 0; i < 2; ++i) {
678 foreach_out_edge_kind(irn, edge, ekind[i]) {
679 ir_node *consumer = get_edge_src_irn(edge);
681 if (is_Proj(consumer)) {
682 //if (arch_get_irn_reg_class(rss->arch_env, consumer, -1) == rss->cls)
683 collect_consumer(rss, rss_irn, consumer, got_sink);
686 collect_single_consumer(rss, rss_irn, consumer, got_sink);
692 * Collects all consumer and descendant of a irn.
694 static void collect_node_info(rss_t *rss, ir_node *irn) {
695 static unsigned cur_desc_walk = 0;
696 rss_irn_t *rss_irn = get_rss_irn(rss, irn);
699 assert(! is_Proj(irn) && "Cannot handle Projs.");
701 /* check if node info is already available */
702 if (rss_irn->handled)
704 //phase_reinit_single_irn_data(&rss->ph, irn);
706 DBG((rss->dbg, LEVEL_1, "\tcomputing consumers of %+F:\n", irn));
708 /* collect all consumer */
710 collect_consumer(rss, rss_irn, irn, &got_sink);
712 /* build sorted consumer array */
713 rss_irn->consumer = build_sorted_array_from_list(rss_irn->consumer_list, phase_obst(&rss->ph));
715 DBG((rss->dbg, LEVEL_1, "\tcompute descendants of %+F:\n", irn));
717 /* collect descendants */
719 collect_descendants(rss, rss_irn, irn, &got_sink, ++cur_desc_walk);
721 /* build sorted descendant array */
722 rss_irn->descendants = build_sorted_array_from_list(rss_irn->descendant_list, phase_obst(&rss->ph));
724 rss_irn->handled = 1;
728 * Checks if v is a potential killer of u.
729 * v is in pkill(u) iff descendants(v) cut consumer(u) is v
731 * @param rss The rss object
732 * @param v The node to check for killer
733 * @param u The potentially killed value
734 * @return 1 if v is in pkill(u), 0 otherwise
736 static int is_potential_killer(rss_t *rss, rss_irn_t *v, rss_irn_t *u) {
741 assert(is_Sink(v->irn) || ((plist_count(v->descendant_list) > 0 && v->descendants) || 1));
742 assert(is_Sink(u->irn) || ((plist_count(u->consumer_list) > 0 && u->consumer) || 1));
744 /* as we loop over the list: loop over the shorter one */
745 if (plist_count(v->descendant_list) > plist_count(u->consumer_list)) {
746 list = u->consumer_list;
747 arr = v->descendants;
750 list = v->descendant_list;
754 /* for each list element: try to find element in array */
755 foreach_plist(list, el) {
756 ir_node *irn = plist_element_get_value(el);
757 ir_node *match = BSEARCH_IRN_ARR(irn, arr);
759 if (match && match != irn)
767 * Update descendants, consumer and pkiller list for the given irn.
769 static void update_node_info(rss_t *rss, ir_node *irn, ir_node *pk_irn) {
770 rss_irn_t *node = get_rss_irn(rss, irn);
771 rss_irn_t *pkiller = get_rss_irn(rss, pk_irn);
773 /* add consumer and rebuild the consumer array */
774 if (! plist_has_value(node->consumer_list, pk_irn)) {
775 plist_insert_back(node->consumer_list, pk_irn);
776 node->consumer = build_sorted_array_from_list(node->consumer_list, phase_obst(&rss->ph));
779 /* add pk_irn as descendant, add it's descendants to irn's and rebuild array */
780 if (! plist_has_value(node->descendant_list, pk_irn)) {
783 plist_insert_back(node->descendant_list, pk_irn);
785 foreach_plist(pkiller->descendant_list, el) {
786 ir_node *desc = plist_element_get_value(el);
788 if (! plist_has_value(node->descendant_list, desc))
789 plist_insert_back(node->descendant_list, desc);
792 node->descendants = build_sorted_array_from_list(node->descendant_list, phase_obst(&rss->ph));
798 * Compute the potential killing set PK.
800 static void compute_pkill_set(rss_t *rss) {
801 plist_element_t *u_el, *v_el;
803 foreach_plist(rss->nodes, u_el) {
804 ir_node *u_irn = plist_element_get_value(u_el);
805 rss_irn_t *u = get_rss_irn(rss, u_irn);
807 DBG((rss->dbg, LEVEL_1, "\tcomputing potential killers of %+F:\n", u_irn));
809 /* check each consumer if it is a potential killer */
810 foreach_plist(u->consumer_list, v_el) {
811 ir_node *v_irn = plist_element_get_value(v_el);
812 rss_irn_t *v = get_rss_irn(rss, v_irn);
814 /* check, if v is a potential killer of u */
815 if (is_potential_killer(rss, v, u)) {
816 if (! plist_has_value(u->pkiller_list, v_irn))
817 plist_insert_back(u->pkiller_list, v_irn);
819 DBG((rss->dbg, LEVEL_2, "\t\tpotential killer %+F\n", v_irn));
826 if (rss->opts->dump_flags & RSS_DUMP_PKG)
827 debug_vcg_dump_pkg(rss, NULL, 0);
831 * Build set of killing edges (from values to their potential killers)
833 static void build_kill_edges(rss_t *rss, pset *epk) {
834 plist_element_t *el, *k_el;
836 foreach_plist(rss->nodes, el) {
837 ir_node *irn = plist_element_get_value(el);
838 rss_irn_t *rirn = get_rss_irn(rss, irn);
840 DBG((rss->dbg, LEVEL_3, "\t\tbuilding kill edges for %+F:\n", irn));
842 foreach_plist(rirn->pkiller_list, k_el) {
843 ir_node *pkiller = plist_element_get_value(k_el);
844 rss_edge_t *ke = obstack_alloc(phase_obst(&rss->ph), sizeof(*ke));
850 DBG((rss->dbg, LEVEL_3, "\t\t\ttarget %+F\n", pkiller));
852 pset_insert(epk, ke, HASH_RSS_EDGE(ke));
857 /* print the given cbc for debugging purpose */
858 static void debug_print_cbc(firm_dbg_module_t *mod, cbc_t *cbc) {
862 DBG((mod, LEVEL_3, "\t\tS = set of parents:\n"));
863 foreach_nodeset(cbc->parents, n) {
864 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
866 DBG((mod, LEVEL_3, "\t\tT = set of children:\n"));
867 foreach_nodeset(cbc->children, n) {
868 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
870 DBG((mod, LEVEL_3, "\t\tE = Edges from producers to consumers\n"));
871 foreach_pset(cbc->kill_edges, ke) {
872 DBG((mod, LEVEL_3, "\t\t\t%+F -> %+F\n", ke->src, ke->tgt));
877 * Construct the bipartite decomposition.
878 * Sid-Ahmed-Ali Touati, Phd Thesis
879 * Register Pressure in Instruction Level Parallelism, p. 71
881 static void compute_bipartite_decomposition(rss_t *rss) {
882 pset *epk = new_pset(cmp_rss_edges, 10);
887 DBG((rss->dbg, LEVEL_1, "\tcomputing bipartite decomposition:\n"));
889 build_kill_edges(rss, epk);
891 foreach_plist(rss->nodes, el) {
892 ir_node *u_irn = plist_element_get_value(el);
893 rss_irn_t *u = get_rss_irn(rss, u_irn);
899 plist_element_t *el2;
901 rss_edge_t *kedge_root = NULL;
902 ir_node *t_irn, *s_irn;
904 if (u->visited || u_irn == _sink)
907 DBG((rss->dbg, LEVEL_2, "\t\t%+F choosen:\n", u_irn));
909 cbc = obstack_alloc(phase_obst(&rss->ph), sizeof(*cbc));
912 /* initialize S_cb */
913 cbc->parents = new_nodeset(5);
914 nodeset_insert(cbc->parents, u_irn);
915 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (init)\n", u_irn));
918 cbc->kill_edges = new_pset(cmp_rss_edges, 5);
920 /* each parent gets killed by at least one value from children */
922 /* T_cb = PK_successors(u) */
923 cbc->children = new_nodeset(5);
924 foreach_plist(u->pkiller_list, el2) {
925 nodeset_insert(cbc->children, plist_element_get_value(el2));
926 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (init)\n", plist_element_get_value(el2)));
930 Now: insert the parents of all children into the parent set
931 and insert the children of all parents into the children set
932 until the sets don't change any more
934 while (p_change || c_change) {
935 p_change = c_change = 0;
937 /* accumulate parents */
938 foreach_nodeset(cbc->children, t_irn) {
939 foreach_pset(epk, k_edge) {
940 ir_node *val = k_edge->src;
942 if (k_edge->tgt == t_irn && ! nodeset_find(cbc->parents, val)) {
943 nodeset_insert(cbc->parents, val);
945 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (killed by %+F)\n", val, t_irn));
950 /* accumulate children */
951 foreach_nodeset(cbc->parents, s_irn) {
952 foreach_pset(epk, k_edge) {
953 ir_node *val = k_edge->tgt;
955 if (k_edge->src == s_irn && ! nodeset_find(cbc->children, val)) {
956 nodeset_insert(cbc->children, val);
958 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (kills %+F)\n", val, s_irn));
964 /* mark all parent values as visited */
965 foreach_nodeset(cbc->parents, s_irn) {
966 rss_irn_t *s = get_rss_irn(rss, s_irn);
968 /* assure bipartite property */
970 if (nodeset_find(cbc->children, s_irn)) {
971 nodeset_remove(cbc->children, s_irn);
972 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F removed from to children\n", s_irn));
978 foreach_pset(epk, k_edge) {
979 if (nodeset_find(cbc->parents, k_edge->src) && nodeset_find(cbc->children, k_edge->tgt)) {
980 pset_insert(cbc->kill_edges, k_edge, HASH_RSS_EDGE(k_edge));
982 Link all k_edges which are about to be removed together.
983 Beware: pset_remove kills the iterator.
985 k_edge->next = kedge_root;
990 /* remove all linked k_edges */
991 for (; kedge_root; kedge_root = kedge_root->next) {
992 pset_remove(epk, kedge_root, HASH_RSS_EDGE(kedge_root));
996 foreach_nodeset(cbc->parents, s_irn) {
999 foreach_pset(cbc->kill_edges, k_edge) {
1000 if (k_edge->src == s_irn) {
1002 pset_break(cbc->kill_edges);
1009 ir_fprintf(stderr, "Warning: parent %+F is not killed in current cbc\n", s_irn);
1012 assert(vrfy_ok && "Verification of CBC failed");
1014 /* add the connected bipartite component */
1015 if (nodeset_count(cbc->parents) > 0 && nodeset_count(cbc->children) > 0 && pset_count(cbc->kill_edges) > 0)
1016 pset_insert(rss->cbc_set, cbc, (unsigned)cbc->nr);
1017 DBG((rss->dbg, LEVEL_2, "\tbipartite component %d inserted:\n", cbc->nr));
1019 if (firm_dbg_get_mask(rss->dbg) & LEVEL_2)
1020 debug_print_cbc(rss->dbg, cbc);
1024 if (rss->opts->dump_flags & RSS_DUMP_CBC)
1025 debug_vcg_dump_bipartite(rss);
1031 * Select the child with the maximum cost.
1033 static child_t *select_child_max_cost(rss_t *rss, nodeset *x, nodeset *y, child_t *t, cbc_t *cbc) {
1035 float max_cost = -1.0f;
1037 DBG((rss->dbg, LEVEL_2, "\t\tcomputing children costs:\n"));
1039 foreach_nodeset(cbc->children, child) {
1040 rss_irn_t *r_child = get_rss_irn(rss, child);
1041 int num_unkilled_parents = 0;
1042 int num_descendants;
1046 /* get the number of unkilled parents */
1047 foreach_pset(cbc->kill_edges, k_edge) {
1048 if (k_edge->tgt == child && nodeset_find(x, k_edge->src))
1049 ++num_unkilled_parents;
1052 cost = (float)num_unkilled_parents;
1054 num_descendants = plist_count(r_child->descendant_list) + nodeset_count(y);
1056 if (num_descendants > 0)
1057 cost /= (float)num_descendants;
1059 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F, #desc %d, cost %.3f\n", child, num_descendants, cost));
1061 if (cost > max_cost) {
1072 * Remove all parents from x which are killed by t_irn.
1074 static void remove_covered_parents(rss_t *rss, nodeset *x, ir_node *t_irn, cbc_t *cbc) {
1075 rss_irn_t *t = get_rss_irn(rss, t_irn);
1078 DBG((rss->dbg, LEVEL_2, "\t\tremoving parents covered by %+F:\n", t_irn));
1080 foreach_pset(cbc->kill_edges, k_edge) {
1081 if (k_edge->tgt == t_irn && nodeset_find(x, k_edge->src)) {
1082 nodeset_remove(x, k_edge->src);
1083 plist_insert_back(t->parent_list, k_edge->src);
1084 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", k_edge->src));
1089 static void update_cumulated_descendent_values(rss_t *rss, nodeset *y, ir_node *t_irn) {
1090 rss_irn_t *t = get_rss_irn(rss, t_irn);
1091 plist_element_t *el;
1093 DBG((rss->dbg, LEVEL_2, "\t\tupdating cumulated descendant value of %+F:\n", t_irn));
1095 foreach_plist(t->descendant_list, el) {
1096 nodeset_insert(y, plist_element_get_value(el));
1097 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", plist_element_get_value(el)));
1102 * Greedy-k: a heuristics for the MMA problem
1104 static void compute_killing_function(rss_t *rss) {
1106 struct obstack obst;
1108 obstack_init(&obst);
1110 rss->cbc_set = pset_new_ptr(5);
1111 compute_bipartite_decomposition(rss);
1113 /* for all bipartite components do: */
1114 foreach_pset(rss->cbc_set, cbc) {
1116 nodeset *x = new_nodeset(10);
1117 nodeset *y = new_nodeset(10);
1118 child_t **sks = NEW_ARR_F(child_t *, 20);
1123 DBG((rss->dbg, LEVEL_1, "\tcomputing SKS for cbc %d:\n", cbc->nr));
1124 DBG((rss->dbg, LEVEL_2, "\t\tinitializing parents X:\n"));
1126 /* X = S_cb (all parents are initially uncovered) */
1127 foreach_nodeset(cbc->parents, p) {
1128 nodeset_insert(x, p);
1129 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", p));
1132 /* while X not empty */
1133 while (nodeset_count(x) > 0) {
1134 child_t *t = obstack_alloc(&obst, sizeof(*t));
1135 memset(t, 0, sizeof(*t));
1137 t = select_child_max_cost(rss, x, y, t, cbc);
1139 if (cur_len >= cur_size) {
1140 ARR_EXTO(child_t *, sks, cur_size * 2);
1144 DBG((rss->dbg, LEVEL_2, "\t\tinsert child %+F (%.3f) into SKS at pos %d\n", t->irn, t->cost, cur_len));
1147 remove_covered_parents(rss, x, t->irn, cbc);
1148 update_cumulated_descendent_values(rss, y, t->irn);
1151 ARR_SHRINKLEN(sks, cur_len);
1153 /* sort SKS in increasing cost order */
1154 qsort(sks, cur_len, sizeof(sks[0]), cmp_child_costs);
1156 DBG((rss->dbg, LEVEL_2, "\tprocessing SKS for cbc %d:\n", cbc->nr));
1158 /* build killing function */
1159 for (i = cur_len - 1; i >= 0; --i) { /* loop over sks in decreasing cost order */
1160 child_t *t = sks[i];
1161 rss_irn_t *rt = get_rss_irn(rss, t->irn);
1162 plist_element_t *p_el;
1164 DBG((rss->dbg, LEVEL_3, "\t\t\tkiller %+F (%.3f):\n", t->irn, t->cost));
1166 /* kill all unkilled parents of t */
1167 foreach_plist(rt->parent_list, p_el) {
1168 ir_node *par = plist_element_get_value(p_el);
1169 rss_irn_t *rpar = get_rss_irn(rss, par);
1171 if (is_Sink(rpar->killer)) {
1172 rpar->killer = t->irn;
1173 DBG((rss->dbg, LEVEL_2, "\t\tkill %+F\n", rpar->irn));
1176 DBG((rss->dbg, LEVEL_3, "\t\t\tkeeping %+F as killer for %+F\n", rpar->killer, rpar->irn));
1186 if (rss->opts->dump_flags & RSS_DUMP_KILL)
1187 debug_vcg_dump_kill(rss);
1189 del_pset(rss->cbc_set);
1190 obstack_free(&obst, NULL);
1194 * Adds the edge src -> tgt to the dvg. Checks if reverse edge is already there (asserts).
1196 static INLINE void add_dvg_edge(rss_t *rss, dvg_t *dvg, ir_node *src, ir_node *tgt, int have_source) {
1197 rss_edge_t *dvg_edge;
1201 nodeset_insert(dvg->nodes, src);
1203 assert(nodeset_find(dvg->nodes, src) != NULL && "Wrong assumption");
1205 nodeset_insert(dvg->nodes, tgt);
1209 assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1213 if (NULL != pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1214 /* add the edge to the DVG */
1215 dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1217 dvg_edge->src = src;
1218 dvg_edge->tgt = tgt;
1219 dvg_edge->next = NULL;
1221 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", src, tgt));
1222 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1227 * Computes the disjoint value DAG (DVG).
1228 * BEWARE: It is not made explicitly clear in the Touati paper,
1229 * but the DVG is meant to be build from the KILLING DAG
1231 static void compute_dvg(rss_t *rss, dvg_t *dvg) {
1232 plist_element_t *el;
1234 DBG((rss->dbg, LEVEL_1, "\tcomputing DVG:\n"));
1236 foreach_plist(rss->nodes, el) {
1237 ir_node *u_irn = plist_element_get_value(el);
1238 rss_irn_t *u = get_rss_irn(rss, u_irn);
1239 rss_irn_t *u_killer = get_rss_irn(rss, u->killer);
1242 /* TODO: omit nodes only having sink as pkiller and killing no one */
1244 /* add an edge to killer */
1245 add_dvg_edge(rss, dvg, u_irn, u->killer, 0);
1247 if (is_Sink(u->killer))
1250 /* We add an edge to every killer, from where we could be reached. */
1251 for (i = ARR_LEN_SAFE(u_killer->descendants) - 1; i >= 0; --i) {
1252 add_dvg_edge(rss, dvg, u_irn, u_killer->descendants[i], 1);
1257 foreach_plist(rss->nodes, el2) {
1258 ir_node *v_irn = plist_element_get_value(el2);
1261 There is an edge (u, v) in the DVG iff v is a descendant of the killer(u).
1263 if (BSEARCH_IRN_ARR(v_irn, u_kill->descendants)) {
1264 rss_edge_t *dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1267 /* insert the user into the DVG and append it to the user list of u */
1268 nodeset_insert(dvg->nodes, v_irn);
1269 if (! plist_has_value(u->dvg_user_list, v_irn))
1270 plist_insert_back(u->dvg_user_list, v_irn);
1272 dvg_edge->src = u_irn;
1273 dvg_edge->tgt = v_irn;
1274 dvg_edge->next = NULL;
1279 assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1281 /* add the edge to the DVG */
1282 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", u_irn, v_irn));
1283 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1289 if (rss->opts->dump_flags & RSS_DUMP_DVG)
1290 debug_vcg_dump_dvg(rss, dvg);
1294 * Updates the dvg structure when a serialization edge from src -> tgt is added.
1296 static void update_dvg(rss_t *rss, dvg_t *dvg, rss_irn_t *src, rss_irn_t *tgt) {
1299 rss_edge_t **arr = alloca(pset_count(dvg->edges) * sizeof(arr[0]));
1302 Add an edge from serialization target to serialization src:
1303 src cannot be alive with target
1305 add_dvg_edge(rss, dvg, tgt->irn, src->irn, 0);
1307 /* Add edges to src's descendants as well, they also getting serialized. */
1308 for (i = ARR_LEN_SAFE(src->descendants) - 1; i >= 0; --i) {
1309 add_dvg_edge(rss, dvg, tgt->irn, src->descendants[i], 1);
1312 /* We also have to add edges from targets predecessors in dvg */
1314 /* We cannot insert elements into set over which we iterate ... */
1315 foreach_pset(dvg->edges, edge) {
1316 if (edge->tgt == tgt->irn) {
1321 for (i = 0; i < idx; ++i) {
1323 add_dvg_edge(rss, dvg, edge->src, src->irn, 1);
1324 for (j = ARR_LEN_SAFE(src->descendants) - 1; j >= 0; --j) {
1325 add_dvg_edge(rss, dvg, edge->src, src->descendants[j], 1);
1332 * Accumulate all descendants for root into list.
1334 static void accumulate_dvg_descendant_values(rss_t *rss, rss_irn_t *root, plist_t *list) {
1335 if (plist_count(root->dvg_user_list) > 0) {
1336 plist_element_t *el;
1338 foreach_plist(root->dvg_user_list, el) {
1339 ir_node *v_irn = plist_element_get_value(el);
1340 rss_irn_t *v = get_rss_irn(rss, v_irn);
1342 /* add v as descendant */
1343 if (! plist_has_value(list, v_irn)) {
1344 plist_insert_back(list, v_irn);
1345 DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG descendant %+F\n", v_irn));
1348 /* accumulate v's descendants */
1349 accumulate_dvg_descendant_values(rss, v, list);
1355 * Builds the list of potential killers for each node
1357 * Needs the descendant list for all user as sorted array.
1359 static void build_dvg_pkiller_list(rss_t *rss, dvg_t *dvg) {
1362 foreach_nodeset(dvg->nodes, irn) {
1363 rss_irn_t *node = get_rss_irn(rss, irn);
1364 plist_element_t *el, *el2;
1366 DBG((rss->dbg, DEBUG_DVG, "\t\tbuilding pkiller list for %+F\n", irn));
1368 /* check each user */
1369 foreach_plist(node->dvg_user_list, el) {
1370 ir_node *u_irn = plist_element_get_value(el);
1372 /* is the current user u_irn not a descendant of any other user -> pkiller */
1373 foreach_plist(node->dvg_user_list, el2) {
1374 ir_node *v_irn = plist_element_get_value(el2);
1375 rss_irn_t *v = get_rss_irn(rss, v_irn);
1378 ! BSEARCH_IRN_ARR(u_irn, v->dvg_desc) &&
1379 ! plist_has_value(node->dvg_pkiller_list, u_irn))
1381 plist_insert_back(node->dvg_pkiller_list, u_irn);
1382 DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG pkiller %+F\n", u_irn));
1387 node->dvg_pkiller = build_sorted_array_from_list(node->dvg_pkiller_list, phase_obst(&rss->ph));
1391 if (firm_dbg_get_mask(rss->dbg) & DEBUG_DVG)
1392 debug_vcg_dump_dvg_pkiller(rss, dvg);
1399 * Compute the maximal antichain of the current DVG.
1400 * This is a reimplementation of the MAXIMAL_ANTI_CHAIN function
1401 * from the DDG library 1.1 (DAG.cpp).
1403 static nodeset *compute_maximal_antichain(rss_t *rss, dvg_t *dvg, int iteration) {
1404 int n = nodeset_count(dvg->nodes);
1405 int *assignment = alloca(n * sizeof(assignment[0]));
1406 int *assignment_rev = alloca(n * sizeof(assignment_rev[0]));
1407 int *idx_map = alloca(n * sizeof(idx_map[0]));
1408 hungarian_problem_t *bp;
1409 nodeset *values, *temp;
1411 int i, j, cost, cur_chain, res;
1412 rss_edge_t *dvg_edge;
1414 #define MAP_IDX(irn) bsearch_for_index(get_irn_idx(irn), idx_map, n, 1)
1416 if (pset_count(dvg->edges) == 0)
1419 bp = hungarian_new(n, n, 1, HUNGARIAN_MATCH_NORMAL);
1422 At first, we build an index map for the nodes in the DVG,
1423 because we cannot use the irn idx therefore as the resulting
1424 bipartite data structure would have around 1.2GB.
1425 So we limit the size to the number of nodes we have in the DVG
1426 and build a sorted index map for their irn indices.
1429 foreach_nodeset(dvg->nodes, u_irn) {
1430 idx_map[i++] = get_irn_idx(u_irn);
1432 qsort(idx_map, n, sizeof(idx_map[0]), cmp_int);
1434 foreach_pset(dvg->edges, dvg_edge) {
1435 int idx_u = MAP_IDX(dvg_edge->src);
1436 int idx_v = MAP_IDX(dvg_edge->tgt);
1438 /* add the entry to the bipartite data structure */
1439 hungarian_add(bp, idx_u, idx_v, 1);
1440 DBG((rss->dbg, LEVEL_3, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1441 idx_u, dvg_edge->src, idx_v, dvg_edge->tgt));
1445 Add a bipartite entry for each pair of nodes (u, v), where exists a
1446 path in the DVG from u to v, ie. connect all descendants(v) to v.
1447 desc_dvg(v) = accumulated descendants of all z in dvg_user_list(v)
1449 foreach_nodeset(dvg->nodes, u_irn) {
1450 rss_irn_t *u = get_rss_irn(rss, u_irn);
1451 int idx_u_irn = MAP_IDX(u_irn);
1453 DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG descendants of %+F:\n", u_irn));
1455 //plist_clear(u->dvg_desc_list);
1456 //accumulate_dvg_descendant_values(rss, u, u->dvg_desc_list);
1459 FIXME: The array is build on the phase obstack and we cannot free the data.
1460 So the array get as many times allocated as this function is called.
1463 /* build the sorted array for faster searches */
1464 //u->dvg_desc = build_sorted_array_from_list(u->dvg_desc_list, phase_obst(&rss->ph));
1466 DBG((rss->dbg, DEBUG_MAX_AC, "\t\tadding bipartite entries of %+F:\n", u_irn));
1468 /* add the bipartite entries for all descendants */
1469 for (i = ARR_LEN_SAFE(u->dvg_desc) - 1; i >= 0; --i) {
1470 rss_irn_t *desc = get_rss_irn(rss, u->dvg_desc[i]);
1471 int idx_desc = MAP_IDX(u->dvg_desc[i]);
1473 /* add the entry to the bipartite data structure */
1474 hungarian_add(bp, idx_u_irn, idx_desc, 1);
1475 DBG((rss->dbg, DEBUG_MAX_AC, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1476 idx_u_irn, u_irn, idx_desc, u->dvg_desc[i]));
1483 /* We want maximum cardinality matching */
1484 hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL);
1487 DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG pkiller:\n"));
1488 /* beware: the following function needs the dvg_desc array */
1489 build_dvg_pkiller_list(rss, dvg);
1492 DBG((rss->dbg, LEVEL_2, "\t\tcomputing bipartite matching\n"));
1494 The maximum cardinality bipartite matching gives us the minimal
1495 chain partition, which corresponds to the maximum anti chains.
1497 res = hungarian_solve(bp, assignment, &cost, 1);
1498 assert(res == 0 && "Bipartite matching failed!");
1501 memset(assignment_rev, -1, n * sizeof(assignment_rev[0]));
1503 /* build the reverse assignment, ie. foreach i -> j, add j -> i */
1504 for (i = 0; i < n; ++i) {
1505 if (assignment[i] >= 0) {
1506 int j = assignment[i];
1507 assignment_rev[j] = i;
1511 DBG((rss->dbg, LEVEL_2, "\t\tgot assignment with cost %d\n", cost));
1512 DBG((rss->dbg, LEVEL_3, "\t\t\tassignment --- reverse assignment\n", cost));
1513 for (i = 0; i < n; ++i) {
1514 DBG((rss->dbg, LEVEL_3, "\t\t\t%3d -> %3d %3d -> %3d\n", i, assignment[i], i, assignment_rev[i]));
1517 values = new_nodeset(10);
1519 /* Construction of the minimal chain partition */
1520 for (j = 0; j < n; ++j) {
1521 /* check nodes, which did not occur as target */
1522 if (assignment_rev[j] == -1) {
1523 int xj = idx_map[j];
1524 ir_node *xj_irn = get_idx_irn(rss->irg, xj);
1525 rss_irn_t *xj_rss = get_rss_irn(rss, xj_irn);
1526 chain_t *c = obstack_alloc(phase_obst(&rss->ph), sizeof(*c));
1529 /* there was no source for j -> we have a source of a new chain */
1530 nodeset_insert(values, xj_irn);
1532 c->elements = plist_obstack_new(phase_obst(&rss->ph));
1533 c->nr = cur_chain++;
1534 plist_insert_back(c->elements, xj_irn);
1538 DBG((rss->dbg, LEVEL_2, "\t\tstarting chain %d:\n", c->nr));
1539 DBG((rss->dbg, LEVEL_2, "\t\t\t%+F (%d)", xj_irn, j));
1541 /* follow chain, having j as source */
1543 while (assignment[source] >= 0) {
1544 int target = assignment[source];
1545 int irn_idx = idx_map[target];
1546 ir_node *irn = get_idx_irn(rss->irg, irn_idx);
1547 rss_irn_t *node = get_rss_irn(rss, irn);
1549 plist_insert_back(c->elements, irn);
1552 DB((rss->dbg, LEVEL_2, " -> %+F (%d)", irn, target));
1554 /* new source = last target */
1558 DB((rss->dbg, LEVEL_2, "\n"));
1563 Computing the maximal antichain: Select an element from each
1564 chain such, such it is parallel with the others.
1566 DBG((rss->dbg, LEVEL_1, "\t\tcomputing set of saturation values (MAX AC)\n"));
1567 DBG((rss->dbg, LEVEL_3, "\t\tstarting with:\n"));
1570 if (firm_dbg_get_mask(rss->dbg) & LEVEL_3)
1571 dump_nodeset(values, "\t\t\t");
1577 We need an explicit array for the values as
1578 we cannot iterate multiple times over the same
1579 set at the same time. :-(((((
1581 int n = nodeset_count(values);
1583 ir_node **val_arr = NEW_ARR_F(ir_node *, n);
1585 foreach_nodeset(values, u_irn)
1586 val_arr[i++] = u_irn;
1591 temp = new_nodeset(10);
1593 /* Select all nodes from current value set, having another node in the set as descendant. */
1594 for (i = 0; i < n; ++i) {
1595 rss_irn_t *u = get_rss_irn(rss, val_arr[i]);
1597 for (j = 0; j < n; ++j) {
1601 key.src = val_arr[i];
1602 key.tgt = val_arr[j];
1604 if (pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1605 /* v[j] is descendant of u -> remove u and break */
1606 nodeset_insert(temp, u->irn);
1607 nodeset_remove(values, u->irn);
1609 DBG((rss->dbg, LEVEL_3, "\t\t\tremoving %+F from values, adding it to temp\n", u->irn));
1617 /* Try to insert the chain predecessor of all selected u's */
1618 foreach_nodeset(temp, u_irn) {
1619 rss_irn_t *u = get_rss_irn(rss, u_irn);
1620 chain_t *c = u->chain;
1621 plist_element_t *el = plist_find_value(c->elements, u_irn);
1623 assert(el && "Missing element in chain!");
1625 /* If u has predecessor in chain: insert the predecessor */
1626 if (el == plist_element_get_prev(el)) {
1627 nodeset_insert(values, plist_element_get_value(el));
1628 DBG((rss->dbg, LEVEL_3, "\t\t\tadding %+F to values\n", plist_element_get_value(el)));
1634 } while (nodeset_count(temp) > 0);
1636 DBG((rss->dbg, LEVEL_2, "\t\tfinal set:\n"));
1638 if (firm_dbg_get_mask(rss->dbg) & LEVEL_2) {
1639 dump_nodeset(values, "\t\t\t");
1643 if (rss->opts->dump_flags & RSS_DUMP_MAXAC)
1644 debug_vcg_dump_pkg(rss, values, iteration);
1654 * Computes the best serialization between two nodes of sat_vals.
1656 static serialization_t *compute_best_admissible_serialization(rss_t *rss, nodeset *sat_vals, serialization_t *ser, int num_regs) {
1657 int n = nodeset_count(sat_vals);
1658 int n_idx = ARR_LEN_SAFE(rss->idx_map);
1660 ir_node **val_arr = alloca(n * sizeof(val_arr[0]));
1661 bitset_t *bs_sv = bitset_alloca(n_idx);
1662 bitset_t *bs_vdesc = bitset_alloca(n_idx);
1663 bitset_t *bs_tmp = bitset_alloca(n_idx);
1664 bitset_t *bs_ukilldesc = bitset_alloca(n_idx);
1665 int best_benefit = INT_MAX;
1666 int best_omega2 = INT_MAX;
1667 int best_benefit_omega20 = INT_MAX;
1671 rss_edge_t min_benefit_edge;
1672 rss_edge_t min_omega20_edge;
1673 rss_irn_t *ser_u_omega1, *ser_v_omega1;
1674 rss_irn_t *ser_u_omega20, *ser_v_omega20;
1676 DBG((rss->dbg, LEVEL_1, "\tcomputing admissible serializations:\n"));
1679 We need an explicit array for the values as
1680 we cannot iterate multiple times over the same
1681 set at the same time. :-(((((
1684 foreach_nodeset(sat_vals, irn) {
1686 bitset_set(bs_sv, BLOCK_IDX_MAP(rss, irn));
1690 We build all admissible serializations and remember the best found so far.
1693 if v in pkiller(u): add edge from v to all other pkiller(u)
1694 else: for all vv in pkiller(u): add edge from v to vv if there exists no path from vv to v
1698 A node is unserializable if:
1699 - it has only one killer and this one is Sink
1700 - it kills no other values
1701 In this case there is no serialization which could
1702 reduce the registerpressure
1704 #define IS_UNSERIALIZABLE_NODE(rss_node) \
1707 (plist_count(rss_node->pkiller_list) == 1) && \
1708 is_Sink(rss_node->killer) && \
1709 (rss_node->kill_count == 0) \
1711 be_is_Barrier(rss_node->irn) || \
1712 be_is_Keep(rss_node->irn) \
1715 /* for all u in sat_vals */
1716 for (i = 0; i < n; ++i) {
1717 rss_irn_t *u = get_rss_irn(rss, val_arr[i]);
1718 plist_element_t *el;
1720 /* ignore nodes where serialization does not help */
1721 if (IS_UNSERIALIZABLE_NODE(u)) {
1722 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", u->irn));
1726 /* accumulate all descendants of all pkiller(u) */
1727 bitset_clear_all(bs_ukilldesc);
1728 foreach_plist(u->pkiller_list, el) {
1729 ir_node *irn = plist_element_get_value(el);
1730 rss_irn_t *node = get_rss_irn(rss, irn);
1733 bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, irn));
1737 for (k = ARR_LEN_SAFE(node->descendants) - 1; k >= 0; --k) {
1738 if (! is_Sink(node->descendants[k]))
1739 bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, node->descendants[k]));
1743 /* for all v in sat_vals */
1744 for (j = 0; j < n; ++j) {
1745 ir_node *v_irn = val_arr[j];
1746 rss_irn_t *v = get_rss_irn(rss, v_irn);
1747 unsigned v_height = get_irn_height(rss->h, v_irn);
1748 int omega1, omega2, is_pkiller;
1750 /* v cannot be serialized with itself
1751 * ignore nodes where serialization does not help */
1752 if (i == j || IS_UNSERIALIZABLE_NODE(v)) {
1754 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", v->irn));
1758 /* get descendants of v */
1759 bitset_clear_all(bs_vdesc);
1760 bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v_irn));
1761 for (k = ARR_LEN_SAFE(v->descendants) - 1; k >= 0; --k) {
1762 if (! is_Sink(v->descendants[k]))
1763 bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v->descendants[k]));
1766 /* if v is in pkiller(u) */
1767 is_pkiller = plist_has_value(u->pkiller_list, v_irn);
1769 /* for all vv in pkiller(u) */
1770 foreach_plist(u->pkiller_list, el) {
1771 ir_node *vv_irn = plist_element_get_value(el);
1774 if (is_Sink(vv_irn) || is_cfop(vv_irn))
1778 add_edge = vv_irn != v_irn && skip_Proj(vv_irn) != skip_Proj(v_irn);
1780 add_edge = ! heights_reachable_in_block(rss->h, skip_Proj(vv_irn), skip_Proj(v_irn));
1783 As we add an edge from vv -> v, we have to make sure,
1784 that there exists no path from v to vv.
1788 int vv_height = get_irn_height(rss->h, vv_irn);
1789 int mu1, mu2, critical_path_cost;
1792 mu1 = | descendants(v) cut sat_vals |
1793 the number of saturating values which cannot
1794 be simultaneously alive with u
1796 bitset_copy(bs_tmp, bs_vdesc);
1797 mu1 = bitset_popcnt(bitset_and(bs_tmp, bs_sv));
1800 mu2 = | accum_desc_all_pkiller(u) without descendants(v) |
1803 bitset_copy(bs_tmp, bs_ukilldesc);
1804 mu2 = bitset_popcnt(bitset_andnot(bs_tmp, bs_vdesc));
1810 /* omega1 = mu1 - mu2 */
1816 /* omega2 = increase of critical path */
1817 critical_path_cost =
1818 v_height /* longest path from v to sink */
1819 + rss->max_height - vv_height /* longest path from source to vv */
1823 If critical_path_cost > max_height -> the new edge
1824 would increase the longest critical path by the difference.
1826 omega2 = critical_path_cost > rss->max_height ? critical_path_cost - rss->max_height : 0;
1828 /* this keeps track of the edge with the best benefit */
1829 if (omega1 >= num_regs - n && omega1 < best_benefit) {
1830 min_benefit_edge.src = v_irn;
1831 min_benefit_edge.tgt = vv_irn;
1836 best_benefit = omega1;
1837 ser->new_killer = is_pkiller;
1840 /* this keeps track of the edge with the best omega1 costs where omega2 == 0 */
1841 if (omega2 == 0 && omega1 >= num_regs - n && omega1 < best_benefit_omega20) {
1842 min_omega20_edge.src = v_irn;
1843 min_omega20_edge.tgt = vv_irn;
1848 best_benefit_omega20 = omega1;
1849 ser->new_killer = is_pkiller;
1852 best_omega2 = MIN(best_omega2, omega2);
1854 DBG((rss->dbg, LEVEL_2, "\t\tfound %+F -> %+F (w1 %d, w2 %d)\n",
1855 v_irn, vv_irn, omega1, omega2));
1857 } /* for all vv in pkiller(u) */
1858 } /* for all v in sat_vals */
1859 } /* for all u in sat_vals */
1864 if (best_omega2 == 0) {
1865 ser->u = ser_u_omega20;
1866 ser->v = ser_v_omega20;
1867 ser->edge->src = min_omega20_edge.src;
1868 ser->edge->tgt = min_omega20_edge.tgt;
1869 ser->omega1 = best_benefit_omega20;
1870 ser->omega2 = best_omega2;
1873 ser->u = ser_u_omega1;
1874 ser->v = ser_v_omega1;
1875 ser->edge->src = min_benefit_edge.src;
1876 ser->edge->tgt = min_benefit_edge.tgt;
1877 ser->omega1 = best_benefit;
1878 ser->omega2 = best_omega2;
1883 #undef IS_UNSERIALIZABLE_NODE
1887 * Perform the value serialization heuristic and add all
1888 * computed serialization edges as dependencies to the irg.
1890 static void perform_value_serialization_heuristic(rss_t *rss) {
1891 bitset_t *arch_nonign_bs = bitset_alloca(arch_register_class_n_regs(rss->cls));
1892 bitset_t *abi_ign_bs = bitset_alloca(arch_register_class_n_regs(rss->cls));
1893 int available_regs, iteration;
1896 pset *ser_set = new_pset(cmp_rss_edges, 20);
1898 /* available_regs = R = |arch_non_ignore_regs cut ~abi_ignore_regs| */
1899 arch_put_non_ignore_regs(rss->arch_env, rss->cls, arch_nonign_bs);
1900 be_abi_put_ignore_regs(rss->abi, rss->cls, abi_ign_bs);
1901 bitset_andnot(arch_nonign_bs, abi_ign_bs);
1902 available_regs = bitset_popcnt(arch_nonign_bs);
1903 //num_live = pset_count(rss->live_block);
1904 //available_regs -= num_live < available_regs ? num_live : 0;
1906 DBG((rss->dbg, LEVEL_1, "\n\t#available regs: %d\n\n", available_regs));
1909 At first we need to compute the disjoint value DAG (DVG = {V, E_dv}).
1910 V = set of all nodes we are currently interested in
1911 E_dv = there is an edge from u to v iff v is a descendant of killer(u), forall u, v in V
1913 dvg.nodes = new_nodeset(plist_count(rss->nodes));
1914 dvg.edges = new_pset(cmp_rss_edges, plist_count(rss->nodes) * 5);
1915 compute_dvg(rss, &dvg);
1918 Then we perform the heuristic serialization algorithm
1919 on the DVG which gives us all necessary serialization
1922 DBG((rss->dbg, LEVEL_1, "\tcomputing maximal antichain:\n"));
1924 sat_vals = compute_maximal_antichain(rss, &dvg, iteration++);
1925 while (sat_vals && (nodeset_count(sat_vals) > available_regs)) {
1926 serialization_t *ser, best_ser;
1927 rss_edge_t *edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*edge));
1928 ir_node *dep_src, *dep_tgt;
1930 best_ser.edge = edge;
1931 ser = compute_best_admissible_serialization(rss, sat_vals, &best_ser, available_regs);
1933 DBG((rss->dbg, LEVEL_1, "\tcurrent register saturation %d, target %d\n", nodeset_count(sat_vals), available_regs));
1936 DBG((rss->dbg, LEVEL_1, "\tno RS improving serialization found, breaking at iteration %d\n", iteration));
1940 /* Insert the serialization as dependency edge into the irg. */
1941 DBG((rss->dbg, LEVEL_1, "\tserializing %+F -> %+F with edge %+F -> %+F and cost %d, %d\n",
1942 ser->u->irn, ser->v->irn, ser->edge->src, ser->edge->tgt, ser->omega1, ser->omega2));
1944 if (pset_find(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge)))
1945 ir_printf("WARNING: serialization %+F -> %+F computed twice!\n", ser->edge->src, ser->edge->tgt);
1948 pset_insert(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge));
1950 /* update the dvg */
1951 update_dvg(rss, &dvg, ser->v, ser->u);
1952 update_dvg(rss, &dvg, get_rss_irn(rss, ser->edge->src), get_rss_irn(rss, ser->edge->tgt));
1953 del_nodeset(sat_vals);
1955 dep_src = skip_Proj(ser->edge->src);
1956 dep_tgt = ser->edge->tgt;
1957 add_irn_dep(dep_src, dep_tgt);
1959 /* Update descendants, consumer and pkillers of the target */
1960 update_node_info(rss, ser->edge->tgt, ser->edge->src);
1962 /* TODO: try to find a cheaper way for updating height information */
1963 rss->max_height = heights_recompute_block(rss->h, rss->block);
1965 /* Recompute the antichain for next serialization */
1966 DBG((rss->dbg, LEVEL_1, "\tre-computing maximal antichain:\n"));
1967 sat_vals = compute_maximal_antichain(rss, &dvg, iteration++);
1970 del_nodeset(dvg.nodes);
1971 del_pset(dvg.edges);
1975 * Do initial calculations for a block.
1977 static void process_block(ir_node *block, void *env) {
1980 const ir_edge_t *edge;
1982 phase_init(&rss->ph, "rss block preprocessor", rss->irg, PHASE_DEFAULT_GROWTH, init_rss_irn);
1984 DBG((rss->dbg, LEVEL_1, "preprocessing block %+F\n", block));
1987 /* build an index map for all nodes in the current block */
1989 n = get_irn_n_edges(block);
1990 NEW_ARR_A(int *, rss->idx_map, n);
1991 foreach_out_edge(block, edge) {
1992 ir_node *irn = get_edge_src_irn(edge);
1993 rss->idx_map[i++] = get_irn_idx(irn);
1995 qsort(rss->idx_map, n, sizeof(rss->idx_map[0]), cmp_int);
1996 rss->max_height = heights_recompute_block(rss->h, block);
1998 /* loop over all register classes */
1999 for (i = arch_isa_get_n_reg_class(rss->arch_env->isa) - 1; i >= 0; --i) {
2000 const arch_register_class_t *cls = arch_isa_get_reg_class(rss->arch_env->isa, i);
2003 DBG((rss->dbg, LEVEL_1, "register class %s\n", arch_register_class_name(cls)));
2005 /* Get all live value at end of Block having current register class */
2006 rss->live_block = pset_new_ptr(10);
2007 be_liveness_end_of_block(rss->liveness, rss->arch_env, rss->cls, rss->block, rss->live_block);
2009 /* reset the list of interesting nodes */
2010 plist_clear(rss->nodes);
2011 plist_insert_back(rss->nodes, _sink);
2013 /* collect all nodes having a certain register class */
2014 foreach_out_edge(block, edge) {
2015 ir_node *irn = get_edge_src_irn(edge);
2016 ir_mode *mode = get_irn_mode(irn);
2020 - mode_T nodes (the projs are asked)
2021 - mode_X nodes (control flow nodes are always scheduled last)
2022 - Keeps (they are always immediately scheduled)
2023 - Phi (same as Keep)
2025 if (mode == mode_T || mode == mode_X || is_Phi(irn))
2029 In case of a proj, we skip
2030 - Barrier (they are a Barrier :)
2032 - the Proj itself, as it's scheduled always with it's super node
2035 ir_node *pred = get_Proj_pred(irn);
2036 if (be_is_Barrier(pred) || is_Start(pred))
2040 /* calculate the descendants and consumer for each node in the block */
2041 collect_node_info(rss, skip_Proj(irn));
2043 if (be_is_Keep(irn))
2046 if (! arch_irn_is(rss->arch_env, irn, ignore) && arch_get_irn_reg_class(rss->arch_env, irn, -1) == cls) {
2047 plist_insert_back(rss->nodes, skip_Proj(irn));
2052 /* compute the potential killing set PK(G) */
2053 compute_pkill_set(rss);
2055 /* compute the killing function k* */
2056 compute_killing_function(rss);
2059 Compute the heuristic value serialization and
2060 add the necessary dependencies to the irg.
2062 perform_value_serialization_heuristic(rss);
2064 del_pset(rss->live_block);
2067 phase_free(&rss->ph);
2072 * Register the options.
2074 void rss_register_options(lc_opt_entry_t *grp) {
2075 static int run_once = 0;
2076 lc_opt_entry_t *rss_grp;
2080 rss_grp = lc_opt_get_grp(grp, "rss");
2082 lc_opt_add_table(rss_grp, rss_option_table);
2085 #endif /* WITH_LIBCORE */
2088 * Preprocess the irg for scheduling.
2090 void rss_schedule_preparation(const be_irg_t *birg) {
2093 FIRM_DBG_REGISTER(rss.dbg, "firm.be.sched.rss");
2095 //firm_dbg_set_mask(rss.dbg, LEVEL_1 | LEVEL_2 | LEVEL_3);
2097 init_rss_special_nodes(birg->irg);
2099 rss.irg = birg->irg;
2100 rss.arch_env = birg->main_env->arch_env;
2101 rss.abi = birg->abi;
2102 rss.h = heights_new(birg->irg);
2103 rss.nodes = plist_new();
2104 rss.opts = &rss_options;
2105 rss.liveness = be_liveness(birg->irg);
2106 irg_block_walk_graph(birg->irg, NULL, process_block, &rss);
2107 heights_free(rss.h);
2108 plist_free(rss.nodes);
2109 be_liveness_free(rss.liveness);
2111 if (birg->main_env->options->dump_flags & DUMP_SCHED)
2112 be_dump(rss.irg, "-rss", dump_ir_block_graph);