2 * Implementation of a register saturating list scheduler
3 * as described in: Sid-Ahmed-Ali Touati
4 * Register Saturation in Superscalar and VLIW Codes
6 * @license This file protected by GPL - GNU GENERAL PUBLIC LICENSE.
7 * @author Christian Wuerdig
20 #include "irgraph_t.h"
22 #include "iredges_t.h"
24 #include "irphase_t.h"
30 #include "bipartite.h"
31 #include "hungarian.h"
38 #include "besched_t.h"
41 #include <libcore/lc_opts.h>
42 #include <libcore/lc_opts_enum.h>
43 #endif /* WITH_LIBCORE */
46 #define ARR_LEN_SAFE(arr) ((arr) != NULL ? ARR_LEN((arr)) : 0)
48 #define HASH_RSS_EDGE(edge) ((get_irn_node_nr((edge)->src) << 16) | (get_irn_node_nr((edge)->tgt) & 0xFFFF))
49 #define BSEARCH_IRN_ARR(val, arr) \
50 bsearch(&(val), (arr), ARR_LEN_SAFE((arr)), sizeof((arr)[0]), cmp_irn_idx)
52 #define BLOCK_IDX_MAP(rss, irn) bsearch_for_index(get_irn_idx((irn)), (rss)->idx_map, ARR_LEN_SAFE((rss)->idx_map), 1)
55 typedef struct _rss_opts_t {
59 /* Represents a child with associated costs */
60 typedef struct _child {
65 /* We need edges for several purposes. */
66 typedef struct _rss_edge {
72 /* Represents a connected bipartite component. */
74 nodeset *parents; /**< = S a set of value producers */
75 nodeset *children; /**< = T a set of value consumers */
76 pset *kill_edges; /**< = E a set of edges (t in T, s in S) such as each s in S gets killed by at least one t in T */
77 int nr; /**< a deterministic index for set insertion (used as hash) */
80 /* Represents a disjoint value DAG. */
86 /* Represents a chain of nodes. */
87 typedef struct _chain {
88 plist_t *elements; /**< List of chain elements */
89 int nr; /**< a deterministic index for set insertion (used as hash) */
92 typedef struct _rss_irn {
93 plist_t *consumer_list; /**< List of consumers */
94 ir_node **consumer; /**< Sorted consumer array (needed for faster access) */
96 plist_t *parent_list; /**< List of parents */
97 plist_t *pkiller_list; /**< List of potential killers */
99 plist_t *descendant_list; /**< List of descendants */
100 ir_node **descendants; /**< Sorted descendant array (needed for faster access) */
102 ir_node *killer; /**< The selected unique killer */
103 ir_node *irn; /**< The corresponding firm node to this rss_irn */
105 chain_t *chain; /**< The chain, this node is associated to */
107 unsigned desc_walk; /**< visited flag for collecting descendants */
108 unsigned kill_count; /**< number of nodes killed by this one */
110 unsigned live_out : 1; /**< irn has consumers outside of it's block */
111 unsigned visited : 1; /**< visited flag for bipartite decomposition */
112 unsigned havecons : 1; /**< visited flag collect consumer */
113 unsigned handled : 1; /**< flag indicating whether or not the list structures have been build */
114 unsigned dumped : 1; /**< flag indication whether or not this node was dumped */
117 /* Represents a serialization edge with associated costs. */
118 typedef struct _serialization {
119 rss_irn_t *u; /* the top node */
120 rss_irn_t *v; /* the node about to be serialized after u */
121 rss_edge_t *edge; /* the edge selected for the serialization */
122 int omega1; /* estimated: available regs - RS reduction */
123 int omega2; /* increase in critical path length */
127 typedef struct _rss {
128 phase_t ph; /**< Phase to hold some data */
129 heights_t *h; /**< The current height object */
130 ir_graph *irg; /**< The irg to preprocess */
131 plist_t *nodes; /**< The list of interesting nodes */
132 const arch_env_t *arch_env; /**< The architecture environment */
133 be_abi_irg_t *abi; /**< The abi for this irg */
134 pset *cbc_set; /**< A set of connected bipartite components */
135 ir_node *block; /**< The current block in progress. */
136 int *idx_map; /**< Mapping irn indices to per block indices */
137 unsigned max_height; /**< maximum height in the current block */
138 rss_opts_t *opts; /**< The options */
139 be_lv_t *liveness; /**< The liveness information for this irg */
140 pset *live_block; /**< Values alive at end of block */
141 const arch_register_class_t *cls; /**< The current register class */
142 DEBUG_ONLY(firm_dbg_module_t *dbg);
145 #define get_rss_irn(rss, irn) (phase_get_or_set_irn_data(&rss->ph, irn))
148 * We need some special nodes:
149 * a source and a sink for all live-in and live-out values of a block
158 static ir_node *_source = NULL;
159 static ir_node *_sink = NULL;
161 #define is_Source(irn) ((irn) == _source)
162 #define is_Sink(irn) ((irn) == _sink)
166 RSS_DUMP_CBC = 1 << 0,
167 RSS_DUMP_PKG = 1 << 1,
168 RSS_DUMP_KILL = 1 << 2,
169 RSS_DUMP_DVG = 1 << 3,
170 RSS_DUMP_MAXAC = 1 << 4,
171 RSS_DUMP_ALL = (RSS_DUMP_MAXAC << 1) - 1,
174 static rss_opts_t rss_options = {
179 static const lc_opt_enum_int_items_t dump_items[] = {
180 { "none", RSS_DUMP_NONE },
181 { "cbc", RSS_DUMP_CBC },
182 { "pkg", RSS_DUMP_PKG },
183 { "kill", RSS_DUMP_KILL },
184 { "dvg", RSS_DUMP_DVG },
185 { "maxac", RSS_DUMP_MAXAC },
186 { "all", RSS_DUMP_ALL },
190 static lc_opt_enum_int_var_t dump_var = {
191 &rss_options.dump_flags, dump_items
194 static const lc_opt_table_entry_t rss_option_table[] = {
195 LC_OPT_ENT_ENUM_MASK("dump", "dump phases", &dump_var),
198 #endif /* WITH_LIBCORE */
200 /******************************************************************************
202 * | | | | / _| | | (_)
203 * | |__ ___| |_ __ ___ _ __ | |_ _ _ _ __ ___| |_ _ ___ _ __ ___
204 * | '_ \ / _ \ | '_ \ / _ \ '__| | _| | | | '_ \ / __| __| |/ _ \| '_ \/ __|
205 * | | | | __/ | |_) | __/ | | | | |_| | | | | (__| |_| | (_) | | | \__ \
206 * |_| |_|\___|_| .__/ \___|_| |_| \__,_|_| |_|\___|\__|_|\___/|_| |_|___/
209 ******************************************************************************/
212 * Acquire opcodes and create source and sink nodes.
214 static void init_rss_special_nodes(ir_graph *irg) {
215 ir_node *block = get_irg_start_block(irg);
216 int iro_rss_base = get_next_ir_opcodes(iro_rss_last);
217 ir_op *op_rss_Source = new_ir_op(iro_rss_base + iro_rss_Source, "rss_Source", op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
218 ir_op *op_rss_Sink = new_ir_op(iro_rss_base + iro_rss_Sink, "rss_Sink", op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
219 _source = new_ir_node(NULL, irg, block, op_rss_Source, mode_ANY, 0, NULL);
220 _sink = new_ir_node(NULL, irg, block, op_rss_Sink, mode_ANY, 0, NULL);
223 static int cmp_int(const void *a, const void *b) {
227 return QSORT_CMP(*i1, *i2);
230 static int cmp_child_costs(const void *a, const void *b) {
231 const child_t *c1 = a;
232 const child_t *c2 = b;
234 return QSORT_CMP(c1->cost, c2->cost);
237 static int cmp_irn_idx(const void *a, const void *b) {
238 const ir_node *n1 = *(ir_node **)a;
239 const ir_node *n2 = *(ir_node **)b;
241 return QSORT_CMP(get_irn_idx(n1), get_irn_idx(n2));
244 static int cmp_rss_edges(const void *a, const void *b) {
245 const rss_edge_t *e1 = a;
246 const rss_edge_t *e2 = b;
248 return (e1->src != e2->src) || (e1->tgt != e2->tgt);
251 static int bsearch_for_index(int key, int *arr, size_t len, int force) {
255 while (right >= left) {
256 int idx = (left + right) / 2;
260 else if (key > arr[idx])
267 assert(0 && "Something is wrong, key not found.");
271 static ir_node **build_sorted_array_from_list(plist_t *irn_list, struct obstack *obst) {
274 int len = plist_count(irn_list);
275 ir_node **arr = NEW_ARR_D(ir_node *, obst, len);
277 /* copy the list into the array */
278 foreach_plist(irn_list, el) {
279 arr[i++] = plist_element_get_value(el);
282 /* sort the array by node index */
283 qsort(arr, len, sizeof(arr[0]), cmp_irn_idx);
288 /*****************************************************
291 * __| | ___| |__ _ _ __ _ __ _ _ _ __ __ _
292 * / _` |/ _ \ '_ \| | | |/ _` |/ _` | | '_ \ / _` |
293 * | (_| | __/ |_) | |_| | (_| | (_| | | | | | (_| |
294 * \__,_|\___|_.__/ \__,_|\__, |\__, |_|_| |_|\__, |
298 *****************************************************/
300 static void dump_nodeset(nodeset *ns, const char *prefix) {
302 foreach_nodeset(ns, irn) {
303 ir_fprintf(stderr, "%s%+F\n", prefix, irn);
307 static void build_file_name(rss_t *rss, const char *suffix, size_t suf_len, char *buf, size_t len) {
308 const char *irg_name;
311 irg_name = get_entity_name(get_irg_entity(rss->irg));
312 snprintf(buf, len - suf_len, "%s-%s-block-%ld",
313 irg_name, arch_register_class_name(rss->cls), get_irn_node_nr(rss->block));
317 /* Dumps all collected bipartite components of current irg as vcg. */
318 static void debug_vcg_dump_bipartite(rss_t *rss) {
322 static const char suffix[] = "-RSS-CBC.vcg";
324 build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
325 f = fopen(file_name, "w");
327 ir_fprintf(f, "graph: { title: \"connected bipartite component graph of %+F\"\n", rss->irg);
328 fprintf(f, "display_edge_labels: no\n");
329 fprintf(f, "layoutalgorithm: mindepth\n");
330 fprintf(f, "manhattan_edges: yes\n\n");
332 foreach_pset(rss->cbc_set, cbc) {
336 fprintf(f, "graph: { titel: \"cbc %d\" label: \"cbc %d\" status:clustered color:yellow\n", cbc->nr, cbc->nr);
337 foreach_nodeset(cbc->parents, n) {
338 ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
340 foreach_nodeset(cbc->children, n) {
341 ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
343 foreach_pset(cbc->kill_edges, ke) {
344 ir_fprintf(f, "edge: { sourcename: \"n%d_%d\" targetname: \"n%d_%d\" }\n",
345 get_irn_node_nr(ke->src), cbc->nr, get_irn_node_nr(ke->tgt), cbc->nr);
353 /* Dump the computed killing function as vcg. */
354 static void debug_vcg_dump_kill(rss_t *rss) {
358 static const char suffix[] = "-RSS-KILL.vcg";
360 build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
361 f = fopen(file_name, "w");
363 ir_fprintf(f, "graph: { title: \"computed kill graph of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
364 fprintf(f, "display_edge_labels: no\n");
365 fprintf(f, "layoutalgorithm: mindepth\n");
366 fprintf(f, "manhattan_edges: yes\n\n");
369 /* reset dump_flag */
371 foreach_phase_irn(&rss->ph, irn) {
372 rss_irn_t *node = get_rss_irn(rss, irn);
377 /* dump all nodes and their killers */
378 foreach_plist(rss->nodes, el) {
379 ir_node *irn = plist_element_get_value(el);
380 rss_irn_t *rirn = get_rss_irn(rss, irn);
381 rss_irn_t *pk_rirn = get_rss_irn(rss, rirn->killer);
383 if (! rirn->dumped) {
384 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
388 if (! pk_rirn->dumped) {
389 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(rirn->killer), rirn->killer);
393 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
394 get_irn_node_nr(rirn->killer), get_irn_node_nr(irn));
401 /* Dumps the potential killing DAG (PKG) as vcg. */
402 static void debug_vcg_dump_pkg(rss_t *rss, nodeset *max_ac, int iteration) {
405 char *suffix = alloca(32);
406 static const char suffix1[] = "-RSS-PKG.vcg";
407 static const char suffix2[] = "-RSS-PKG-MAXAC.vcg";
411 snprintf(suffix, 32, "%s", suffix1);
414 snprintf(suffix, 32, "-%02d%s", iteration, suffix2);
417 build_file_name(rss, suffix, strlen(suffix) + 1, file_name, sizeof(file_name));
418 f = fopen(file_name, "w");
420 ir_fprintf(f, "graph: { title: \"potential killing DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
421 fprintf(f, "display_edge_labels: no\n");
422 fprintf(f, "layoutalgorithm: mindepth\n");
423 fprintf(f, "manhattan_edges: yes\n\n");
426 /* reset dump_flag */
428 foreach_phase_irn(&rss->ph, irn) {
429 rss_irn_t *node = get_rss_irn(rss, irn);
434 foreach_plist(rss->nodes, el) {
435 ir_node *irn = plist_element_get_value(el);
436 rss_irn_t *rirn = get_rss_irn(rss, irn);
438 plist_element_t *k_el;
440 /* dump selected saturating values in yellow */
441 if (max_ac && nodeset_find(max_ac, irn))
444 if (! rirn->dumped) {
446 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F c%d\" %s }\n", get_irn_node_nr(irn), irn, rirn->chain->nr, c1);
448 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(irn), irn, c1);
452 foreach_plist(rirn->pkiller_list, k_el) {
453 ir_node *pkiller = plist_element_get_value(k_el);
454 rss_irn_t *pk_rirn = get_rss_irn(rss, pkiller);
457 if (max_ac && nodeset_find(max_ac, pkiller))
460 if (! pk_rirn->dumped) {
462 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F c%d\" %s }\n", get_irn_node_nr(pkiller), pkiller, pk_rirn->chain->nr, c2);
464 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(pkiller), pkiller, c2);
467 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
468 get_irn_node_nr(pkiller), get_irn_node_nr(irn));
475 /* Dumps the disjoint value DAG (DVG) as vcg. */
476 static void debug_vcg_dump_dvg(rss_t *rss, dvg_t *dvg) {
477 static const char suffix[] = "-RSS-DVG.vcg";
483 build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
484 f = fopen(file_name, "w");
486 ir_fprintf(f, "graph: { title: \"disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
487 fprintf(f, "display_edge_labels: no\n");
488 fprintf(f, "layoutalgorithm: mindepth\n");
489 fprintf(f, "manhattan_edges: yes\n\n");
492 foreach_nodeset(dvg->nodes, irn) {
493 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
497 foreach_pset(dvg->edges, edge) {
498 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
499 get_irn_node_nr(edge->src), get_irn_node_nr(edge->tgt));
507 /* Dumps the PKG(DVG). */
508 static void debug_vcg_dump_dvg_pkiller(rss_t *rss, dvg_t *dvg) {
509 static const char suffix[] = "-RSS-DVG-PKG.vcg";
514 build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
515 f = fopen(file_name, "w");
517 ir_fprintf(f, "graph: { title: \"PKG of disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
518 fprintf(f, "display_edge_labels: no\n");
519 fprintf(f, "layoutalgorithm: mindepth\n");
520 fprintf(f, "manhattan_edges: yes\n\n");
523 foreach_nodeset(dvg->nodes, irn) {
524 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
528 foreach_nodeset(dvg->nodes, irn) {
529 rss_irn_t *node = get_rss_irn(rss, irn);
532 foreach_plist(node->dvg_pkiller_list, el) {
533 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
534 get_irn_node_nr(plist_element_get_value(el)), get_irn_node_nr(irn));
544 * In case there is no rss information for irn, initialize it.
546 static void *init_rss_irn(phase_t *ph, ir_node *irn, void *old) {
547 rss_irn_t *res = old ? old : phase_alloc(ph, sizeof(res[0]));
549 res->descendant_list = plist_obstack_new(phase_obst(ph));
550 res->descendants = NULL;
552 res->consumer_list = plist_obstack_new(phase_obst(ph));
553 res->consumer = NULL;
555 res->pkiller_list = plist_obstack_new(phase_obst(ph));
557 res->parent_list = plist_obstack_new(phase_obst(ph));
575 * Collect all nodes data dependent on current node.
577 static void collect_descendants(rss_t *rss, rss_irn_t *rirn, ir_node *irn, int *got_sink, unsigned cur_desc_walk) {
578 const ir_edge_t *edge;
579 rss_irn_t *cur_node = get_rss_irn(rss, irn);
580 ir_node *block = rss->block;
581 ir_edge_kind_t ekind[2] = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
584 if (cur_node->desc_walk >= cur_desc_walk)
586 cur_node->desc_walk = cur_desc_walk;
588 /* Stop at Barriers */
589 if (be_is_Barrier(irn))
592 /* loop over normal and dependency edges */
593 for (i = 0; i < 2; ++i) {
594 foreach_out_edge_kind(irn, edge, ekind[i]) {
595 ir_node *user = get_edge_src_irn(edge);
597 /* skip ignore nodes as they do not really contribute to register pressure */
598 if (arch_irn_is(rss->arch_env, user, ignore))
601 if (get_irn_mode(user) == mode_X) {
610 //if (arch_get_irn_reg_class(rss->arch_env, user, -1) == rss->cls)
611 collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
614 /* check if user lives in block and is not a control flow node */
615 if (get_nodes_block(user) == block) {
616 if (! plist_has_value(rirn->descendant_list, user)) {
617 plist_insert_back(rirn->descendant_list, user);
618 DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", user));
620 collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
622 else if (! *got_sink) {
624 /* user lives out of block: add sink as descendant if not already done */
625 plist_insert_back(rirn->descendant_list, _sink);
627 DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", _sink));
635 * Handles a single consumer.
637 static void collect_single_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *consumer, int *got_sink) {
638 ir_node *block = rss->block;
640 assert(! is_Proj(consumer) && "Cannot handle Projs");
642 if (! is_Block(consumer) && get_nodes_block(consumer) == block) {
643 if (! arch_irn_is(rss->arch_env, consumer, ignore) && ! plist_has_value(rss_irn->consumer_list, consumer)) {
644 plist_insert_back(rss_irn->consumer_list, consumer);
645 DBG((rss->dbg, LEVEL_2, "\t\tconsumer %+F\n", consumer));
649 rss_irn->live_out = 1;
650 DBG((rss->dbg, LEVEL_2, "\t\tlive out %+F", consumer));
652 plist_insert_back(rss_irn->consumer_list, _sink);
654 DB((rss->dbg, LEVEL_2, ", %+F added instead", _sink));
656 DB((rss->dbg, LEVEL_2, "\n"));
661 * Collect all nodes consuming the value(s) produced by current node.
663 static void collect_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *irn, int *got_sink) {
664 const ir_edge_t *edge;
666 ir_edge_kind_t ekind[2] = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
667 rss_irn_t *cur_node = get_rss_irn(rss, irn);
669 if (cur_node->havecons)
671 cur_node->havecons = 1;
673 for (i = 0; i < 2; ++i) {
674 foreach_out_edge_kind(irn, edge, ekind[i]) {
675 ir_node *consumer = get_edge_src_irn(edge);
677 if (is_Proj(consumer)) {
678 //if (arch_get_irn_reg_class(rss->arch_env, consumer, -1) == rss->cls)
679 collect_consumer(rss, rss_irn, consumer, got_sink);
682 collect_single_consumer(rss, rss_irn, consumer, got_sink);
688 * Collects all consumer and descendant of a irn.
690 static void collect_node_info(rss_t *rss, ir_node *irn) {
691 static unsigned cur_desc_walk = 0;
692 rss_irn_t *rss_irn = get_rss_irn(rss, irn);
695 assert(! is_Proj(irn) && "Cannot handle Projs.");
697 /* check if node info is already available */
698 if (rss_irn->handled)
700 //phase_reinit_single_irn_data(&rss->ph, irn);
702 DBG((rss->dbg, LEVEL_1, "\tcomputing consumers of %+F:\n", irn));
704 /* collect all consumer */
706 collect_consumer(rss, rss_irn, irn, &got_sink);
708 /* build sorted consumer array */
709 rss_irn->consumer = build_sorted_array_from_list(rss_irn->consumer_list, phase_obst(&rss->ph));
711 DBG((rss->dbg, LEVEL_1, "\tcompute descendants of %+F:\n", irn));
713 /* collect descendants */
715 collect_descendants(rss, rss_irn, irn, &got_sink, ++cur_desc_walk);
717 /* build sorted descendant array */
718 rss_irn->descendants = build_sorted_array_from_list(rss_irn->descendant_list, phase_obst(&rss->ph));
720 rss_irn->handled = 1;
724 * Checks if v is a potential killer of u.
725 * v is in pkill(u) iff descendants(v) cut consumer(u) is v
727 * @param rss The rss object
728 * @param v The node to check for killer
729 * @param u The potentially killed value
730 * @return 1 if v is in pkill(u), 0 otherwise
732 static int is_potential_killer(rss_t *rss, rss_irn_t *v, rss_irn_t *u) {
737 assert(is_Sink(v->irn) || ((plist_count(v->descendant_list) > 0 && v->descendants) || 1));
738 assert(is_Sink(u->irn) || ((plist_count(u->consumer_list) > 0 && u->consumer) || 1));
740 /* as we loop over the list: loop over the shorter one */
741 if (plist_count(v->descendant_list) > plist_count(u->consumer_list)) {
742 list = u->consumer_list;
743 arr = v->descendants;
746 list = v->descendant_list;
750 /* for each list element: try to find element in array */
751 foreach_plist(list, el) {
752 ir_node *irn = plist_element_get_value(el);
753 ir_node *match = BSEARCH_IRN_ARR(irn, arr);
755 if (match && match != irn)
763 * Update descendants, consumer and pkiller list for the given irn.
765 static void update_node_info(rss_t *rss, ir_node *irn, ir_node *pk_irn) {
766 rss_irn_t *node = get_rss_irn(rss, irn);
767 rss_irn_t *pkiller = get_rss_irn(rss, pk_irn);
769 /* add consumer and rebuild the consumer array */
770 if (! plist_has_value(node->consumer_list, pk_irn)) {
771 plist_insert_back(node->consumer_list, pk_irn);
772 node->consumer = build_sorted_array_from_list(node->consumer_list, phase_obst(&rss->ph));
775 /* add pk_irn as descendant, add it's descendants to irn's and rebuild array */
776 if (! plist_has_value(node->descendant_list, pk_irn)) {
779 plist_insert_back(node->descendant_list, pk_irn);
781 foreach_plist(pkiller->descendant_list, el) {
782 ir_node *desc = plist_element_get_value(el);
784 if (! plist_has_value(node->descendant_list, desc))
785 plist_insert_back(node->descendant_list, desc);
788 node->descendants = build_sorted_array_from_list(node->descendant_list, phase_obst(&rss->ph));
794 * Compute the potential killing set PK.
796 static void compute_pkill_set(rss_t *rss) {
797 plist_element_t *u_el, *v_el;
799 foreach_plist(rss->nodes, u_el) {
800 ir_node *u_irn = plist_element_get_value(u_el);
801 rss_irn_t *u = get_rss_irn(rss, u_irn);
803 DBG((rss->dbg, LEVEL_1, "\tcomputing potential killers of %+F:\n", u_irn));
805 /* check each consumer if it is a potential killer */
806 foreach_plist(u->consumer_list, v_el) {
807 ir_node *v_irn = plist_element_get_value(v_el);
808 rss_irn_t *v = get_rss_irn(rss, v_irn);
810 /* check, if v is a potential killer of u */
811 if (is_potential_killer(rss, v, u)) {
812 if (! plist_has_value(u->pkiller_list, v_irn))
813 plist_insert_back(u->pkiller_list, v_irn);
815 DBG((rss->dbg, LEVEL_2, "\t\tpotential killer %+F\n", v_irn));
822 if (rss->opts->dump_flags & RSS_DUMP_PKG)
823 debug_vcg_dump_pkg(rss, NULL, 0);
827 * Build set of killing edges (from values to their potential killers)
829 static void build_kill_edges(rss_t *rss, pset *epk) {
830 plist_element_t *el, *k_el;
832 foreach_plist(rss->nodes, el) {
833 ir_node *irn = plist_element_get_value(el);
834 rss_irn_t *rirn = get_rss_irn(rss, irn);
836 DBG((rss->dbg, LEVEL_3, "\t\tbuilding kill edges for %+F:\n", irn));
838 foreach_plist(rirn->pkiller_list, k_el) {
839 ir_node *pkiller = plist_element_get_value(k_el);
840 rss_edge_t *ke = obstack_alloc(phase_obst(&rss->ph), sizeof(*ke));
846 DBG((rss->dbg, LEVEL_3, "\t\t\ttarget %+F\n", pkiller));
848 pset_insert(epk, ke, HASH_RSS_EDGE(ke));
853 /* print the given cbc for debugging purpose */
854 static void debug_print_cbc(firm_dbg_module_t *mod, cbc_t *cbc) {
858 DBG((mod, LEVEL_3, "\t\tS = set of parents:\n"));
859 foreach_nodeset(cbc->parents, n) {
860 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
862 DBG((mod, LEVEL_3, "\t\tT = set of children:\n"));
863 foreach_nodeset(cbc->children, n) {
864 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
866 DBG((mod, LEVEL_3, "\t\tE = Edges from producers to consumers\n"));
867 foreach_pset(cbc->kill_edges, ke) {
868 DBG((mod, LEVEL_3, "\t\t\t%+F -> %+F\n", ke->src, ke->tgt));
873 * Construct the bipartite decomposition.
874 * Sid-Ahmed-Ali Touati, Phd Thesis
875 * Register Pressure in Instruction Level Parallelism, p. 71
877 static void compute_bipartite_decomposition(rss_t *rss) {
878 pset *epk = new_pset(cmp_rss_edges, 10);
883 DBG((rss->dbg, LEVEL_1, "\tcomputing bipartite decomposition:\n"));
885 build_kill_edges(rss, epk);
887 foreach_plist(rss->nodes, el) {
888 ir_node *u_irn = plist_element_get_value(el);
889 rss_irn_t *u = get_rss_irn(rss, u_irn);
895 plist_element_t *el2;
897 rss_edge_t *kedge_root = NULL;
898 ir_node *t_irn, *s_irn;
900 if (u->visited || u_irn == _sink)
903 DBG((rss->dbg, LEVEL_2, "\t\t%+F choosen:\n", u_irn));
905 cbc = obstack_alloc(phase_obst(&rss->ph), sizeof(*cbc));
908 /* initialize S_cb */
909 cbc->parents = new_nodeset(5);
910 nodeset_insert(cbc->parents, u_irn);
911 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (init)\n", u_irn));
914 cbc->kill_edges = new_pset(cmp_rss_edges, 5);
916 /* each parent gets killed by at least one value from children */
918 /* T_cb = PK_successors(u) */
919 cbc->children = new_nodeset(5);
920 foreach_plist(u->pkiller_list, el2) {
921 nodeset_insert(cbc->children, plist_element_get_value(el2));
922 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (init)\n", plist_element_get_value(el2)));
926 Now: insert the parents of all children into the parent set
927 and insert the children of all parents into the children set
928 until the sets don't change any more
930 while (p_change || c_change) {
931 p_change = c_change = 0;
933 /* accumulate parents */
934 foreach_nodeset(cbc->children, t_irn) {
935 foreach_pset(epk, k_edge) {
936 ir_node *val = k_edge->src;
938 if (k_edge->tgt == t_irn && ! nodeset_find(cbc->parents, val)) {
939 nodeset_insert(cbc->parents, val);
941 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (killed by %+F)\n", val, t_irn));
946 /* accumulate children */
947 foreach_nodeset(cbc->parents, s_irn) {
948 foreach_pset(epk, k_edge) {
949 ir_node *val = k_edge->tgt;
951 if (k_edge->src == s_irn && ! nodeset_find(cbc->children, val)) {
952 nodeset_insert(cbc->children, val);
954 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (kills %+F)\n", val, s_irn));
960 /* mark all parent values as visited */
961 foreach_nodeset(cbc->parents, s_irn) {
962 rss_irn_t *s = get_rss_irn(rss, s_irn);
964 /* assure bipartite property */
966 if (nodeset_find(cbc->children, s_irn)) {
967 nodeset_remove(cbc->children, s_irn);
968 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F removed from to children\n", s_irn));
974 foreach_pset(epk, k_edge) {
975 if (nodeset_find(cbc->parents, k_edge->src) && nodeset_find(cbc->children, k_edge->tgt)) {
976 pset_insert(cbc->kill_edges, k_edge, HASH_RSS_EDGE(k_edge));
978 Link all k_edges which are about to be removed together.
979 Beware: pset_remove kills the iterator.
981 k_edge->next = kedge_root;
986 /* remove all linked k_edges */
987 for (; kedge_root; kedge_root = kedge_root->next) {
988 pset_remove(epk, kedge_root, HASH_RSS_EDGE(kedge_root));
992 foreach_nodeset(cbc->parents, s_irn) {
995 foreach_pset(cbc->kill_edges, k_edge) {
996 if (k_edge->src == s_irn) {
998 pset_break(cbc->kill_edges);
1005 ir_fprintf(stderr, "Warning: parent %+F is not killed in current cbc\n", s_irn);
1008 assert(vrfy_ok && "Verification of CBC failed");
1010 /* add the connected bipartite component */
1011 if (nodeset_count(cbc->parents) > 0 && nodeset_count(cbc->children) > 0 && pset_count(cbc->kill_edges) > 0)
1012 pset_insert(rss->cbc_set, cbc, (unsigned)cbc->nr);
1013 DBG((rss->dbg, LEVEL_2, "\tbipartite component %d inserted:\n", cbc->nr));
1015 if (firm_dbg_get_mask(rss->dbg) & LEVEL_2)
1016 debug_print_cbc(rss->dbg, cbc);
1020 if (rss->opts->dump_flags & RSS_DUMP_CBC)
1021 debug_vcg_dump_bipartite(rss);
1027 * Select the child with the maximum cost.
1029 static child_t *select_child_max_cost(rss_t *rss, nodeset *x, nodeset *y, child_t *t, cbc_t *cbc) {
1031 float max_cost = -1.0f;
1033 DBG((rss->dbg, LEVEL_2, "\t\tcomputing children costs:\n"));
1035 foreach_nodeset(cbc->children, child) {
1036 rss_irn_t *r_child = get_rss_irn(rss, child);
1037 int num_unkilled_parents = 0;
1038 int num_descendants;
1042 /* get the number of unkilled parents */
1043 foreach_pset(cbc->kill_edges, k_edge) {
1044 if (k_edge->tgt == child && nodeset_find(x, k_edge->src))
1045 ++num_unkilled_parents;
1048 cost = (float)num_unkilled_parents;
1050 num_descendants = plist_count(r_child->descendant_list) + nodeset_count(y);
1052 if (num_descendants > 0)
1053 cost /= (float)num_descendants;
1055 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F, #desc %d, cost %.3f\n", child, num_descendants, cost));
1057 if (cost > max_cost) {
1068 * Remove all parents from x which are killed by t_irn.
1070 static void remove_covered_parents(rss_t *rss, nodeset *x, ir_node *t_irn, cbc_t *cbc) {
1071 rss_irn_t *t = get_rss_irn(rss, t_irn);
1074 DBG((rss->dbg, LEVEL_2, "\t\tremoving parents covered by %+F:\n", t_irn));
1076 foreach_pset(cbc->kill_edges, k_edge) {
1077 if (k_edge->tgt == t_irn && nodeset_find(x, k_edge->src)) {
1078 nodeset_remove(x, k_edge->src);
1079 plist_insert_back(t->parent_list, k_edge->src);
1080 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", k_edge->src));
1085 static void update_cumulated_descendent_values(rss_t *rss, nodeset *y, ir_node *t_irn) {
1086 rss_irn_t *t = get_rss_irn(rss, t_irn);
1087 plist_element_t *el;
1089 DBG((rss->dbg, LEVEL_2, "\t\tupdating cumulated descendant value of %+F:\n", t_irn));
1091 foreach_plist(t->descendant_list, el) {
1092 nodeset_insert(y, plist_element_get_value(el));
1093 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", plist_element_get_value(el)));
1098 * Greedy-k: a heuristics for the MMA problem
1100 static void compute_killing_function(rss_t *rss) {
1102 struct obstack obst;
1104 obstack_init(&obst);
1106 rss->cbc_set = pset_new_ptr(5);
1107 compute_bipartite_decomposition(rss);
1109 /* for all bipartite components do: */
1110 foreach_pset(rss->cbc_set, cbc) {
1112 nodeset *x = new_nodeset(10);
1113 nodeset *y = new_nodeset(10);
1114 child_t **sks = NEW_ARR_F(child_t *, 20);
1119 DBG((rss->dbg, LEVEL_1, "\tcomputing SKS for cbc %d:\n", cbc->nr));
1120 DBG((rss->dbg, LEVEL_2, "\t\tinitializing parents X:\n"));
1122 /* X = S_cb (all parents are initially uncovered) */
1123 foreach_nodeset(cbc->parents, p) {
1124 nodeset_insert(x, p);
1125 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", p));
1128 /* while X not empty */
1129 while (nodeset_count(x) > 0) {
1130 child_t *t = obstack_alloc(&obst, sizeof(*t));
1131 memset(t, 0, sizeof(*t));
1133 t = select_child_max_cost(rss, x, y, t, cbc);
1135 if (cur_len >= cur_size) {
1136 ARR_EXTO(child_t *, sks, cur_size * 2);
1140 DBG((rss->dbg, LEVEL_2, "\t\tinsert child %+F (%.3f) into SKS at pos %d\n", t->irn, t->cost, cur_len));
1143 remove_covered_parents(rss, x, t->irn, cbc);
1144 update_cumulated_descendent_values(rss, y, t->irn);
1147 ARR_SHRINKLEN(sks, cur_len);
1149 /* sort SKS in increasing cost order */
1150 qsort(sks, cur_len, sizeof(sks[0]), cmp_child_costs);
1152 DBG((rss->dbg, LEVEL_2, "\tprocessing SKS for cbc %d:\n", cbc->nr));
1154 /* build killing function */
1155 for (i = cur_len - 1; i >= 0; --i) { /* loop over sks in decreasing cost order */
1156 child_t *t = sks[i];
1157 rss_irn_t *rt = get_rss_irn(rss, t->irn);
1158 plist_element_t *p_el;
1160 DBG((rss->dbg, LEVEL_3, "\t\t\tkiller %+F (%.3f):\n", t->irn, t->cost));
1162 /* kill all unkilled parents of t */
1163 foreach_plist(rt->parent_list, p_el) {
1164 ir_node *par = plist_element_get_value(p_el);
1165 rss_irn_t *rpar = get_rss_irn(rss, par);
1167 if (is_Sink(rpar->killer)) {
1168 rpar->killer = t->irn;
1169 DBG((rss->dbg, LEVEL_2, "\t\tkill %+F\n", rpar->irn));
1172 DBG((rss->dbg, LEVEL_3, "\t\t\tkeeping %+F as killer for %+F\n", rpar->killer, rpar->irn));
1182 if (rss->opts->dump_flags & RSS_DUMP_KILL)
1183 debug_vcg_dump_kill(rss);
1185 del_pset(rss->cbc_set);
1186 obstack_free(&obst, NULL);
1190 * Adds the edge src -> tgt to the dvg. Checks if reverse edge is already there (asserts).
1192 static INLINE void add_dvg_edge(rss_t *rss, dvg_t *dvg, ir_node *src, ir_node *tgt, int have_source) {
1193 rss_edge_t *dvg_edge;
1197 nodeset_insert(dvg->nodes, src);
1199 assert(nodeset_find(dvg->nodes, src) != NULL && "Wrong assumption");
1201 nodeset_insert(dvg->nodes, tgt);
1205 assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1209 if (NULL != pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1210 /* add the edge to the DVG */
1211 dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1213 dvg_edge->src = src;
1214 dvg_edge->tgt = tgt;
1215 dvg_edge->next = NULL;
1217 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", src, tgt));
1218 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1223 * Computes the disjoint value DAG (DVG).
1224 * BEWARE: It is not made explicitly clear in the Touati paper,
1225 * but the DVG is meant to be build from the KILLING DAG
1227 static void compute_dvg(rss_t *rss, dvg_t *dvg) {
1228 plist_element_t *el;
1230 DBG((rss->dbg, LEVEL_1, "\tcomputing DVG:\n"));
1232 foreach_plist(rss->nodes, el) {
1233 ir_node *u_irn = plist_element_get_value(el);
1234 rss_irn_t *u = get_rss_irn(rss, u_irn);
1235 rss_irn_t *u_killer = get_rss_irn(rss, u->killer);
1238 /* TODO: omit nodes only having sink as pkiller and killing no one */
1240 /* add an edge to killer */
1241 add_dvg_edge(rss, dvg, u_irn, u->killer, 0);
1243 if (is_Sink(u->killer))
1246 /* We add an edge to every killer, from where we could be reached. */
1247 for (i = ARR_LEN_SAFE(u_killer->descendants) - 1; i >= 0; --i) {
1248 add_dvg_edge(rss, dvg, u_irn, u_killer->descendants[i], 1);
1253 foreach_plist(rss->nodes, el2) {
1254 ir_node *v_irn = plist_element_get_value(el2);
1257 There is an edge (u, v) in the DVG iff v is a descendant of the killer(u).
1259 if (BSEARCH_IRN_ARR(v_irn, u_kill->descendants)) {
1260 rss_edge_t *dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1263 /* insert the user into the DVG and append it to the user list of u */
1264 nodeset_insert(dvg->nodes, v_irn);
1265 if (! plist_has_value(u->dvg_user_list, v_irn))
1266 plist_insert_back(u->dvg_user_list, v_irn);
1268 dvg_edge->src = u_irn;
1269 dvg_edge->tgt = v_irn;
1270 dvg_edge->next = NULL;
1275 assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1277 /* add the edge to the DVG */
1278 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", u_irn, v_irn));
1279 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1285 if (rss->opts->dump_flags & RSS_DUMP_DVG)
1286 debug_vcg_dump_dvg(rss, dvg);
1290 * Updates the dvg structure when a serialization edge from src -> tgt is added.
1292 static void update_dvg(rss_t *rss, dvg_t *dvg, rss_irn_t *src, rss_irn_t *tgt) {
1295 rss_edge_t **arr = alloca(pset_count(dvg->edges) * sizeof(arr[0]));
1298 Add an edge from serialization target to serialization src:
1299 src cannot be alive with target
1301 add_dvg_edge(rss, dvg, tgt->irn, src->irn, 0);
1303 /* Add edges to src's descendants as well, they also getting serialized. */
1304 for (i = ARR_LEN_SAFE(src->descendants) - 1; i >= 0; --i) {
1305 add_dvg_edge(rss, dvg, tgt->irn, src->descendants[i], 1);
1308 /* We also have to add edges from targets predecessors in dvg */
1310 /* We cannot insert elements into set over which we iterate ... */
1311 foreach_pset(dvg->edges, edge) {
1312 if (edge->tgt == tgt->irn) {
1317 for (i = 0; i < idx; ++i) {
1319 add_dvg_edge(rss, dvg, edge->src, src->irn, 1);
1320 for (j = ARR_LEN_SAFE(src->descendants) - 1; j >= 0; --j) {
1321 add_dvg_edge(rss, dvg, edge->src, src->descendants[j], 1);
1328 * Accumulate all descendants for root into list.
1330 static void accumulate_dvg_descendant_values(rss_t *rss, rss_irn_t *root, plist_t *list) {
1331 if (plist_count(root->dvg_user_list) > 0) {
1332 plist_element_t *el;
1334 foreach_plist(root->dvg_user_list, el) {
1335 ir_node *v_irn = plist_element_get_value(el);
1336 rss_irn_t *v = get_rss_irn(rss, v_irn);
1338 /* add v as descendant */
1339 if (! plist_has_value(list, v_irn)) {
1340 plist_insert_back(list, v_irn);
1341 DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG descendant %+F\n", v_irn));
1344 /* accumulate v's descendants */
1345 accumulate_dvg_descendant_values(rss, v, list);
1351 * Builds the list of potential killers for each node
1353 * Needs the descendant list for all user as sorted array.
1355 static void build_dvg_pkiller_list(rss_t *rss, dvg_t *dvg) {
1358 foreach_nodeset(dvg->nodes, irn) {
1359 rss_irn_t *node = get_rss_irn(rss, irn);
1360 plist_element_t *el, *el2;
1362 DBG((rss->dbg, DEBUG_DVG, "\t\tbuilding pkiller list for %+F\n", irn));
1364 /* check each user */
1365 foreach_plist(node->dvg_user_list, el) {
1366 ir_node *u_irn = plist_element_get_value(el);
1368 /* is the current user u_irn not a descendant of any other user -> pkiller */
1369 foreach_plist(node->dvg_user_list, el2) {
1370 ir_node *v_irn = plist_element_get_value(el2);
1371 rss_irn_t *v = get_rss_irn(rss, v_irn);
1374 ! BSEARCH_IRN_ARR(u_irn, v->dvg_desc) &&
1375 ! plist_has_value(node->dvg_pkiller_list, u_irn))
1377 plist_insert_back(node->dvg_pkiller_list, u_irn);
1378 DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG pkiller %+F\n", u_irn));
1383 node->dvg_pkiller = build_sorted_array_from_list(node->dvg_pkiller_list, phase_obst(&rss->ph));
1387 if (firm_dbg_get_mask(rss->dbg) & DEBUG_DVG)
1388 debug_vcg_dump_dvg_pkiller(rss, dvg);
1395 * Compute the maximal antichain of the current DVG.
1396 * This is a reimplementation of the MAXIMAL_ANTI_CHAIN function
1397 * from the DDG library 1.1 (DAG.cpp).
1399 static nodeset *compute_maximal_antichain(rss_t *rss, dvg_t *dvg, int iteration) {
1400 int n = nodeset_count(dvg->nodes);
1401 int *assignment = alloca(n * sizeof(assignment[0]));
1402 int *assignment_rev = alloca(n * sizeof(assignment_rev[0]));
1403 int *idx_map = alloca(n * sizeof(idx_map[0]));
1404 hungarian_problem_t *bp;
1405 nodeset *values, *temp;
1407 int i, j, cost, cur_chain, res;
1408 rss_edge_t *dvg_edge;
1410 #define MAP_IDX(irn) bsearch_for_index(get_irn_idx(irn), idx_map, n, 1)
1412 if (pset_count(dvg->edges) == 0)
1415 bp = hungarian_new(n, n, 1, HUNGARIAN_MATCH_NORMAL);
1418 At first, we build an index map for the nodes in the DVG,
1419 because we cannot use the irn idx therefore as the resulting
1420 bipartite data structure would have around 1.2GB.
1421 So we limit the size to the number of nodes we have in the DVG
1422 and build a sorted index map for their irn indices.
1425 foreach_nodeset(dvg->nodes, u_irn) {
1426 idx_map[i++] = get_irn_idx(u_irn);
1428 qsort(idx_map, n, sizeof(idx_map[0]), cmp_int);
1430 foreach_pset(dvg->edges, dvg_edge) {
1431 int idx_u = MAP_IDX(dvg_edge->src);
1432 int idx_v = MAP_IDX(dvg_edge->tgt);
1434 /* add the entry to the bipartite data structure */
1435 hungarian_add(bp, idx_u, idx_v, 1);
1436 DBG((rss->dbg, LEVEL_3, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1437 idx_u, dvg_edge->src, idx_v, dvg_edge->tgt));
1441 Add a bipartite entry for each pair of nodes (u, v), where exists a
1442 path in the DVG from u to v, ie. connect all descendants(v) to v.
1443 desc_dvg(v) = accumulated descendants of all z in dvg_user_list(v)
1445 foreach_nodeset(dvg->nodes, u_irn) {
1446 rss_irn_t *u = get_rss_irn(rss, u_irn);
1447 int idx_u_irn = MAP_IDX(u_irn);
1449 DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG descendants of %+F:\n", u_irn));
1451 //plist_clear(u->dvg_desc_list);
1452 //accumulate_dvg_descendant_values(rss, u, u->dvg_desc_list);
1455 FIXME: The array is build on the phase obstack and we cannot free the data.
1456 So the array get as many times allocated as this function is called.
1459 /* build the sorted array for faster searches */
1460 //u->dvg_desc = build_sorted_array_from_list(u->dvg_desc_list, phase_obst(&rss->ph));
1462 DBG((rss->dbg, DEBUG_MAX_AC, "\t\tadding bipartite entries of %+F:\n", u_irn));
1464 /* add the bipartite entries for all descendants */
1465 for (i = ARR_LEN_SAFE(u->dvg_desc) - 1; i >= 0; --i) {
1466 rss_irn_t *desc = get_rss_irn(rss, u->dvg_desc[i]);
1467 int idx_desc = MAP_IDX(u->dvg_desc[i]);
1469 /* add the entry to the bipartite data structure */
1470 hungarian_add(bp, idx_u_irn, idx_desc, 1);
1471 DBG((rss->dbg, DEBUG_MAX_AC, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1472 idx_u_irn, u_irn, idx_desc, u->dvg_desc[i]));
1479 /* We want maximum cardinality matching */
1480 hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL);
1483 DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG pkiller:\n"));
1484 /* beware: the following function needs the dvg_desc array */
1485 build_dvg_pkiller_list(rss, dvg);
1488 DBG((rss->dbg, LEVEL_2, "\t\tcomputing bipartite matching\n"));
1490 The maximum cardinality bipartite matching gives us the minimal
1491 chain partition, which corresponds to the maximum anti chains.
1493 res = hungarian_solve(bp, assignment, &cost, 1);
1494 assert(res == 0 && "Bipartite matching failed!");
1497 memset(assignment_rev, -1, n * sizeof(assignment_rev[0]));
1499 /* build the reverse assignment, ie. foreach i -> j, add j -> i */
1500 for (i = 0; i < n; ++i) {
1501 if (assignment[i] >= 0) {
1502 int j = assignment[i];
1503 assignment_rev[j] = i;
1507 DBG((rss->dbg, LEVEL_2, "\t\tgot assignment with cost %d\n", cost));
1508 DBG((rss->dbg, LEVEL_3, "\t\t\tassignment --- reverse assignment\n", cost));
1509 for (i = 0; i < n; ++i) {
1510 DBG((rss->dbg, LEVEL_3, "\t\t\t%3d -> %3d %3d -> %3d\n", i, assignment[i], i, assignment_rev[i]));
1513 values = new_nodeset(10);
1515 /* Construction of the minimal chain partition */
1516 for (j = 0; j < n; ++j) {
1517 /* check nodes, which did not occur as target */
1518 if (assignment_rev[j] == -1) {
1519 int xj = idx_map[j];
1520 ir_node *xj_irn = get_idx_irn(rss->irg, xj);
1521 rss_irn_t *xj_rss = get_rss_irn(rss, xj_irn);
1522 chain_t *c = obstack_alloc(phase_obst(&rss->ph), sizeof(*c));
1525 /* there was no source for j -> we have a source of a new chain */
1526 nodeset_insert(values, xj_irn);
1528 c->elements = plist_obstack_new(phase_obst(&rss->ph));
1529 c->nr = cur_chain++;
1530 plist_insert_back(c->elements, xj_irn);
1534 DBG((rss->dbg, LEVEL_2, "\t\tstarting chain %d:\n", c->nr));
1535 DBG((rss->dbg, LEVEL_2, "\t\t\t%+F (%d)", xj_irn, j));
1537 /* follow chain, having j as source */
1539 while (assignment[source] >= 0) {
1540 int target = assignment[source];
1541 int irn_idx = idx_map[target];
1542 ir_node *irn = get_idx_irn(rss->irg, irn_idx);
1543 rss_irn_t *node = get_rss_irn(rss, irn);
1545 plist_insert_back(c->elements, irn);
1548 DB((rss->dbg, LEVEL_2, " -> %+F (%d)", irn, target));
1550 /* new source = last target */
1554 DB((rss->dbg, LEVEL_2, "\n"));
1559 Computing the maximal antichain: Select an element from each
1560 chain such, such it is parallel with the others.
1562 DBG((rss->dbg, LEVEL_1, "\t\tcomputing set of saturation values (MAX AC)\n"));
1563 DBG((rss->dbg, LEVEL_3, "\t\tstarting with:\n"));
1566 if (firm_dbg_get_mask(rss->dbg) & LEVEL_3)
1567 dump_nodeset(values, "\t\t\t");
1573 We need an explicit array for the values as
1574 we cannot iterate multiple times over the same
1575 set at the same time. :-(((((
1577 int n = nodeset_count(values);
1579 ir_node **val_arr = NEW_ARR_F(ir_node *, n);
1581 foreach_nodeset(values, u_irn)
1582 val_arr[i++] = u_irn;
1587 temp = new_nodeset(10);
1589 /* Select all nodes from current value set, having another node in the set as descendant. */
1590 for (i = 0; i < n; ++i) {
1591 rss_irn_t *u = get_rss_irn(rss, val_arr[i]);
1593 for (j = 0; j < n; ++j) {
1597 key.src = val_arr[i];
1598 key.tgt = val_arr[j];
1600 if (pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1601 /* v[j] is descendant of u -> remove u and break */
1602 nodeset_insert(temp, u->irn);
1603 nodeset_remove(values, u->irn);
1605 DBG((rss->dbg, LEVEL_3, "\t\t\tremoving %+F from values, adding it to temp\n", u->irn));
1613 /* Try to insert the chain predecessor of all selected u's */
1614 foreach_nodeset(temp, u_irn) {
1615 rss_irn_t *u = get_rss_irn(rss, u_irn);
1616 chain_t *c = u->chain;
1617 plist_element_t *el = plist_find_value(c->elements, u_irn);
1619 assert(el && "Missing element in chain!");
1621 /* If u has predecessor in chain: insert the predecessor */
1622 if (el == plist_element_get_prev(el)) {
1623 nodeset_insert(values, plist_element_get_value(el));
1624 DBG((rss->dbg, LEVEL_3, "\t\t\tadding %+F to values\n", plist_element_get_value(el)));
1630 } while (nodeset_count(temp) > 0);
1632 DBG((rss->dbg, LEVEL_2, "\t\tfinal set:\n"));
1634 if (firm_dbg_get_mask(rss->dbg) & LEVEL_2) {
1635 dump_nodeset(values, "\t\t\t");
1639 if (rss->opts->dump_flags & RSS_DUMP_MAXAC)
1640 debug_vcg_dump_pkg(rss, values, iteration);
1650 * Computes the best serialization between two nodes of sat_vals.
1652 static serialization_t *compute_best_admissible_serialization(rss_t *rss, nodeset *sat_vals, serialization_t *ser, int num_regs) {
1653 int n = nodeset_count(sat_vals);
1654 int n_idx = ARR_LEN_SAFE(rss->idx_map);
1656 ir_node **val_arr = alloca(n * sizeof(val_arr[0]));
1657 bitset_t *bs_sv = bitset_alloca(n_idx);
1658 bitset_t *bs_vdesc = bitset_alloca(n_idx);
1659 bitset_t *bs_tmp = bitset_alloca(n_idx);
1660 bitset_t *bs_ukilldesc = bitset_alloca(n_idx);
1661 int best_benefit = INT_MAX;
1662 int best_omega2 = INT_MAX;
1663 int best_benefit_omega20 = INT_MAX;
1667 rss_edge_t min_benefit_edge;
1668 rss_edge_t min_omega20_edge;
1669 rss_irn_t *ser_u_omega1, *ser_v_omega1;
1670 rss_irn_t *ser_u_omega20, *ser_v_omega20;
1672 DBG((rss->dbg, LEVEL_1, "\tcomputing admissible serializations:\n"));
1675 We need an explicit array for the values as
1676 we cannot iterate multiple times over the same
1677 set at the same time. :-(((((
1680 foreach_nodeset(sat_vals, irn) {
1682 bitset_set(bs_sv, BLOCK_IDX_MAP(rss, irn));
1686 We build all admissible serializations and remember the best found so far.
1689 if v in pkiller(u): add edge from v to all other pkiller(u)
1690 else: for all vv in pkiller(u): add edge from v to vv if there exists no path from vv to v
1694 A node is unserializable if:
1695 - it has only one killer and this one is Sink
1696 - it kills no other values
1697 In this case there is no serialization which could
1698 reduce the registerpressure
1700 #define IS_UNSERIALIZABLE_NODE(rss_node) \
1703 (plist_count(rss_node->pkiller_list) == 1) && \
1704 is_Sink(rss_node->killer) && \
1705 (rss_node->kill_count == 0) \
1707 be_is_Barrier(rss_node->irn) || \
1708 be_is_Keep(rss_node->irn) \
1711 /* for all u in sat_vals */
1712 for (i = 0; i < n; ++i) {
1713 rss_irn_t *u = get_rss_irn(rss, val_arr[i]);
1714 plist_element_t *el;
1716 /* ignore nodes where serialization does not help */
1717 if (IS_UNSERIALIZABLE_NODE(u)) {
1718 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", u->irn));
1722 /* accumulate all descendants of all pkiller(u) */
1723 bitset_clear_all(bs_ukilldesc);
1724 foreach_plist(u->pkiller_list, el) {
1725 ir_node *irn = plist_element_get_value(el);
1726 rss_irn_t *node = get_rss_irn(rss, irn);
1729 bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, irn));
1733 for (k = ARR_LEN_SAFE(node->descendants) - 1; k >= 0; --k) {
1734 if (! is_Sink(node->descendants[k]))
1735 bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, node->descendants[k]));
1739 /* for all v in sat_vals */
1740 for (j = 0; j < n; ++j) {
1741 ir_node *v_irn = val_arr[j];
1742 rss_irn_t *v = get_rss_irn(rss, v_irn);
1743 unsigned v_height = get_irn_height(rss->h, v_irn);
1744 int omega1, omega2, is_pkiller;
1746 /* v cannot be serialized with itself
1747 * ignore nodes where serialization does not help */
1748 if (i == j || IS_UNSERIALIZABLE_NODE(v)) {
1750 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", v->irn));
1754 /* get descendants of v */
1755 bitset_clear_all(bs_vdesc);
1756 bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v_irn));
1757 for (k = ARR_LEN_SAFE(v->descendants) - 1; k >= 0; --k) {
1758 if (! is_Sink(v->descendants[k]))
1759 bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v->descendants[k]));
1762 /* if v is in pkiller(u) */
1763 is_pkiller = plist_has_value(u->pkiller_list, v_irn);
1765 /* for all vv in pkiller(u) */
1766 foreach_plist(u->pkiller_list, el) {
1767 ir_node *vv_irn = plist_element_get_value(el);
1770 if (is_Sink(vv_irn) || is_cfop(vv_irn))
1774 add_edge = vv_irn != v_irn && skip_Proj(vv_irn) != skip_Proj(v_irn);
1776 add_edge = ! heights_reachable_in_block(rss->h, skip_Proj(vv_irn), skip_Proj(v_irn));
1779 As we add an edge from vv -> v, we have to make sure,
1780 that there exists no path from v to vv.
1784 int vv_height = get_irn_height(rss->h, vv_irn);
1785 int mu1, mu2, critical_path_cost;
1788 mu1 = | descendants(v) cut sat_vals |
1789 the number of saturating values which cannot
1790 be simultaneously alive with u
1792 bitset_copy(bs_tmp, bs_vdesc);
1793 mu1 = bitset_popcnt(bitset_and(bs_tmp, bs_sv));
1796 mu2 = | accum_desc_all_pkiller(u) without descendants(v) |
1799 bitset_copy(bs_tmp, bs_ukilldesc);
1800 mu2 = bitset_popcnt(bitset_andnot(bs_tmp, bs_vdesc));
1806 /* omega1 = mu1 - mu2 */
1812 /* omega2 = increase of critical path */
1813 critical_path_cost =
1814 v_height /* longest path from v to sink */
1815 + rss->max_height - vv_height /* longest path from source to vv */
1819 If critical_path_cost > max_height -> the new edge
1820 would increase the longest critical path by the difference.
1822 omega2 = critical_path_cost > rss->max_height ? critical_path_cost - rss->max_height : 0;
1824 /* this keeps track of the edge with the best benefit */
1825 if (omega1 >= num_regs - n && omega1 < best_benefit) {
1826 min_benefit_edge.src = v_irn;
1827 min_benefit_edge.tgt = vv_irn;
1832 best_benefit = omega1;
1833 ser->new_killer = is_pkiller;
1836 /* this keeps track of the edge with the best omega1 costs where omega2 == 0 */
1837 if (omega2 == 0 && omega1 >= num_regs - n && omega1 < best_benefit_omega20) {
1838 min_omega20_edge.src = v_irn;
1839 min_omega20_edge.tgt = vv_irn;
1844 best_benefit_omega20 = omega1;
1845 ser->new_killer = is_pkiller;
1848 best_omega2 = MIN(best_omega2, omega2);
1850 DBG((rss->dbg, LEVEL_2, "\t\tfound %+F -> %+F (w1 %d, w2 %d)\n",
1851 v_irn, vv_irn, omega1, omega2));
1853 } /* for all vv in pkiller(u) */
1854 } /* for all v in sat_vals */
1855 } /* for all u in sat_vals */
1860 if (best_omega2 == 0) {
1861 ser->u = ser_u_omega20;
1862 ser->v = ser_v_omega20;
1863 ser->edge->src = min_omega20_edge.src;
1864 ser->edge->tgt = min_omega20_edge.tgt;
1865 ser->omega1 = best_benefit_omega20;
1866 ser->omega2 = best_omega2;
1869 ser->u = ser_u_omega1;
1870 ser->v = ser_v_omega1;
1871 ser->edge->src = min_benefit_edge.src;
1872 ser->edge->tgt = min_benefit_edge.tgt;
1873 ser->omega1 = best_benefit;
1874 ser->omega2 = best_omega2;
1879 #undef IS_UNSERIALIZABLE_NODE
1883 * Perform the value serialization heuristic and add all
1884 * computed serialization edges as dependencies to the irg.
1886 static void perform_value_serialization_heuristic(rss_t *rss) {
1887 bitset_t *arch_nonign_bs = bitset_alloca(arch_register_class_n_regs(rss->cls));
1888 bitset_t *abi_ign_bs = bitset_alloca(arch_register_class_n_regs(rss->cls));
1889 int available_regs, iteration, num_live;
1892 pset *ser_set = new_pset(cmp_rss_edges, 20);
1894 /* available_regs = R = |arch_non_ignore_regs cut ~abi_ignore_regs| */
1895 arch_put_non_ignore_regs(rss->arch_env, rss->cls, arch_nonign_bs);
1896 be_abi_put_ignore_regs(rss->abi, rss->cls, abi_ign_bs);
1897 bitset_andnot(arch_nonign_bs, abi_ign_bs);
1898 available_regs = bitset_popcnt(arch_nonign_bs);
1899 //um_live = pset_count(rss->live_block);
1900 //available_regs -= num_live < available_regs ? num_live : 0;
1902 DBG((rss->dbg, LEVEL_1, "\n\t#available regs: %d\n\n", available_regs));
1905 At first we need to compute the disjoint value DAG (DVG = {V, E_dv}).
1906 V = set of all nodes we are currently interested in
1907 E_dv = there is an edge from u to v iff v is a descendant of killer(u), forall u, v in V
1909 dvg.nodes = new_nodeset(plist_count(rss->nodes));
1910 dvg.edges = new_pset(cmp_rss_edges, plist_count(rss->nodes) * 5);
1911 compute_dvg(rss, &dvg);
1914 Then we perform the heuristic serialization algorithm
1915 on the DVG which gives us all necessary serialization
1918 DBG((rss->dbg, LEVEL_1, "\tcomputing maximal antichain:\n"));
1920 sat_vals = compute_maximal_antichain(rss, &dvg, iteration++);
1921 while (sat_vals && (nodeset_count(sat_vals) > available_regs)) {
1922 serialization_t *ser, best_ser;
1923 rss_edge_t *edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*edge));
1924 ir_node *dep_src, *dep_tgt;
1926 best_ser.edge = edge;
1927 ser = compute_best_admissible_serialization(rss, sat_vals, &best_ser, available_regs);
1929 DBG((rss->dbg, LEVEL_1, "\tcurrent register saturation %d, target %d\n", nodeset_count(sat_vals), available_regs));
1932 DBG((rss->dbg, LEVEL_1, "\tno RS improving serialization found, breaking at iteration %d\n", iteration));
1936 /* Insert the serialization as dependency edge into the irg. */
1937 DBG((rss->dbg, LEVEL_1, "\tserializing %+F -> %+F with edge %+F -> %+F and cost %d, %d\n",
1938 ser->u->irn, ser->v->irn, ser->edge->src, ser->edge->tgt, ser->omega1, ser->omega2));
1940 if (pset_find(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge)))
1941 ir_printf("WARNING: serialization %+F -> %+F computed twice!\n", ser->edge->src, ser->edge->tgt);
1944 pset_insert(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge));
1946 /* update the dvg */
1947 update_dvg(rss, &dvg, ser->v, ser->u);
1948 update_dvg(rss, &dvg, get_rss_irn(rss, ser->edge->src), get_rss_irn(rss, ser->edge->tgt));
1949 del_nodeset(sat_vals);
1951 dep_src = skip_Proj(ser->edge->src);
1952 dep_tgt = ser->edge->tgt;
1953 add_irn_dep(dep_src, dep_tgt);
1955 /* Update descendants, consumer and pkillers of the target */
1956 update_node_info(rss, ser->edge->tgt, ser->edge->src);
1958 /* TODO: try to find a cheaper way for updating height information */
1959 rss->max_height = heights_recompute_block(rss->h, rss->block);
1961 /* Recompute the antichain for next serialization */
1962 DBG((rss->dbg, LEVEL_1, "\tre-computing maximal antichain:\n"));
1963 sat_vals = compute_maximal_antichain(rss, &dvg, iteration++);
1966 del_nodeset(dvg.nodes);
1967 del_pset(dvg.edges);
1971 * Do initial calculations for a block.
1973 static void process_block(ir_node *block, void *env) {
1976 const ir_edge_t *edge;
1978 phase_init(&rss->ph, "rss block preprocessor", rss->irg, PHASE_DEFAULT_GROWTH, init_rss_irn);
1980 DBG((rss->dbg, LEVEL_1, "preprocessing block %+F\n", block));
1983 /* build an index map for all nodes in the current block */
1985 n = get_irn_n_edges(block);
1986 NEW_ARR_A(int *, rss->idx_map, n);
1987 foreach_out_edge(block, edge) {
1988 ir_node *irn = get_edge_src_irn(edge);
1989 rss->idx_map[i++] = get_irn_idx(irn);
1991 qsort(rss->idx_map, n, sizeof(rss->idx_map[0]), cmp_int);
1992 rss->max_height = heights_recompute_block(rss->h, block);
1994 /* loop over all register classes */
1995 for (i = arch_isa_get_n_reg_class(rss->arch_env->isa) - 1; i >= 0; --i) {
1996 const arch_register_class_t *cls = arch_isa_get_reg_class(rss->arch_env->isa, i);
1999 DBG((rss->dbg, LEVEL_1, "register class %s\n", arch_register_class_name(cls)));
2001 /* Get all live value at end of Block having current register class */
2002 rss->live_block = pset_new_ptr(10);
2003 be_liveness_end_of_block(rss->liveness, rss->arch_env, rss->cls, rss->block, rss->live_block);
2005 /* reset the list of interesting nodes */
2006 plist_clear(rss->nodes);
2007 plist_insert_back(rss->nodes, _sink);
2009 /* collect all nodes having a certain register class */
2010 foreach_out_edge(block, edge) {
2011 ir_node *irn = get_edge_src_irn(edge);
2012 ir_mode *mode = get_irn_mode(irn);
2016 - mode_T nodes (the projs are asked)
2017 - mode_X nodes (control flow nodes are always scheduled last)
2018 - Keeps (they are always immediately scheduled)
2019 - Phi (same as Keep)
2021 if (mode == mode_T || mode == mode_X || is_Phi(irn))
2025 In case of a proj, we skip
2026 - Barrier (they are a Barrier :)
2028 - the Proj itself, as it's scheduled always with it's super node
2031 ir_node *pred = get_Proj_pred(irn);
2032 if (be_is_Barrier(pred) || is_Start(pred))
2036 /* calculate the descendants and consumer for each node in the block */
2037 collect_node_info(rss, skip_Proj(irn));
2039 if (be_is_Keep(irn))
2042 if (! arch_irn_is(rss->arch_env, irn, ignore) && arch_get_irn_reg_class(rss->arch_env, irn, -1) == cls) {
2043 plist_insert_back(rss->nodes, skip_Proj(irn));
2048 /* compute the potential killing set PK(G) */
2049 compute_pkill_set(rss);
2051 /* compute the killing function k* */
2052 compute_killing_function(rss);
2055 Compute the heuristic value serialization and
2056 add the necessary dependencies to the irg.
2058 perform_value_serialization_heuristic(rss);
2060 del_pset(rss->live_block);
2063 phase_free(&rss->ph);
2068 * Register the options.
2070 void rss_register_options(lc_opt_entry_t *grp) {
2071 static int run_once = 0;
2072 lc_opt_entry_t *rss_grp;
2076 rss_grp = lc_opt_get_grp(grp, "rss");
2078 lc_opt_add_table(rss_grp, rss_option_table);
2081 #endif /* WITH_LIBCORE */
2084 * Preprocess the irg for scheduling.
2086 void rss_schedule_preparation(const be_irg_t *birg) {
2089 FIRM_DBG_REGISTER(rss.dbg, "firm.be.sched.rss");
2091 //firm_dbg_set_mask(rss.dbg, LEVEL_1 | LEVEL_2 | LEVEL_3);
2093 init_rss_special_nodes(birg->irg);
2095 rss.irg = birg->irg;
2096 rss.arch_env = birg->main_env->arch_env;
2097 rss.abi = birg->abi;
2098 rss.h = heights_new(birg->irg);
2099 rss.nodes = plist_new();
2100 rss.opts = &rss_options;
2101 rss.liveness = be_liveness(birg->irg);
2102 irg_block_walk_graph(birg->irg, NULL, process_block, &rss);
2103 heights_free(rss.h);
2104 plist_free(rss.nodes);
2105 be_liveness_free(rss.liveness);
2107 if (birg->main_env->options->dump_flags & DUMP_SCHED)
2108 be_dump(rss.irg, "-rss", dump_ir_block_graph);