2 * Implementation of a register saturating list scheduler
3 * as described in: Sid-Ahmed-Ali Touati
4 * Register Saturation in Superscalar and VLIW Codes
6 * @license This file protected by GPL - GNU GENERAL PUBLIC LICENSE.
7 * @author Christian Wuerdig
20 #include "irgraph_t.h"
22 #include "iredges_t.h"
24 #include "irphase_t.h"
30 #include "bipartite.h"
31 #include "hungarian.h"
39 #include "besched_t.h"
42 #include <libcore/lc_opts.h>
43 #include <libcore/lc_opts_enum.h>
44 #endif /* WITH_LIBCORE */
47 #define ARR_LEN_SAFE(arr) ((arr) != NULL ? ARR_LEN((arr)) : 0)
49 #define HASH_RSS_EDGE(edge) ((get_irn_node_nr((edge)->src) << 16) | (get_irn_node_nr((edge)->tgt) & 0xFFFF))
50 #define BSEARCH_IRN_ARR(val, arr) \
51 bsearch(&(val), (arr), ARR_LEN_SAFE((arr)), sizeof((arr)[0]), cmp_irn_idx)
53 #define BLOCK_IDX_MAP(rss, irn) bsearch_for_index(get_irn_idx((irn)), (rss)->idx_map, ARR_LEN_SAFE((rss)->idx_map), 1)
56 typedef struct _rss_opts_t {
60 /* Represents a child with associated costs */
61 typedef struct _child {
66 /* We need edges for several purposes. */
67 typedef struct _rss_edge {
73 /* Represents a connected bipartite component. */
75 nodeset *parents; /**< = S a set of value producers */
76 nodeset *children; /**< = T a set of value consumers */
77 pset *kill_edges; /**< = E a set of edges (t in T, s in S) such as each s in S gets killed by at least one t in T */
78 int nr; /**< a deterministic index for set insertion (used as hash) */
81 /* Represents a disjoint value DAG. */
87 /* Represents a chain of nodes. */
88 typedef struct _chain {
89 plist_t *elements; /**< List of chain elements */
90 int nr; /**< a deterministic index for set insertion (used as hash) */
93 typedef struct _rss_irn {
94 plist_t *consumer_list; /**< List of consumers */
95 ir_node **consumer; /**< Sorted consumer array (needed for faster access) */
97 plist_t *parent_list; /**< List of parents */
98 plist_t *pkiller_list; /**< List of potential killers */
100 plist_t *descendant_list; /**< List of descendants */
101 ir_node **descendants; /**< Sorted descendant array (needed for faster access) */
103 ir_node *killer; /**< The selected unique killer */
104 ir_node *irn; /**< The corresponding firm node to this rss_irn */
106 chain_t *chain; /**< The chain, this node is associated to */
108 unsigned desc_walk; /**< visited flag for collecting descendants */
109 unsigned kill_count; /**< number of nodes killed by this one */
111 unsigned live_out : 1; /**< irn has consumers outside of it's block */
112 unsigned visited : 1; /**< visited flag for bipartite decomposition */
113 unsigned havecons : 1; /**< visited flag collect consumer */
114 unsigned handled : 1; /**< flag indicating whether or not the list structures have been build */
115 unsigned dumped : 1; /**< flag indication whether or not this node was dumped */
118 /* Represents a serialization edge with associated costs. */
119 typedef struct _serialization {
120 rss_irn_t *u; /* the top node */
121 rss_irn_t *v; /* the node about to be serialized after u */
122 rss_edge_t *edge; /* the edge selected for the serialization */
123 int omega1; /* estimated: available regs - RS reduction */
124 int omega2; /* increase in critical path length */
128 typedef struct _rss {
129 phase_t ph; /**< Phase to hold some data */
130 heights_t *h; /**< The current height object */
131 ir_graph *irg; /**< The irg to preprocess */
132 plist_t *nodes; /**< The list of interesting nodes */
133 const arch_env_t *arch_env; /**< The architecture environment */
134 be_abi_irg_t *abi; /**< The abi for this irg */
135 pset *cbc_set; /**< A set of connected bipartite components */
136 ir_node *block; /**< The current block in progress. */
137 int *idx_map; /**< Mapping irn indices to per block indices */
138 unsigned max_height; /**< maximum height in the current block */
139 rss_opts_t *opts; /**< The options */
140 be_lv_t *liveness; /**< The liveness information for this irg */
141 pset *live_block; /**< Values alive at end of block */
142 const arch_register_class_t *cls; /**< The current register class */
143 DEBUG_ONLY(firm_dbg_module_t *dbg);
146 #define get_rss_irn(rss, irn) (phase_get_or_set_irn_data(&rss->ph, irn))
149 * We need some special nodes:
150 * a source and a sink for all live-in and live-out values of a block
159 static ir_node *_source = NULL;
160 static ir_node *_sink = NULL;
162 #define is_Source(irn) ((irn) == _source)
163 #define is_Sink(irn) ((irn) == _sink)
167 RSS_DUMP_CBC = 1 << 0,
168 RSS_DUMP_PKG = 1 << 1,
169 RSS_DUMP_KILL = 1 << 2,
170 RSS_DUMP_DVG = 1 << 3,
171 RSS_DUMP_MAXAC = 1 << 4,
172 RSS_DUMP_ALL = (RSS_DUMP_MAXAC << 1) - 1,
175 static rss_opts_t rss_options = {
180 static const lc_opt_enum_int_items_t dump_items[] = {
181 { "none", RSS_DUMP_NONE },
182 { "cbc", RSS_DUMP_CBC },
183 { "pkg", RSS_DUMP_PKG },
184 { "kill", RSS_DUMP_KILL },
185 { "dvg", RSS_DUMP_DVG },
186 { "maxac", RSS_DUMP_MAXAC },
187 { "all", RSS_DUMP_ALL },
191 static lc_opt_enum_int_var_t dump_var = {
192 &rss_options.dump_flags, dump_items
195 static const lc_opt_table_entry_t rss_option_table[] = {
196 LC_OPT_ENT_ENUM_MASK("dump", "dump phases", &dump_var),
199 #endif /* WITH_LIBCORE */
201 /******************************************************************************
203 * | | | | / _| | | (_)
204 * | |__ ___| |_ __ ___ _ __ | |_ _ _ _ __ ___| |_ _ ___ _ __ ___
205 * | '_ \ / _ \ | '_ \ / _ \ '__| | _| | | | '_ \ / __| __| |/ _ \| '_ \/ __|
206 * | | | | __/ | |_) | __/ | | | | |_| | | | | (__| |_| | (_) | | | \__ \
207 * |_| |_|\___|_| .__/ \___|_| |_| \__,_|_| |_|\___|\__|_|\___/|_| |_|___/
210 ******************************************************************************/
213 * Acquire opcodes and create source and sink nodes.
215 static void init_rss_special_nodes(ir_graph *irg) {
216 ir_node *block = get_irg_start_block(irg);
217 int iro_rss_base = get_next_ir_opcodes(iro_rss_last);
218 ir_op *op_rss_Source = new_ir_op(iro_rss_base + iro_rss_Source, "rss_Source", op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
219 ir_op *op_rss_Sink = new_ir_op(iro_rss_base + iro_rss_Sink, "rss_Sink", op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
220 _source = new_ir_node(NULL, irg, block, op_rss_Source, mode_ANY, 0, NULL);
221 _sink = new_ir_node(NULL, irg, block, op_rss_Sink, mode_ANY, 0, NULL);
224 static int cmp_int(const void *a, const void *b) {
228 return QSORT_CMP(*i1, *i2);
231 static int cmp_child_costs(const void *a, const void *b) {
232 const child_t *c1 = a;
233 const child_t *c2 = b;
235 return QSORT_CMP(c1->cost, c2->cost);
238 static int cmp_irn_idx(const void *a, const void *b) {
239 const ir_node *n1 = *(ir_node **)a;
240 const ir_node *n2 = *(ir_node **)b;
242 return QSORT_CMP(get_irn_idx(n1), get_irn_idx(n2));
245 static int cmp_rss_edges(const void *a, const void *b) {
246 const rss_edge_t *e1 = a;
247 const rss_edge_t *e2 = b;
249 return (e1->src != e2->src) || (e1->tgt != e2->tgt);
252 static int bsearch_for_index(int key, int *arr, size_t len, int force) {
256 while (right >= left) {
257 int idx = (left + right) / 2;
261 else if (key > arr[idx])
268 assert(0 && "Something is wrong, key not found.");
272 static ir_node **build_sorted_array_from_list(plist_t *irn_list, struct obstack *obst) {
275 int len = plist_count(irn_list);
276 ir_node **arr = NEW_ARR_D(ir_node *, obst, len);
278 /* copy the list into the array */
279 foreach_plist(irn_list, el) {
280 arr[i++] = plist_element_get_value(el);
283 /* sort the array by node index */
284 qsort(arr, len, sizeof(arr[0]), cmp_irn_idx);
289 /*****************************************************
292 * __| | ___| |__ _ _ __ _ __ _ _ _ __ __ _
293 * / _` |/ _ \ '_ \| | | |/ _` |/ _` | | '_ \ / _` |
294 * | (_| | __/ |_) | |_| | (_| | (_| | | | | | (_| |
295 * \__,_|\___|_.__/ \__,_|\__, |\__, |_|_| |_|\__, |
299 *****************************************************/
302 static void dump_nodeset(nodeset *ns, const char *prefix) {
304 foreach_nodeset(ns, irn) {
305 ir_fprintf(stderr, "%s%+F\n", prefix, irn);
310 static void build_file_name(rss_t *rss, const char *suffix, size_t suf_len, char *buf, size_t len) {
311 const char *irg_name;
314 irg_name = get_entity_name(get_irg_entity(rss->irg));
315 snprintf(buf, len - suf_len, "%s-%s-block-%ld",
316 irg_name, arch_register_class_name(rss->cls), get_irn_node_nr(rss->block));
320 /* Dumps all collected bipartite components of current irg as vcg. */
321 static void debug_vcg_dump_bipartite(rss_t *rss) {
325 static const char suffix[] = "-RSS-CBC.vcg";
327 build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
328 f = fopen(file_name, "w");
330 ir_fprintf(f, "graph: { title: \"connected bipartite component graph of %+F\"\n", rss->irg);
331 fprintf(f, "display_edge_labels: no\n");
332 fprintf(f, "layoutalgorithm: mindepth\n");
333 fprintf(f, "manhattan_edges: yes\n\n");
335 foreach_pset(rss->cbc_set, cbc) {
339 fprintf(f, "graph: { titel: \"cbc %d\" label: \"cbc %d\" status:clustered color:yellow\n", cbc->nr, cbc->nr);
340 foreach_nodeset(cbc->parents, n) {
341 ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
343 foreach_nodeset(cbc->children, n) {
344 ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
346 foreach_pset(cbc->kill_edges, ke) {
347 ir_fprintf(f, "edge: { sourcename: \"n%d_%d\" targetname: \"n%d_%d\" }\n",
348 get_irn_node_nr(ke->src), cbc->nr, get_irn_node_nr(ke->tgt), cbc->nr);
356 /* Dump the computed killing function as vcg. */
357 static void debug_vcg_dump_kill(rss_t *rss) {
361 static const char suffix[] = "-RSS-KILL.vcg";
363 build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
364 f = fopen(file_name, "w");
366 ir_fprintf(f, "graph: { title: \"computed kill graph of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
367 fprintf(f, "display_edge_labels: no\n");
368 fprintf(f, "layoutalgorithm: mindepth\n");
369 fprintf(f, "manhattan_edges: yes\n\n");
372 /* reset dump_flag */
374 foreach_phase_irn(&rss->ph, irn) {
375 rss_irn_t *node = get_rss_irn(rss, irn);
380 /* dump all nodes and their killers */
381 foreach_plist(rss->nodes, el) {
382 ir_node *irn = plist_element_get_value(el);
383 rss_irn_t *rirn = get_rss_irn(rss, irn);
384 rss_irn_t *pk_rirn = get_rss_irn(rss, rirn->killer);
386 if (! rirn->dumped) {
387 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
391 if (! pk_rirn->dumped) {
392 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(rirn->killer), rirn->killer);
396 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
397 get_irn_node_nr(rirn->killer), get_irn_node_nr(irn));
404 /* Dumps the potential killing DAG (PKG) as vcg. */
405 static void debug_vcg_dump_pkg(rss_t *rss, nodeset *max_ac, int iteration) {
408 char *suffix = alloca(32);
409 static const char suffix1[] = "-RSS-PKG.vcg";
410 static const char suffix2[] = "-RSS-PKG-MAXAC.vcg";
414 snprintf(suffix, 32, "%s", suffix1);
417 snprintf(suffix, 32, "-%02d%s", iteration, suffix2);
420 build_file_name(rss, suffix, strlen(suffix) + 1, file_name, sizeof(file_name));
421 f = fopen(file_name, "w");
423 ir_fprintf(f, "graph: { title: \"potential killing DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
424 fprintf(f, "display_edge_labels: no\n");
425 fprintf(f, "layoutalgorithm: mindepth\n");
426 fprintf(f, "manhattan_edges: yes\n\n");
429 /* reset dump_flag */
431 foreach_phase_irn(&rss->ph, irn) {
432 rss_irn_t *node = get_rss_irn(rss, irn);
437 foreach_plist(rss->nodes, el) {
438 ir_node *irn = plist_element_get_value(el);
439 rss_irn_t *rirn = get_rss_irn(rss, irn);
441 plist_element_t *k_el;
443 /* dump selected saturating values in yellow */
444 if (max_ac && nodeset_find(max_ac, irn))
447 if (! rirn->dumped) {
449 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F c%d\" %s }\n", get_irn_node_nr(irn), irn, rirn->chain->nr, c1);
451 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(irn), irn, c1);
455 foreach_plist(rirn->pkiller_list, k_el) {
456 ir_node *pkiller = plist_element_get_value(k_el);
457 rss_irn_t *pk_rirn = get_rss_irn(rss, pkiller);
460 if (max_ac && nodeset_find(max_ac, pkiller))
463 if (! pk_rirn->dumped) {
465 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F c%d\" %s }\n", get_irn_node_nr(pkiller), pkiller, pk_rirn->chain->nr, c2);
467 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(pkiller), pkiller, c2);
470 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
471 get_irn_node_nr(pkiller), get_irn_node_nr(irn));
478 /* Dumps the disjoint value DAG (DVG) as vcg. */
479 static void debug_vcg_dump_dvg(rss_t *rss, dvg_t *dvg) {
480 static const char suffix[] = "-RSS-DVG.vcg";
486 build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
487 f = fopen(file_name, "w");
489 ir_fprintf(f, "graph: { title: \"disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
490 fprintf(f, "display_edge_labels: no\n");
491 fprintf(f, "layoutalgorithm: mindepth\n");
492 fprintf(f, "manhattan_edges: yes\n\n");
495 foreach_nodeset(dvg->nodes, irn) {
496 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
500 foreach_pset(dvg->edges, edge) {
501 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
502 get_irn_node_nr(edge->src), get_irn_node_nr(edge->tgt));
510 /* Dumps the PKG(DVG). */
511 static void debug_vcg_dump_dvg_pkiller(rss_t *rss, dvg_t *dvg) {
512 static const char suffix[] = "-RSS-DVG-PKG.vcg";
517 build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
518 f = fopen(file_name, "w");
520 ir_fprintf(f, "graph: { title: \"PKG of disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
521 fprintf(f, "display_edge_labels: no\n");
522 fprintf(f, "layoutalgorithm: mindepth\n");
523 fprintf(f, "manhattan_edges: yes\n\n");
526 foreach_nodeset(dvg->nodes, irn) {
527 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
531 foreach_nodeset(dvg->nodes, irn) {
532 rss_irn_t *node = get_rss_irn(rss, irn);
535 foreach_plist(node->dvg_pkiller_list, el) {
536 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
537 get_irn_node_nr(plist_element_get_value(el)), get_irn_node_nr(irn));
547 * In case there is no rss information for irn, initialize it.
549 static void *init_rss_irn(phase_t *ph, ir_node *irn, void *old) {
550 rss_irn_t *res = old ? old : phase_alloc(ph, sizeof(res[0]));
552 res->descendant_list = plist_obstack_new(phase_obst(ph));
553 res->descendants = NULL;
555 res->consumer_list = plist_obstack_new(phase_obst(ph));
556 res->consumer = NULL;
558 res->pkiller_list = plist_obstack_new(phase_obst(ph));
560 res->parent_list = plist_obstack_new(phase_obst(ph));
578 * Collect all nodes data dependent on current node.
580 static void collect_descendants(rss_t *rss, rss_irn_t *rirn, ir_node *irn, int *got_sink, unsigned cur_desc_walk) {
581 const ir_edge_t *edge;
582 rss_irn_t *cur_node = get_rss_irn(rss, irn);
583 ir_node *block = rss->block;
584 ir_edge_kind_t ekind[2] = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
587 if (cur_node->desc_walk >= cur_desc_walk)
589 cur_node->desc_walk = cur_desc_walk;
591 /* Stop at Barriers */
592 if (be_is_Barrier(irn))
595 /* loop over normal and dependency edges */
596 for (i = 0; i < 2; ++i) {
597 foreach_out_edge_kind(irn, edge, ekind[i]) {
598 ir_node *user = get_edge_src_irn(edge);
600 /* skip ignore nodes as they do not really contribute to register pressure */
601 if (arch_irn_is(rss->arch_env, user, ignore))
605 (a) mode_X means Jump -> out_edge
606 (b) Phi as user of a normal node -> out-edge
608 if (get_irn_mode(user) == mode_X || is_Phi(user)) {
617 //if (arch_get_irn_reg_class(rss->arch_env, user, -1) == rss->cls)
618 collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
621 /* check if user lives in block and is not a control flow node */
622 if (get_nodes_block(user) == block) {
623 if (! plist_has_value(rirn->descendant_list, user)) {
624 plist_insert_back(rirn->descendant_list, user);
625 DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", user));
627 collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
629 else if (! *got_sink) {
631 /* user lives out of block: add sink as descendant if not already done */
632 plist_insert_back(rirn->descendant_list, _sink);
634 DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", _sink));
642 * Handles a single consumer.
644 static void collect_single_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *consumer, int *got_sink) {
645 ir_node *block = rss->block;
647 assert(! is_Proj(consumer) && "Cannot handle Projs");
649 if (! is_Phi(consumer) && ! is_Block(consumer) && get_nodes_block(consumer) == block) {
650 if (! arch_irn_is(rss->arch_env, consumer, ignore) && ! plist_has_value(rss_irn->consumer_list, consumer)) {
651 plist_insert_back(rss_irn->consumer_list, consumer);
652 DBG((rss->dbg, LEVEL_2, "\t\tconsumer %+F\n", consumer));
656 rss_irn->live_out = 1;
657 DBG((rss->dbg, LEVEL_2, "\t\tlive out %+F", consumer));
659 plist_insert_back(rss_irn->consumer_list, _sink);
661 DB((rss->dbg, LEVEL_2, ", %+F added instead", _sink));
663 DB((rss->dbg, LEVEL_2, "\n"));
668 * Collect all nodes consuming the value(s) produced by current node.
670 static void collect_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *irn, int *got_sink) {
671 const ir_edge_t *edge;
673 ir_edge_kind_t ekind[2] = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
674 rss_irn_t *cur_node = get_rss_irn(rss, irn);
676 if (cur_node->havecons)
678 cur_node->havecons = 1;
680 for (i = 0; i < 2; ++i) {
681 foreach_out_edge_kind(irn, edge, ekind[i]) {
682 ir_node *consumer = get_edge_src_irn(edge);
684 if (is_Proj(consumer)) {
685 //if (arch_get_irn_reg_class(rss->arch_env, consumer, -1) == rss->cls)
686 collect_consumer(rss, rss_irn, consumer, got_sink);
689 collect_single_consumer(rss, rss_irn, consumer, got_sink);
695 * Collects all consumer and descendant of a irn.
697 static void collect_node_info(rss_t *rss, ir_node *irn) {
698 static unsigned cur_desc_walk = 0;
699 rss_irn_t *rss_irn = get_rss_irn(rss, irn);
702 assert(! is_Proj(irn) && "Cannot handle Projs.");
704 /* check if node info is already available */
705 if (rss_irn->handled)
707 //phase_reinit_single_irn_data(&rss->ph, irn);
709 DBG((rss->dbg, LEVEL_1, "\tcomputing consumers of %+F:\n", irn));
711 /* collect all consumer */
713 collect_consumer(rss, rss_irn, irn, &got_sink);
715 /* build sorted consumer array */
716 rss_irn->consumer = build_sorted_array_from_list(rss_irn->consumer_list, phase_obst(&rss->ph));
718 DBG((rss->dbg, LEVEL_1, "\tcompute descendants of %+F:\n", irn));
720 /* collect descendants */
722 collect_descendants(rss, rss_irn, irn, &got_sink, ++cur_desc_walk);
724 /* build sorted descendant array */
725 rss_irn->descendants = build_sorted_array_from_list(rss_irn->descendant_list, phase_obst(&rss->ph));
727 rss_irn->handled = 1;
731 * Checks if v is a potential killer of u.
732 * v is in pkill(u) iff descendants(v) cut consumer(u) is v
734 * @param rss The rss object
735 * @param v The node to check for killer
736 * @param u The potentially killed value
737 * @return 1 if v is in pkill(u), 0 otherwise
739 static int is_potential_killer(rss_t *rss, rss_irn_t *v, rss_irn_t *u) {
744 assert(is_Sink(v->irn) || ((plist_count(v->descendant_list) > 0 && v->descendants) || 1));
745 assert(is_Sink(u->irn) || ((plist_count(u->consumer_list) > 0 && u->consumer) || 1));
747 /* as we loop over the list: loop over the shorter one */
748 if (plist_count(v->descendant_list) > plist_count(u->consumer_list)) {
749 list = u->consumer_list;
750 arr = v->descendants;
753 list = v->descendant_list;
757 /* for each list element: try to find element in array */
758 foreach_plist(list, el) {
759 ir_node *irn = plist_element_get_value(el);
760 ir_node *match = BSEARCH_IRN_ARR(irn, arr);
762 if (match && match != irn)
770 * Update descendants, consumer and pkiller list for the given irn.
772 static void update_node_info(rss_t *rss, ir_node *irn, ir_node *pk_irn) {
773 rss_irn_t *node = get_rss_irn(rss, irn);
774 rss_irn_t *pkiller = get_rss_irn(rss, pk_irn);
776 /* add consumer and rebuild the consumer array */
777 if (! plist_has_value(node->consumer_list, pk_irn)) {
778 plist_insert_back(node->consumer_list, pk_irn);
779 node->consumer = build_sorted_array_from_list(node->consumer_list, phase_obst(&rss->ph));
782 /* add pk_irn as descendant, add it's descendants to irn's and rebuild array */
783 if (! plist_has_value(node->descendant_list, pk_irn)) {
786 plist_insert_back(node->descendant_list, pk_irn);
788 foreach_plist(pkiller->descendant_list, el) {
789 ir_node *desc = plist_element_get_value(el);
791 if (! plist_has_value(node->descendant_list, desc))
792 plist_insert_back(node->descendant_list, desc);
795 node->descendants = build_sorted_array_from_list(node->descendant_list, phase_obst(&rss->ph));
801 * Compute the potential killing set PK.
803 static void compute_pkill_set(rss_t *rss) {
804 plist_element_t *u_el, *v_el;
806 foreach_plist(rss->nodes, u_el) {
807 ir_node *u_irn = plist_element_get_value(u_el);
808 rss_irn_t *u = get_rss_irn(rss, u_irn);
810 DBG((rss->dbg, LEVEL_1, "\tcomputing potential killers of %+F:\n", u_irn));
812 /* check each consumer if it is a potential killer */
813 foreach_plist(u->consumer_list, v_el) {
814 ir_node *v_irn = plist_element_get_value(v_el);
815 rss_irn_t *v = get_rss_irn(rss, v_irn);
817 /* check, if v is a potential killer of u */
818 if (is_potential_killer(rss, v, u)) {
819 if (! plist_has_value(u->pkiller_list, v_irn))
820 plist_insert_back(u->pkiller_list, v_irn);
822 DBG((rss->dbg, LEVEL_2, "\t\tpotential killer %+F\n", v_irn));
829 if (rss->opts->dump_flags & RSS_DUMP_PKG)
830 debug_vcg_dump_pkg(rss, NULL, 0);
834 * Build set of killing edges (from values to their potential killers)
836 static void build_kill_edges(rss_t *rss, pset *epk) {
837 plist_element_t *el, *k_el;
839 foreach_plist(rss->nodes, el) {
840 ir_node *irn = plist_element_get_value(el);
841 rss_irn_t *rirn = get_rss_irn(rss, irn);
843 DBG((rss->dbg, LEVEL_3, "\t\tbuilding kill edges for %+F:\n", irn));
845 foreach_plist(rirn->pkiller_list, k_el) {
846 ir_node *pkiller = plist_element_get_value(k_el);
847 rss_edge_t *ke = obstack_alloc(phase_obst(&rss->ph), sizeof(*ke));
853 DBG((rss->dbg, LEVEL_3, "\t\t\ttarget %+F\n", pkiller));
855 pset_insert(epk, ke, HASH_RSS_EDGE(ke));
861 /* print the given cbc for debugging purpose */
862 static void debug_print_cbc(firm_dbg_module_t *mod, cbc_t *cbc) {
866 DBG((mod, LEVEL_3, "\t\tS = set of parents:\n"));
867 foreach_nodeset(cbc->parents, n) {
868 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
870 DBG((mod, LEVEL_3, "\t\tT = set of children:\n"));
871 foreach_nodeset(cbc->children, n) {
872 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
874 DBG((mod, LEVEL_3, "\t\tE = Edges from producers to consumers\n"));
875 foreach_pset(cbc->kill_edges, ke) {
876 DBG((mod, LEVEL_3, "\t\t\t%+F -> %+F\n", ke->src, ke->tgt));
882 * Construct the bipartite decomposition.
883 * Sid-Ahmed-Ali Touati, Phd Thesis
884 * Register Pressure in Instruction Level Parallelism, p. 71
886 static void compute_bipartite_decomposition(rss_t *rss) {
887 pset *epk = new_pset(cmp_rss_edges, 10);
892 DBG((rss->dbg, LEVEL_1, "\tcomputing bipartite decomposition:\n"));
894 build_kill_edges(rss, epk);
896 foreach_plist(rss->nodes, el) {
897 ir_node *u_irn = plist_element_get_value(el);
898 rss_irn_t *u = get_rss_irn(rss, u_irn);
904 plist_element_t *el2;
906 rss_edge_t *kedge_root = NULL;
907 ir_node *t_irn, *s_irn;
909 if (u->visited || u_irn == _sink)
912 DBG((rss->dbg, LEVEL_2, "\t\t%+F choosen:\n", u_irn));
914 cbc = obstack_alloc(phase_obst(&rss->ph), sizeof(*cbc));
917 /* initialize S_cb */
918 cbc->parents = new_nodeset(5);
919 nodeset_insert(cbc->parents, u_irn);
920 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (init)\n", u_irn));
923 cbc->kill_edges = new_pset(cmp_rss_edges, 5);
925 /* each parent gets killed by at least one value from children */
927 /* T_cb = PK_successors(u) */
928 cbc->children = new_nodeset(5);
929 foreach_plist(u->pkiller_list, el2) {
930 nodeset_insert(cbc->children, plist_element_get_value(el2));
931 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (init)\n", plist_element_get_value(el2)));
935 Now: insert the parents of all children into the parent set
936 and insert the children of all parents into the children set
937 until the sets don't change any more
939 while (p_change || c_change) {
940 p_change = c_change = 0;
942 /* accumulate parents */
943 foreach_nodeset(cbc->children, t_irn) {
944 foreach_pset(epk, k_edge) {
945 ir_node *val = k_edge->src;
947 if (k_edge->tgt == t_irn && ! nodeset_find(cbc->parents, val)) {
948 nodeset_insert(cbc->parents, val);
950 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (killed by %+F)\n", val, t_irn));
955 /* accumulate children */
956 foreach_nodeset(cbc->parents, s_irn) {
957 foreach_pset(epk, k_edge) {
958 ir_node *val = k_edge->tgt;
960 if (k_edge->src == s_irn && ! nodeset_find(cbc->children, val)) {
961 nodeset_insert(cbc->children, val);
963 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (kills %+F)\n", val, s_irn));
969 /* mark all parent values as visited */
970 foreach_nodeset(cbc->parents, s_irn) {
971 rss_irn_t *s = get_rss_irn(rss, s_irn);
973 /* assure bipartite property */
975 if (nodeset_find(cbc->children, s_irn)) {
976 nodeset_remove(cbc->children, s_irn);
977 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F removed from to children\n", s_irn));
983 foreach_pset(epk, k_edge) {
984 if (nodeset_find(cbc->parents, k_edge->src) && nodeset_find(cbc->children, k_edge->tgt)) {
985 pset_insert(cbc->kill_edges, k_edge, HASH_RSS_EDGE(k_edge));
987 Link all k_edges which are about to be removed together.
988 Beware: pset_remove kills the iterator.
990 k_edge->next = kedge_root;
995 /* remove all linked k_edges */
996 for (; kedge_root; kedge_root = kedge_root->next) {
997 pset_remove(epk, kedge_root, HASH_RSS_EDGE(kedge_root));
1000 /* verify the cbc */
1001 foreach_nodeset(cbc->parents, s_irn) {
1004 foreach_pset(cbc->kill_edges, k_edge) {
1005 if (k_edge->src == s_irn) {
1007 pset_break(cbc->kill_edges);
1014 ir_fprintf(stderr, "Warning: parent %+F is not killed in current cbc\n", s_irn);
1017 assert(vrfy_ok && "Verification of CBC failed");
1019 /* add the connected bipartite component */
1020 if (nodeset_count(cbc->parents) > 0 && nodeset_count(cbc->children) > 0 && pset_count(cbc->kill_edges) > 0)
1021 pset_insert(rss->cbc_set, cbc, (unsigned)cbc->nr);
1022 DBG((rss->dbg, LEVEL_2, "\tbipartite component %d inserted:\n", cbc->nr));
1024 if (firm_dbg_get_mask(rss->dbg) & LEVEL_2)
1025 debug_print_cbc(rss->dbg, cbc);
1029 if (rss->opts->dump_flags & RSS_DUMP_CBC)
1030 debug_vcg_dump_bipartite(rss);
1036 * Select the child with the maximum cost.
1038 static child_t *select_child_max_cost(rss_t *rss, nodeset *x, nodeset *y, child_t *t, cbc_t *cbc) {
1040 float max_cost = -1.0f;
1042 DBG((rss->dbg, LEVEL_2, "\t\tcomputing children costs:\n"));
1044 foreach_nodeset(cbc->children, child) {
1045 rss_irn_t *r_child = get_rss_irn(rss, child);
1046 int num_unkilled_parents = 0;
1047 int num_descendants;
1051 /* get the number of unkilled parents */
1052 foreach_pset(cbc->kill_edges, k_edge) {
1053 if (k_edge->tgt == child && nodeset_find(x, k_edge->src))
1054 ++num_unkilled_parents;
1057 cost = (float)num_unkilled_parents;
1059 num_descendants = plist_count(r_child->descendant_list) + nodeset_count(y);
1061 if (num_descendants > 0)
1062 cost /= (float)num_descendants;
1064 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F, #desc %d, cost %.3f\n", child, num_descendants, cost));
1066 if (cost > max_cost) {
1077 * Remove all parents from x which are killed by t_irn.
1079 static void remove_covered_parents(rss_t *rss, nodeset *x, ir_node *t_irn, cbc_t *cbc) {
1080 rss_irn_t *t = get_rss_irn(rss, t_irn);
1083 DBG((rss->dbg, LEVEL_2, "\t\tremoving parents covered by %+F:\n", t_irn));
1085 foreach_pset(cbc->kill_edges, k_edge) {
1086 if (k_edge->tgt == t_irn && nodeset_find(x, k_edge->src)) {
1087 nodeset_remove(x, k_edge->src);
1088 plist_insert_back(t->parent_list, k_edge->src);
1089 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", k_edge->src));
1094 static void update_cumulated_descendent_values(rss_t *rss, nodeset *y, ir_node *t_irn) {
1095 rss_irn_t *t = get_rss_irn(rss, t_irn);
1096 plist_element_t *el;
1098 DBG((rss->dbg, LEVEL_2, "\t\tupdating cumulated descendant value of %+F:\n", t_irn));
1100 foreach_plist(t->descendant_list, el) {
1101 nodeset_insert(y, plist_element_get_value(el));
1102 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", plist_element_get_value(el)));
1107 * Greedy-k: a heuristics for the MMA problem
1109 static void compute_killing_function(rss_t *rss) {
1111 struct obstack obst;
1113 obstack_init(&obst);
1115 rss->cbc_set = pset_new_ptr(5);
1116 compute_bipartite_decomposition(rss);
1118 /* for all bipartite components do: */
1119 foreach_pset(rss->cbc_set, cbc) {
1121 nodeset *x = new_nodeset(10);
1122 nodeset *y = new_nodeset(10);
1123 child_t **sks = NEW_ARR_F(child_t *, 20);
1128 DBG((rss->dbg, LEVEL_1, "\tcomputing SKS for cbc %d:\n", cbc->nr));
1129 DBG((rss->dbg, LEVEL_2, "\t\tinitializing parents X:\n"));
1131 /* X = S_cb (all parents are initially uncovered) */
1132 foreach_nodeset(cbc->parents, p) {
1133 nodeset_insert(x, p);
1134 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", p));
1137 /* while X not empty */
1138 while (nodeset_count(x) > 0) {
1139 child_t *t = obstack_alloc(&obst, sizeof(*t));
1140 memset(t, 0, sizeof(*t));
1142 t = select_child_max_cost(rss, x, y, t, cbc);
1144 if (cur_len >= cur_size) {
1145 ARR_EXTO(child_t *, sks, cur_size * 2);
1149 DBG((rss->dbg, LEVEL_2, "\t\tinsert child %+F (%.3f) into SKS at pos %d\n", t->irn, t->cost, cur_len));
1152 remove_covered_parents(rss, x, t->irn, cbc);
1153 update_cumulated_descendent_values(rss, y, t->irn);
1156 ARR_SHRINKLEN(sks, cur_len);
1158 /* sort SKS in increasing cost order */
1159 qsort(sks, cur_len, sizeof(sks[0]), cmp_child_costs);
1161 DBG((rss->dbg, LEVEL_2, "\tprocessing SKS for cbc %d:\n", cbc->nr));
1163 /* build killing function */
1164 for (i = cur_len - 1; i >= 0; --i) { /* loop over sks in decreasing cost order */
1165 child_t *t = sks[i];
1166 rss_irn_t *rt = get_rss_irn(rss, t->irn);
1167 plist_element_t *p_el;
1169 DBG((rss->dbg, LEVEL_3, "\t\t\tkiller %+F (%.3f):\n", t->irn, t->cost));
1171 /* kill all unkilled parents of t */
1172 foreach_plist(rt->parent_list, p_el) {
1173 ir_node *par = plist_element_get_value(p_el);
1174 rss_irn_t *rpar = get_rss_irn(rss, par);
1176 if (is_Sink(rpar->killer)) {
1177 rpar->killer = t->irn;
1178 DBG((rss->dbg, LEVEL_2, "\t\tkill %+F\n", rpar->irn));
1181 DBG((rss->dbg, LEVEL_3, "\t\t\tkeeping %+F as killer for %+F\n", rpar->killer, rpar->irn));
1191 if (rss->opts->dump_flags & RSS_DUMP_KILL)
1192 debug_vcg_dump_kill(rss);
1194 del_pset(rss->cbc_set);
1195 obstack_free(&obst, NULL);
1199 * Adds the edge src -> tgt to the dvg. Checks if reverse edge is already there (asserts).
1201 static INLINE void add_dvg_edge(rss_t *rss, dvg_t *dvg, ir_node *src, ir_node *tgt, int have_source) {
1202 rss_edge_t *dvg_edge;
1206 nodeset_insert(dvg->nodes, src);
1208 assert(nodeset_find(dvg->nodes, src) != NULL && "Wrong assumption");
1210 nodeset_insert(dvg->nodes, tgt);
1214 assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1218 if (NULL != pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1219 /* add the edge to the DVG */
1220 dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1222 dvg_edge->src = src;
1223 dvg_edge->tgt = tgt;
1224 dvg_edge->next = NULL;
1226 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", src, tgt));
1227 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1232 * Computes the disjoint value DAG (DVG).
1233 * BEWARE: It is not made explicitly clear in the Touati paper,
1234 * but the DVG is meant to be build from the KILLING DAG
1236 static void compute_dvg(rss_t *rss, dvg_t *dvg) {
1237 plist_element_t *el;
1239 DBG((rss->dbg, LEVEL_1, "\tcomputing DVG:\n"));
1241 foreach_plist(rss->nodes, el) {
1242 ir_node *u_irn = plist_element_get_value(el);
1243 rss_irn_t *u = get_rss_irn(rss, u_irn);
1244 rss_irn_t *u_killer = get_rss_irn(rss, u->killer);
1247 /* TODO: omit nodes only having sink as pkiller and killing no one */
1249 /* add an edge to killer */
1250 add_dvg_edge(rss, dvg, u_irn, u->killer, 0);
1252 if (is_Sink(u->killer))
1255 /* We add an edge to every killer, from where we could be reached. */
1256 for (i = ARR_LEN_SAFE(u_killer->descendants) - 1; i >= 0; --i) {
1257 add_dvg_edge(rss, dvg, u_irn, u_killer->descendants[i], 1);
1262 foreach_plist(rss->nodes, el2) {
1263 ir_node *v_irn = plist_element_get_value(el2);
1266 There is an edge (u, v) in the DVG iff v is a descendant of the killer(u).
1268 if (BSEARCH_IRN_ARR(v_irn, u_kill->descendants)) {
1269 rss_edge_t *dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1272 /* insert the user into the DVG and append it to the user list of u */
1273 nodeset_insert(dvg->nodes, v_irn);
1274 if (! plist_has_value(u->dvg_user_list, v_irn))
1275 plist_insert_back(u->dvg_user_list, v_irn);
1277 dvg_edge->src = u_irn;
1278 dvg_edge->tgt = v_irn;
1279 dvg_edge->next = NULL;
1284 assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1286 /* add the edge to the DVG */
1287 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", u_irn, v_irn));
1288 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1294 if (rss->opts->dump_flags & RSS_DUMP_DVG)
1295 debug_vcg_dump_dvg(rss, dvg);
1299 * Updates the dvg structure when a serialization edge from src -> tgt is added.
1301 static void update_dvg(rss_t *rss, dvg_t *dvg, rss_irn_t *src, rss_irn_t *tgt) {
1304 rss_edge_t **arr = alloca(pset_count(dvg->edges) * sizeof(arr[0]));
1307 Add an edge from serialization target to serialization src:
1308 src cannot be alive with target
1310 add_dvg_edge(rss, dvg, tgt->irn, src->irn, 0);
1312 /* Add edges to src's descendants as well, they also getting serialized. */
1313 for (i = ARR_LEN_SAFE(src->descendants) - 1; i >= 0; --i) {
1314 add_dvg_edge(rss, dvg, tgt->irn, src->descendants[i], 1);
1317 /* We also have to add edges from targets predecessors in dvg */
1319 /* We cannot insert elements into set over which we iterate ... */
1320 foreach_pset(dvg->edges, edge) {
1321 if (edge->tgt == tgt->irn) {
1326 for (i = 0; i < idx; ++i) {
1328 add_dvg_edge(rss, dvg, edge->src, src->irn, 1);
1329 for (j = ARR_LEN_SAFE(src->descendants) - 1; j >= 0; --j) {
1330 add_dvg_edge(rss, dvg, edge->src, src->descendants[j], 1);
1337 * Accumulate all descendants for root into list.
1339 static void accumulate_dvg_descendant_values(rss_t *rss, rss_irn_t *root, plist_t *list) {
1340 if (plist_count(root->dvg_user_list) > 0) {
1341 plist_element_t *el;
1343 foreach_plist(root->dvg_user_list, el) {
1344 ir_node *v_irn = plist_element_get_value(el);
1345 rss_irn_t *v = get_rss_irn(rss, v_irn);
1347 /* add v as descendant */
1348 if (! plist_has_value(list, v_irn)) {
1349 plist_insert_back(list, v_irn);
1350 DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG descendant %+F\n", v_irn));
1353 /* accumulate v's descendants */
1354 accumulate_dvg_descendant_values(rss, v, list);
1360 * Builds the list of potential killers for each node
1362 * Needs the descendant list for all user as sorted array.
1364 static void build_dvg_pkiller_list(rss_t *rss, dvg_t *dvg) {
1367 foreach_nodeset(dvg->nodes, irn) {
1368 rss_irn_t *node = get_rss_irn(rss, irn);
1369 plist_element_t *el, *el2;
1371 DBG((rss->dbg, DEBUG_DVG, "\t\tbuilding pkiller list for %+F\n", irn));
1373 /* check each user */
1374 foreach_plist(node->dvg_user_list, el) {
1375 ir_node *u_irn = plist_element_get_value(el);
1377 /* is the current user u_irn not a descendant of any other user -> pkiller */
1378 foreach_plist(node->dvg_user_list, el2) {
1379 ir_node *v_irn = plist_element_get_value(el2);
1380 rss_irn_t *v = get_rss_irn(rss, v_irn);
1383 ! BSEARCH_IRN_ARR(u_irn, v->dvg_desc) &&
1384 ! plist_has_value(node->dvg_pkiller_list, u_irn))
1386 plist_insert_back(node->dvg_pkiller_list, u_irn);
1387 DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG pkiller %+F\n", u_irn));
1392 node->dvg_pkiller = build_sorted_array_from_list(node->dvg_pkiller_list, phase_obst(&rss->ph));
1396 if (firm_dbg_get_mask(rss->dbg) & DEBUG_DVG)
1397 debug_vcg_dump_dvg_pkiller(rss, dvg);
1404 * Compute the maximal antichain of the current DVG.
1405 * This is a reimplementation of the MAXIMAL_ANTI_CHAIN function
1406 * from the DDG library 1.1 (DAG.cpp).
1408 static nodeset *compute_maximal_antichain(rss_t *rss, dvg_t *dvg, int iteration) {
1409 int n = nodeset_count(dvg->nodes);
1410 int *assignment = alloca(n * sizeof(assignment[0]));
1411 int *assignment_rev = alloca(n * sizeof(assignment_rev[0]));
1412 int *idx_map = alloca(n * sizeof(idx_map[0]));
1413 hungarian_problem_t *bp;
1414 nodeset *values, *temp;
1416 int i, j, cost, cur_chain, res;
1417 rss_edge_t *dvg_edge;
1419 #define MAP_IDX(irn) bsearch_for_index(get_irn_idx(irn), idx_map, n, 1)
1421 if (pset_count(dvg->edges) == 0)
1424 bp = hungarian_new(n, n, 1, HUNGARIAN_MATCH_NORMAL);
1427 At first, we build an index map for the nodes in the DVG,
1428 because we cannot use the irn idx therefore as the resulting
1429 bipartite data structure would have around 1.2GB.
1430 So we limit the size to the number of nodes we have in the DVG
1431 and build a sorted index map for their irn indices.
1434 foreach_nodeset(dvg->nodes, u_irn) {
1435 idx_map[i++] = get_irn_idx(u_irn);
1437 qsort(idx_map, n, sizeof(idx_map[0]), cmp_int);
1439 foreach_pset(dvg->edges, dvg_edge) {
1440 int idx_u = MAP_IDX(dvg_edge->src);
1441 int idx_v = MAP_IDX(dvg_edge->tgt);
1443 /* add the entry to the bipartite data structure */
1444 hungarian_add(bp, idx_u, idx_v, 1);
1445 DBG((rss->dbg, LEVEL_3, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1446 idx_u, dvg_edge->src, idx_v, dvg_edge->tgt));
1450 Add a bipartite entry for each pair of nodes (u, v), where exists a
1451 path in the DVG from u to v, ie. connect all descendants(v) to v.
1452 desc_dvg(v) = accumulated descendants of all z in dvg_user_list(v)
1454 foreach_nodeset(dvg->nodes, u_irn) {
1455 rss_irn_t *u = get_rss_irn(rss, u_irn);
1456 int idx_u_irn = MAP_IDX(u_irn);
1458 DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG descendants of %+F:\n", u_irn));
1460 //plist_clear(u->dvg_desc_list);
1461 //accumulate_dvg_descendant_values(rss, u, u->dvg_desc_list);
1464 FIXME: The array is build on the phase obstack and we cannot free the data.
1465 So the array get as many times allocated as this function is called.
1468 /* build the sorted array for faster searches */
1469 //u->dvg_desc = build_sorted_array_from_list(u->dvg_desc_list, phase_obst(&rss->ph));
1471 DBG((rss->dbg, DEBUG_MAX_AC, "\t\tadding bipartite entries of %+F:\n", u_irn));
1473 /* add the bipartite entries for all descendants */
1474 for (i = ARR_LEN_SAFE(u->dvg_desc) - 1; i >= 0; --i) {
1475 rss_irn_t *desc = get_rss_irn(rss, u->dvg_desc[i]);
1476 int idx_desc = MAP_IDX(u->dvg_desc[i]);
1478 /* add the entry to the bipartite data structure */
1479 hungarian_add(bp, idx_u_irn, idx_desc, 1);
1480 DBG((rss->dbg, DEBUG_MAX_AC, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1481 idx_u_irn, u_irn, idx_desc, u->dvg_desc[i]));
1488 /* We want maximum cardinality matching */
1489 hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL);
1492 DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG pkiller:\n"));
1493 /* beware: the following function needs the dvg_desc array */
1494 build_dvg_pkiller_list(rss, dvg);
1497 DBG((rss->dbg, LEVEL_2, "\t\tcomputing bipartite matching\n"));
1499 The maximum cardinality bipartite matching gives us the minimal
1500 chain partition, which corresponds to the maximum anti chains.
1502 res = hungarian_solve(bp, assignment, &cost, 1);
1503 assert(res == 0 && "Bipartite matching failed!");
1506 memset(assignment_rev, -1, n * sizeof(assignment_rev[0]));
1508 /* build the reverse assignment, ie. foreach i -> j, add j -> i */
1509 for (i = 0; i < n; ++i) {
1510 if (assignment[i] >= 0) {
1511 int j = assignment[i];
1512 assignment_rev[j] = i;
1516 DBG((rss->dbg, LEVEL_2, "\t\tgot assignment with cost %d\n", cost));
1517 DBG((rss->dbg, LEVEL_3, "\t\t\tassignment --- reverse assignment\n", cost));
1518 for (i = 0; i < n; ++i) {
1519 DBG((rss->dbg, LEVEL_3, "\t\t\t%3d -> %3d %3d -> %3d\n", i, assignment[i], i, assignment_rev[i]));
1522 values = new_nodeset(10);
1524 /* Construction of the minimal chain partition */
1525 for (j = 0; j < n; ++j) {
1526 /* check nodes, which did not occur as target */
1527 if (assignment_rev[j] == -1) {
1528 int xj = idx_map[j];
1529 ir_node *xj_irn = get_idx_irn(rss->irg, xj);
1530 rss_irn_t *xj_rss = get_rss_irn(rss, xj_irn);
1531 chain_t *c = obstack_alloc(phase_obst(&rss->ph), sizeof(*c));
1534 /* there was no source for j -> we have a source of a new chain */
1535 nodeset_insert(values, xj_irn);
1537 c->elements = plist_obstack_new(phase_obst(&rss->ph));
1538 c->nr = cur_chain++;
1539 plist_insert_back(c->elements, xj_irn);
1543 DBG((rss->dbg, LEVEL_2, "\t\tstarting chain %d:\n", c->nr));
1544 DBG((rss->dbg, LEVEL_2, "\t\t\t%+F (%d)", xj_irn, j));
1546 /* follow chain, having j as source */
1548 while (assignment[source] >= 0) {
1549 int target = assignment[source];
1550 int irn_idx = idx_map[target];
1551 ir_node *irn = get_idx_irn(rss->irg, irn_idx);
1552 rss_irn_t *node = get_rss_irn(rss, irn);
1554 plist_insert_back(c->elements, irn);
1557 DB((rss->dbg, LEVEL_2, " -> %+F (%d)", irn, target));
1559 /* new source = last target */
1563 DB((rss->dbg, LEVEL_2, "\n"));
1568 Computing the maximal antichain: Select an element from each
1569 chain such, such it is parallel with the others.
1571 DBG((rss->dbg, LEVEL_1, "\t\tcomputing set of saturation values (MAX AC)\n"));
1572 DBG((rss->dbg, LEVEL_3, "\t\tstarting with:\n"));
1575 if (firm_dbg_get_mask(rss->dbg) & LEVEL_3)
1576 dump_nodeset(values, "\t\t\t");
1582 We need an explicit array for the values as
1583 we cannot iterate multiple times over the same
1584 set at the same time. :-(((((
1586 int n = nodeset_count(values);
1588 ir_node **val_arr = NEW_ARR_F(ir_node *, n);
1590 foreach_nodeset(values, u_irn)
1591 val_arr[i++] = u_irn;
1596 temp = new_nodeset(10);
1598 /* Select all nodes from current value set, having another node in the set as descendant. */
1599 for (i = 0; i < n; ++i) {
1600 rss_irn_t *u = get_rss_irn(rss, val_arr[i]);
1602 for (j = 0; j < n; ++j) {
1606 key.src = val_arr[i];
1607 key.tgt = val_arr[j];
1609 if (pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1610 /* v[j] is descendant of u -> remove u and break */
1611 nodeset_insert(temp, u->irn);
1612 nodeset_remove(values, u->irn);
1614 DBG((rss->dbg, LEVEL_3, "\t\t\tremoving %+F from values, adding it to temp\n", u->irn));
1622 /* Try to insert the chain predecessor of all selected u's */
1623 foreach_nodeset(temp, u_irn) {
1624 rss_irn_t *u = get_rss_irn(rss, u_irn);
1625 chain_t *c = u->chain;
1626 plist_element_t *el = plist_find_value(c->elements, u_irn);
1628 assert(el && "Missing element in chain!");
1630 /* If u has predecessor in chain: insert the predecessor */
1631 if (el == plist_element_get_prev(el)) {
1632 nodeset_insert(values, plist_element_get_value(el));
1633 DBG((rss->dbg, LEVEL_3, "\t\t\tadding %+F to values\n", plist_element_get_value(el)));
1639 } while (nodeset_count(temp) > 0);
1641 DBG((rss->dbg, LEVEL_2, "\t\tfinal set:\n"));
1643 if (firm_dbg_get_mask(rss->dbg) & LEVEL_2) {
1644 dump_nodeset(values, "\t\t\t");
1648 if (rss->opts->dump_flags & RSS_DUMP_MAXAC)
1649 debug_vcg_dump_pkg(rss, values, iteration);
1659 * Computes the best serialization between two nodes of sat_vals.
1661 static serialization_t *compute_best_admissible_serialization(rss_t *rss, nodeset *sat_vals, serialization_t *ser, int num_regs) {
1662 int n = nodeset_count(sat_vals);
1663 int n_idx = ARR_LEN_SAFE(rss->idx_map);
1665 ir_node **val_arr = alloca(n * sizeof(val_arr[0]));
1666 bitset_t *bs_sv = bitset_alloca(n_idx);
1667 bitset_t *bs_vdesc = bitset_alloca(n_idx);
1668 bitset_t *bs_tmp = bitset_alloca(n_idx);
1669 bitset_t *bs_ukilldesc = bitset_alloca(n_idx);
1670 int best_benefit = INT_MAX;
1671 int best_omega2 = INT_MAX;
1672 int best_benefit_omega20 = INT_MAX;
1676 rss_edge_t min_benefit_edge;
1677 rss_edge_t min_omega20_edge;
1678 rss_irn_t *ser_u_omega1 = NULL, *ser_v_omega1 = NULL;
1679 rss_irn_t *ser_u_omega20 = NULL, *ser_v_omega20 = NULL;
1681 DBG((rss->dbg, LEVEL_1, "\tcomputing admissible serializations:\n"));
1684 We need an explicit array for the values as
1685 we cannot iterate multiple times over the same
1686 set at the same time. :-(((((
1689 foreach_nodeset(sat_vals, irn) {
1691 bitset_set(bs_sv, BLOCK_IDX_MAP(rss, irn));
1695 We build all admissible serializations and remember the best found so far.
1698 if v in pkiller(u): add edge from v to all other pkiller(u)
1699 else: for all vv in pkiller(u): add edge from v to vv if there exists no path from vv to v
1703 A node is unserializable if:
1704 - it has only one killer and this one is Sink
1705 - it kills no other values
1706 In this case there is no serialization which could
1707 reduce the registerpressure
1709 #define IS_UNSERIALIZABLE_NODE(rss_node) \
1712 (plist_count(rss_node->pkiller_list) == 1) && \
1713 is_Sink(rss_node->killer) && \
1714 (rss_node->kill_count == 0) \
1716 be_is_Barrier(rss_node->irn) || \
1717 be_is_Keep(rss_node->irn) \
1720 /* for all u in sat_vals */
1721 for (i = 0; i < n; ++i) {
1722 rss_irn_t *u = get_rss_irn(rss, val_arr[i]);
1723 plist_element_t *el;
1725 /* ignore nodes where serialization does not help */
1726 if (IS_UNSERIALIZABLE_NODE(u)) {
1727 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", u->irn));
1731 /* accumulate all descendants of all pkiller(u) */
1732 bitset_clear_all(bs_ukilldesc);
1733 foreach_plist(u->pkiller_list, el) {
1734 ir_node *irn = plist_element_get_value(el);
1735 rss_irn_t *node = get_rss_irn(rss, irn);
1738 bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, irn));
1742 for (k = ARR_LEN_SAFE(node->descendants) - 1; k >= 0; --k) {
1743 if (! is_Sink(node->descendants[k]))
1744 bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, node->descendants[k]));
1748 /* for all v in sat_vals */
1749 for (j = 0; j < n; ++j) {
1750 ir_node *v_irn = val_arr[j];
1751 rss_irn_t *v = get_rss_irn(rss, v_irn);
1752 unsigned v_height = get_irn_height(rss->h, v_irn);
1753 int omega1, omega2, is_pkiller;
1755 /* v cannot be serialized with itself
1756 * ignore nodes where serialization does not help */
1757 if (i == j || IS_UNSERIALIZABLE_NODE(v)) {
1759 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", v->irn));
1763 /* get descendants of v */
1764 bitset_clear_all(bs_vdesc);
1765 bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v_irn));
1766 for (k = ARR_LEN_SAFE(v->descendants) - 1; k >= 0; --k) {
1767 if (! is_Sink(v->descendants[k]))
1768 bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v->descendants[k]));
1771 /* if v is in pkiller(u) */
1772 is_pkiller = plist_has_value(u->pkiller_list, v_irn);
1774 /* for all vv in pkiller(u) */
1775 foreach_plist(u->pkiller_list, el) {
1776 ir_node *vv_irn = plist_element_get_value(el);
1779 if (is_Sink(vv_irn) || is_cfop(vv_irn))
1783 add_edge = vv_irn != v_irn && skip_Proj(vv_irn) != skip_Proj(v_irn);
1785 add_edge = ! heights_reachable_in_block(rss->h, skip_Proj(vv_irn), skip_Proj(v_irn));
1788 As we add an edge from vv -> v, we have to make sure,
1789 that there exists no path from v to vv.
1793 int vv_height = get_irn_height(rss->h, vv_irn);
1794 int mu1, mu2, critical_path_cost;
1797 mu1 = | descendants(v) cut sat_vals |
1798 the number of saturating values which cannot
1799 be simultaneously alive with u
1801 bitset_copy(bs_tmp, bs_vdesc);
1802 mu1 = bitset_popcnt(bitset_and(bs_tmp, bs_sv));
1805 mu2 = | accum_desc_all_pkiller(u) without descendants(v) |
1808 bitset_copy(bs_tmp, bs_ukilldesc);
1809 mu2 = bitset_popcnt(bitset_andnot(bs_tmp, bs_vdesc));
1815 /* omega1 = mu1 - mu2 */
1821 /* omega2 = increase of critical path */
1822 critical_path_cost =
1823 v_height /* longest path from v to sink */
1824 + rss->max_height - vv_height /* longest path from source to vv */
1828 If critical_path_cost > max_height -> the new edge
1829 would increase the longest critical path by the difference.
1831 omega2 = critical_path_cost > rss->max_height ? critical_path_cost - rss->max_height : 0;
1833 /* this keeps track of the edge with the best benefit */
1834 if (omega1 >= num_regs - n && omega1 < best_benefit) {
1835 min_benefit_edge.src = v_irn;
1836 min_benefit_edge.tgt = vv_irn;
1841 best_benefit = omega1;
1842 ser->new_killer = is_pkiller;
1845 /* this keeps track of the edge with the best omega1 costs where omega2 == 0 */
1846 if (omega2 == 0 && omega1 >= num_regs - n && omega1 < best_benefit_omega20) {
1847 min_omega20_edge.src = v_irn;
1848 min_omega20_edge.tgt = vv_irn;
1853 best_benefit_omega20 = omega1;
1854 ser->new_killer = is_pkiller;
1857 best_omega2 = MIN(best_omega2, omega2);
1859 DBG((rss->dbg, LEVEL_2, "\t\tfound %+F -> %+F (w1 %d, w2 %d)\n",
1860 v_irn, vv_irn, omega1, omega2));
1862 } /* for all vv in pkiller(u) */
1863 } /* for all v in sat_vals */
1864 } /* for all u in sat_vals */
1869 if (best_omega2 == 0) {
1870 ser->u = ser_u_omega20;
1871 ser->v = ser_v_omega20;
1872 ser->edge->src = min_omega20_edge.src;
1873 ser->edge->tgt = min_omega20_edge.tgt;
1874 ser->omega1 = best_benefit_omega20;
1875 ser->omega2 = best_omega2;
1878 ser->u = ser_u_omega1;
1879 ser->v = ser_v_omega1;
1880 ser->edge->src = min_benefit_edge.src;
1881 ser->edge->tgt = min_benefit_edge.tgt;
1882 ser->omega1 = best_benefit;
1883 ser->omega2 = best_omega2;
1888 #undef IS_UNSERIALIZABLE_NODE
1892 * Perform the value serialization heuristic and add all
1893 * computed serialization edges as dependencies to the irg.
1895 static void perform_value_serialization_heuristic(rss_t *rss) {
1896 bitset_t *arch_nonign_bs = bitset_alloca(arch_register_class_n_regs(rss->cls));
1897 bitset_t *abi_ign_bs = bitset_alloca(arch_register_class_n_regs(rss->cls));
1898 int available_regs, iteration;
1901 pset *ser_set = new_pset(cmp_rss_edges, 20);
1903 /* available_regs = R = |arch_non_ignore_regs cut ~abi_ignore_regs| */
1904 arch_put_non_ignore_regs(rss->arch_env, rss->cls, arch_nonign_bs);
1905 be_abi_put_ignore_regs(rss->abi, rss->cls, abi_ign_bs);
1906 bitset_andnot(arch_nonign_bs, abi_ign_bs);
1907 available_regs = bitset_popcnt(arch_nonign_bs);
1908 //num_live = pset_count(rss->live_block);
1909 //available_regs -= num_live < available_regs ? num_live : 0;
1911 DBG((rss->dbg, LEVEL_1, "\n\t#available regs: %d\n\n", available_regs));
1914 At first we need to compute the disjoint value DAG (DVG = {V, E_dv}).
1915 V = set of all nodes we are currently interested in
1916 E_dv = there is an edge from u to v iff v is a descendant of killer(u), forall u, v in V
1918 dvg.nodes = new_nodeset(plist_count(rss->nodes));
1919 dvg.edges = new_pset(cmp_rss_edges, plist_count(rss->nodes) * 5);
1920 compute_dvg(rss, &dvg);
1923 Then we perform the heuristic serialization algorithm
1924 on the DVG which gives us all necessary serialization
1927 DBG((rss->dbg, LEVEL_1, "\tcomputing maximal antichain:\n"));
1929 sat_vals = compute_maximal_antichain(rss, &dvg, iteration++);
1930 while (sat_vals && (nodeset_count(sat_vals) > available_regs)) {
1931 serialization_t *ser, best_ser;
1932 rss_edge_t *edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*edge));
1933 ir_node *dep_src, *dep_tgt;
1935 best_ser.edge = edge;
1936 ser = compute_best_admissible_serialization(rss, sat_vals, &best_ser, available_regs);
1938 DBG((rss->dbg, LEVEL_1, "\tcurrent register saturation %d, target %d\n", nodeset_count(sat_vals), available_regs));
1941 DBG((rss->dbg, LEVEL_1, "\tno RS improving serialization found, breaking at iteration %d\n", iteration));
1945 /* Insert the serialization as dependency edge into the irg. */
1946 DBG((rss->dbg, LEVEL_1, "\tserializing %+F -> %+F with edge %+F -> %+F and cost %d, %d\n",
1947 ser->u->irn, ser->v->irn, ser->edge->src, ser->edge->tgt, ser->omega1, ser->omega2));
1949 if (pset_find(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge)))
1950 ir_printf("WARNING: serialization %+F -> %+F computed twice!\n", ser->edge->src, ser->edge->tgt);
1953 pset_insert(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge));
1955 /* update the dvg */
1956 update_dvg(rss, &dvg, ser->v, ser->u);
1957 update_dvg(rss, &dvg, get_rss_irn(rss, ser->edge->src), get_rss_irn(rss, ser->edge->tgt));
1958 del_nodeset(sat_vals);
1960 dep_src = skip_Proj(ser->edge->src);
1961 dep_tgt = ser->edge->tgt;
1962 add_irn_dep(dep_src, dep_tgt);
1964 /* Update descendants, consumer and pkillers of the target */
1965 update_node_info(rss, ser->edge->tgt, ser->edge->src);
1967 /* TODO: try to find a cheaper way for updating height information */
1968 rss->max_height = heights_recompute_block(rss->h, rss->block);
1970 /* Recompute the antichain for next serialization */
1971 DBG((rss->dbg, LEVEL_1, "\tre-computing maximal antichain:\n"));
1972 sat_vals = compute_maximal_antichain(rss, &dvg, iteration++);
1975 del_nodeset(dvg.nodes);
1976 del_pset(dvg.edges);
1980 * Do initial calculations for a block.
1982 static void process_block(ir_node *block, void *env) {
1985 const ir_edge_t *edge;
1987 phase_init(&rss->ph, "rss block preprocessor", rss->irg, PHASE_DEFAULT_GROWTH, init_rss_irn);
1989 DBG((rss->dbg, LEVEL_1, "preprocessing block %+F\n", block));
1992 /* build an index map for all nodes in the current block */
1994 n = get_irn_n_edges(block);
1995 NEW_ARR_A(int *, rss->idx_map, n);
1996 foreach_out_edge(block, edge) {
1997 ir_node *irn = get_edge_src_irn(edge);
1998 rss->idx_map[i++] = get_irn_idx(irn);
2000 qsort(rss->idx_map, n, sizeof(rss->idx_map[0]), cmp_int);
2001 rss->max_height = heights_recompute_block(rss->h, block);
2003 /* loop over all register classes */
2004 for (i = arch_isa_get_n_reg_class(rss->arch_env->isa) - 1; i >= 0; --i) {
2005 const arch_register_class_t *cls = arch_isa_get_reg_class(rss->arch_env->isa, i);
2008 DBG((rss->dbg, LEVEL_1, "register class %s\n", arch_register_class_name(cls)));
2010 /* Get all live value at end of Block having current register class */
2011 rss->live_block = pset_new_ptr(10);
2012 be_liveness_end_of_block(rss->liveness, rss->arch_env, rss->cls, rss->block, rss->live_block);
2014 /* reset the list of interesting nodes */
2015 plist_clear(rss->nodes);
2016 plist_insert_back(rss->nodes, _sink);
2018 /* collect all nodes having a certain register class */
2019 foreach_out_edge(block, edge) {
2020 ir_node *irn = get_edge_src_irn(edge);
2021 ir_mode *mode = get_irn_mode(irn);
2025 - mode_T nodes (the projs are asked)
2026 - mode_X nodes (control flow nodes are always scheduled last)
2027 - Keeps (they are always immediately scheduled)
2028 - Phi (same as Keep)
2030 if (mode == mode_T || mode == mode_X || is_Phi(irn))
2034 In case of a proj, we skip
2035 - Barrier (they are a Barrier :)
2037 - the Proj itself, as it's scheduled always with it's super node
2040 ir_node *pred = get_Proj_pred(irn);
2041 if (be_is_Barrier(pred) || is_Start(pred))
2045 /* calculate the descendants and consumer for each node in the block */
2046 collect_node_info(rss, skip_Proj(irn));
2048 if (be_is_Keep(irn))
2051 if (! arch_irn_is(rss->arch_env, irn, ignore) && arch_get_irn_reg_class(rss->arch_env, irn, -1) == cls) {
2052 plist_insert_back(rss->nodes, skip_Proj(irn));
2057 /* compute the potential killing set PK(G) */
2058 compute_pkill_set(rss);
2060 /* compute the killing function k* */
2061 compute_killing_function(rss);
2064 Compute the heuristic value serialization and
2065 add the necessary dependencies to the irg.
2067 perform_value_serialization_heuristic(rss);
2069 del_pset(rss->live_block);
2072 phase_free(&rss->ph);
2077 * Register the options.
2079 void be_init_schedrss(void) {
2080 lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
2081 lc_opt_entry_t *sched_grp = lc_opt_get_grp(be_grp, "sched");
2082 lc_opt_entry_t *rss_grp = lc_opt_get_grp(sched_grp, "rss");
2084 lc_opt_add_table(rss_grp, rss_option_table);
2087 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_schedrss);
2088 #endif /* WITH_LIBCORE */
2091 * Preprocess the irg for scheduling.
2093 void rss_schedule_preparation(const be_irg_t *birg) {
2096 FIRM_DBG_REGISTER(rss.dbg, "firm.be.sched.rss");
2098 //firm_dbg_set_mask(rss.dbg, LEVEL_1 | LEVEL_2 | LEVEL_3);
2100 init_rss_special_nodes(birg->irg);
2102 rss.irg = birg->irg;
2103 rss.arch_env = birg->main_env->arch_env;
2104 rss.abi = birg->abi;
2105 rss.h = heights_new(birg->irg);
2106 rss.nodes = plist_new();
2107 rss.opts = &rss_options;
2108 rss.liveness = be_liveness(birg->irg);
2109 irg_block_walk_graph(birg->irg, NULL, process_block, &rss);
2110 heights_free(rss.h);
2111 plist_free(rss.nodes);
2112 be_liveness_free(rss.liveness);
2114 if (birg->main_env->options->dump_flags & DUMP_SCHED)
2115 be_dump(rss.irg, "-rss", dump_ir_block_graph);