2 * Implementation of a register saturating list scheduler
3 * as described in: Sid-Ahmed-Ali Touati
4 * Register Saturation in Superscalar and VLIW Codes
6 * @license This file protected by GPL - GNU GENERAL PUBLIC LICENSE.
7 * @author Christian Wuerdig
20 #include "irgraph_t.h"
22 #include "iredges_t.h"
24 #include "irphase_t.h"
30 #include "bipartite.h"
31 #include "hungarian.h"
39 #include "besched_t.h"
42 #include <libcore/lc_opts.h>
43 #include <libcore/lc_opts_enum.h>
44 #endif /* WITH_LIBCORE */
47 #define ARR_LEN_SAFE(arr) ((arr) != NULL ? ARR_LEN((arr)) : 0)
49 #define HASH_RSS_EDGE(edge) ((get_irn_node_nr((edge)->src) << 16) | (get_irn_node_nr((edge)->tgt) & 0xFFFF))
50 #define BSEARCH_IRN_ARR(val, arr) \
51 bsearch(&(val), (arr), ARR_LEN_SAFE((arr)), sizeof((arr)[0]), cmp_irn_idx)
53 #define BLOCK_IDX_MAP(rss, irn) bsearch_for_index(get_irn_idx((irn)), (rss)->idx_map, ARR_LEN_SAFE((rss)->idx_map), 1)
56 typedef struct _rss_opts_t {
60 /* Represents a child with associated costs */
61 typedef struct _child {
66 /* We need edges for several purposes. */
67 typedef struct _rss_edge {
73 /* Represents a connected bipartite component. */
75 nodeset *parents; /**< = S a set of value producers */
76 nodeset *children; /**< = T a set of value consumers */
77 pset *kill_edges; /**< = E a set of edges (t in T, s in S) such as each s in S gets killed by at least one t in T */
78 int nr; /**< a deterministic index for set insertion (used as hash) */
81 /* Represents a disjoint value DAG. */
87 /* Represents a chain of nodes. */
88 typedef struct _chain {
89 plist_t *elements; /**< List of chain elements */
90 int nr; /**< a deterministic index for set insertion (used as hash) */
93 typedef struct _rss_irn {
94 plist_t *consumer_list; /**< List of consumers */
95 ir_node **consumer; /**< Sorted consumer array (needed for faster access) */
97 plist_t *parent_list; /**< List of parents */
98 plist_t *pkiller_list; /**< List of potential killers */
100 plist_t *descendant_list; /**< List of descendants */
101 ir_node **descendants; /**< Sorted descendant array (needed for faster access) */
103 ir_node *killer; /**< The selected unique killer */
104 ir_node *irn; /**< The corresponding firm node to this rss_irn */
106 chain_t *chain; /**< The chain, this node is associated to */
108 unsigned desc_walk; /**< visited flag for collecting descendants */
109 unsigned kill_count; /**< number of nodes killed by this one */
111 unsigned live_out : 1; /**< irn has consumers outside of it's block */
112 unsigned visited : 1; /**< visited flag for bipartite decomposition */
113 unsigned havecons : 1; /**< visited flag collect consumer */
114 unsigned handled : 1; /**< flag indicating whether or not the list structures have been build */
115 unsigned dumped : 1; /**< flag indication whether or not this node was dumped */
118 /* Represents a serialization edge with associated costs. */
119 typedef struct _serialization {
120 rss_irn_t *u; /* the top node */
121 rss_irn_t *v; /* the node about to be serialized after u */
122 rss_edge_t *edge; /* the edge selected for the serialization */
123 int omega1; /* estimated: available regs - RS reduction */
124 int omega2; /* increase in critical path length */
128 typedef struct _rss {
129 phase_t ph; /**< Phase to hold some data */
130 heights_t *h; /**< The current height object */
131 ir_graph *irg; /**< The irg to preprocess */
132 plist_t *nodes; /**< The list of interesting nodes */
133 const arch_env_t *arch_env; /**< The architecture environment */
134 be_abi_irg_t *abi; /**< The abi for this irg */
135 pset *cbc_set; /**< A set of connected bipartite components */
136 ir_node *block; /**< The current block in progress. */
137 int *idx_map; /**< Mapping irn indices to per block indices */
138 unsigned max_height; /**< maximum height in the current block */
139 rss_opts_t *opts; /**< The options */
140 be_lv_t *liveness; /**< The liveness information for this irg */
141 pset *live_block; /**< Values alive at end of block */
142 const arch_register_class_t *cls; /**< The current register class */
143 DEBUG_ONLY(firm_dbg_module_t *dbg);
146 #define get_rss_irn(rss, irn) (phase_get_or_set_irn_data(&rss->ph, irn))
149 * We need some special nodes:
150 * a source and a sink for all live-in and live-out values of a block
158 /** The opcode of the rss_Source node once allocated. */
159 static ir_op *op_rss_Source;
160 /** The opcode of the rss_Sink node once allocated. */
161 static ir_op *op_rss_Sink;
163 /** The rss_Source node of the current graph. */
164 static ir_node *_source = NULL;
165 /** The rss_Sink node of the current graph. */
166 static ir_node *_sink = NULL;
168 #define is_Source(irn) ((irn) == _source)
169 #define is_Sink(irn) ((irn) == _sink)
173 RSS_DUMP_CBC = 1 << 0,
174 RSS_DUMP_PKG = 1 << 1,
175 RSS_DUMP_KILL = 1 << 2,
176 RSS_DUMP_DVG = 1 << 3,
177 RSS_DUMP_MAXAC = 1 << 4,
178 RSS_DUMP_ALL = (RSS_DUMP_MAXAC << 1) - 1,
181 static rss_opts_t rss_options = {
186 static const lc_opt_enum_int_items_t dump_items[] = {
187 { "none", RSS_DUMP_NONE },
188 { "cbc", RSS_DUMP_CBC },
189 { "pkg", RSS_DUMP_PKG },
190 { "kill", RSS_DUMP_KILL },
191 { "dvg", RSS_DUMP_DVG },
192 { "maxac", RSS_DUMP_MAXAC },
193 { "all", RSS_DUMP_ALL },
197 static lc_opt_enum_int_var_t dump_var = {
198 &rss_options.dump_flags, dump_items
201 static const lc_opt_table_entry_t rss_option_table[] = {
202 LC_OPT_ENT_ENUM_MASK("dump", "dump phases", &dump_var),
205 #endif /* WITH_LIBCORE */
207 /******************************************************************************
209 * | | | | / _| | | (_)
210 * | |__ ___| |_ __ ___ _ __ | |_ _ _ _ __ ___| |_ _ ___ _ __ ___
211 * | '_ \ / _ \ | '_ \ / _ \ '__| | _| | | | '_ \ / __| __| |/ _ \| '_ \/ __|
212 * | | | | __/ | |_) | __/ | | | | |_| | | | | (__| |_| | (_) | | | \__ \
213 * |_| |_|\___|_| .__/ \___|_| |_| \__,_|_| |_|\___|\__|_|\___/|_| |_|___/
216 ******************************************************************************/
219 * Acquire opcodes if needed and create source and sink nodes.
221 static void init_rss_special_nodes(ir_graph *irg) {
224 if (op_rss_Source == NULL) {
225 int iro_rss_base = get_next_ir_opcodes(iro_rss_last);
226 op_rss_Source = new_ir_op(iro_rss_base + iro_rss_Source, "rss_Source", op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
227 op_rss_Sink = new_ir_op(iro_rss_base + iro_rss_Sink, "rss_Sink", op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
229 block = get_irg_start_block(irg);
230 _source = new_ir_node(NULL, irg, block, op_rss_Source, mode_ANY, 0, NULL);
231 _sink = new_ir_node(NULL, irg, block, op_rss_Sink, mode_ANY, 0, NULL);
234 static int cmp_int(const void *a, const void *b) {
238 return QSORT_CMP(*i1, *i2);
241 static int cmp_child_costs(const void *a, const void *b) {
242 const child_t *c1 = a;
243 const child_t *c2 = b;
245 return QSORT_CMP(c1->cost, c2->cost);
248 static int cmp_irn_idx(const void *a, const void *b) {
249 const ir_node *n1 = *(ir_node **)a;
250 const ir_node *n2 = *(ir_node **)b;
252 return QSORT_CMP(get_irn_idx(n1), get_irn_idx(n2));
255 static int cmp_rss_edges(const void *a, const void *b) {
256 const rss_edge_t *e1 = a;
257 const rss_edge_t *e2 = b;
259 return (e1->src != e2->src) || (e1->tgt != e2->tgt);
262 static int bsearch_for_index(int key, int *arr, size_t len, int force) {
266 while (right >= left) {
267 int idx = (left + right) / 2;
271 else if (key > arr[idx])
278 assert(0 && "Something is wrong, key not found.");
282 static ir_node **build_sorted_array_from_list(plist_t *irn_list, struct obstack *obst) {
285 int len = plist_count(irn_list);
286 ir_node **arr = NEW_ARR_D(ir_node *, obst, len);
288 /* copy the list into the array */
289 foreach_plist(irn_list, el) {
290 arr[i++] = plist_element_get_value(el);
293 /* sort the array by node index */
294 qsort(arr, len, sizeof(arr[0]), cmp_irn_idx);
299 /*****************************************************
302 * __| | ___| |__ _ _ __ _ __ _ _ _ __ __ _
303 * / _` |/ _ \ '_ \| | | |/ _` |/ _` | | '_ \ / _` |
304 * | (_| | __/ |_) | |_| | (_| | (_| | | | | | (_| |
305 * \__,_|\___|_.__/ \__,_|\__, |\__, |_|_| |_|\__, |
309 *****************************************************/
312 static void dump_nodeset(nodeset *ns, const char *prefix) {
314 foreach_nodeset(ns, irn) {
315 ir_fprintf(stderr, "%s%+F\n", prefix, irn);
320 static void build_file_name(rss_t *rss, const char *suffix, size_t suf_len, char *buf, size_t len) {
321 const char *irg_name;
324 irg_name = get_entity_name(get_irg_entity(rss->irg));
325 snprintf(buf, len - suf_len, "%s-%s-block-%ld",
326 irg_name, arch_register_class_name(rss->cls), get_irn_node_nr(rss->block));
330 /* Dumps all collected bipartite components of current irg as vcg. */
331 static void debug_vcg_dump_bipartite(rss_t *rss) {
335 static const char suffix[] = "-RSS-CBC.vcg";
337 build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
338 f = fopen(file_name, "w");
340 ir_fprintf(f, "graph: { title: \"connected bipartite component graph of %+F\"\n", rss->irg);
341 fprintf(f, "display_edge_labels: no\n");
342 fprintf(f, "layoutalgorithm: mindepth\n");
343 fprintf(f, "manhattan_edges: yes\n\n");
345 foreach_pset(rss->cbc_set, cbc) {
349 fprintf(f, "graph: { titel: \"cbc %d\" label: \"cbc %d\" status:clustered color:yellow\n", cbc->nr, cbc->nr);
350 foreach_nodeset(cbc->parents, n) {
351 ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
353 foreach_nodeset(cbc->children, n) {
354 ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
356 foreach_pset(cbc->kill_edges, ke) {
357 ir_fprintf(f, "edge: { sourcename: \"n%d_%d\" targetname: \"n%d_%d\" }\n",
358 get_irn_node_nr(ke->src), cbc->nr, get_irn_node_nr(ke->tgt), cbc->nr);
366 /* Dump the computed killing function as vcg. */
367 static void debug_vcg_dump_kill(rss_t *rss) {
371 static const char suffix[] = "-RSS-KILL.vcg";
373 build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
374 f = fopen(file_name, "w");
376 ir_fprintf(f, "graph: { title: \"computed kill graph of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
377 fprintf(f, "display_edge_labels: no\n");
378 fprintf(f, "layoutalgorithm: mindepth\n");
379 fprintf(f, "manhattan_edges: yes\n\n");
382 /* reset dump_flag */
384 foreach_phase_irn(&rss->ph, irn) {
385 rss_irn_t *node = get_rss_irn(rss, irn);
390 /* dump all nodes and their killers */
391 foreach_plist(rss->nodes, el) {
392 ir_node *irn = plist_element_get_value(el);
393 rss_irn_t *rirn = get_rss_irn(rss, irn);
394 rss_irn_t *pk_rirn = get_rss_irn(rss, rirn->killer);
396 if (! rirn->dumped) {
397 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
401 if (! pk_rirn->dumped) {
402 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(rirn->killer), rirn->killer);
406 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
407 get_irn_node_nr(rirn->killer), get_irn_node_nr(irn));
414 /* Dumps the potential killing DAG (PKG) as vcg. */
415 static void debug_vcg_dump_pkg(rss_t *rss, nodeset *max_ac, int iteration) {
418 char *suffix = alloca(32);
419 static const char suffix1[] = "-RSS-PKG.vcg";
420 static const char suffix2[] = "-RSS-PKG-MAXAC.vcg";
424 snprintf(suffix, 32, "%s", suffix1);
427 snprintf(suffix, 32, "-%02d%s", iteration, suffix2);
430 build_file_name(rss, suffix, strlen(suffix) + 1, file_name, sizeof(file_name));
431 f = fopen(file_name, "w");
433 ir_fprintf(f, "graph: { title: \"potential killing DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
434 fprintf(f, "display_edge_labels: no\n");
435 fprintf(f, "layoutalgorithm: mindepth\n");
436 fprintf(f, "manhattan_edges: yes\n\n");
439 /* reset dump_flag */
441 foreach_phase_irn(&rss->ph, irn) {
442 rss_irn_t *node = get_rss_irn(rss, irn);
447 foreach_plist(rss->nodes, el) {
448 ir_node *irn = plist_element_get_value(el);
449 rss_irn_t *rirn = get_rss_irn(rss, irn);
451 plist_element_t *k_el;
453 /* dump selected saturating values in yellow */
454 if (max_ac && nodeset_find(max_ac, irn))
457 if (! rirn->dumped) {
459 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F c%d\" %s }\n", get_irn_node_nr(irn), irn, rirn->chain->nr, c1);
461 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(irn), irn, c1);
465 foreach_plist(rirn->pkiller_list, k_el) {
466 ir_node *pkiller = plist_element_get_value(k_el);
467 rss_irn_t *pk_rirn = get_rss_irn(rss, pkiller);
470 if (max_ac && nodeset_find(max_ac, pkiller))
473 if (! pk_rirn->dumped) {
475 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F c%d\" %s }\n", get_irn_node_nr(pkiller), pkiller, pk_rirn->chain->nr, c2);
477 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(pkiller), pkiller, c2);
480 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
481 get_irn_node_nr(pkiller), get_irn_node_nr(irn));
488 /* Dumps the disjoint value DAG (DVG) as vcg. */
489 static void debug_vcg_dump_dvg(rss_t *rss, dvg_t *dvg) {
490 static const char suffix[] = "-RSS-DVG.vcg";
496 build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
497 f = fopen(file_name, "w");
499 ir_fprintf(f, "graph: { title: \"disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
500 fprintf(f, "display_edge_labels: no\n");
501 fprintf(f, "layoutalgorithm: mindepth\n");
502 fprintf(f, "manhattan_edges: yes\n\n");
505 foreach_nodeset(dvg->nodes, irn) {
506 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
510 foreach_pset(dvg->edges, edge) {
511 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
512 get_irn_node_nr(edge->src), get_irn_node_nr(edge->tgt));
520 /* Dumps the PKG(DVG). */
521 static void debug_vcg_dump_dvg_pkiller(rss_t *rss, dvg_t *dvg) {
522 static const char suffix[] = "-RSS-DVG-PKG.vcg";
527 build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
528 f = fopen(file_name, "w");
530 ir_fprintf(f, "graph: { title: \"PKG of disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
531 fprintf(f, "display_edge_labels: no\n");
532 fprintf(f, "layoutalgorithm: mindepth\n");
533 fprintf(f, "manhattan_edges: yes\n\n");
536 foreach_nodeset(dvg->nodes, irn) {
537 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
541 foreach_nodeset(dvg->nodes, irn) {
542 rss_irn_t *node = get_rss_irn(rss, irn);
545 foreach_plist(node->dvg_pkiller_list, el) {
546 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
547 get_irn_node_nr(plist_element_get_value(el)), get_irn_node_nr(irn));
557 * In case there is no rss information for irn, initialize it.
559 static void *init_rss_irn(phase_t *ph, ir_node *irn, void *old) {
560 rss_irn_t *res = old ? old : phase_alloc(ph, sizeof(res[0]));
562 res->descendant_list = plist_obstack_new(phase_obst(ph));
563 res->descendants = NULL;
565 res->consumer_list = plist_obstack_new(phase_obst(ph));
566 res->consumer = NULL;
568 res->pkiller_list = plist_obstack_new(phase_obst(ph));
570 res->parent_list = plist_obstack_new(phase_obst(ph));
588 * Collect all nodes data dependent on current node.
590 static void collect_descendants(rss_t *rss, rss_irn_t *rirn, ir_node *irn, int *got_sink, unsigned cur_desc_walk) {
591 const ir_edge_t *edge;
592 rss_irn_t *cur_node = get_rss_irn(rss, irn);
593 ir_node *block = rss->block;
594 ir_edge_kind_t ekind[2] = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
597 if (cur_node->desc_walk >= cur_desc_walk)
599 cur_node->desc_walk = cur_desc_walk;
601 /* Stop at Barriers */
602 if (be_is_Barrier(irn))
605 /* loop over normal and dependency edges */
606 for (i = 0; i < 2; ++i) {
607 foreach_out_edge_kind(irn, edge, ekind[i]) {
608 ir_node *user = get_edge_src_irn(edge);
610 /* skip ignore nodes as they do not really contribute to register pressure */
611 if (arch_irn_is(rss->arch_env, user, ignore))
615 (a) mode_X means Jump -> out_edge
616 (b) Phi as user of a normal node -> out-edge
618 if (get_irn_mode(user) == mode_X || is_Phi(user)) {
627 //if (arch_get_irn_reg_class(rss->arch_env, user, -1) == rss->cls)
628 collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
631 /* check if user lives in block and is not a control flow node */
632 if (get_nodes_block(user) == block) {
633 if (! plist_has_value(rirn->descendant_list, user)) {
634 plist_insert_back(rirn->descendant_list, user);
635 DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", user));
637 collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
639 else if (! *got_sink) {
641 /* user lives out of block: add sink as descendant if not already done */
642 plist_insert_back(rirn->descendant_list, _sink);
644 DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", _sink));
652 * Handles a single consumer.
654 static void collect_single_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *consumer, int *got_sink) {
655 ir_node *block = rss->block;
657 assert(! is_Proj(consumer) && "Cannot handle Projs");
659 if (! is_Phi(consumer) && ! is_Block(consumer) && get_nodes_block(consumer) == block) {
660 if (! arch_irn_is(rss->arch_env, consumer, ignore) && ! plist_has_value(rss_irn->consumer_list, consumer)) {
661 plist_insert_back(rss_irn->consumer_list, consumer);
662 DBG((rss->dbg, LEVEL_2, "\t\tconsumer %+F\n", consumer));
666 rss_irn->live_out = 1;
667 DBG((rss->dbg, LEVEL_2, "\t\tlive out %+F", consumer));
669 plist_insert_back(rss_irn->consumer_list, _sink);
671 DB((rss->dbg, LEVEL_2, ", %+F added instead", _sink));
673 DB((rss->dbg, LEVEL_2, "\n"));
678 * Collect all nodes consuming the value(s) produced by current node.
680 static void collect_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *irn, int *got_sink) {
681 const ir_edge_t *edge;
683 ir_edge_kind_t ekind[2] = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
684 rss_irn_t *cur_node = get_rss_irn(rss, irn);
686 if (cur_node->havecons)
688 cur_node->havecons = 1;
690 for (i = 0; i < 2; ++i) {
691 foreach_out_edge_kind(irn, edge, ekind[i]) {
692 ir_node *consumer = get_edge_src_irn(edge);
694 if (is_Proj(consumer)) {
695 //if (arch_get_irn_reg_class(rss->arch_env, consumer, -1) == rss->cls)
696 collect_consumer(rss, rss_irn, consumer, got_sink);
699 collect_single_consumer(rss, rss_irn, consumer, got_sink);
705 * Collects all consumer and descendant of a irn.
707 static void collect_node_info(rss_t *rss, ir_node *irn) {
708 static unsigned cur_desc_walk = 0;
709 rss_irn_t *rss_irn = get_rss_irn(rss, irn);
712 assert(! is_Proj(irn) && "Cannot handle Projs.");
714 /* check if node info is already available */
715 if (rss_irn->handled)
717 //phase_reinit_single_irn_data(&rss->ph, irn);
719 DBG((rss->dbg, LEVEL_1, "\tcomputing consumers of %+F:\n", irn));
721 /* collect all consumer */
723 collect_consumer(rss, rss_irn, irn, &got_sink);
725 /* build sorted consumer array */
726 rss_irn->consumer = build_sorted_array_from_list(rss_irn->consumer_list, phase_obst(&rss->ph));
728 DBG((rss->dbg, LEVEL_1, "\tcompute descendants of %+F:\n", irn));
730 /* collect descendants */
732 collect_descendants(rss, rss_irn, irn, &got_sink, ++cur_desc_walk);
734 /* build sorted descendant array */
735 rss_irn->descendants = build_sorted_array_from_list(rss_irn->descendant_list, phase_obst(&rss->ph));
737 rss_irn->handled = 1;
741 * Checks if v is a potential killer of u.
742 * v is in pkill(u) iff descendants(v) cut consumer(u) is v
744 * @param rss The rss object
745 * @param v The node to check for killer
746 * @param u The potentially killed value
747 * @return 1 if v is in pkill(u), 0 otherwise
749 static int is_potential_killer(rss_t *rss, rss_irn_t *v, rss_irn_t *u) {
754 assert(is_Sink(v->irn) || ((plist_count(v->descendant_list) > 0 && v->descendants) || 1));
755 assert(is_Sink(u->irn) || ((plist_count(u->consumer_list) > 0 && u->consumer) || 1));
757 /* as we loop over the list: loop over the shorter one */
758 if (plist_count(v->descendant_list) > plist_count(u->consumer_list)) {
759 list = u->consumer_list;
760 arr = v->descendants;
763 list = v->descendant_list;
767 /* for each list element: try to find element in array */
768 foreach_plist(list, el) {
769 ir_node *irn = plist_element_get_value(el);
770 ir_node *match = BSEARCH_IRN_ARR(irn, arr);
772 if (match && match != irn)
780 * Update descendants, consumer and pkiller list for the given irn.
782 static void update_node_info(rss_t *rss, ir_node *irn, ir_node *pk_irn) {
783 rss_irn_t *node = get_rss_irn(rss, irn);
784 rss_irn_t *pkiller = get_rss_irn(rss, pk_irn);
786 /* add consumer and rebuild the consumer array */
787 if (! plist_has_value(node->consumer_list, pk_irn)) {
788 plist_insert_back(node->consumer_list, pk_irn);
789 node->consumer = build_sorted_array_from_list(node->consumer_list, phase_obst(&rss->ph));
792 /* add pk_irn as descendant, add it's descendants to irn's and rebuild array */
793 if (! plist_has_value(node->descendant_list, pk_irn)) {
796 plist_insert_back(node->descendant_list, pk_irn);
798 foreach_plist(pkiller->descendant_list, el) {
799 ir_node *desc = plist_element_get_value(el);
801 if (! plist_has_value(node->descendant_list, desc))
802 plist_insert_back(node->descendant_list, desc);
805 node->descendants = build_sorted_array_from_list(node->descendant_list, phase_obst(&rss->ph));
811 * Compute the potential killing set PK.
813 static void compute_pkill_set(rss_t *rss) {
814 plist_element_t *u_el, *v_el;
816 foreach_plist(rss->nodes, u_el) {
817 ir_node *u_irn = plist_element_get_value(u_el);
818 rss_irn_t *u = get_rss_irn(rss, u_irn);
820 DBG((rss->dbg, LEVEL_1, "\tcomputing potential killers of %+F:\n", u_irn));
822 /* check each consumer if it is a potential killer */
823 foreach_plist(u->consumer_list, v_el) {
824 ir_node *v_irn = plist_element_get_value(v_el);
825 rss_irn_t *v = get_rss_irn(rss, v_irn);
827 /* check, if v is a potential killer of u */
828 if (is_potential_killer(rss, v, u)) {
829 if (! plist_has_value(u->pkiller_list, v_irn))
830 plist_insert_back(u->pkiller_list, v_irn);
832 DBG((rss->dbg, LEVEL_2, "\t\tpotential killer %+F\n", v_irn));
839 if (rss->opts->dump_flags & RSS_DUMP_PKG)
840 debug_vcg_dump_pkg(rss, NULL, 0);
844 * Build set of killing edges (from values to their potential killers)
846 static void build_kill_edges(rss_t *rss, pset *epk) {
847 plist_element_t *el, *k_el;
849 foreach_plist(rss->nodes, el) {
850 ir_node *irn = plist_element_get_value(el);
851 rss_irn_t *rirn = get_rss_irn(rss, irn);
853 DBG((rss->dbg, LEVEL_3, "\t\tbuilding kill edges for %+F:\n", irn));
855 foreach_plist(rirn->pkiller_list, k_el) {
856 ir_node *pkiller = plist_element_get_value(k_el);
857 rss_edge_t *ke = obstack_alloc(phase_obst(&rss->ph), sizeof(*ke));
863 DBG((rss->dbg, LEVEL_3, "\t\t\ttarget %+F\n", pkiller));
865 pset_insert(epk, ke, HASH_RSS_EDGE(ke));
871 /* print the given cbc for debugging purpose */
872 static void debug_print_cbc(firm_dbg_module_t *mod, cbc_t *cbc) {
876 DBG((mod, LEVEL_3, "\t\tS = set of parents:\n"));
877 foreach_nodeset(cbc->parents, n) {
878 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
880 DBG((mod, LEVEL_3, "\t\tT = set of children:\n"));
881 foreach_nodeset(cbc->children, n) {
882 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
884 DBG((mod, LEVEL_3, "\t\tE = Edges from producers to consumers\n"));
885 foreach_pset(cbc->kill_edges, ke) {
886 DBG((mod, LEVEL_3, "\t\t\t%+F -> %+F\n", ke->src, ke->tgt));
892 * Construct the bipartite decomposition.
893 * Sid-Ahmed-Ali Touati, Phd Thesis
894 * Register Pressure in Instruction Level Parallelism, p. 71
896 static void compute_bipartite_decomposition(rss_t *rss) {
897 pset *epk = new_pset(cmp_rss_edges, 10);
902 DBG((rss->dbg, LEVEL_1, "\tcomputing bipartite decomposition:\n"));
904 build_kill_edges(rss, epk);
906 foreach_plist(rss->nodes, el) {
907 ir_node *u_irn = plist_element_get_value(el);
908 rss_irn_t *u = get_rss_irn(rss, u_irn);
914 plist_element_t *el2;
916 rss_edge_t *kedge_root = NULL;
917 ir_node *t_irn, *s_irn;
919 if (u->visited || u_irn == _sink)
922 DBG((rss->dbg, LEVEL_2, "\t\t%+F choosen:\n", u_irn));
924 cbc = obstack_alloc(phase_obst(&rss->ph), sizeof(*cbc));
927 /* initialize S_cb */
928 cbc->parents = new_nodeset(5);
929 nodeset_insert(cbc->parents, u_irn);
930 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (init)\n", u_irn));
933 cbc->kill_edges = new_pset(cmp_rss_edges, 5);
935 /* each parent gets killed by at least one value from children */
937 /* T_cb = PK_successors(u) */
938 cbc->children = new_nodeset(5);
939 foreach_plist(u->pkiller_list, el2) {
940 nodeset_insert(cbc->children, plist_element_get_value(el2));
941 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (init)\n", plist_element_get_value(el2)));
945 Now: insert the parents of all children into the parent set
946 and insert the children of all parents into the children set
947 until the sets don't change any more
949 while (p_change || c_change) {
950 p_change = c_change = 0;
952 /* accumulate parents */
953 foreach_nodeset(cbc->children, t_irn) {
954 foreach_pset(epk, k_edge) {
955 ir_node *val = k_edge->src;
957 if (k_edge->tgt == t_irn && ! nodeset_find(cbc->parents, val)) {
958 nodeset_insert(cbc->parents, val);
960 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (killed by %+F)\n", val, t_irn));
965 /* accumulate children */
966 foreach_nodeset(cbc->parents, s_irn) {
967 foreach_pset(epk, k_edge) {
968 ir_node *val = k_edge->tgt;
970 if (k_edge->src == s_irn && ! nodeset_find(cbc->children, val)) {
971 nodeset_insert(cbc->children, val);
973 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (kills %+F)\n", val, s_irn));
979 /* mark all parent values as visited */
980 foreach_nodeset(cbc->parents, s_irn) {
981 rss_irn_t *s = get_rss_irn(rss, s_irn);
983 /* assure bipartite property */
985 if (nodeset_find(cbc->children, s_irn)) {
986 nodeset_remove(cbc->children, s_irn);
987 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F removed from to children\n", s_irn));
993 foreach_pset(epk, k_edge) {
994 if (nodeset_find(cbc->parents, k_edge->src) && nodeset_find(cbc->children, k_edge->tgt)) {
995 pset_insert(cbc->kill_edges, k_edge, HASH_RSS_EDGE(k_edge));
997 Link all k_edges which are about to be removed together.
998 Beware: pset_remove kills the iterator.
1000 k_edge->next = kedge_root;
1001 kedge_root = k_edge;
1005 /* remove all linked k_edges */
1006 for (; kedge_root; kedge_root = kedge_root->next) {
1007 pset_remove(epk, kedge_root, HASH_RSS_EDGE(kedge_root));
1010 /* verify the cbc */
1011 foreach_nodeset(cbc->parents, s_irn) {
1014 foreach_pset(cbc->kill_edges, k_edge) {
1015 if (k_edge->src == s_irn) {
1017 pset_break(cbc->kill_edges);
1024 ir_fprintf(stderr, "Warning: parent %+F is not killed in current cbc\n", s_irn);
1027 assert(vrfy_ok && "Verification of CBC failed");
1029 /* add the connected bipartite component */
1030 if (nodeset_count(cbc->parents) > 0 && nodeset_count(cbc->children) > 0 && pset_count(cbc->kill_edges) > 0)
1031 pset_insert(rss->cbc_set, cbc, (unsigned)cbc->nr);
1032 DBG((rss->dbg, LEVEL_2, "\tbipartite component %d inserted:\n", cbc->nr));
1034 if (firm_dbg_get_mask(rss->dbg) & LEVEL_2)
1035 debug_print_cbc(rss->dbg, cbc);
1039 if (rss->opts->dump_flags & RSS_DUMP_CBC)
1040 debug_vcg_dump_bipartite(rss);
1046 * Select the child with the maximum cost.
1048 static child_t *select_child_max_cost(rss_t *rss, nodeset *x, nodeset *y, child_t *t, cbc_t *cbc) {
1050 float max_cost = -1.0f;
1052 DBG((rss->dbg, LEVEL_2, "\t\tcomputing children costs:\n"));
1054 foreach_nodeset(cbc->children, child) {
1055 rss_irn_t *r_child = get_rss_irn(rss, child);
1056 int num_unkilled_parents = 0;
1057 int num_descendants;
1061 /* get the number of unkilled parents */
1062 foreach_pset(cbc->kill_edges, k_edge) {
1063 if (k_edge->tgt == child && nodeset_find(x, k_edge->src))
1064 ++num_unkilled_parents;
1067 cost = (float)num_unkilled_parents;
1069 num_descendants = plist_count(r_child->descendant_list) + nodeset_count(y);
1071 if (num_descendants > 0)
1072 cost /= (float)num_descendants;
1074 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F, #desc %d, cost %.3f\n", child, num_descendants, cost));
1076 if (cost > max_cost) {
1087 * Remove all parents from x which are killed by t_irn.
1089 static void remove_covered_parents(rss_t *rss, nodeset *x, ir_node *t_irn, cbc_t *cbc) {
1090 rss_irn_t *t = get_rss_irn(rss, t_irn);
1093 DBG((rss->dbg, LEVEL_2, "\t\tremoving parents covered by %+F:\n", t_irn));
1095 foreach_pset(cbc->kill_edges, k_edge) {
1096 if (k_edge->tgt == t_irn && nodeset_find(x, k_edge->src)) {
1097 nodeset_remove(x, k_edge->src);
1098 plist_insert_back(t->parent_list, k_edge->src);
1099 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", k_edge->src));
1104 static void update_cumulated_descendent_values(rss_t *rss, nodeset *y, ir_node *t_irn) {
1105 rss_irn_t *t = get_rss_irn(rss, t_irn);
1106 plist_element_t *el;
1108 DBG((rss->dbg, LEVEL_2, "\t\tupdating cumulated descendant value of %+F:\n", t_irn));
1110 foreach_plist(t->descendant_list, el) {
1111 nodeset_insert(y, plist_element_get_value(el));
1112 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", plist_element_get_value(el)));
1117 * Greedy-k: a heuristics for the MMA problem
1119 static void compute_killing_function(rss_t *rss) {
1121 struct obstack obst;
1123 obstack_init(&obst);
1125 rss->cbc_set = pset_new_ptr(5);
1126 compute_bipartite_decomposition(rss);
1128 /* for all bipartite components do: */
1129 foreach_pset(rss->cbc_set, cbc) {
1131 nodeset *x = new_nodeset(10);
1132 nodeset *y = new_nodeset(10);
1133 child_t **sks = NEW_ARR_F(child_t *, 20);
1138 DBG((rss->dbg, LEVEL_1, "\tcomputing SKS for cbc %d:\n", cbc->nr));
1139 DBG((rss->dbg, LEVEL_2, "\t\tinitializing parents X:\n"));
1141 /* X = S_cb (all parents are initially uncovered) */
1142 foreach_nodeset(cbc->parents, p) {
1143 nodeset_insert(x, p);
1144 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", p));
1147 /* while X not empty */
1148 while (nodeset_count(x) > 0) {
1149 child_t *t = obstack_alloc(&obst, sizeof(*t));
1150 memset(t, 0, sizeof(*t));
1152 t = select_child_max_cost(rss, x, y, t, cbc);
1154 if (cur_len >= cur_size) {
1155 ARR_EXTO(child_t *, sks, cur_size * 2);
1159 DBG((rss->dbg, LEVEL_2, "\t\tinsert child %+F (%.3f) into SKS at pos %d\n", t->irn, t->cost, cur_len));
1162 remove_covered_parents(rss, x, t->irn, cbc);
1163 update_cumulated_descendent_values(rss, y, t->irn);
1166 ARR_SHRINKLEN(sks, cur_len);
1168 /* sort SKS in increasing cost order */
1169 qsort(sks, cur_len, sizeof(sks[0]), cmp_child_costs);
1171 DBG((rss->dbg, LEVEL_2, "\tprocessing SKS for cbc %d:\n", cbc->nr));
1173 /* build killing function */
1174 for (i = cur_len - 1; i >= 0; --i) { /* loop over sks in decreasing cost order */
1175 child_t *t = sks[i];
1176 rss_irn_t *rt = get_rss_irn(rss, t->irn);
1177 plist_element_t *p_el;
1179 DBG((rss->dbg, LEVEL_3, "\t\t\tkiller %+F (%.3f):\n", t->irn, t->cost));
1181 /* kill all unkilled parents of t */
1182 foreach_plist(rt->parent_list, p_el) {
1183 ir_node *par = plist_element_get_value(p_el);
1184 rss_irn_t *rpar = get_rss_irn(rss, par);
1186 if (is_Sink(rpar->killer)) {
1187 rpar->killer = t->irn;
1188 DBG((rss->dbg, LEVEL_2, "\t\tkill %+F\n", rpar->irn));
1191 DBG((rss->dbg, LEVEL_3, "\t\t\tkeeping %+F as killer for %+F\n", rpar->killer, rpar->irn));
1201 if (rss->opts->dump_flags & RSS_DUMP_KILL)
1202 debug_vcg_dump_kill(rss);
1204 del_pset(rss->cbc_set);
1205 obstack_free(&obst, NULL);
1209 * Adds the edge src -> tgt to the dvg. Checks if reverse edge is already there (asserts).
1211 static INLINE void add_dvg_edge(rss_t *rss, dvg_t *dvg, ir_node *src, ir_node *tgt, int have_source) {
1212 rss_edge_t *dvg_edge;
1216 nodeset_insert(dvg->nodes, src);
1218 assert(nodeset_find(dvg->nodes, src) != NULL && "Wrong assumption");
1220 nodeset_insert(dvg->nodes, tgt);
1224 assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1228 if (NULL != pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1229 /* add the edge to the DVG */
1230 dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1232 dvg_edge->src = src;
1233 dvg_edge->tgt = tgt;
1234 dvg_edge->next = NULL;
1236 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", src, tgt));
1237 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1242 * Computes the disjoint value DAG (DVG).
1243 * BEWARE: It is not made explicitly clear in the Touati paper,
1244 * but the DVG is meant to be build from the KILLING DAG
1246 static void compute_dvg(rss_t *rss, dvg_t *dvg) {
1247 plist_element_t *el;
1249 DBG((rss->dbg, LEVEL_1, "\tcomputing DVG:\n"));
1251 foreach_plist(rss->nodes, el) {
1252 ir_node *u_irn = plist_element_get_value(el);
1253 rss_irn_t *u = get_rss_irn(rss, u_irn);
1254 rss_irn_t *u_killer = get_rss_irn(rss, u->killer);
1257 /* TODO: omit nodes only having sink as pkiller and killing no one */
1259 /* add an edge to killer */
1260 add_dvg_edge(rss, dvg, u_irn, u->killer, 0);
1262 if (is_Sink(u->killer))
1265 /* We add an edge to every killer, from where we could be reached. */
1266 for (i = ARR_LEN_SAFE(u_killer->descendants) - 1; i >= 0; --i) {
1267 add_dvg_edge(rss, dvg, u_irn, u_killer->descendants[i], 1);
1272 foreach_plist(rss->nodes, el2) {
1273 ir_node *v_irn = plist_element_get_value(el2);
1276 There is an edge (u, v) in the DVG iff v is a descendant of the killer(u).
1278 if (BSEARCH_IRN_ARR(v_irn, u_kill->descendants)) {
1279 rss_edge_t *dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1282 /* insert the user into the DVG and append it to the user list of u */
1283 nodeset_insert(dvg->nodes, v_irn);
1284 if (! plist_has_value(u->dvg_user_list, v_irn))
1285 plist_insert_back(u->dvg_user_list, v_irn);
1287 dvg_edge->src = u_irn;
1288 dvg_edge->tgt = v_irn;
1289 dvg_edge->next = NULL;
1294 assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1296 /* add the edge to the DVG */
1297 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", u_irn, v_irn));
1298 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1304 if (rss->opts->dump_flags & RSS_DUMP_DVG)
1305 debug_vcg_dump_dvg(rss, dvg);
1309 * Updates the dvg structure when a serialization edge from src -> tgt is added.
1311 static void update_dvg(rss_t *rss, dvg_t *dvg, rss_irn_t *src, rss_irn_t *tgt) {
1314 rss_edge_t **arr = alloca(pset_count(dvg->edges) * sizeof(arr[0]));
1317 Add an edge from serialization target to serialization src:
1318 src cannot be alive with target
1320 add_dvg_edge(rss, dvg, tgt->irn, src->irn, 0);
1322 /* Add edges to src's descendants as well, they also getting serialized. */
1323 for (i = ARR_LEN_SAFE(src->descendants) - 1; i >= 0; --i) {
1324 add_dvg_edge(rss, dvg, tgt->irn, src->descendants[i], 1);
1327 /* We also have to add edges from targets predecessors in dvg */
1329 /* We cannot insert elements into set over which we iterate ... */
1330 foreach_pset(dvg->edges, edge) {
1331 if (edge->tgt == tgt->irn) {
1336 for (i = 0; i < idx; ++i) {
1338 add_dvg_edge(rss, dvg, edge->src, src->irn, 1);
1339 for (j = ARR_LEN_SAFE(src->descendants) - 1; j >= 0; --j) {
1340 add_dvg_edge(rss, dvg, edge->src, src->descendants[j], 1);
1347 * Accumulate all descendants for root into list.
1349 static void accumulate_dvg_descendant_values(rss_t *rss, rss_irn_t *root, plist_t *list) {
1350 if (plist_count(root->dvg_user_list) > 0) {
1351 plist_element_t *el;
1353 foreach_plist(root->dvg_user_list, el) {
1354 ir_node *v_irn = plist_element_get_value(el);
1355 rss_irn_t *v = get_rss_irn(rss, v_irn);
1357 /* add v as descendant */
1358 if (! plist_has_value(list, v_irn)) {
1359 plist_insert_back(list, v_irn);
1360 DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG descendant %+F\n", v_irn));
1363 /* accumulate v's descendants */
1364 accumulate_dvg_descendant_values(rss, v, list);
1370 * Builds the list of potential killers for each node
1372 * Needs the descendant list for all user as sorted array.
1374 static void build_dvg_pkiller_list(rss_t *rss, dvg_t *dvg) {
1377 foreach_nodeset(dvg->nodes, irn) {
1378 rss_irn_t *node = get_rss_irn(rss, irn);
1379 plist_element_t *el, *el2;
1381 DBG((rss->dbg, DEBUG_DVG, "\t\tbuilding pkiller list for %+F\n", irn));
1383 /* check each user */
1384 foreach_plist(node->dvg_user_list, el) {
1385 ir_node *u_irn = plist_element_get_value(el);
1387 /* is the current user u_irn not a descendant of any other user -> pkiller */
1388 foreach_plist(node->dvg_user_list, el2) {
1389 ir_node *v_irn = plist_element_get_value(el2);
1390 rss_irn_t *v = get_rss_irn(rss, v_irn);
1393 ! BSEARCH_IRN_ARR(u_irn, v->dvg_desc) &&
1394 ! plist_has_value(node->dvg_pkiller_list, u_irn))
1396 plist_insert_back(node->dvg_pkiller_list, u_irn);
1397 DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG pkiller %+F\n", u_irn));
1402 node->dvg_pkiller = build_sorted_array_from_list(node->dvg_pkiller_list, phase_obst(&rss->ph));
1406 if (firm_dbg_get_mask(rss->dbg) & DEBUG_DVG)
1407 debug_vcg_dump_dvg_pkiller(rss, dvg);
1414 * Compute the maximal antichain of the current DVG.
1415 * This is a reimplementation of the MAXIMAL_ANTI_CHAIN function
1416 * from the DDG library 1.1 (DAG.cpp).
1418 static nodeset *compute_maximal_antichain(rss_t *rss, dvg_t *dvg, int iteration) {
1419 int n = nodeset_count(dvg->nodes);
1420 int *assignment = alloca(n * sizeof(assignment[0]));
1421 int *assignment_rev = alloca(n * sizeof(assignment_rev[0]));
1422 int *idx_map = alloca(n * sizeof(idx_map[0]));
1423 hungarian_problem_t *bp;
1424 nodeset *values, *temp;
1426 int i, j, cost, cur_chain, res;
1427 rss_edge_t *dvg_edge;
1429 #define MAP_IDX(irn) bsearch_for_index(get_irn_idx(irn), idx_map, n, 1)
1431 if (pset_count(dvg->edges) == 0)
1434 bp = hungarian_new(n, n, 1, HUNGARIAN_MATCH_NORMAL);
1437 At first, we build an index map for the nodes in the DVG,
1438 because we cannot use the irn idx therefore as the resulting
1439 bipartite data structure would have around 1.2GB.
1440 So we limit the size to the number of nodes we have in the DVG
1441 and build a sorted index map for their irn indices.
1444 foreach_nodeset(dvg->nodes, u_irn) {
1445 idx_map[i++] = get_irn_idx(u_irn);
1447 qsort(idx_map, n, sizeof(idx_map[0]), cmp_int);
1449 foreach_pset(dvg->edges, dvg_edge) {
1450 int idx_u = MAP_IDX(dvg_edge->src);
1451 int idx_v = MAP_IDX(dvg_edge->tgt);
1453 /* add the entry to the bipartite data structure */
1454 hungarian_add(bp, idx_u, idx_v, 1);
1455 DBG((rss->dbg, LEVEL_3, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1456 idx_u, dvg_edge->src, idx_v, dvg_edge->tgt));
1460 Add a bipartite entry for each pair of nodes (u, v), where exists a
1461 path in the DVG from u to v, ie. connect all descendants(v) to v.
1462 desc_dvg(v) = accumulated descendants of all z in dvg_user_list(v)
1464 foreach_nodeset(dvg->nodes, u_irn) {
1465 rss_irn_t *u = get_rss_irn(rss, u_irn);
1466 int idx_u_irn = MAP_IDX(u_irn);
1468 DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG descendants of %+F:\n", u_irn));
1470 //plist_clear(u->dvg_desc_list);
1471 //accumulate_dvg_descendant_values(rss, u, u->dvg_desc_list);
1474 FIXME: The array is build on the phase obstack and we cannot free the data.
1475 So the array get as many times allocated as this function is called.
1478 /* build the sorted array for faster searches */
1479 //u->dvg_desc = build_sorted_array_from_list(u->dvg_desc_list, phase_obst(&rss->ph));
1481 DBG((rss->dbg, DEBUG_MAX_AC, "\t\tadding bipartite entries of %+F:\n", u_irn));
1483 /* add the bipartite entries for all descendants */
1484 for (i = ARR_LEN_SAFE(u->dvg_desc) - 1; i >= 0; --i) {
1485 rss_irn_t *desc = get_rss_irn(rss, u->dvg_desc[i]);
1486 int idx_desc = MAP_IDX(u->dvg_desc[i]);
1488 /* add the entry to the bipartite data structure */
1489 hungarian_add(bp, idx_u_irn, idx_desc, 1);
1490 DBG((rss->dbg, DEBUG_MAX_AC, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1491 idx_u_irn, u_irn, idx_desc, u->dvg_desc[i]));
1498 /* We want maximum cardinality matching */
1499 hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL);
1502 DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG pkiller:\n"));
1503 /* beware: the following function needs the dvg_desc array */
1504 build_dvg_pkiller_list(rss, dvg);
1507 DBG((rss->dbg, LEVEL_2, "\t\tcomputing bipartite matching\n"));
1509 The maximum cardinality bipartite matching gives us the minimal
1510 chain partition, which corresponds to the maximum anti chains.
1512 res = hungarian_solve(bp, assignment, &cost, 1);
1513 assert(res == 0 && "Bipartite matching failed!");
1516 memset(assignment_rev, -1, n * sizeof(assignment_rev[0]));
1518 /* build the reverse assignment, ie. foreach i -> j, add j -> i */
1519 for (i = 0; i < n; ++i) {
1520 if (assignment[i] >= 0) {
1521 int j = assignment[i];
1522 assignment_rev[j] = i;
1526 DBG((rss->dbg, LEVEL_2, "\t\tgot assignment with cost %d\n", cost));
1527 DBG((rss->dbg, LEVEL_3, "\t\t\tassignment --- reverse assignment\n", cost));
1528 for (i = 0; i < n; ++i) {
1529 DBG((rss->dbg, LEVEL_3, "\t\t\t%3d -> %3d %3d -> %3d\n", i, assignment[i], i, assignment_rev[i]));
1532 values = new_nodeset(10);
1534 /* Construction of the minimal chain partition */
1535 for (j = 0; j < n; ++j) {
1536 /* check nodes, which did not occur as target */
1537 if (assignment_rev[j] == -1) {
1538 int xj = idx_map[j];
1539 ir_node *xj_irn = get_idx_irn(rss->irg, xj);
1540 rss_irn_t *xj_rss = get_rss_irn(rss, xj_irn);
1541 chain_t *c = obstack_alloc(phase_obst(&rss->ph), sizeof(*c));
1544 /* there was no source for j -> we have a source of a new chain */
1545 nodeset_insert(values, xj_irn);
1547 c->elements = plist_obstack_new(phase_obst(&rss->ph));
1548 c->nr = cur_chain++;
1549 plist_insert_back(c->elements, xj_irn);
1553 DBG((rss->dbg, LEVEL_2, "\t\tstarting chain %d:\n", c->nr));
1554 DBG((rss->dbg, LEVEL_2, "\t\t\t%+F (%d)", xj_irn, j));
1556 /* follow chain, having j as source */
1558 while (assignment[source] >= 0) {
1559 int target = assignment[source];
1560 int irn_idx = idx_map[target];
1561 ir_node *irn = get_idx_irn(rss->irg, irn_idx);
1562 rss_irn_t *node = get_rss_irn(rss, irn);
1564 plist_insert_back(c->elements, irn);
1567 DB((rss->dbg, LEVEL_2, " -> %+F (%d)", irn, target));
1569 /* new source = last target */
1573 DB((rss->dbg, LEVEL_2, "\n"));
1578 Computing the maximal antichain: Select an element from each
1579 chain such, such it is parallel with the others.
1581 DBG((rss->dbg, LEVEL_1, "\t\tcomputing set of saturation values (MAX AC)\n"));
1582 DBG((rss->dbg, LEVEL_3, "\t\tstarting with:\n"));
1585 if (firm_dbg_get_mask(rss->dbg) & LEVEL_3)
1586 dump_nodeset(values, "\t\t\t");
1592 We need an explicit array for the values as
1593 we cannot iterate multiple times over the same
1594 set at the same time. :-(((((
1596 int n = nodeset_count(values);
1598 ir_node **val_arr = NEW_ARR_F(ir_node *, n);
1600 foreach_nodeset(values, u_irn)
1601 val_arr[i++] = u_irn;
1606 temp = new_nodeset(10);
1608 /* Select all nodes from current value set, having another node in the set as descendant. */
1609 for (i = 0; i < n; ++i) {
1610 rss_irn_t *u = get_rss_irn(rss, val_arr[i]);
1612 for (j = 0; j < n; ++j) {
1616 key.src = val_arr[i];
1617 key.tgt = val_arr[j];
1619 if (pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1620 /* v[j] is descendant of u -> remove u and break */
1621 nodeset_insert(temp, u->irn);
1622 nodeset_remove(values, u->irn);
1624 DBG((rss->dbg, LEVEL_3, "\t\t\tremoving %+F from values, adding it to temp\n", u->irn));
1632 /* Try to insert the chain predecessor of all selected u's */
1633 foreach_nodeset(temp, u_irn) {
1634 rss_irn_t *u = get_rss_irn(rss, u_irn);
1635 chain_t *c = u->chain;
1636 plist_element_t *el = plist_find_value(c->elements, u_irn);
1638 assert(el && "Missing element in chain!");
1640 /* If u has predecessor in chain: insert the predecessor */
1641 if (el == plist_element_get_prev(el)) {
1642 nodeset_insert(values, plist_element_get_value(el));
1643 DBG((rss->dbg, LEVEL_3, "\t\t\tadding %+F to values\n", plist_element_get_value(el)));
1649 } while (nodeset_count(temp) > 0);
1651 DBG((rss->dbg, LEVEL_2, "\t\tfinal set:\n"));
1653 if (firm_dbg_get_mask(rss->dbg) & LEVEL_2) {
1654 dump_nodeset(values, "\t\t\t");
1658 if (rss->opts->dump_flags & RSS_DUMP_MAXAC)
1659 debug_vcg_dump_pkg(rss, values, iteration);
1669 * Computes the best serialization between two nodes of sat_vals.
1671 static serialization_t *compute_best_admissible_serialization(rss_t *rss, nodeset *sat_vals, serialization_t *ser, int num_regs) {
1672 int n = nodeset_count(sat_vals);
1673 int n_idx = ARR_LEN_SAFE(rss->idx_map);
1675 ir_node **val_arr = alloca(n * sizeof(val_arr[0]));
1676 bitset_t *bs_sv = bitset_alloca(n_idx);
1677 bitset_t *bs_vdesc = bitset_alloca(n_idx);
1678 bitset_t *bs_tmp = bitset_alloca(n_idx);
1679 bitset_t *bs_ukilldesc = bitset_alloca(n_idx);
1680 int best_benefit = INT_MAX;
1681 int best_omega2 = INT_MAX;
1682 int best_benefit_omega20 = INT_MAX;
1686 rss_edge_t min_benefit_edge;
1687 rss_edge_t min_omega20_edge;
1688 rss_irn_t *ser_u_omega1 = NULL, *ser_v_omega1 = NULL;
1689 rss_irn_t *ser_u_omega20 = NULL, *ser_v_omega20 = NULL;
1691 DBG((rss->dbg, LEVEL_1, "\tcomputing admissible serializations:\n"));
1694 We need an explicit array for the values as
1695 we cannot iterate multiple times over the same
1696 set at the same time. :-(((((
1699 foreach_nodeset(sat_vals, irn) {
1701 bitset_set(bs_sv, BLOCK_IDX_MAP(rss, irn));
1705 We build all admissible serializations and remember the best found so far.
1708 if v in pkiller(u): add edge from v to all other pkiller(u)
1709 else: for all vv in pkiller(u): add edge from v to vv if there exists no path from vv to v
1713 A node is unserializable if:
1714 - it has only one killer and this one is Sink
1715 - it kills no other values
1716 In this case there is no serialization which could
1717 reduce the registerpressure
1719 #define IS_UNSERIALIZABLE_NODE(rss_node) \
1722 (plist_count(rss_node->pkiller_list) == 1) && \
1723 is_Sink(rss_node->killer) && \
1724 (rss_node->kill_count == 0) \
1726 be_is_Barrier(rss_node->irn) || \
1727 be_is_Keep(rss_node->irn) \
1730 /* for all u in sat_vals */
1731 for (i = 0; i < n; ++i) {
1732 rss_irn_t *u = get_rss_irn(rss, val_arr[i]);
1733 plist_element_t *el;
1735 /* ignore nodes where serialization does not help */
1736 if (IS_UNSERIALIZABLE_NODE(u)) {
1737 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", u->irn));
1741 /* accumulate all descendants of all pkiller(u) */
1742 bitset_clear_all(bs_ukilldesc);
1743 foreach_plist(u->pkiller_list, el) {
1744 ir_node *irn = plist_element_get_value(el);
1745 rss_irn_t *node = get_rss_irn(rss, irn);
1748 bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, irn));
1752 for (k = ARR_LEN_SAFE(node->descendants) - 1; k >= 0; --k) {
1753 if (! is_Sink(node->descendants[k]))
1754 bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, node->descendants[k]));
1758 /* for all v in sat_vals */
1759 for (j = 0; j < n; ++j) {
1760 ir_node *v_irn = val_arr[j];
1761 rss_irn_t *v = get_rss_irn(rss, v_irn);
1762 unsigned v_height = get_irn_height(rss->h, v_irn);
1763 int omega1, omega2, is_pkiller;
1765 /* v cannot be serialized with itself
1766 * ignore nodes where serialization does not help */
1767 if (i == j || IS_UNSERIALIZABLE_NODE(v)) {
1769 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", v->irn));
1773 /* get descendants of v */
1774 bitset_clear_all(bs_vdesc);
1775 bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v_irn));
1776 for (k = ARR_LEN_SAFE(v->descendants) - 1; k >= 0; --k) {
1777 if (! is_Sink(v->descendants[k]))
1778 bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v->descendants[k]));
1781 /* if v is in pkiller(u) */
1782 is_pkiller = plist_has_value(u->pkiller_list, v_irn);
1784 /* for all vv in pkiller(u) */
1785 foreach_plist(u->pkiller_list, el) {
1786 ir_node *vv_irn = plist_element_get_value(el);
1789 if (is_Sink(vv_irn) || is_cfop(vv_irn))
1793 add_edge = vv_irn != v_irn && skip_Proj(vv_irn) != skip_Proj(v_irn);
1795 add_edge = ! heights_reachable_in_block(rss->h, skip_Proj(vv_irn), skip_Proj(v_irn));
1798 As we add an edge from vv -> v, we have to make sure,
1799 that there exists no path from v to vv.
1803 int vv_height = get_irn_height(rss->h, vv_irn);
1804 int mu1, mu2, critical_path_cost;
1807 mu1 = | descendants(v) cut sat_vals |
1808 the number of saturating values which cannot
1809 be simultaneously alive with u
1811 bitset_copy(bs_tmp, bs_vdesc);
1812 mu1 = bitset_popcnt(bitset_and(bs_tmp, bs_sv));
1815 mu2 = | accum_desc_all_pkiller(u) without descendants(v) |
1818 bitset_copy(bs_tmp, bs_ukilldesc);
1819 mu2 = bitset_popcnt(bitset_andnot(bs_tmp, bs_vdesc));
1825 /* omega1 = mu1 - mu2 */
1831 /* omega2 = increase of critical path */
1832 critical_path_cost =
1833 v_height /* longest path from v to sink */
1834 + rss->max_height - vv_height /* longest path from source to vv */
1838 If critical_path_cost > max_height -> the new edge
1839 would increase the longest critical path by the difference.
1841 omega2 = critical_path_cost > rss->max_height ? critical_path_cost - rss->max_height : 0;
1843 /* this keeps track of the edge with the best benefit */
1844 if (omega1 >= num_regs - n && omega1 < best_benefit) {
1845 min_benefit_edge.src = v_irn;
1846 min_benefit_edge.tgt = vv_irn;
1851 best_benefit = omega1;
1852 ser->new_killer = is_pkiller;
1855 /* this keeps track of the edge with the best omega1 costs where omega2 == 0 */
1856 if (omega2 == 0 && omega1 >= num_regs - n && omega1 < best_benefit_omega20) {
1857 min_omega20_edge.src = v_irn;
1858 min_omega20_edge.tgt = vv_irn;
1863 best_benefit_omega20 = omega1;
1864 ser->new_killer = is_pkiller;
1867 best_omega2 = MIN(best_omega2, omega2);
1869 DBG((rss->dbg, LEVEL_2, "\t\tfound %+F -> %+F (w1 %d, w2 %d)\n",
1870 v_irn, vv_irn, omega1, omega2));
1872 } /* for all vv in pkiller(u) */
1873 } /* for all v in sat_vals */
1874 } /* for all u in sat_vals */
1879 if (best_omega2 == 0) {
1880 ser->u = ser_u_omega20;
1881 ser->v = ser_v_omega20;
1882 ser->edge->src = min_omega20_edge.src;
1883 ser->edge->tgt = min_omega20_edge.tgt;
1884 ser->omega1 = best_benefit_omega20;
1885 ser->omega2 = best_omega2;
1888 ser->u = ser_u_omega1;
1889 ser->v = ser_v_omega1;
1890 ser->edge->src = min_benefit_edge.src;
1891 ser->edge->tgt = min_benefit_edge.tgt;
1892 ser->omega1 = best_benefit;
1893 ser->omega2 = best_omega2;
1898 #undef IS_UNSERIALIZABLE_NODE
1902 * Perform the value serialization heuristic and add all
1903 * computed serialization edges as dependencies to the irg.
1905 static void perform_value_serialization_heuristic(rss_t *rss) {
1906 bitset_t *arch_nonign_bs = bitset_alloca(arch_register_class_n_regs(rss->cls));
1907 bitset_t *abi_ign_bs = bitset_alloca(arch_register_class_n_regs(rss->cls));
1908 int available_regs, iteration;
1911 pset *ser_set = new_pset(cmp_rss_edges, 20);
1913 /* available_regs = R = |arch_non_ignore_regs cut ~abi_ignore_regs| */
1914 arch_put_non_ignore_regs(rss->arch_env, rss->cls, arch_nonign_bs);
1915 be_abi_put_ignore_regs(rss->abi, rss->cls, abi_ign_bs);
1916 bitset_andnot(arch_nonign_bs, abi_ign_bs);
1917 available_regs = bitset_popcnt(arch_nonign_bs);
1918 //num_live = pset_count(rss->live_block);
1919 //available_regs -= num_live < available_regs ? num_live : 0;
1921 DBG((rss->dbg, LEVEL_1, "\n\t#available regs: %d\n\n", available_regs));
1924 At first we need to compute the disjoint value DAG (DVG = {V, E_dv}).
1925 V = set of all nodes we are currently interested in
1926 E_dv = there is an edge from u to v iff v is a descendant of killer(u), forall u, v in V
1928 dvg.nodes = new_nodeset(plist_count(rss->nodes));
1929 dvg.edges = new_pset(cmp_rss_edges, plist_count(rss->nodes) * 5);
1930 compute_dvg(rss, &dvg);
1933 Then we perform the heuristic serialization algorithm
1934 on the DVG which gives us all necessary serialization
1937 DBG((rss->dbg, LEVEL_1, "\tcomputing maximal antichain:\n"));
1939 sat_vals = compute_maximal_antichain(rss, &dvg, iteration++);
1940 while (sat_vals && (nodeset_count(sat_vals) > available_regs)) {
1941 serialization_t *ser, best_ser;
1942 rss_edge_t *edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*edge));
1943 ir_node *dep_src, *dep_tgt;
1945 best_ser.edge = edge;
1946 ser = compute_best_admissible_serialization(rss, sat_vals, &best_ser, available_regs);
1948 DBG((rss->dbg, LEVEL_1, "\tcurrent register saturation %d, target %d\n", nodeset_count(sat_vals), available_regs));
1951 DBG((rss->dbg, LEVEL_1, "\tno RS improving serialization found, breaking at iteration %d\n", iteration));
1955 /* Insert the serialization as dependency edge into the irg. */
1956 DBG((rss->dbg, LEVEL_1, "\tserializing %+F -> %+F with edge %+F -> %+F and cost %d, %d\n",
1957 ser->u->irn, ser->v->irn, ser->edge->src, ser->edge->tgt, ser->omega1, ser->omega2));
1959 if (pset_find(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge)))
1960 ir_printf("WARNING: serialization %+F -> %+F computed twice!\n", ser->edge->src, ser->edge->tgt);
1963 pset_insert(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge));
1965 /* update the dvg */
1966 update_dvg(rss, &dvg, ser->v, ser->u);
1967 update_dvg(rss, &dvg, get_rss_irn(rss, ser->edge->src), get_rss_irn(rss, ser->edge->tgt));
1968 del_nodeset(sat_vals);
1970 dep_src = skip_Proj(ser->edge->src);
1971 dep_tgt = ser->edge->tgt;
1972 add_irn_dep(dep_src, dep_tgt);
1974 /* Update descendants, consumer and pkillers of the target */
1975 update_node_info(rss, ser->edge->tgt, ser->edge->src);
1977 /* TODO: try to find a cheaper way for updating height information */
1978 rss->max_height = heights_recompute_block(rss->h, rss->block);
1980 /* Recompute the antichain for next serialization */
1981 DBG((rss->dbg, LEVEL_1, "\tre-computing maximal antichain:\n"));
1982 sat_vals = compute_maximal_antichain(rss, &dvg, iteration++);
1985 del_nodeset(dvg.nodes);
1986 del_pset(dvg.edges);
1990 * Do initial calculations for a block.
1992 static void process_block(ir_node *block, void *env) {
1995 const ir_edge_t *edge;
1997 phase_init(&rss->ph, "rss block preprocessor", rss->irg, PHASE_DEFAULT_GROWTH, init_rss_irn);
1999 DBG((rss->dbg, LEVEL_1, "preprocessing block %+F\n", block));
2002 /* build an index map for all nodes in the current block */
2004 n = get_irn_n_edges(block);
2005 NEW_ARR_A(int *, rss->idx_map, n);
2006 foreach_out_edge(block, edge) {
2007 ir_node *irn = get_edge_src_irn(edge);
2008 rss->idx_map[i++] = get_irn_idx(irn);
2010 qsort(rss->idx_map, n, sizeof(rss->idx_map[0]), cmp_int);
2011 rss->max_height = heights_recompute_block(rss->h, block);
2013 /* loop over all register classes */
2014 for (i = arch_isa_get_n_reg_class(rss->arch_env->isa) - 1; i >= 0; --i) {
2015 const arch_register_class_t *cls = arch_isa_get_reg_class(rss->arch_env->isa, i);
2018 DBG((rss->dbg, LEVEL_1, "register class %s\n", arch_register_class_name(cls)));
2020 /* Get all live value at end of Block having current register class */
2021 rss->live_block = pset_new_ptr(10);
2022 be_liveness_end_of_block(rss->liveness, rss->arch_env, rss->cls, rss->block, rss->live_block);
2024 /* reset the list of interesting nodes */
2025 plist_clear(rss->nodes);
2026 plist_insert_back(rss->nodes, _sink);
2028 /* collect all nodes having a certain register class */
2029 foreach_out_edge(block, edge) {
2030 ir_node *irn = get_edge_src_irn(edge);
2031 ir_mode *mode = get_irn_mode(irn);
2035 - mode_T nodes (the projs are asked)
2036 - mode_X nodes (control flow nodes are always scheduled last)
2037 - Keeps (they are always immediately scheduled)
2038 - Phi (same as Keep)
2040 if (mode == mode_T || mode == mode_X || is_Phi(irn))
2044 In case of a proj, we skip
2045 - Barrier (they are a Barrier :)
2047 - the Proj itself, as it's scheduled always with it's super node
2050 ir_node *pred = get_Proj_pred(irn);
2051 if (be_is_Barrier(pred) || is_Start(pred))
2055 /* calculate the descendants and consumer for each node in the block */
2056 collect_node_info(rss, skip_Proj(irn));
2058 if (be_is_Keep(irn))
2061 if (! arch_irn_is(rss->arch_env, irn, ignore) && arch_get_irn_reg_class(rss->arch_env, irn, -1) == cls) {
2062 plist_insert_back(rss->nodes, skip_Proj(irn));
2067 /* compute the potential killing set PK(G) */
2068 compute_pkill_set(rss);
2070 /* compute the killing function k* */
2071 compute_killing_function(rss);
2074 Compute the heuristic value serialization and
2075 add the necessary dependencies to the irg.
2077 perform_value_serialization_heuristic(rss);
2079 del_pset(rss->live_block);
2082 phase_free(&rss->ph);
2087 * Register the options.
2089 void be_init_schedrss(void) {
2090 lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
2091 lc_opt_entry_t *sched_grp = lc_opt_get_grp(be_grp, "sched");
2092 lc_opt_entry_t *rss_grp = lc_opt_get_grp(sched_grp, "rss");
2094 lc_opt_add_table(rss_grp, rss_option_table);
2097 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_schedrss);
2098 #endif /* WITH_LIBCORE */
2101 * Preprocess the irg for scheduling.
2103 void rss_schedule_preparation(const be_irg_t *birg) {
2106 FIRM_DBG_REGISTER(rss.dbg, "firm.be.sched.rss");
2108 //firm_dbg_set_mask(rss.dbg, LEVEL_1 | LEVEL_2 | LEVEL_3);
2110 init_rss_special_nodes(birg->irg);
2112 rss.irg = birg->irg;
2113 rss.arch_env = birg->main_env->arch_env;
2114 rss.abi = birg->abi;
2115 rss.h = heights_new(birg->irg);
2116 rss.nodes = plist_new();
2117 rss.opts = &rss_options;
2118 rss.liveness = be_liveness(birg->irg);
2119 irg_block_walk_graph(birg->irg, NULL, process_block, &rss);
2120 heights_free(rss.h);
2121 plist_free(rss.nodes);
2122 be_liveness_free(rss.liveness);
2124 if (birg->main_env->options->dump_flags & DUMP_SCHED)
2125 be_dump(rss.irg, "-rss", dump_ir_block_graph);