2 * Implementation of a register saturating list scheduler
3 * as described in: Sid-Ahmed-Ali Touati
4 * Register Saturation in Superscalar and VLIW Codes
6 * @license This file protected by GPL - GNU GENERAL PUBLIC LICENSE.
7 * @author Christian Wuerdig
20 #include "irgraph_t.h"
22 #include "iredges_t.h"
24 #include "irphase_t.h"
30 #include "bipartite.h"
31 #include "hungarian.h"
38 #include "besched_t.h"
41 #include <libcore/lc_opts.h>
42 #include <libcore/lc_opts_enum.h>
43 #endif /* WITH_LIBCORE */
46 #define ARR_LEN_SAFE(arr) ((arr) != NULL ? ARR_LEN((arr)) : 0)
48 #define HASH_RSS_EDGE(edge) ((get_irn_node_nr((edge)->src) << 16) | (get_irn_node_nr((edge)->tgt) & 0xFFFF))
49 #define BSEARCH_IRN_ARR(val, arr) \
50 bsearch(&(val), (arr), ARR_LEN_SAFE((arr)), sizeof((arr)[0]), cmp_irn_idx)
52 #define BLOCK_IDX_MAP(rss, irn) bsearch_for_index(get_irn_idx((irn)), (rss)->idx_map, ARR_LEN_SAFE((rss)->idx_map), 1)
55 typedef struct _rss_opts_t {
59 /* Represents a child with associated costs */
60 typedef struct _child {
65 /* We need edges for several purposes. */
66 typedef struct _rss_edge {
72 /* Represents a connected bipartite component. */
74 nodeset *parents; /**< = S a set of value producers */
75 nodeset *children; /**< = T a set of value consumers */
76 pset *kill_edges; /**< = E a set of edges (t in T, s in S) such as each s in S gets killed by at least one t in T */
77 int nr; /**< a deterministic index for set insertion (used as hash) */
80 /* Represents a disjoint value DAG. */
86 /* Represents a chain of nodes. */
87 typedef struct _chain {
88 plist_t *elements; /**< List of chain elements */
89 int nr; /**< a deterministic index for set insertion (used as hash) */
92 typedef struct _rss_irn {
93 plist_t *consumer_list; /**< List of consumers */
94 ir_node **consumer; /**< Sorted consumer array (needed for faster access) */
96 plist_t *parent_list; /**< List of parents */
97 plist_t *pkiller_list; /**< List of potential killers */
99 plist_t *descendant_list; /**< List of descendants */
100 ir_node **descendants; /**< Sorted descendant array (needed for faster access) */
102 ir_node *killer; /**< The selected unique killer */
103 ir_node *irn; /**< The corresponding firm node to this rss_irn */
105 chain_t *chain; /**< The chain, this node is associated to */
107 unsigned desc_walk; /**< visited flag for collecting descendants */
108 unsigned kill_count; /**< number of nodes killed by this one */
110 unsigned live_out : 1; /**< irn has consumers outside of it's block */
111 unsigned visited : 1; /**< visited flag for bipartite decomposition */
112 unsigned havecons : 1; /**< visited flag collect consumer */
113 unsigned handled : 1; /**< flag indicating whether or not the list structures have been build */
114 unsigned dumped : 1; /**< flag indication whether or not this node was dumped */
117 /* Represents a serialization edge with associated costs. */
118 typedef struct _serialization {
119 rss_irn_t *u; /* the top node */
120 rss_irn_t *v; /* the node about to be serialized after u */
121 rss_edge_t *edge; /* the edge selected for the serialization */
122 int omega1; /* estimated: available regs - RS reduction */
123 int omega2; /* increase in critical path length */
127 typedef struct _rss {
128 phase_t ph; /**< Phase to hold some data */
129 heights_t *h; /**< The current height object */
130 ir_graph *irg; /**< The irg to preprocess */
131 plist_t *nodes; /**< The list of interesting nodes */
132 const arch_env_t *arch_env; /**< The architecture environment */
133 be_abi_irg_t *abi; /**< The abi for this irg */
134 pset *cbc_set; /**< A set of connected bipartite components */
135 ir_node *block; /**< The current block in progress. */
136 int *idx_map; /**< Mapping irn indices to per block indices */
137 unsigned max_height; /**< maximum height in the current block */
138 rss_opts_t *opts; /**< The options */
139 be_lv_t *liveness; /**< The liveness information for this irg */
140 pset *live_block; /**< Values alive at end of block */
141 const arch_register_class_t *cls; /**< The current register class */
142 DEBUG_ONLY(firm_dbg_module_t *dbg);
145 #define get_rss_irn(rss, irn) (phase_get_or_set_irn_data(&rss->ph, irn))
148 * We need some special nodes:
149 * a source and a sink for all live-in and live-out values of a block
158 static ir_node *_source = NULL;
159 static ir_node *_sink = NULL;
161 #define is_Source(irn) ((irn) == _source)
162 #define is_Sink(irn) ((irn) == _sink)
166 RSS_DUMP_CBC = 1 << 0,
167 RSS_DUMP_PKG = 1 << 1,
168 RSS_DUMP_KILL = 1 << 2,
169 RSS_DUMP_DVG = 1 << 3,
170 RSS_DUMP_MAXAC = 1 << 4,
171 RSS_DUMP_ALL = (RSS_DUMP_MAXAC << 1) - 1,
174 static rss_opts_t rss_options = {
179 static const lc_opt_enum_int_items_t dump_items[] = {
180 { "none", RSS_DUMP_NONE },
181 { "cbc", RSS_DUMP_CBC },
182 { "pkg", RSS_DUMP_PKG },
183 { "kill", RSS_DUMP_KILL },
184 { "dvg", RSS_DUMP_DVG },
185 { "maxac", RSS_DUMP_MAXAC },
186 { "all", RSS_DUMP_ALL },
190 static lc_opt_enum_int_var_t dump_var = {
191 &rss_options.dump_flags, dump_items
194 static const lc_opt_table_entry_t rss_option_table[] = {
195 LC_OPT_ENT_ENUM_MASK("dump", "dump phases", &dump_var),
198 #endif /* WITH_LIBCORE */
200 /******************************************************************************
202 * | | | | / _| | | (_)
203 * | |__ ___| |_ __ ___ _ __ | |_ _ _ _ __ ___| |_ _ ___ _ __ ___
204 * | '_ \ / _ \ | '_ \ / _ \ '__| | _| | | | '_ \ / __| __| |/ _ \| '_ \/ __|
205 * | | | | __/ | |_) | __/ | | | | |_| | | | | (__| |_| | (_) | | | \__ \
206 * |_| |_|\___|_| .__/ \___|_| |_| \__,_|_| |_|\___|\__|_|\___/|_| |_|___/
209 ******************************************************************************/
212 * Acquire opcodes and create source and sink nodes.
214 static void init_rss_special_nodes(ir_graph *irg) {
215 ir_node *block = get_irg_start_block(irg);
216 int iro_rss_base = get_next_ir_opcodes(iro_rss_last);
217 ir_op *op_rss_Source = new_ir_op(iro_rss_base + iro_rss_Source, "rss_Source", op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
218 ir_op *op_rss_Sink = new_ir_op(iro_rss_base + iro_rss_Sink, "rss_Sink", op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
219 _source = new_ir_node(NULL, irg, block, op_rss_Source, mode_ANY, 0, NULL);
220 _sink = new_ir_node(NULL, irg, block, op_rss_Sink, mode_ANY, 0, NULL);
223 static int cmp_int(const void *a, const void *b) {
227 return QSORT_CMP(*i1, *i2);
230 static int cmp_child_costs(const void *a, const void *b) {
231 const child_t *c1 = a;
232 const child_t *c2 = b;
234 return QSORT_CMP(c1->cost, c2->cost);
237 static int cmp_irn_idx(const void *a, const void *b) {
238 const ir_node *n1 = *(ir_node **)a;
239 const ir_node *n2 = *(ir_node **)b;
241 return QSORT_CMP(get_irn_idx(n1), get_irn_idx(n2));
244 static int cmp_rss_edges(const void *a, const void *b) {
245 const rss_edge_t *e1 = a;
246 const rss_edge_t *e2 = b;
248 return (e1->src != e2->src) || (e1->tgt != e2->tgt);
251 static int bsearch_for_index(int key, int *arr, size_t len, int force) {
255 while (right >= left) {
256 int idx = (left + right) / 2;
260 else if (key > arr[idx])
267 assert(0 && "Something is wrong, key not found.");
271 static ir_node **build_sorted_array_from_list(plist_t *irn_list, struct obstack *obst) {
274 int len = plist_count(irn_list);
275 ir_node **arr = NEW_ARR_D(ir_node *, obst, len);
277 /* copy the list into the array */
278 foreach_plist(irn_list, el) {
279 arr[i++] = plist_element_get_value(el);
282 /* sort the array by node index */
283 qsort(arr, len, sizeof(arr[0]), cmp_irn_idx);
288 /*****************************************************
291 * __| | ___| |__ _ _ __ _ __ _ _ _ __ __ _
292 * / _` |/ _ \ '_ \| | | |/ _` |/ _` | | '_ \ / _` |
293 * | (_| | __/ |_) | |_| | (_| | (_| | | | | | (_| |
294 * \__,_|\___|_.__/ \__,_|\__, |\__, |_|_| |_|\__, |
298 *****************************************************/
300 static void dump_nodeset(nodeset *ns, const char *prefix) {
302 foreach_nodeset(ns, irn) {
303 ir_fprintf(stderr, "%s%+F\n", prefix, irn);
307 static void build_file_name(rss_t *rss, const char *suffix, size_t suf_len, char *buf, size_t len) {
308 const char *irg_name;
311 irg_name = get_entity_name(get_irg_entity(rss->irg));
312 snprintf(buf, len - suf_len, "%s-%s-block-%ld",
313 irg_name, arch_register_class_name(rss->cls), get_irn_node_nr(rss->block));
317 /* Dumps all collected bipartite components of current irg as vcg. */
318 static void debug_vcg_dump_bipartite(rss_t *rss) {
322 static const char suffix[] = "-RSS-CBC.vcg";
324 build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
325 f = fopen(file_name, "w");
327 ir_fprintf(f, "graph: { title: \"connected bipartite component graph of %+F\"\n", rss->irg);
328 fprintf(f, "display_edge_labels: no\n");
329 fprintf(f, "layoutalgorithm: mindepth\n");
330 fprintf(f, "manhattan_edges: yes\n\n");
332 foreach_pset(rss->cbc_set, cbc) {
336 fprintf(f, "graph: { titel: \"cbc %d\" label: \"cbc %d\" status:clustered color:yellow\n", cbc->nr, cbc->nr);
337 foreach_nodeset(cbc->parents, n) {
338 ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
340 foreach_nodeset(cbc->children, n) {
341 ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
343 foreach_pset(cbc->kill_edges, ke) {
344 ir_fprintf(f, "edge: { sourcename: \"n%d_%d\" targetname: \"n%d_%d\" }\n",
345 get_irn_node_nr(ke->src), cbc->nr, get_irn_node_nr(ke->tgt), cbc->nr);
353 /* Dump the computed killing function as vcg. */
354 static void debug_vcg_dump_kill(rss_t *rss) {
358 static const char suffix[] = "-RSS-KILL.vcg";
360 build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
361 f = fopen(file_name, "w");
363 ir_fprintf(f, "graph: { title: \"computed kill graph of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
364 fprintf(f, "display_edge_labels: no\n");
365 fprintf(f, "layoutalgorithm: mindepth\n");
366 fprintf(f, "manhattan_edges: yes\n\n");
368 /* first: reset dumped flag of all nodes */
369 foreach_plist(rss->nodes, el) {
370 ir_node *irn = plist_element_get_value(el);
371 rss_irn_t *rirn = get_rss_irn(rss, irn);
375 /* dump all nodes and their killers */
376 foreach_plist(rss->nodes, el) {
377 ir_node *irn = plist_element_get_value(el);
378 rss_irn_t *rirn = get_rss_irn(rss, irn);
379 rss_irn_t *pk_rirn = get_rss_irn(rss, rirn->killer);
381 if (! rirn->dumped) {
382 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
386 if (! pk_rirn->dumped) {
387 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(rirn->killer), rirn->killer);
391 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
392 get_irn_node_nr(rirn->killer), get_irn_node_nr(irn));
399 /* Dumps the potential killing DAG (PKG) as vcg. */
400 static void debug_vcg_dump_pkg(rss_t *rss, nodeset *max_ac, int iteration) {
403 char *suffix = alloca(32);
404 static const char suffix1[] = "-RSS-PKG.vcg";
405 static const char suffix2[] = "-RSS-PKG-MAXAC.vcg";
409 snprintf(suffix, 32, "%s", suffix1);
412 snprintf(suffix, 32, "-%02d%s", iteration, suffix2);
415 build_file_name(rss, suffix, strlen(suffix) + 1, file_name, sizeof(file_name));
416 f = fopen(file_name, "w");
418 ir_fprintf(f, "graph: { title: \"potential killing DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
419 fprintf(f, "display_edge_labels: no\n");
420 fprintf(f, "layoutalgorithm: mindepth\n");
421 fprintf(f, "manhattan_edges: yes\n\n");
423 foreach_plist(rss->nodes, el) {
424 ir_node *irn = plist_element_get_value(el);
425 rss_irn_t *rirn = get_rss_irn(rss, irn);
427 plist_element_t *k_el;
429 /* dump selected saturating values in yellow */
430 if (max_ac && nodeset_find(max_ac, irn))
434 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F c%d\" %s }\n", get_irn_node_nr(irn), irn, rirn->chain->nr, c1);
436 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(irn), irn, c1);
439 foreach_plist(rirn->pkiller_list, k_el) {
440 ir_node *pkiller = plist_element_get_value(k_el);
441 rss_irn_t *pk_rirn = get_rss_irn(rss, pkiller);
444 if (max_ac && nodeset_find(max_ac, pkiller))
447 if (! pk_rirn->dumped) {
449 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F c%d\" %s }\n", get_irn_node_nr(pkiller), pkiller, pk_rirn->chain->nr, c2);
451 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(pkiller), pkiller, c2);
454 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
455 get_irn_node_nr(pkiller), get_irn_node_nr(irn));
462 /* Dumps the disjoint value DAG (DVG) as vcg. */
463 static void debug_vcg_dump_dvg(rss_t *rss, dvg_t *dvg) {
464 static const char suffix[] = "-RSS-DVG.vcg";
470 build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
471 f = fopen(file_name, "w");
473 ir_fprintf(f, "graph: { title: \"disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
474 fprintf(f, "display_edge_labels: no\n");
475 fprintf(f, "layoutalgorithm: mindepth\n");
476 fprintf(f, "manhattan_edges: yes\n\n");
479 foreach_nodeset(dvg->nodes, irn) {
480 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
484 foreach_pset(dvg->edges, edge) {
485 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
486 get_irn_node_nr(edge->src), get_irn_node_nr(edge->tgt));
494 /* Dumps the PKG(DVG). */
495 static void debug_vcg_dump_dvg_pkiller(rss_t *rss, dvg_t *dvg) {
496 static const char suffix[] = "-RSS-DVG-PKG.vcg";
501 build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
502 f = fopen(file_name, "w");
504 ir_fprintf(f, "graph: { title: \"PKG of disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
505 fprintf(f, "display_edge_labels: no\n");
506 fprintf(f, "layoutalgorithm: mindepth\n");
507 fprintf(f, "manhattan_edges: yes\n\n");
510 foreach_nodeset(dvg->nodes, irn) {
511 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
515 foreach_nodeset(dvg->nodes, irn) {
516 rss_irn_t *node = get_rss_irn(rss, irn);
519 foreach_plist(node->dvg_pkiller_list, el) {
520 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
521 get_irn_node_nr(plist_element_get_value(el)), get_irn_node_nr(irn));
531 * In case there is no rss information for irn, initialize it.
533 static void *init_rss_irn(phase_t *ph, ir_node *irn, void *old) {
534 rss_irn_t *res = old ? old : phase_alloc(ph, sizeof(res[0]));
536 res->descendant_list = plist_obstack_new(phase_obst(ph));
537 res->descendants = NULL;
539 res->consumer_list = plist_obstack_new(phase_obst(ph));
540 res->consumer = NULL;
542 res->pkiller_list = plist_obstack_new(phase_obst(ph));
544 res->parent_list = plist_obstack_new(phase_obst(ph));
562 * Collect all nodes data dependent on current node.
564 static void collect_descendants(rss_t *rss, rss_irn_t *rirn, ir_node *irn, int *got_sink, unsigned cur_desc_walk) {
565 const ir_edge_t *edge;
566 rss_irn_t *cur_node = get_rss_irn(rss, irn);
567 ir_node *block = rss->block;
568 ir_edge_kind_t ekind[2] = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
571 if (cur_node->desc_walk >= cur_desc_walk)
573 cur_node->desc_walk = cur_desc_walk;
575 /* Stop at Barriers */
576 if (be_is_Barrier(irn))
579 /* loop over normal and dependency edges */
580 for (i = 0; i < 2; ++i) {
581 foreach_out_edge_kind(irn, edge, ekind[i]) {
582 ir_node *user = get_edge_src_irn(edge);
584 /* skip ignore nodes as they do not really contribute to register pressure */
585 if (arch_irn_is(rss->arch_env, user, ignore))
588 if (get_irn_mode(user) == mode_X) {
597 //if (arch_get_irn_reg_class(rss->arch_env, user, -1) == rss->cls)
598 collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
601 /* check if user lives in block and is not a control flow node */
602 if (get_nodes_block(user) == block) {
603 if (! plist_has_value(rirn->descendant_list, user)) {
604 plist_insert_back(rirn->descendant_list, user);
605 DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", user));
607 collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
609 else if (! *got_sink) {
611 /* user lives out of block: add sink as descendant if not already done */
612 plist_insert_back(rirn->descendant_list, _sink);
614 DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", _sink));
622 * Handles a single consumer.
624 static void collect_single_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *consumer, int *got_sink) {
625 ir_node *block = rss->block;
627 assert(! is_Proj(consumer) && "Cannot handle Projs");
629 if (! is_Block(consumer) && get_nodes_block(consumer) == block) {
630 if (! arch_irn_is(rss->arch_env, consumer, ignore) && ! plist_has_value(rss_irn->consumer_list, consumer)) {
631 plist_insert_back(rss_irn->consumer_list, consumer);
632 DBG((rss->dbg, LEVEL_2, "\t\tconsumer %+F\n", consumer));
636 rss_irn->live_out = 1;
637 DBG((rss->dbg, LEVEL_2, "\t\tlive out %+F", consumer));
639 plist_insert_back(rss_irn->consumer_list, _sink);
641 DB((rss->dbg, LEVEL_2, ", %+F added instead", _sink));
643 DB((rss->dbg, LEVEL_2, "\n"));
648 * Collect all nodes consuming the value(s) produced by current node.
650 static void collect_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *irn, int *got_sink) {
651 const ir_edge_t *edge;
653 ir_edge_kind_t ekind[2] = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
654 rss_irn_t *cur_node = get_rss_irn(rss, irn);
656 if (cur_node->havecons)
658 cur_node->havecons = 1;
660 for (i = 0; i < 2; ++i) {
661 foreach_out_edge_kind(irn, edge, ekind[i]) {
662 ir_node *consumer = get_edge_src_irn(edge);
664 if (is_Proj(consumer)) {
665 //if (arch_get_irn_reg_class(rss->arch_env, consumer, -1) == rss->cls)
666 collect_consumer(rss, rss_irn, consumer, got_sink);
669 collect_single_consumer(rss, rss_irn, consumer, got_sink);
675 * Collects all consumer and descendant of a irn.
677 static void collect_node_info(rss_t *rss, ir_node *irn) {
678 static unsigned cur_desc_walk = 0;
679 rss_irn_t *rss_irn = get_rss_irn(rss, irn);
682 assert(! is_Proj(irn) && "Cannot handle Projs.");
684 /* check if node info is already available */
685 if (rss_irn->handled)
687 //phase_reinit_single_irn_data(&rss->ph, irn);
689 DBG((rss->dbg, LEVEL_1, "\tcomputing consumers of %+F:\n", irn));
691 /* collect all consumer */
693 collect_consumer(rss, rss_irn, irn, &got_sink);
695 /* build sorted consumer array */
696 rss_irn->consumer = build_sorted_array_from_list(rss_irn->consumer_list, phase_obst(&rss->ph));
698 DBG((rss->dbg, LEVEL_1, "\tcompute descendants of %+F:\n", irn));
700 /* collect descendants */
702 collect_descendants(rss, rss_irn, irn, &got_sink, ++cur_desc_walk);
704 /* build sorted descendant array */
705 rss_irn->descendants = build_sorted_array_from_list(rss_irn->descendant_list, phase_obst(&rss->ph));
707 rss_irn->handled = 1;
711 * Checks if v is a potential killer of u.
712 * v is in pkill(u) iff descendants(v) cut consumer(u) is v
714 * @param rss The rss object
715 * @param v The node to check for killer
716 * @param u The potentially killed value
717 * @return 1 if v is in pkill(u), 0 otherwise
719 static int is_potential_killer(rss_t *rss, rss_irn_t *v, rss_irn_t *u) {
724 assert(is_Sink(v->irn) || ((plist_count(v->descendant_list) > 0 && v->descendants) || 1));
725 assert(is_Sink(u->irn) || ((plist_count(u->consumer_list) > 0 && u->consumer) || 1));
727 /* as we loop over the list: loop over the shorter one */
728 if (plist_count(v->descendant_list) > plist_count(u->consumer_list)) {
729 list = u->consumer_list;
730 arr = v->descendants;
733 list = v->descendant_list;
737 /* for each list element: try to find element in array */
738 foreach_plist(list, el) {
739 ir_node *irn = plist_element_get_value(el);
740 ir_node *match = BSEARCH_IRN_ARR(irn, arr);
742 if (match && match != irn)
750 * Update descendants, consumer and pkiller list for the given irn.
752 static void update_node_info(rss_t *rss, ir_node *irn, ir_node *pk_irn) {
753 rss_irn_t *node = get_rss_irn(rss, irn);
754 rss_irn_t *pkiller = get_rss_irn(rss, pk_irn);
756 /* add consumer and rebuild the consumer array */
757 if (! plist_has_value(node->consumer_list, pk_irn)) {
758 plist_insert_back(node->consumer_list, pk_irn);
759 node->consumer = build_sorted_array_from_list(node->consumer_list, phase_obst(&rss->ph));
762 /* add pk_irn as descendant, add it's descendants to irn's and rebuild array */
763 if (! plist_has_value(node->descendant_list, pk_irn)) {
766 plist_insert_back(node->descendant_list, pk_irn);
768 foreach_plist(pkiller->descendant_list, el) {
769 ir_node *desc = plist_element_get_value(el);
771 if (! plist_has_value(node->descendant_list, desc))
772 plist_insert_back(node->descendant_list, desc);
775 node->descendants = build_sorted_array_from_list(node->descendant_list, phase_obst(&rss->ph));
781 * Compute the potential killing set PK.
783 static void compute_pkill_set(rss_t *rss) {
784 plist_element_t *u_el, *v_el;
786 foreach_plist(rss->nodes, u_el) {
787 ir_node *u_irn = plist_element_get_value(u_el);
788 rss_irn_t *u = get_rss_irn(rss, u_irn);
790 DBG((rss->dbg, LEVEL_1, "\tcomputing potential killers of %+F:\n", u_irn));
792 /* check each consumer if it is a potential killer */
793 foreach_plist(u->consumer_list, v_el) {
794 ir_node *v_irn = plist_element_get_value(v_el);
795 rss_irn_t *v = get_rss_irn(rss, v_irn);
797 /* check, if v is a potential killer of u */
798 if (is_potential_killer(rss, v, u)) {
799 if (! plist_has_value(u->pkiller_list, v_irn))
800 plist_insert_back(u->pkiller_list, v_irn);
802 DBG((rss->dbg, LEVEL_2, "\t\tpotential killer %+F\n", v_irn));
809 if (rss->opts->dump_flags & RSS_DUMP_PKG)
810 debug_vcg_dump_pkg(rss, NULL, 0);
814 * Build set of killing edges (from values to their potential killers)
816 static void build_kill_edges(rss_t *rss, pset *epk) {
817 plist_element_t *el, *k_el;
819 foreach_plist(rss->nodes, el) {
820 ir_node *irn = plist_element_get_value(el);
821 rss_irn_t *rirn = get_rss_irn(rss, irn);
823 DBG((rss->dbg, LEVEL_3, "\t\tbuilding kill edges for %+F:\n", irn));
825 foreach_plist(rirn->pkiller_list, k_el) {
826 ir_node *pkiller = plist_element_get_value(k_el);
827 rss_edge_t *ke = obstack_alloc(phase_obst(&rss->ph), sizeof(*ke));
833 DBG((rss->dbg, LEVEL_3, "\t\t\ttarget %+F\n", pkiller));
835 pset_insert(epk, ke, HASH_RSS_EDGE(ke));
840 /* print the given cbc for debugging purpose */
841 static void debug_print_cbc(firm_dbg_module_t *mod, cbc_t *cbc) {
845 DBG((mod, LEVEL_3, "\t\tS = set of parents:\n"));
846 foreach_nodeset(cbc->parents, n) {
847 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
849 DBG((mod, LEVEL_3, "\t\tT = set of children:\n"));
850 foreach_nodeset(cbc->children, n) {
851 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
853 DBG((mod, LEVEL_3, "\t\tE = Edges from producers to consumers\n"));
854 foreach_pset(cbc->kill_edges, ke) {
855 DBG((mod, LEVEL_3, "\t\t\t%+F -> %+F\n", ke->src, ke->tgt));
860 * Construct the bipartite decomposition.
861 * Sid-Ahmed-Ali Touati, Phd Thesis
862 * Register Pressure in Instruction Level Parallelism, p. 71
864 static void compute_bipartite_decomposition(rss_t *rss) {
865 pset *epk = new_pset(cmp_rss_edges, 10);
870 DBG((rss->dbg, LEVEL_1, "\tcomputing bipartite decomposition:\n"));
872 build_kill_edges(rss, epk);
874 foreach_plist(rss->nodes, el) {
875 ir_node *u_irn = plist_element_get_value(el);
876 rss_irn_t *u = get_rss_irn(rss, u_irn);
881 plist_element_t *el2;
883 rss_edge_t *kedge_root = NULL;
884 ir_node *t_irn, *s_irn;
886 if (u->visited || u_irn == _sink)
889 DBG((rss->dbg, LEVEL_2, "\t\t%+F choosen:\n", u_irn));
891 cbc = obstack_alloc(phase_obst(&rss->ph), sizeof(*cbc));
894 /* initialize S_cb */
895 cbc->parents = new_nodeset(5);
896 nodeset_insert(cbc->parents, u_irn);
897 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (init)\n", u_irn));
900 cbc->kill_edges = new_pset(cmp_rss_edges, 5);
902 /* each parent gets killed by at least one value from children */
904 /* T_cb = PK_successors(u) */
905 cbc->children = new_nodeset(5);
906 foreach_plist(u->pkiller_list, el2) {
907 nodeset_insert(cbc->children, plist_element_get_value(el2));
908 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (init)\n", plist_element_get_value(el2)));
912 Now: insert the parents of all children into the parent set
913 and insert the children of all parents into the children set
914 until the sets don't change any more
916 while (p_change || c_change) {
917 p_change = c_change = 0;
919 /* accumulate parents */
920 foreach_nodeset(cbc->children, t_irn) {
921 foreach_pset(epk, k_edge) {
922 ir_node *val = k_edge->src;
924 if (k_edge->tgt == t_irn && ! nodeset_find(cbc->parents, val)) {
925 nodeset_insert(cbc->parents, val);
927 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents\n", val));
932 /* accumulate children */
933 foreach_nodeset(cbc->parents, s_irn) {
934 foreach_pset(epk, k_edge) {
935 ir_node *val = k_edge->tgt;
937 if (k_edge->src == s_irn && ! nodeset_find(cbc->children, val)) {
938 nodeset_insert(cbc->children, val);
940 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children\n", val));
946 /* mark all parent values as visited */
947 foreach_nodeset(cbc->parents, s_irn) {
948 rss_irn_t *s = get_rss_irn(rss, s_irn);
950 /* assure bipartite property */
951 if (nodeset_find(cbc->children, s_irn)) {
952 nodeset_remove(cbc->children, s_irn);
953 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F removed from to children\n", s_irn));
958 foreach_pset(epk, k_edge) {
959 if (nodeset_find(cbc->parents, k_edge->src) && nodeset_find(cbc->children, k_edge->tgt)) {
960 pset_insert(cbc->kill_edges, k_edge, HASH_RSS_EDGE(k_edge));
962 Link all k_edges which are about to be removed together.
963 Beware: pset_remove kills the iterator.
965 k_edge->next = kedge_root;
970 /* remove all linked k_edges */
971 for (; kedge_root; kedge_root = kedge_root->next) {
972 pset_remove(epk, kedge_root, HASH_RSS_EDGE(kedge_root));
975 /* add the connected bipartite component */
976 if (nodeset_count(cbc->parents) > 0 && nodeset_count(cbc->children) > 0 && pset_count(cbc->kill_edges) > 0)
977 pset_insert(rss->cbc_set, cbc, (unsigned)cbc->nr);
978 DBG((rss->dbg, LEVEL_2, "\tbipartite component %d inserted:\n", cbc->nr));
980 if (firm_dbg_get_mask(rss->dbg) & LEVEL_2)
981 debug_print_cbc(rss->dbg, cbc);
985 if (rss->opts->dump_flags & RSS_DUMP_CBC)
986 debug_vcg_dump_bipartite(rss);
992 * Select the child with the maximum cost.
994 static child_t *select_child_max_cost(rss_t *rss, nodeset *x, nodeset *y, child_t *t, cbc_t *cbc) {
996 float max_cost = -1.0f;
998 DBG((rss->dbg, LEVEL_2, "\t\tcomputing children costs:\n"));
1000 foreach_nodeset(cbc->children, child) {
1001 rss_irn_t *r_child = get_rss_irn(rss, child);
1002 int num_unkilled_parents = 0;
1003 int num_descendants;
1007 /* get the number of unkilled parents */
1008 foreach_pset(cbc->kill_edges, k_edge) {
1009 if (k_edge->tgt == child && nodeset_find(x, k_edge->src))
1010 ++num_unkilled_parents;
1013 cost = (float)num_unkilled_parents;
1015 num_descendants = plist_count(r_child->descendant_list) + nodeset_count(y);
1017 if (num_descendants > 0)
1018 cost /= (float)num_descendants;
1020 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F, #desc %d, cost %.3f\n", child, num_descendants, cost));
1022 if (cost > max_cost) {
1033 * Remove all parents from x which are killed by t_irn.
1035 static void remove_covered_parents(rss_t *rss, nodeset *x, ir_node *t_irn, cbc_t *cbc) {
1036 rss_irn_t *t = get_rss_irn(rss, t_irn);
1039 DBG((rss->dbg, LEVEL_2, "\t\tremoving parents covered by %+F:\n", t_irn));
1041 foreach_pset(cbc->kill_edges, k_edge) {
1042 if (k_edge->tgt == t_irn && nodeset_find(x, k_edge->src)) {
1043 nodeset_remove(x, k_edge->src);
1044 plist_insert_back(t->parent_list, k_edge->src);
1045 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", k_edge->src));
1050 static void update_cumulated_descendent_values(rss_t *rss, nodeset *y, ir_node *t_irn) {
1051 rss_irn_t *t = get_rss_irn(rss, t_irn);
1052 plist_element_t *el;
1054 DBG((rss->dbg, LEVEL_2, "\t\tupdating cumulated descendant value of %+F:\n", t_irn));
1056 foreach_plist(t->descendant_list, el) {
1057 nodeset_insert(y, plist_element_get_value(el));
1058 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", plist_element_get_value(el)));
1063 * Greedy-k: a heuristics for the MMA problem
1065 static void compute_killing_function(rss_t *rss) {
1067 struct obstack obst;
1069 obstack_init(&obst);
1071 rss->cbc_set = pset_new_ptr(5);
1072 compute_bipartite_decomposition(rss);
1074 /* for all bipartite components do: */
1075 foreach_pset(rss->cbc_set, cbc) {
1077 nodeset *x = new_nodeset(10);
1078 nodeset *y = new_nodeset(10);
1079 child_t **sks = NEW_ARR_F(child_t *, 20);
1084 DBG((rss->dbg, LEVEL_1, "\tcomputing SKS for cbc %d:\n", cbc->nr));
1085 DBG((rss->dbg, LEVEL_2, "\t\tinitializing parents X:\n"));
1087 /* X = S_cb (all parents are initially uncovered) */
1088 foreach_nodeset(cbc->parents, p) {
1089 nodeset_insert(x, p);
1090 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", p));
1093 /* while X not empty */
1094 while (nodeset_count(x) > 0) {
1095 child_t *t = obstack_alloc(&obst, sizeof(*t));
1096 memset(t, 0, sizeof(*t));
1098 t = select_child_max_cost(rss, x, y, t, cbc);
1100 if (cur_len >= cur_size) {
1101 ARR_EXTO(child_t *, sks, cur_size * 2);
1105 DBG((rss->dbg, LEVEL_2, "\t\tinsert child %+F (%.3f) into SKS at pos %d\n", t->irn, t->cost, cur_len));
1108 remove_covered_parents(rss, x, t->irn, cbc);
1109 update_cumulated_descendent_values(rss, y, t->irn);
1112 ARR_SHRINKLEN(sks, cur_len);
1114 /* sort SKS in increasing cost order */
1115 qsort(sks, cur_len, sizeof(sks[0]), cmp_child_costs);
1117 DBG((rss->dbg, LEVEL_2, "\tprocessing SKS for cbc %d:\n", cbc->nr));
1119 /* build killing function */
1120 for (i = cur_len - 1; i >= 0; --i) { /* loop over sks in decreasing cost order */
1121 child_t *t = sks[i];
1122 rss_irn_t *rt = get_rss_irn(rss, t->irn);
1123 plist_element_t *p_el;
1125 DBG((rss->dbg, LEVEL_3, "\t\t\tkiller %+F (%.3f):\n", t->irn, t->cost));
1127 /* kill all unkilled parents of t */
1128 foreach_plist(rt->parent_list, p_el) {
1129 ir_node *par = plist_element_get_value(p_el);
1130 rss_irn_t *rpar = get_rss_irn(rss, par);
1132 if (is_Sink(rpar->killer)) {
1133 rpar->killer = t->irn;
1134 DBG((rss->dbg, LEVEL_2, "\t\tkill %+F\n", rpar->irn));
1137 DBG((rss->dbg, LEVEL_3, "\t\t\tkeeping %+F as killer for %+F\n", rpar->killer, rpar->irn));
1147 if (rss->opts->dump_flags & RSS_DUMP_KILL)
1148 debug_vcg_dump_kill(rss);
1150 del_pset(rss->cbc_set);
1151 obstack_free(&obst, NULL);
1155 * Adds the edge src -> tgt to the dvg. Checks if reverse edge is already there (asserts).
1157 static INLINE void add_dvg_edge(rss_t *rss, dvg_t *dvg, ir_node *src, ir_node *tgt, int have_source) {
1158 rss_edge_t *dvg_edge;
1162 nodeset_insert(dvg->nodes, src);
1164 assert(nodeset_find(dvg->nodes, src) != NULL && "Wrong assumption");
1166 nodeset_insert(dvg->nodes, tgt);
1170 assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1174 if (NULL != pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1175 /* add the edge to the DVG */
1176 dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1178 dvg_edge->src = src;
1179 dvg_edge->tgt = tgt;
1180 dvg_edge->next = NULL;
1182 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", src, tgt));
1183 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1188 * Computes the disjoint value DAG (DVG).
1189 * BEWARE: It is not made explicitly clear in the Touati paper,
1190 * but the DVG is meant to be build from the KILLING DAG
1192 static void compute_dvg(rss_t *rss, dvg_t *dvg) {
1193 plist_element_t *el;
1195 DBG((rss->dbg, LEVEL_1, "\tcomputing DVG:\n"));
1197 foreach_plist(rss->nodes, el) {
1198 ir_node *u_irn = plist_element_get_value(el);
1199 rss_irn_t *u = get_rss_irn(rss, u_irn);
1200 rss_irn_t *u_killer = get_rss_irn(rss, u->killer);
1203 /* TODO: omit nodes only having sink as pkiller and killing no one */
1205 /* add an edge to killer */
1206 add_dvg_edge(rss, dvg, u_irn, u->killer, 0);
1208 if (is_Sink(u->killer))
1211 /* We add an edge to every killer, from where we could be reached. */
1212 for (i = ARR_LEN_SAFE(u_killer->descendants) - 1; i >= 0; --i) {
1213 add_dvg_edge(rss, dvg, u_irn, u_killer->descendants[i], 1);
1218 foreach_plist(rss->nodes, el2) {
1219 ir_node *v_irn = plist_element_get_value(el2);
1222 There is an edge (u, v) in the DVG iff v is a descendant of the killer(u).
1224 if (BSEARCH_IRN_ARR(v_irn, u_kill->descendants)) {
1225 rss_edge_t *dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1228 /* insert the user into the DVG and append it to the user list of u */
1229 nodeset_insert(dvg->nodes, v_irn);
1230 if (! plist_has_value(u->dvg_user_list, v_irn))
1231 plist_insert_back(u->dvg_user_list, v_irn);
1233 dvg_edge->src = u_irn;
1234 dvg_edge->tgt = v_irn;
1235 dvg_edge->next = NULL;
1240 assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1242 /* add the edge to the DVG */
1243 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", u_irn, v_irn));
1244 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1250 if (rss->opts->dump_flags & RSS_DUMP_DVG)
1251 debug_vcg_dump_dvg(rss, dvg);
1255 * Updates the dvg structure when a serialization edge from src -> tgt is added.
1257 static void update_dvg(rss_t *rss, dvg_t *dvg, rss_irn_t *src, rss_irn_t *tgt) {
1260 rss_edge_t **arr = alloca(pset_count(dvg->edges) * sizeof(arr[0]));
1263 Add an edge from serialization target to serialization src:
1264 src cannot be alive with target
1266 add_dvg_edge(rss, dvg, tgt->irn, src->irn, 0);
1268 /* Add edges to src's descendants as well, they also getting serialized. */
1269 for (i = ARR_LEN_SAFE(src->descendants) - 1; i >= 0; --i) {
1270 add_dvg_edge(rss, dvg, tgt->irn, src->descendants[i], 1);
1273 /* We also have to add edges from targets predecessors in dvg */
1275 /* We cannot insert elements into set over which we iterate ... */
1276 foreach_pset(dvg->edges, edge) {
1277 if (edge->tgt == tgt->irn) {
1282 for (i = 0; i < idx; ++i) {
1284 add_dvg_edge(rss, dvg, edge->src, src->irn, 1);
1285 for (j = ARR_LEN_SAFE(src->descendants) - 1; j >= 0; --j) {
1286 add_dvg_edge(rss, dvg, edge->src, src->descendants[j], 1);
1293 * Accumulate all descendants for root into list.
1295 static void accumulate_dvg_descendant_values(rss_t *rss, rss_irn_t *root, plist_t *list) {
1296 if (plist_count(root->dvg_user_list) > 0) {
1297 plist_element_t *el;
1299 foreach_plist(root->dvg_user_list, el) {
1300 ir_node *v_irn = plist_element_get_value(el);
1301 rss_irn_t *v = get_rss_irn(rss, v_irn);
1303 /* add v as descendant */
1304 if (! plist_has_value(list, v_irn)) {
1305 plist_insert_back(list, v_irn);
1306 DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG descendant %+F\n", v_irn));
1309 /* accumulate v's descendants */
1310 accumulate_dvg_descendant_values(rss, v, list);
1316 * Builds the list of potential killers for each node
1318 * Needs the descendant list for all user as sorted array.
1320 static void build_dvg_pkiller_list(rss_t *rss, dvg_t *dvg) {
1323 foreach_nodeset(dvg->nodes, irn) {
1324 rss_irn_t *node = get_rss_irn(rss, irn);
1325 plist_element_t *el, *el2;
1327 DBG((rss->dbg, DEBUG_DVG, "\t\tbuilding pkiller list for %+F\n", irn));
1329 /* check each user */
1330 foreach_plist(node->dvg_user_list, el) {
1331 ir_node *u_irn = plist_element_get_value(el);
1333 /* is the current user u_irn not a descendant of any other user -> pkiller */
1334 foreach_plist(node->dvg_user_list, el2) {
1335 ir_node *v_irn = plist_element_get_value(el2);
1336 rss_irn_t *v = get_rss_irn(rss, v_irn);
1339 ! BSEARCH_IRN_ARR(u_irn, v->dvg_desc) &&
1340 ! plist_has_value(node->dvg_pkiller_list, u_irn))
1342 plist_insert_back(node->dvg_pkiller_list, u_irn);
1343 DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG pkiller %+F\n", u_irn));
1348 node->dvg_pkiller = build_sorted_array_from_list(node->dvg_pkiller_list, phase_obst(&rss->ph));
1352 if (firm_dbg_get_mask(rss->dbg) & DEBUG_DVG)
1353 debug_vcg_dump_dvg_pkiller(rss, dvg);
1360 * Compute the maximal antichain of the current DVG.
1361 * This is a reimplementation of the MAXIMAL_ANTI_CHAIN function
1362 * from the DDG library 1.1 (DAG.cpp).
1364 static nodeset *compute_maximal_antichain(rss_t *rss, dvg_t *dvg, int iteration) {
1365 int n = nodeset_count(dvg->nodes);
1366 int *assignment = alloca(n * sizeof(assignment[0]));
1367 int *assignment_rev = alloca(n * sizeof(assignment_rev[0]));
1368 int *idx_map = alloca(n * sizeof(idx_map[0]));
1369 hungarian_problem_t *bp;
1370 nodeset *values, *temp;
1372 int i, j, cost, cur_chain, res;
1373 rss_edge_t *dvg_edge;
1375 #define MAP_IDX(irn) bsearch_for_index(get_irn_idx(irn), idx_map, n, 1)
1377 if (pset_count(dvg->edges) == 0)
1380 bp = hungarian_new(n, n, 1, HUNGARIAN_MATCH_NORMAL);
1383 At first, we build an index map for the nodes in the DVG,
1384 because we cannot use the irn idx therefore as the resulting
1385 bipartite data structure would have around 1.2GB.
1386 So we limit the size to the number of nodes we have in the DVG
1387 and build a sorted index map for their irn indices.
1390 foreach_nodeset(dvg->nodes, u_irn) {
1391 idx_map[i++] = get_irn_idx(u_irn);
1393 qsort(idx_map, n, sizeof(idx_map[0]), cmp_int);
1395 foreach_pset(dvg->edges, dvg_edge) {
1396 int idx_u = MAP_IDX(dvg_edge->src);
1397 int idx_v = MAP_IDX(dvg_edge->tgt);
1399 /* add the entry to the bipartite data structure */
1400 hungarian_add(bp, idx_u, idx_v, 1);
1401 DBG((rss->dbg, LEVEL_3, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1402 idx_u, dvg_edge->src, idx_v, dvg_edge->tgt));
1406 Add a bipartite entry for each pair of nodes (u, v), where exists a
1407 path in the DVG from u to v, ie. connect all descendants(v) to v.
1408 desc_dvg(v) = accumulated descendants of all z in dvg_user_list(v)
1410 foreach_nodeset(dvg->nodes, u_irn) {
1411 rss_irn_t *u = get_rss_irn(rss, u_irn);
1412 int idx_u_irn = MAP_IDX(u_irn);
1414 DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG descendants of %+F:\n", u_irn));
1416 //plist_clear(u->dvg_desc_list);
1417 //accumulate_dvg_descendant_values(rss, u, u->dvg_desc_list);
1420 FIXME: The array is build on the phase obstack and we cannot free the data.
1421 So the array get as many times allocated as this function is called.
1424 /* build the sorted array for faster searches */
1425 //u->dvg_desc = build_sorted_array_from_list(u->dvg_desc_list, phase_obst(&rss->ph));
1427 DBG((rss->dbg, DEBUG_MAX_AC, "\t\tadding bipartite entries of %+F:\n", u_irn));
1429 /* add the bipartite entries for all descendants */
1430 for (i = ARR_LEN_SAFE(u->dvg_desc) - 1; i >= 0; --i) {
1431 rss_irn_t *desc = get_rss_irn(rss, u->dvg_desc[i]);
1432 int idx_desc = MAP_IDX(u->dvg_desc[i]);
1434 /* add the entry to the bipartite data structure */
1435 hungarian_add(bp, idx_u_irn, idx_desc, 1);
1436 DBG((rss->dbg, DEBUG_MAX_AC, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1437 idx_u_irn, u_irn, idx_desc, u->dvg_desc[i]));
1444 /* We want maximum cardinality matching */
1445 hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL);
1448 DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG pkiller:\n"));
1449 /* beware: the following function needs the dvg_desc array */
1450 build_dvg_pkiller_list(rss, dvg);
1453 DBG((rss->dbg, LEVEL_2, "\t\tcomputing bipartite matching\n"));
1455 The maximum cardinality bipartite matching gives us the minimal
1456 chain partition, which corresponds to the maximum anti chains.
1458 res = hungarian_solve(bp, assignment, &cost, 1);
1459 assert(res == 0 && "Bipartite matching failed!");
1462 memset(assignment_rev, -1, n * sizeof(assignment_rev[0]));
1464 /* build the reverse assignment, ie. foreach i -> j, add j -> i */
1465 for (i = 0; i < n; ++i) {
1466 if (assignment[i] >= 0) {
1467 int j = assignment[i];
1468 assignment_rev[j] = i;
1472 DBG((rss->dbg, LEVEL_2, "\t\tgot assignment with cost %d\n", cost));
1473 DBG((rss->dbg, LEVEL_3, "\t\t\tassignment --- reverse assignment\n", cost));
1474 for (i = 0; i < n; ++i) {
1475 DBG((rss->dbg, LEVEL_3, "\t\t\t%3d -> %3d %3d -> %3d\n", i, assignment[i], i, assignment_rev[i]));
1478 values = new_nodeset(10);
1480 /* Construction of the minimal chain partition */
1481 for (j = 0; j < n; ++j) {
1482 /* check nodes, which did not occur as target */
1483 if (assignment_rev[j] == -1) {
1484 int xj = idx_map[j];
1485 ir_node *xj_irn = get_idx_irn(rss->irg, xj);
1486 rss_irn_t *xj_rss = get_rss_irn(rss, xj_irn);
1487 chain_t *c = obstack_alloc(phase_obst(&rss->ph), sizeof(*c));
1490 /* there was no source for j -> we have a source of a new chain */
1491 nodeset_insert(values, xj_irn);
1493 c->elements = plist_obstack_new(phase_obst(&rss->ph));
1494 c->nr = cur_chain++;
1495 plist_insert_back(c->elements, xj_irn);
1499 DBG((rss->dbg, LEVEL_2, "\t\tstarting chain %d:\n", c->nr));
1500 DBG((rss->dbg, LEVEL_2, "\t\t\t%+F (%d)", xj_irn, j));
1502 /* follow chain, having j as source */
1504 while (assignment[source] >= 0) {
1505 int target = assignment[source];
1506 int irn_idx = idx_map[target];
1507 ir_node *irn = get_idx_irn(rss->irg, irn_idx);
1508 rss_irn_t *node = get_rss_irn(rss, irn);
1510 plist_insert_back(c->elements, irn);
1513 DB((rss->dbg, LEVEL_2, " -> %+F (%d)", irn, target));
1515 /* new source = last target */
1519 DB((rss->dbg, LEVEL_2, "\n"));
1524 Computing the maximal antichain: Select an element from each
1525 chain such, such it is parallel with the others.
1527 DBG((rss->dbg, LEVEL_1, "\t\tcomputing set of saturation values (MAX AC)\n"));
1528 DBG((rss->dbg, LEVEL_3, "\t\tstarting with:\n"));
1531 if (firm_dbg_get_mask(rss->dbg) & LEVEL_3)
1532 dump_nodeset(values, "\t\t\t");
1538 We need an explicit array for the values as
1539 we cannot iterate multiple times over the same
1540 set at the same time. :-(((((
1542 int n = nodeset_count(values);
1544 ir_node **val_arr = NEW_ARR_F(ir_node *, n);
1546 foreach_nodeset(values, u_irn)
1547 val_arr[i++] = u_irn;
1552 temp = new_nodeset(10);
1554 /* Select all nodes from current value set, having another node in the set as descendant. */
1555 for (i = 0; i < n; ++i) {
1556 rss_irn_t *u = get_rss_irn(rss, val_arr[i]);
1558 for (j = 0; j < n; ++j) {
1562 key.src = val_arr[i];
1563 key.tgt = val_arr[j];
1565 if (pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1566 /* v[j] is descendant of u -> remove u and break */
1567 nodeset_insert(temp, u->irn);
1568 nodeset_remove(values, u->irn);
1570 DBG((rss->dbg, LEVEL_3, "\t\t\tremoving %+F from values, adding it to temp\n", u->irn));
1578 /* Try to insert the chain predecessor of all selected u's */
1579 foreach_nodeset(temp, u_irn) {
1580 rss_irn_t *u = get_rss_irn(rss, u_irn);
1581 chain_t *c = u->chain;
1582 plist_element_t *el = plist_find_value(c->elements, u_irn);
1584 assert(el && "Missing element in chain!");
1586 /* If u has predecessor in chain: insert the predecessor */
1587 if (el == plist_element_get_prev(el)) {
1588 nodeset_insert(values, plist_element_get_value(el));
1589 DBG((rss->dbg, LEVEL_3, "\t\t\tadding %+F to values\n", plist_element_get_value(el)));
1595 } while (nodeset_count(temp) > 0);
1597 DBG((rss->dbg, LEVEL_2, "\t\tfinal set:\n"));
1599 if (firm_dbg_get_mask(rss->dbg) & LEVEL_2) {
1600 dump_nodeset(values, "\t\t\t");
1604 if (rss->opts->dump_flags & RSS_DUMP_MAXAC)
1605 debug_vcg_dump_pkg(rss, values, iteration);
1615 * Computes the best serialization between two nodes of sat_vals.
1617 static serialization_t *compute_best_admissible_serialization(rss_t *rss, nodeset *sat_vals, serialization_t *ser, int num_regs) {
1618 int n = nodeset_count(sat_vals);
1619 int n_idx = ARR_LEN_SAFE(rss->idx_map);
1621 ir_node **val_arr = alloca(n * sizeof(val_arr[0]));
1622 bitset_t *bs_sv = bitset_alloca(n_idx);
1623 bitset_t *bs_vdesc = bitset_alloca(n_idx);
1624 bitset_t *bs_tmp = bitset_alloca(n_idx);
1625 bitset_t *bs_ukilldesc = bitset_alloca(n_idx);
1626 int best_benefit = INT_MAX;
1627 int best_omega2 = INT_MAX;
1628 int best_benefit_omega20 = INT_MAX;
1632 rss_edge_t min_benefit_edge;
1633 rss_edge_t min_omega20_edge;
1634 rss_irn_t *ser_u_omega1, *ser_v_omega1;
1635 rss_irn_t *ser_u_omega20, *ser_v_omega20;
1637 DBG((rss->dbg, LEVEL_1, "\tcomputing admissible serializations:\n"));
1640 We need an explicit array for the values as
1641 we cannot iterate multiple times over the same
1642 set at the same time. :-(((((
1645 foreach_nodeset(sat_vals, irn) {
1647 bitset_set(bs_sv, BLOCK_IDX_MAP(rss, irn));
1651 We build all admissible serializations and remember the best found so far.
1654 if v in pkiller(u): add edge from v to all other pkiller(u)
1655 else: for all vv in pkiller(u): add edge from v to vv if there exists no path from vv to v
1659 A node is unserializable if:
1660 - it has only one killer and this one is Sink
1661 - it kills no other values
1662 In this case there is no serialization which could
1663 reduce the registerpressure
1665 #define IS_UNSERIALIZABLE_NODE(rss_node) \
1668 (plist_count(rss_node->pkiller_list) == 1) && \
1669 is_Sink(rss_node->killer) && \
1670 (rss_node->kill_count == 0) \
1672 be_is_Barrier(rss_node->irn) || \
1673 be_is_Keep(rss_node->irn) \
1676 /* for all u in sat_vals */
1677 for (i = 0; i < n; ++i) {
1678 rss_irn_t *u = get_rss_irn(rss, val_arr[i]);
1679 plist_element_t *el;
1681 /* ignore nodes where serialization does not help */
1682 if (IS_UNSERIALIZABLE_NODE(u)) {
1683 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", u->irn));
1687 /* accumulate all descendants of all pkiller(u) */
1688 bitset_clear_all(bs_ukilldesc);
1689 foreach_plist(u->pkiller_list, el) {
1690 ir_node *irn = plist_element_get_value(el);
1691 rss_irn_t *node = get_rss_irn(rss, irn);
1694 bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, irn));
1698 for (k = ARR_LEN_SAFE(node->descendants) - 1; k >= 0; --k) {
1699 if (! is_Sink(node->descendants[k]))
1700 bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, node->descendants[k]));
1704 /* for all v in sat_vals */
1705 for (j = 0; j < n; ++j) {
1706 ir_node *v_irn = val_arr[j];
1707 rss_irn_t *v = get_rss_irn(rss, v_irn);
1708 unsigned v_height = get_irn_height(rss->h, v_irn);
1709 int omega1, omega2, is_pkiller;
1711 /* v cannot be serialized with itself
1712 * ignore nodes where serialization does not help */
1713 if (i == j || IS_UNSERIALIZABLE_NODE(v)) {
1715 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", v->irn));
1719 /* get descendants of v */
1720 bitset_clear_all(bs_vdesc);
1721 bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v_irn));
1722 for (k = ARR_LEN_SAFE(v->descendants) - 1; k >= 0; --k) {
1723 if (! is_Sink(v->descendants[k]))
1724 bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v->descendants[k]));
1727 /* if v is in pkiller(u) */
1728 is_pkiller = plist_has_value(u->pkiller_list, v_irn);
1730 /* for all vv in pkiller(u) */
1731 foreach_plist(u->pkiller_list, el) {
1732 ir_node *vv_irn = plist_element_get_value(el);
1735 if (is_Sink(vv_irn) || is_cfop(vv_irn))
1739 add_edge = vv_irn != v_irn && skip_Proj(vv_irn) != skip_Proj(v_irn);
1741 add_edge = ! heights_reachable_in_block(rss->h, skip_Proj(vv_irn), skip_Proj(v_irn));
1744 As we add an edge from vv -> v, we have to make sure,
1745 that there exists no path from v to vv.
1749 int vv_height = get_irn_height(rss->h, vv_irn);
1750 int mu1, mu2, critical_path_cost;
1753 mu1 = | descendants(v) cut sat_vals |
1754 the number of saturating values which cannot
1755 be simultaneously alive with u
1757 bitset_copy(bs_tmp, bs_vdesc);
1758 mu1 = bitset_popcnt(bitset_and(bs_tmp, bs_sv));
1761 mu2 = | accum_desc_all_pkiller(u) without descendants(v) |
1764 bitset_copy(bs_tmp, bs_ukilldesc);
1765 mu2 = bitset_popcnt(bitset_andnot(bs_tmp, bs_vdesc));
1771 /* omega1 = mu1 - mu2 */
1777 /* omega2 = increase of critical path */
1778 critical_path_cost =
1779 v_height /* longest path from v to sink */
1780 + rss->max_height - vv_height /* longest path from source to vv */
1784 If critical_path_cost > max_height -> the new edge
1785 would increase the longest critical path by the difference.
1787 omega2 = critical_path_cost > rss->max_height ? critical_path_cost - rss->max_height : 0;
1789 /* this keeps track of the edge with the best benefit */
1790 if (omega1 >= num_regs - n && omega1 < best_benefit) {
1791 min_benefit_edge.src = v_irn;
1792 min_benefit_edge.tgt = vv_irn;
1797 best_benefit = omega1;
1798 ser->new_killer = is_pkiller;
1801 /* this keeps track of the edge with the best omega1 costs where omega2 == 0 */
1802 if (omega2 == 0 && omega1 >= num_regs - n && omega1 < best_benefit_omega20) {
1803 min_omega20_edge.src = v_irn;
1804 min_omega20_edge.tgt = vv_irn;
1809 best_benefit_omega20 = omega1;
1810 ser->new_killer = is_pkiller;
1813 best_omega2 = MIN(best_omega2, omega2);
1815 DBG((rss->dbg, LEVEL_2, "\t\tfound %+F -> %+F (w1 %d, w2 %d)\n",
1816 v_irn, vv_irn, omega1, omega2));
1818 } /* for all vv in pkiller(u) */
1819 } /* for all v in sat_vals */
1820 } /* for all u in sat_vals */
1825 if (best_omega2 == 0) {
1826 ser->u = ser_u_omega20;
1827 ser->v = ser_v_omega20;
1828 ser->edge->src = min_omega20_edge.src;
1829 ser->edge->tgt = min_omega20_edge.tgt;
1830 ser->omega1 = best_benefit_omega20;
1831 ser->omega2 = best_omega2;
1834 ser->u = ser_u_omega1;
1835 ser->v = ser_v_omega1;
1836 ser->edge->src = min_benefit_edge.src;
1837 ser->edge->tgt = min_benefit_edge.tgt;
1838 ser->omega1 = best_benefit;
1839 ser->omega2 = best_omega2;
1844 #undef IS_UNSERIALIZABLE_NODE
1848 * Perform the value serialization heuristic and add all
1849 * computed serialization edges as dependencies to the irg.
1851 static void perform_value_serialization_heuristic(rss_t *rss) {
1852 bitset_t *arch_nonign_bs = bitset_alloca(arch_register_class_n_regs(rss->cls));
1853 bitset_t *abi_ign_bs = bitset_alloca(arch_register_class_n_regs(rss->cls));
1854 int available_regs, iteration, num_live;
1857 pset *ser_set = new_pset(cmp_rss_edges, 20);
1859 /* available_regs = R = |arch_non_ignore_regs cut ~abi_ignore_regs| */
1860 arch_put_non_ignore_regs(rss->arch_env, rss->cls, arch_nonign_bs);
1861 be_abi_put_ignore_regs(rss->abi, rss->cls, abi_ign_bs);
1862 bitset_andnot(arch_nonign_bs, abi_ign_bs);
1863 available_regs = bitset_popcnt(arch_nonign_bs);
1864 //um_live = pset_count(rss->live_block);
1865 //available_regs -= num_live < available_regs ? num_live : 0;
1867 DBG((rss->dbg, LEVEL_1, "\n\t#available regs: %d\n\n", available_regs));
1870 At first we need to compute the disjoint value DAG (DVG = {V, E_dv}).
1871 V = set of all nodes we are currently interested in
1872 E_dv = there is an edge from u to v iff v is a descendant of killer(u), forall u, v in V
1874 dvg.nodes = new_nodeset(plist_count(rss->nodes));
1875 dvg.edges = new_pset(cmp_rss_edges, plist_count(rss->nodes) * 5);
1876 compute_dvg(rss, &dvg);
1879 Then we perform the heuristic serialization algorithm
1880 on the DVG which gives us all necessary serialization
1883 DBG((rss->dbg, LEVEL_1, "\tcomputing maximal antichain:\n"));
1885 sat_vals = compute_maximal_antichain(rss, &dvg, iteration++);
1886 while (sat_vals && (nodeset_count(sat_vals) > available_regs)) {
1887 serialization_t *ser, best_ser;
1888 rss_edge_t *edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*edge));
1889 ir_node *dep_src, *dep_tgt;
1891 best_ser.edge = edge;
1892 ser = compute_best_admissible_serialization(rss, sat_vals, &best_ser, available_regs);
1894 DBG((rss->dbg, LEVEL_1, "\tcurrent register saturation %d, target %d\n", nodeset_count(sat_vals), available_regs));
1897 DBG((rss->dbg, LEVEL_1, "\tno RS improving serialization found, breaking at iteration %d\n", iteration));
1901 /* Insert the serialization as dependency edge into the irg. */
1902 DBG((rss->dbg, LEVEL_1, "\tserializing %+F -> %+F with edge %+F -> %+F and cost %d, %d\n",
1903 ser->u->irn, ser->v->irn, ser->edge->src, ser->edge->tgt, ser->omega1, ser->omega2));
1905 if (pset_find(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge)))
1906 ir_printf("WARNING: serialization %+F -> %+F computed twice!\n", ser->edge->src, ser->edge->tgt);
1909 pset_insert(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge));
1911 /* update the dvg */
1912 update_dvg(rss, &dvg, ser->v, ser->u);
1913 update_dvg(rss, &dvg, get_rss_irn(rss, ser->edge->src), get_rss_irn(rss, ser->edge->tgt));
1914 del_nodeset(sat_vals);
1916 dep_src = skip_Proj(ser->edge->src);
1917 dep_tgt = ser->edge->tgt;
1918 add_irn_dep(dep_src, dep_tgt);
1920 /* Update descendants, consumer and pkillers of the target */
1921 update_node_info(rss, ser->edge->tgt, ser->edge->src);
1923 /* TODO: try to find a cheaper way for updating height information */
1924 rss->max_height = heights_recompute_block(rss->h, rss->block);
1926 /* Recompute the antichain for next serialization */
1927 DBG((rss->dbg, LEVEL_1, "\tre-computing maximal antichain:\n"));
1928 sat_vals = compute_maximal_antichain(rss, &dvg, iteration++);
1931 del_nodeset(dvg.nodes);
1932 del_pset(dvg.edges);
1936 * Do initial calculations for a block.
1938 static void process_block(ir_node *block, void *env) {
1941 const ir_edge_t *edge;
1943 phase_init(&rss->ph, "rss block preprocessor", rss->irg, PHASE_DEFAULT_GROWTH, init_rss_irn);
1945 DBG((rss->dbg, LEVEL_1, "preprocessing block %+F\n", block));
1948 /* build an index map for all nodes in the current block */
1950 n = get_irn_n_edges(block);
1951 NEW_ARR_A(int *, rss->idx_map, n);
1952 foreach_out_edge(block, edge) {
1953 ir_node *irn = get_edge_src_irn(edge);
1954 rss->idx_map[i++] = get_irn_idx(irn);
1956 qsort(rss->idx_map, n, sizeof(rss->idx_map[0]), cmp_int);
1957 rss->max_height = heights_recompute_block(rss->h, block);
1959 /* loop over all register classes */
1960 for (i = arch_isa_get_n_reg_class(rss->arch_env->isa) - 1; i >= 0; --i) {
1961 const arch_register_class_t *cls = arch_isa_get_reg_class(rss->arch_env->isa, i);
1964 DBG((rss->dbg, LEVEL_1, "register class %s\n", arch_register_class_name(cls)));
1966 /* Get all live value at end of Block having current register class */
1967 rss->live_block = pset_new_ptr(10);
1968 be_liveness_end_of_block(rss->liveness, rss->arch_env, rss->cls, rss->block, rss->live_block);
1970 /* reset the list of interesting nodes */
1971 plist_clear(rss->nodes);
1972 plist_insert_back(rss->nodes, _sink);
1974 /* collect all nodes having a certain register class */
1975 foreach_out_edge(block, edge) {
1976 ir_node *irn = get_edge_src_irn(edge);
1977 ir_mode *mode = get_irn_mode(irn);
1981 - mode_T nodes (the projs are asked)
1982 - mode_X nodes (control flow nodes are always scheduled last)
1983 - Keeps (they are always immediately scheduled)
1984 - Phi (same as Keep)
1986 if (mode == mode_T || mode == mode_X || is_Phi(irn))
1990 In case of a proj, we skip
1991 - Barrier (they are a Barrier :)
1993 - the Proj itself, as it's scheduled always with it's super node
1996 ir_node *pred = get_Proj_pred(irn);
1997 if (be_is_Barrier(pred) || is_Start(pred))
2001 /* calculate the descendants and consumer for each node in the block */
2002 collect_node_info(rss, skip_Proj(irn));
2004 if (be_is_Keep(irn))
2007 if (! arch_irn_is(rss->arch_env, irn, ignore) && arch_get_irn_reg_class(rss->arch_env, irn, -1) == cls) {
2008 plist_insert_back(rss->nodes, skip_Proj(irn));
2013 /* compute the potential killing set PK(G) */
2014 compute_pkill_set(rss);
2016 /* compute the killing function k* */
2017 compute_killing_function(rss);
2020 Compute the heuristic value serialization and
2021 add the necessary dependencies to the irg.
2023 perform_value_serialization_heuristic(rss);
2025 del_pset(rss->live_block);
2028 phase_free(&rss->ph);
2033 * Register the options.
2035 void rss_register_options(lc_opt_entry_t *grp) {
2036 static int run_once = 0;
2037 lc_opt_entry_t *rss_grp;
2041 rss_grp = lc_opt_get_grp(grp, "rss");
2043 lc_opt_add_table(rss_grp, rss_option_table);
2046 #endif /* WITH_LIBCORE */
2049 * Preprocess the irg for scheduling.
2051 void rss_schedule_preparation(const be_irg_t *birg) {
2054 FIRM_DBG_REGISTER(rss.dbg, "firm.be.sched.rss");
2056 //firm_dbg_set_mask(rss.dbg, LEVEL_1 | LEVEL_2 | LEVEL_3);
2058 init_rss_special_nodes(birg->irg);
2060 rss.irg = birg->irg;
2061 rss.arch_env = birg->main_env->arch_env;
2062 rss.abi = birg->abi;
2063 rss.h = heights_new(birg->irg);
2064 rss.nodes = plist_new();
2065 rss.opts = &rss_options;
2066 rss.liveness = be_liveness(birg->irg);
2067 irg_block_walk_graph(birg->irg, NULL, process_block, &rss);
2068 heights_free(rss.h);
2069 plist_free(rss.nodes);
2070 be_liveness_free(rss.liveness);
2072 if (birg->main_env->options->dump_flags & DUMP_SCHED)
2073 be_dump(rss.irg, "-rss", dump_ir_block_graph);