2 * Implementation of a register saturating list scheduler
3 * as described in: Sid-Ahmed-Ali Touati
4 * Register Saturation in Superscalar and VLIW Codes
6 * @license This file protected by GPL - GNU GENERAL PUBLIC LICENSE.
7 * @author Christian Wuerdig
20 #include "irgraph_t.h"
22 #include "iredges_t.h"
24 #include "irphase_t.h"
30 #include "bipartite.h"
31 #include "hungarian.h"
38 #include "besched_t.h"
41 #include <libcore/lc_opts.h>
42 #include <libcore/lc_opts_enum.h>
43 #endif /* WITH_LIBCORE */
46 #define ARR_LEN_SAFE(arr) ((arr) != NULL ? ARR_LEN((arr)) : 0)
48 #define HASH_RSS_EDGE(edge) ((get_irn_node_nr((edge)->src) << 16) | (get_irn_node_nr((edge)->tgt) & 0xFFFF))
49 #define BSEARCH_IRN_ARR(val, arr) \
50 bsearch(&(val), (arr), ARR_LEN_SAFE((arr)), sizeof((arr)[0]), cmp_irn_idx)
52 #define BLOCK_IDX_MAP(rss, irn) bsearch_for_index(get_irn_idx((irn)), (rss)->idx_map, ARR_LEN_SAFE((rss)->idx_map), 1)
55 typedef struct _rss_opts_t {
59 /* Represents a child with associated costs */
60 typedef struct _child {
65 /* We need edges for several purposes. */
66 typedef struct _rss_edge {
72 /* Represents a connected bipartite component. */
74 nodeset *parents; /**< = S a set of value producers */
75 nodeset *children; /**< = T a set of value consumers */
76 pset *kill_edges; /**< = E a set of edges (t in T, s in S) such as each s in S gets killed by at least one t in T */
77 int nr; /**< a deterministic index for set insertion (used as hash) */
80 /* Represents a disjoint value DAG. */
86 /* Represents a chain of nodes. */
87 typedef struct _chain {
88 plist_t *elements; /**< List of chain elements */
89 int nr; /**< a deterministic index for set insertion (used as hash) */
92 typedef struct _rss_irn {
93 plist_t *consumer_list; /**< List of consumers */
94 ir_node **consumer; /**< Sorted consumer array (needed for faster access) */
96 plist_t *parent_list; /**< List of parents */
97 plist_t *pkiller_list; /**< List of potential killers */
99 plist_t *descendant_list; /**< List of descendants */
100 ir_node **descendants; /**< Sorted descendant array (needed for faster access) */
102 ir_node *killer; /**< The selected unique killer */
103 ir_node *irn; /**< The corresponding firm node to this rss_irn */
105 chain_t *chain; /**< The chain, this node is associated to */
107 unsigned desc_walk; /**< visited flag for collecting descendants */
108 unsigned kill_count; /**< number of nodes killed by this one */
110 unsigned live_out : 1; /**< irn has consumers outside of it's block */
111 unsigned visited : 1; /**< visited flag for bipartite decomposition */
112 unsigned havecons : 1; /**< visited flag collect consumer */
113 unsigned handled : 1; /**< flag indicating whether or not the list structures have been build */
114 unsigned dumped : 1; /**< flag indication whether or not this node was dumped */
117 /* Represents a serialization edge with associated costs. */
118 typedef struct _serialization {
119 rss_irn_t *u; /* the top node */
120 rss_irn_t *v; /* the node about to be serialized after u */
121 rss_edge_t *edge; /* the edge selected for the serialization */
122 int omega1; /* estimated: available regs - RS reduction */
123 int omega2; /* increase in critical path length */
127 typedef struct _rss {
128 phase_t ph; /**< Phase to hold some data */
129 heights_t *h; /**< The current height object */
130 ir_graph *irg; /**< The irg to preprocess */
131 plist_t *nodes; /**< The list of interesting nodes */
132 const arch_env_t *arch_env; /**< The architecture environment */
133 be_abi_irg_t *abi; /**< The abi for this irg */
134 pset *cbc_set; /**< A set of connected bipartite components */
135 ir_node *block; /**< The current block in progress. */
136 int *idx_map; /**< Mapping irn indices to per block indices */
137 unsigned max_height; /**< maximum height in the current block */
138 rss_opts_t *opts; /**< The options */
139 const arch_register_class_t *cls; /**< The current register class */
140 DEBUG_ONLY(firm_dbg_module_t *dbg);
143 #define get_rss_irn(rss, irn) (phase_get_or_set_irn_data(&rss->ph, irn))
146 * We need some special nodes:
147 * a source and a sink for all live-in and live-out values of a block
156 static ir_node *_source = NULL;
157 static ir_node *_sink = NULL;
159 #define is_Source(irn) ((irn) == _source)
160 #define is_Sink(irn) ((irn) == _sink)
164 RSS_DUMP_CBC = 1 << 0,
165 RSS_DUMP_PKG = 1 << 1,
166 RSS_DUMP_KILL = 1 << 2,
167 RSS_DUMP_DVG = 1 << 3,
168 RSS_DUMP_MAXAC = 1 << 4,
169 RSS_DUMP_ALL = (RSS_DUMP_MAXAC << 1) - 1,
172 static rss_opts_t rss_options = {
177 static const lc_opt_enum_int_items_t dump_items[] = {
178 { "none", RSS_DUMP_NONE },
179 { "cbc", RSS_DUMP_CBC },
180 { "pkg", RSS_DUMP_PKG },
181 { "kill", RSS_DUMP_KILL },
182 { "dvg", RSS_DUMP_DVG },
183 { "maxac", RSS_DUMP_MAXAC },
184 { "all", RSS_DUMP_ALL },
188 static lc_opt_enum_int_var_t dump_var = {
189 &rss_options.dump_flags, dump_items
192 static const lc_opt_table_entry_t rss_option_table[] = {
193 LC_OPT_ENT_ENUM_MASK("dump", "dump phases (none, cbc, pkg, kill, dvg, maxac, all)", &dump_var),
196 #endif /* WITH_LIBCORE */
198 /******************************************************************************
200 * | | | | / _| | | (_)
201 * | |__ ___| |_ __ ___ _ __ | |_ _ _ _ __ ___| |_ _ ___ _ __ ___
202 * | '_ \ / _ \ | '_ \ / _ \ '__| | _| | | | '_ \ / __| __| |/ _ \| '_ \/ __|
203 * | | | | __/ | |_) | __/ | | | | |_| | | | | (__| |_| | (_) | | | \__ \
204 * |_| |_|\___|_| .__/ \___|_| |_| \__,_|_| |_|\___|\__|_|\___/|_| |_|___/
207 ******************************************************************************/
210 * Acquire opcodes and create source and sink nodes.
212 static void init_rss_special_nodes(ir_graph *irg) {
213 ir_node *block = get_irg_start_block(irg);
214 int iro_rss_base = get_next_ir_opcodes(iro_rss_last);
215 ir_op *op_rss_Source = new_ir_op(iro_rss_base + iro_rss_Source, "rss_Source", op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
216 ir_op *op_rss_Sink = new_ir_op(iro_rss_base + iro_rss_Sink, "rss_Sink", op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
217 _source = new_ir_node(NULL, irg, block, op_rss_Source, mode_ANY, 0, NULL);
218 _sink = new_ir_node(NULL, irg, block, op_rss_Sink, mode_ANY, 0, NULL);
221 static int cmp_int(const void *a, const void *b) {
225 return QSORT_CMP(*i1, *i2);
228 static int cmp_child_costs(const void *a, const void *b) {
229 const child_t *c1 = a;
230 const child_t *c2 = b;
232 return QSORT_CMP(c1->cost, c2->cost);
235 static int cmp_irn_idx(const void *a, const void *b) {
236 const ir_node *n1 = *(ir_node **)a;
237 const ir_node *n2 = *(ir_node **)b;
239 return QSORT_CMP(get_irn_idx(n1), get_irn_idx(n2));
242 static int cmp_rss_edges(const void *a, const void *b) {
243 const rss_edge_t *e1 = a;
244 const rss_edge_t *e2 = b;
246 return (e1->src != e2->src) || (e1->tgt != e2->tgt);
249 static int bsearch_for_index(int key, int *arr, size_t len, int force) {
253 while (right >= left) {
254 int idx = (left + right) / 2;
258 else if (key > arr[idx])
265 assert(0 && "Something is wrong, key not found.");
269 static ir_node **build_sorted_array_from_list(plist_t *irn_list, struct obstack *obst) {
272 int len = plist_count(irn_list);
273 ir_node **arr = NEW_ARR_D(ir_node *, obst, len);
275 /* copy the list into the array */
276 foreach_plist(irn_list, el) {
277 arr[i++] = plist_element_get_value(el);
280 /* sort the array by node index */
281 qsort(arr, len, sizeof(arr[0]), cmp_irn_idx);
286 /*****************************************************
289 * __| | ___| |__ _ _ __ _ __ _ _ _ __ __ _
290 * / _` |/ _ \ '_ \| | | |/ _` |/ _` | | '_ \ / _` |
291 * | (_| | __/ |_) | |_| | (_| | (_| | | | | | (_| |
292 * \__,_|\___|_.__/ \__,_|\__, |\__, |_|_| |_|\__, |
296 *****************************************************/
298 static void dump_nodeset(nodeset *ns, const char *prefix) {
300 foreach_nodeset(ns, irn) {
301 ir_fprintf(stderr, "%s%+F\n", prefix, irn);
305 static void build_file_name(rss_t *rss, const char *suffix, size_t suf_len, char *buf, size_t len) {
306 const char *irg_name;
309 irg_name = get_entity_name(get_irg_entity(rss->irg));
310 snprintf(buf, len - suf_len, "%s-%s-block-%ld",
311 irg_name, arch_register_class_name(rss->cls), get_irn_node_nr(rss->block));
315 /* Dumps all collected bipartite components of current irg as vcg. */
316 static void debug_vcg_dump_bipartite(rss_t *rss) {
320 static const char suffix[] = "-RSS-CBC.vcg";
322 build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
323 f = fopen(file_name, "w");
325 ir_fprintf(f, "graph: { title: \"connected bipartite component graph of %+F\"\n", rss->irg);
326 fprintf(f, "display_edge_labels: no\n");
327 fprintf(f, "layoutalgorithm: mindepth\n");
328 fprintf(f, "manhattan_edges: yes\n\n");
330 foreach_pset(rss->cbc_set, cbc) {
334 fprintf(f, "graph: { titel: \"cbc %d\" label: \"cbc %d\" status:clustered color:yellow\n", cbc->nr, cbc->nr);
335 foreach_nodeset(cbc->parents, n) {
336 ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
338 foreach_nodeset(cbc->children, n) {
339 ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
341 foreach_pset(cbc->kill_edges, ke) {
342 ir_fprintf(f, "edge: { sourcename: \"n%d_%d\" targetname: \"n%d_%d\" }\n",
343 get_irn_node_nr(ke->src), cbc->nr, get_irn_node_nr(ke->tgt), cbc->nr);
351 /* Dump the computed killing function as vcg. */
352 static void debug_vcg_dump_kill(rss_t *rss) {
356 static const char suffix[] = "-RSS-KILL.vcg";
358 build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
359 f = fopen(file_name, "w");
361 ir_fprintf(f, "graph: { title: \"computed kill graph of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
362 fprintf(f, "display_edge_labels: no\n");
363 fprintf(f, "layoutalgorithm: mindepth\n");
364 fprintf(f, "manhattan_edges: yes\n\n");
366 /* first: reset dumped flag of all nodes */
367 foreach_plist(rss->nodes, el) {
368 ir_node *irn = plist_element_get_value(el);
369 rss_irn_t *rirn = get_rss_irn(rss, irn);
373 /* dump all nodes and their killers */
374 foreach_plist(rss->nodes, el) {
375 ir_node *irn = plist_element_get_value(el);
376 rss_irn_t *rirn = get_rss_irn(rss, irn);
377 rss_irn_t *pk_rirn = get_rss_irn(rss, rirn->killer);
379 if (! rirn->dumped) {
380 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
384 if (! pk_rirn->dumped) {
385 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(rirn->killer), rirn->killer);
389 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
390 get_irn_node_nr(rirn->killer), get_irn_node_nr(irn));
397 /* Dumps the potential killing DAG (PKG) as vcg. */
398 static void debug_vcg_dump_pkg(rss_t *rss, nodeset *max_ac, int iteration) {
401 char *suffix = alloca(32);
402 static const char suffix1[] = "-RSS-PKG.vcg";
403 static const char suffix2[] = "-RSS-PKG-MAXAC.vcg";
407 snprintf(suffix, 32, "%s", suffix1);
410 snprintf(suffix, 32, "-%02d%s", iteration, suffix2);
413 build_file_name(rss, suffix, strlen(suffix) + 1, file_name, sizeof(file_name));
414 f = fopen(file_name, "w");
416 ir_fprintf(f, "graph: { title: \"potential killing DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
417 fprintf(f, "display_edge_labels: no\n");
418 fprintf(f, "layoutalgorithm: mindepth\n");
419 fprintf(f, "manhattan_edges: yes\n\n");
421 foreach_plist(rss->nodes, el) {
422 ir_node *irn = plist_element_get_value(el);
423 rss_irn_t *rirn = get_rss_irn(rss, irn);
425 plist_element_t *k_el;
427 /* dump selected saturating values in yellow */
428 if (max_ac && nodeset_find(max_ac, irn))
432 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F c%d\" %s }\n", get_irn_node_nr(irn), irn, rirn->chain->nr, c1);
434 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(irn), irn, c1);
437 foreach_plist(rirn->pkiller_list, k_el) {
438 ir_node *pkiller = plist_element_get_value(k_el);
439 rss_irn_t *pk_rirn = get_rss_irn(rss, pkiller);
442 if (max_ac && nodeset_find(max_ac, pkiller))
445 if (! pk_rirn->dumped) {
447 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F c%d\" %s }\n", get_irn_node_nr(pkiller), pkiller, pk_rirn->chain->nr, c2);
449 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(pkiller), pkiller, c2);
452 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
453 get_irn_node_nr(pkiller), get_irn_node_nr(irn));
460 /* Dumps the disjoint value DAG (DVG) as vcg. */
461 static void debug_vcg_dump_dvg(rss_t *rss, dvg_t *dvg) {
462 static const char suffix[] = "-RSS-DVG.vcg";
468 build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
469 f = fopen(file_name, "w");
471 ir_fprintf(f, "graph: { title: \"disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
472 fprintf(f, "display_edge_labels: no\n");
473 fprintf(f, "layoutalgorithm: mindepth\n");
474 fprintf(f, "manhattan_edges: yes\n\n");
477 foreach_nodeset(dvg->nodes, irn) {
478 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
482 foreach_pset(dvg->edges, edge) {
483 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
484 get_irn_node_nr(edge->src), get_irn_node_nr(edge->tgt));
492 /* Dumps the PKG(DVG). */
493 static void debug_vcg_dump_dvg_pkiller(rss_t *rss, dvg_t *dvg) {
494 static const char suffix[] = "-RSS-DVG-PKG.vcg";
499 build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
500 f = fopen(file_name, "w");
502 ir_fprintf(f, "graph: { title: \"PKG of disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
503 fprintf(f, "display_edge_labels: no\n");
504 fprintf(f, "layoutalgorithm: mindepth\n");
505 fprintf(f, "manhattan_edges: yes\n\n");
508 foreach_nodeset(dvg->nodes, irn) {
509 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
513 foreach_nodeset(dvg->nodes, irn) {
514 rss_irn_t *node = get_rss_irn(rss, irn);
517 foreach_plist(node->dvg_pkiller_list, el) {
518 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
519 get_irn_node_nr(plist_element_get_value(el)), get_irn_node_nr(irn));
529 * In case there is no rss information for irn, initialize it.
531 static void *init_rss_irn(phase_t *ph, ir_node *irn, void *old) {
532 rss_irn_t *res = old ? old : phase_alloc(ph, sizeof(res[0]));
534 res->descendant_list = plist_obstack_new(phase_obst(ph));
535 res->descendants = NULL;
537 res->consumer_list = plist_obstack_new(phase_obst(ph));
538 res->consumer = NULL;
540 res->pkiller_list = plist_obstack_new(phase_obst(ph));
542 res->parent_list = plist_obstack_new(phase_obst(ph));
560 * Collect all nodes data dependent on current node.
562 static void collect_descendants(rss_t *rss, rss_irn_t *rirn, ir_node *irn, int *got_sink, unsigned cur_desc_walk) {
563 const ir_edge_t *edge;
564 rss_irn_t *cur_node = get_rss_irn(rss, irn);
565 ir_node *block = rss->block;
566 ir_edge_kind_t ekind[2] = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
569 if (cur_node->desc_walk >= cur_desc_walk)
571 cur_node->desc_walk = cur_desc_walk;
573 /* Stop at Barriers and Keeps */
574 if (be_is_Barrier(irn) || be_is_Keep(irn))
577 /* loop over normal and dependency edges */
578 for (i = 0; i < 2; ++i) {
579 foreach_out_edge_kind(irn, edge, ekind[i]) {
580 ir_node *user = get_edge_src_irn(edge);
582 /* skip ignore nodes as they do not really contribute to register pressure */
583 if (arch_irn_is(rss->arch_env, user, ignore))
587 if (get_irn_mode(user) == mode_X && ! *got_sink)
590 if (arch_get_irn_reg_class(rss->arch_env, user, -1) == rss->cls)
591 collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
594 /* check if user lives in block and is not a control flow node */
595 if (get_nodes_block(user) == block) {
596 if (! plist_has_value(rirn->descendant_list, user)) {
597 plist_insert_back(rirn->descendant_list, user);
598 DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", user));
600 collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
602 else if (! *got_sink) {
604 /* user lives out of block: add sink as descendant if not already done */
605 plist_insert_back(rirn->descendant_list, _sink);
607 DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", _sink));
615 * Handles a single consumer.
617 static void collect_single_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *consumer, int *got_sink) {
618 ir_node *block = rss->block;
620 assert(! is_Proj(consumer) && "Cannot handle Projs");
622 if (get_nodes_block(consumer) == block) {
623 if (! arch_irn_is(rss->arch_env, consumer, ignore)) {
624 plist_insert_back(rss_irn->consumer_list, consumer);
625 DBG((rss->dbg, LEVEL_2, "\t\tconsumer %+F\n", consumer));
629 rss_irn->live_out = 1;
630 DBG((rss->dbg, LEVEL_2, "\t\tlive out %+F", consumer));
632 plist_insert_back(rss_irn->consumer_list, _sink);
634 DB((rss->dbg, LEVEL_2, ", %+F added instead", _sink));
636 DB((rss->dbg, LEVEL_2, "\n"));
641 * Collect all nodes consuming the value(s) produced by current node.
643 static void collect_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *irn, int *got_sink) {
644 const ir_edge_t *edge;
646 ir_edge_kind_t ekind[2] = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
647 rss_irn_t *cur_node = get_rss_irn(rss, irn);
649 if (cur_node->havecons)
651 cur_node->havecons = 1;
653 for (i = 0; i < 2; ++i) {
654 foreach_out_edge_kind(irn, edge, ekind[i]) {
655 ir_node *consumer = get_edge_src_irn(edge);
657 if (is_Proj(consumer)) {
658 if (arch_get_irn_reg_class(rss->arch_env, consumer, -1) == rss->cls)
659 collect_consumer(rss, rss_irn, consumer, got_sink);
662 collect_single_consumer(rss, rss_irn, consumer, got_sink);
668 * Collects all consumer and descendant of a irn.
670 static void collect_node_info(rss_t *rss, ir_node *irn) {
671 static unsigned cur_desc_walk = 0;
672 rss_irn_t *rss_irn = get_rss_irn(rss, irn);
675 assert(! is_Proj(irn) && "Cannot handle Projs.");
677 /* check if node info is already available */
678 if (rss_irn->handled)
679 phase_reinit_single_irn_data(&rss->ph, irn);
681 DBG((rss->dbg, LEVEL_1, "\tcomputing consumers of %+F:\n", irn));
683 /* collect all consumer */
685 collect_consumer(rss, rss_irn, irn, &got_sink);
687 /* build sorted consumer array */
688 rss_irn->consumer = build_sorted_array_from_list(rss_irn->consumer_list, phase_obst(&rss->ph));
690 DBG((rss->dbg, LEVEL_1, "\tcompute descendants of %+F:\n", irn));
692 /* collect descendants */
694 collect_descendants(rss, rss_irn, irn, &got_sink, ++cur_desc_walk);
696 /* build sorted descendant array */
697 rss_irn->descendants = build_sorted_array_from_list(rss_irn->descendant_list, phase_obst(&rss->ph));
699 rss_irn->handled = 1;
703 * Checks if v is a potential killer of u.
704 * v is in pkill(u) iff descendants(v) cut consumer(u) is v
706 * @param rss The rss object
707 * @param v The node to check for killer
708 * @param u The potentially killed value
709 * @return 1 if v is in pkill(u), 0 otherwise
711 static int is_potential_killer(rss_t *rss, rss_irn_t *v, rss_irn_t *u) {
716 assert(is_Sink(v->irn) || ((plist_count(v->descendant_list) > 0 && v->descendants) || 1));
717 assert(is_Sink(u->irn) || ((plist_count(u->consumer_list) > 0 && u->consumer) || 1));
719 /* as we loop over the list: loop over the shorter one */
720 if (plist_count(v->descendant_list) > plist_count(u->consumer_list)) {
721 list = u->consumer_list;
722 arr = v->descendants;
725 list = v->descendant_list;
729 /* for each list element: try to find element in array */
730 foreach_plist(list, el) {
731 ir_node *irn = plist_element_get_value(el);
732 ir_node *match = BSEARCH_IRN_ARR(irn, arr);
734 if (match && match != irn)
742 * Update descendants, consumer and pkiller list for the given irn.
744 static void update_node_info(rss_t *rss, ir_node *irn, ir_node *pk_irn) {
745 rss_irn_t *node = get_rss_irn(rss, irn);
746 rss_irn_t *pkiller = get_rss_irn(rss, pk_irn);
748 /* add consumer and rebuild the consumer array */
749 if (! plist_has_value(node->consumer_list, pk_irn)) {
750 plist_insert_back(node->consumer_list, pk_irn);
751 node->consumer = build_sorted_array_from_list(node->consumer_list, phase_obst(&rss->ph));
754 /* add pk_irn as descendant, add it's descendants to irn's and rebuild array */
755 if (! plist_has_value(node->descendant_list, pk_irn)) {
758 plist_insert_back(node->descendant_list, pk_irn);
760 foreach_plist(pkiller->descendant_list, el) {
761 ir_node *desc = plist_element_get_value(el);
763 if (! plist_has_value(node->descendant_list, desc))
764 plist_insert_back(node->descendant_list, desc);
767 node->descendants = build_sorted_array_from_list(node->descendant_list, phase_obst(&rss->ph));
773 * Compute the potential killing set PK.
775 static void compute_pkill_set(rss_t *rss) {
776 plist_element_t *u_el, *v_el;
778 foreach_plist(rss->nodes, u_el) {
779 ir_node *u_irn = plist_element_get_value(u_el);
780 rss_irn_t *u = get_rss_irn(rss, u_irn);
782 DBG((rss->dbg, LEVEL_1, "\tcomputing potential killers of %+F:\n", u_irn));
784 /* check each consumer if it is a potential killer */
785 foreach_plist(u->consumer_list, v_el) {
786 ir_node *v_irn = plist_element_get_value(v_el);
787 rss_irn_t *v = get_rss_irn(rss, v_irn);
789 /* check, if v is a potential killer of u */
790 if (is_potential_killer(rss, v, u)) {
791 if (! plist_has_value(u->pkiller_list, v_irn))
792 plist_insert_back(u->pkiller_list, v_irn);
794 DBG((rss->dbg, LEVEL_2, "\t\tpotential killer %+F\n", v_irn));
801 if (rss->opts->dump_flags & RSS_DUMP_PKG)
802 debug_vcg_dump_pkg(rss, NULL, 0);
806 * Build set of killing edges (from values to their potential killers)
808 static void build_kill_edges(rss_t *rss, pset *epk) {
809 plist_element_t *el, *k_el;
811 foreach_plist(rss->nodes, el) {
812 ir_node *irn = plist_element_get_value(el);
813 rss_irn_t *rirn = get_rss_irn(rss, irn);
815 DBG((rss->dbg, LEVEL_3, "\t\tbuilding kill edges for %+F:\n", irn));
817 foreach_plist(rirn->pkiller_list, k_el) {
818 ir_node *pkiller = plist_element_get_value(k_el);
819 rss_edge_t *ke = obstack_alloc(phase_obst(&rss->ph), sizeof(*ke));
825 DBG((rss->dbg, LEVEL_3, "\t\t\ttarget %+F\n", pkiller));
827 pset_insert(epk, ke, HASH_RSS_EDGE(ke));
832 /* print the given cbc for debugging purpose */
833 static void debug_print_cbc(firm_dbg_module_t *mod, cbc_t *cbc) {
837 DBG((mod, LEVEL_3, "\t\tS = set of parents:\n"));
838 foreach_nodeset(cbc->parents, n) {
839 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
841 DBG((mod, LEVEL_3, "\t\tT = set of children:\n"));
842 foreach_nodeset(cbc->children, n) {
843 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
845 DBG((mod, LEVEL_3, "\t\tE = Edges from producers to consumers\n"));
846 foreach_pset(cbc->kill_edges, ke) {
847 DBG((mod, LEVEL_3, "\t\t\t%+F -> %+F\n", ke->src, ke->tgt));
852 * Construct the bipartite decomposition.
853 * Sid-Ahmed-Ali Touati, Phd Thesis
854 * Register Pressure in Instruction Level Parallelism, p. 71
856 static void compute_bipartite_decomposition(rss_t *rss) {
857 pset *epk = new_pset(cmp_rss_edges, 10);
862 DBG((rss->dbg, LEVEL_1, "\tcomputing bipartite decomposition:\n"));
864 build_kill_edges(rss, epk);
866 foreach_plist(rss->nodes, el) {
867 ir_node *u_irn = plist_element_get_value(el);
868 rss_irn_t *u = get_rss_irn(rss, u_irn);
873 plist_element_t *el2;
875 rss_edge_t *kedge_root = NULL;
876 ir_node *t_irn, *s_irn;
878 if (u->visited || u_irn == _sink)
881 DBG((rss->dbg, LEVEL_2, "\t\t%+F choosen:\n", u_irn));
883 cbc = obstack_alloc(phase_obst(&rss->ph), sizeof(*cbc));
886 /* initialize S_cb */
887 cbc->parents = new_nodeset(5);
888 nodeset_insert(cbc->parents, u_irn);
889 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (init)\n", u_irn));
892 cbc->kill_edges = new_pset(cmp_rss_edges, 5);
894 /* each parent gets killed by at least one value from children */
896 /* T_cb = PK_successors(u) */
897 cbc->children = new_nodeset(5);
898 foreach_plist(u->pkiller_list, el2) {
899 nodeset_insert(cbc->children, plist_element_get_value(el2));
900 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (init)\n", plist_element_get_value(el2)));
904 Now: insert the parents of all children into the parent set
905 and insert the children of all parents into the children set
906 until the sets don't change any more
908 while (p_change || c_change) {
909 p_change = c_change = 0;
911 /* accumulate parents */
912 foreach_nodeset(cbc->children, t_irn) {
913 foreach_pset(epk, k_edge) {
914 ir_node *val = k_edge->src;
916 if (k_edge->tgt == t_irn && ! nodeset_find(cbc->parents, val)) {
917 nodeset_insert(cbc->parents, val);
919 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents\n", val));
924 /* accumulate children */
925 foreach_nodeset(cbc->parents, s_irn) {
926 foreach_pset(epk, k_edge) {
927 ir_node *val = k_edge->tgt;
929 if (k_edge->src == s_irn && ! nodeset_find(cbc->children, val)) {
930 nodeset_insert(cbc->children, val);
932 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children\n", val));
938 /* mark all parent values as visited */
939 foreach_nodeset(cbc->parents, s_irn) {
940 rss_irn_t *s = get_rss_irn(rss, s_irn);
942 /* assure bipartite property */
943 if (nodeset_find(cbc->children, s_irn)) {
944 nodeset_remove(cbc->children, s_irn);
945 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F removed from to children\n", s_irn));
950 foreach_pset(epk, k_edge) {
951 if (nodeset_find(cbc->parents, k_edge->src) && nodeset_find(cbc->children, k_edge->tgt)) {
952 pset_insert(cbc->kill_edges, k_edge, HASH_RSS_EDGE(k_edge));
954 Link all k_edges which are about to be removed together.
955 Beware: pset_remove kills the iterator.
957 k_edge->next = kedge_root;
962 /* remove all linked k_edges */
963 for (; kedge_root; kedge_root = kedge_root->next) {
964 pset_remove(epk, kedge_root, HASH_RSS_EDGE(kedge_root));
967 /* add the connected bipartite component */
968 if (nodeset_count(cbc->parents) > 0 && nodeset_count(cbc->children) > 0 && pset_count(cbc->kill_edges) > 0)
969 pset_insert(rss->cbc_set, cbc, (unsigned)cbc->nr);
970 DBG((rss->dbg, LEVEL_2, "\tbipartite component %d inserted:\n", cbc->nr));
972 if (firm_dbg_get_mask(rss->dbg) & LEVEL_2)
973 debug_print_cbc(rss->dbg, cbc);
977 if (rss->opts->dump_flags & RSS_DUMP_CBC)
978 debug_vcg_dump_bipartite(rss);
984 * Select the child with the maximum cost.
986 static child_t *select_child_max_cost(rss_t *rss, nodeset *x, nodeset *y, child_t *t, cbc_t *cbc) {
988 float max_cost = -1.0f;
990 DBG((rss->dbg, LEVEL_2, "\t\tcomputing children costs:\n"));
992 foreach_nodeset(cbc->children, child) {
993 rss_irn_t *r_child = get_rss_irn(rss, child);
994 int num_unkilled_parents = 0;
999 /* get the number of unkilled parents */
1000 foreach_pset(cbc->kill_edges, k_edge) {
1001 if (k_edge->tgt == child && nodeset_find(x, k_edge->src))
1002 ++num_unkilled_parents;
1005 cost = (float)num_unkilled_parents;
1007 num_descendants = plist_count(r_child->descendant_list) + nodeset_count(y);
1009 if (num_descendants > 0)
1010 cost /= (float)num_descendants;
1012 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F, #desc %d, cost %.3f\n", child, num_descendants, cost));
1014 if (cost > max_cost) {
1025 * Remove all parents from x which are killed by t_irn.
1027 static void remove_covered_parents(rss_t *rss, nodeset *x, ir_node *t_irn, cbc_t *cbc) {
1028 rss_irn_t *t = get_rss_irn(rss, t_irn);
1031 DBG((rss->dbg, LEVEL_2, "\t\tremoving parents covered by %+F:\n", t_irn));
1033 foreach_pset(cbc->kill_edges, k_edge) {
1034 if (k_edge->tgt == t_irn && nodeset_find(x, k_edge->src)) {
1035 nodeset_remove(x, k_edge->src);
1036 plist_insert_back(t->parent_list, k_edge->src);
1037 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", k_edge->src));
1042 static void update_cumulated_descendent_values(rss_t *rss, nodeset *y, ir_node *t_irn) {
1043 rss_irn_t *t = get_rss_irn(rss, t_irn);
1044 plist_element_t *el;
1046 DBG((rss->dbg, LEVEL_2, "\t\tupdating cumulated descendant value of %+F:\n", t_irn));
1048 foreach_plist(t->descendant_list, el) {
1049 nodeset_insert(y, plist_element_get_value(el));
1050 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", plist_element_get_value(el)));
1055 * Greedy-k: a heuristics for the MMA problem
1057 static void compute_killing_function(rss_t *rss) {
1059 struct obstack obst;
1061 obstack_init(&obst);
1063 rss->cbc_set = pset_new_ptr(5);
1064 compute_bipartite_decomposition(rss);
1066 /* for all bipartite components do: */
1067 foreach_pset(rss->cbc_set, cbc) {
1069 nodeset *x = new_nodeset(10);
1070 nodeset *y = new_nodeset(10);
1071 child_t **sks = NEW_ARR_F(child_t *, 20);
1076 DBG((rss->dbg, LEVEL_1, "\tcomputing SKS for cbc %d:\n", cbc->nr));
1077 DBG((rss->dbg, LEVEL_2, "\t\tinitializing parents X:\n"));
1079 /* X = S_cb (all parents are initially uncovered) */
1080 foreach_nodeset(cbc->parents, p) {
1081 nodeset_insert(x, p);
1082 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", p));
1085 /* while X not empty */
1086 while (nodeset_count(x) > 0) {
1087 child_t *t = obstack_alloc(&obst, sizeof(*t));
1088 memset(t, 0, sizeof(*t));
1090 t = select_child_max_cost(rss, x, y, t, cbc);
1092 if (cur_len >= cur_size) {
1093 ARR_EXTO(child_t *, sks, cur_size * 2);
1097 DBG((rss->dbg, LEVEL_2, "\t\tinsert child %+F (%.3f) into SKS at pos %d\n", t->irn, t->cost, cur_len));
1100 remove_covered_parents(rss, x, t->irn, cbc);
1101 update_cumulated_descendent_values(rss, y, t->irn);
1104 ARR_SHRINKLEN(sks, cur_len);
1106 /* sort SKS in increasing cost order */
1107 qsort(sks, cur_len, sizeof(sks[0]), cmp_child_costs);
1109 DBG((rss->dbg, LEVEL_2, "\tprocessing SKS for cbc %d:\n", cbc->nr));
1111 /* build killing function */
1112 for (i = cur_len - 1; i >= 0; --i) { /* loop over sks in decreasing cost order */
1113 child_t *t = sks[i];
1114 rss_irn_t *rt = get_rss_irn(rss, t->irn);
1115 plist_element_t *p_el;
1117 DBG((rss->dbg, LEVEL_3, "\t\t\tkiller %+F (%.3f):\n", t->irn, t->cost));
1119 /* kill all unkilled parents of t */
1120 foreach_plist(rt->parent_list, p_el) {
1121 ir_node *par = plist_element_get_value(p_el);
1122 rss_irn_t *rpar = get_rss_irn(rss, par);
1124 if (is_Sink(rpar->killer)) {
1125 rpar->killer = t->irn;
1126 DBG((rss->dbg, LEVEL_2, "\t\tkill %+F\n", rpar->irn));
1129 DBG((rss->dbg, LEVEL_3, "\t\t\tkeeping %+F as killer for %+F\n", rpar->killer, rpar->irn));
1139 if (rss->opts->dump_flags & RSS_DUMP_KILL)
1140 debug_vcg_dump_kill(rss);
1142 del_pset(rss->cbc_set);
1143 obstack_free(&obst, NULL);
1147 * Adds the edge src -> tgt to the dvg. Checks if reverse edge is already there (asserts).
1149 static INLINE void add_dvg_edge(rss_t *rss, dvg_t *dvg, ir_node *src, ir_node *tgt, int have_source) {
1150 rss_edge_t *dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1154 nodeset_insert(dvg->nodes, src);
1156 assert(nodeset_find(dvg->nodes, src) != NULL && "Wrong assumption");
1158 nodeset_insert(dvg->nodes, tgt);
1160 /* add an edge to our killer */
1161 dvg_edge->src = src;
1162 dvg_edge->tgt = tgt;
1163 dvg_edge->next = NULL;
1167 assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1169 /* add the edge to the DVG */
1170 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", src, tgt));
1171 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1175 * Computes the disjoint value DAG (DVG).
1176 * BEWARE: It is not made explicitly clear in the Touati paper,
1177 * but the DVG is meant to be build from the KILLING DAG
1179 static void compute_dvg(rss_t *rss, dvg_t *dvg) {
1180 plist_element_t *el;
1182 DBG((rss->dbg, LEVEL_1, "\tcomputing DVG:\n"));
1184 foreach_plist(rss->nodes, el) {
1185 ir_node *u_irn = plist_element_get_value(el);
1186 rss_irn_t *u = get_rss_irn(rss, u_irn);
1187 rss_irn_t *u_killer = get_rss_irn(rss, u->killer);
1190 /* TODO: omit nodes only having sink as pkiller and killing no one */
1192 /* add an edge to killer */
1193 add_dvg_edge(rss, dvg, u_irn, u->killer, 0);
1195 if (is_Sink(u->killer))
1198 /* We add an edge to every killer, from where we could be reached. */
1199 for (i = ARR_LEN_SAFE(u_killer->descendants) - 1; i >= 0; --i) {
1200 add_dvg_edge(rss, dvg, u_irn, u_killer->descendants[i], 1);
1205 foreach_plist(rss->nodes, el2) {
1206 ir_node *v_irn = plist_element_get_value(el2);
1209 There is an edge (u, v) in the DVG iff v is a descendant of the killer(u).
1211 if (BSEARCH_IRN_ARR(v_irn, u_kill->descendants)) {
1212 rss_edge_t *dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1215 /* insert the user into the DVG and append it to the user list of u */
1216 nodeset_insert(dvg->nodes, v_irn);
1217 if (! plist_has_value(u->dvg_user_list, v_irn))
1218 plist_insert_back(u->dvg_user_list, v_irn);
1220 dvg_edge->src = u_irn;
1221 dvg_edge->tgt = v_irn;
1222 dvg_edge->next = NULL;
1227 assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1229 /* add the edge to the DVG */
1230 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", u_irn, v_irn));
1231 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1237 if (rss->opts->dump_flags & RSS_DUMP_DVG)
1238 debug_vcg_dump_dvg(rss, dvg);
1242 * Updates the dvg structure when a serialization edge from src -> tgt is added.
1244 static void update_dvg(rss_t *rss, dvg_t *dvg, rss_irn_t *src, rss_irn_t *tgt) {
1247 add_dvg_edge(rss, dvg, tgt->irn, src->irn, 0);
1249 for (i = ARR_LEN_SAFE(src->descendants) - 1; i >= 0; --i) {
1250 add_dvg_edge(rss, dvg, tgt->irn, src->descendants[i], 1);
1256 * Accumulate all descendants for root into list.
1258 static void accumulate_dvg_descendant_values(rss_t *rss, rss_irn_t *root, plist_t *list) {
1259 if (plist_count(root->dvg_user_list) > 0) {
1260 plist_element_t *el;
1262 foreach_plist(root->dvg_user_list, el) {
1263 ir_node *v_irn = plist_element_get_value(el);
1264 rss_irn_t *v = get_rss_irn(rss, v_irn);
1266 /* add v as descendant */
1267 if (! plist_has_value(list, v_irn)) {
1268 plist_insert_back(list, v_irn);
1269 DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG descendant %+F\n", v_irn));
1272 /* accumulate v's descendants */
1273 accumulate_dvg_descendant_values(rss, v, list);
1279 * Builds the list of potential killers for each node
1281 * Needs the descendant list for all user as sorted array.
1283 static void build_dvg_pkiller_list(rss_t *rss, dvg_t *dvg) {
1286 foreach_nodeset(dvg->nodes, irn) {
1287 rss_irn_t *node = get_rss_irn(rss, irn);
1288 plist_element_t *el, *el2;
1290 DBG((rss->dbg, DEBUG_DVG, "\t\tbuilding pkiller list for %+F\n", irn));
1292 /* check each user */
1293 foreach_plist(node->dvg_user_list, el) {
1294 ir_node *u_irn = plist_element_get_value(el);
1296 /* is the current user u_irn not a descendant of any other user -> pkiller */
1297 foreach_plist(node->dvg_user_list, el2) {
1298 ir_node *v_irn = plist_element_get_value(el2);
1299 rss_irn_t *v = get_rss_irn(rss, v_irn);
1302 ! BSEARCH_IRN_ARR(u_irn, v->dvg_desc) &&
1303 ! plist_has_value(node->dvg_pkiller_list, u_irn))
1305 plist_insert_back(node->dvg_pkiller_list, u_irn);
1306 DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG pkiller %+F\n", u_irn));
1311 node->dvg_pkiller = build_sorted_array_from_list(node->dvg_pkiller_list, phase_obst(&rss->ph));
1315 if (firm_dbg_get_mask(rss->dbg) & DEBUG_DVG)
1316 debug_vcg_dump_dvg_pkiller(rss, dvg);
1323 * Compute the maximal antichain of the current DVG.
1324 * This is a reimplementation of the MAXIMAL_ANTI_CHAIN function
1325 * from the DDG library 1.1 (DAG.cpp).
1327 static nodeset *compute_maximal_antichain(rss_t *rss, dvg_t *dvg, int iteration) {
1328 int n = nodeset_count(dvg->nodes);
1329 int *assignment = alloca(n * sizeof(assignment[0]));
1330 int *assignment_rev = alloca(n * sizeof(assignment_rev[0]));
1331 int *idx_map = alloca(n * sizeof(idx_map[0]));
1332 hungarian_problem_t *bp;
1333 nodeset *values, *temp;
1335 int i, j, cost, cur_chain, res;
1336 rss_edge_t *dvg_edge;
1338 #define MAP_IDX(irn) bsearch_for_index(get_irn_idx(irn), idx_map, n, 1)
1340 if (pset_count(dvg->edges) == 0)
1343 bp = hungarian_new(n, n, 1, HUNGARIAN_MATCH_NORMAL);
1346 At first, we build an index map for the nodes in the DVG,
1347 because we cannot use the irn idx therefore as the resulting
1348 bipartite data structure would have around 1.2GB.
1349 So we limit the size to the number of nodes we have in the DVG
1350 and build a sorted index map for their irn indices.
1353 foreach_nodeset(dvg->nodes, u_irn) {
1354 idx_map[i++] = get_irn_idx(u_irn);
1356 qsort(idx_map, n, sizeof(idx_map[0]), cmp_int);
1358 foreach_pset(dvg->edges, dvg_edge) {
1359 int idx_u = MAP_IDX(dvg_edge->src);
1360 int idx_v = MAP_IDX(dvg_edge->tgt);
1362 /* add the entry to the bipartite data structure */
1363 hungarian_add(bp, idx_u, idx_v, 1);
1364 DBG((rss->dbg, LEVEL_3, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1365 idx_u, dvg_edge->src, idx_v, dvg_edge->tgt));
1369 Add a bipartite entry for each pair of nodes (u, v), where exists a
1370 path in the DVG from u to v, ie. connect all descendants(v) to v.
1371 desc_dvg(v) = accumulated descendants of all z in dvg_user_list(v)
1373 foreach_nodeset(dvg->nodes, u_irn) {
1374 rss_irn_t *u = get_rss_irn(rss, u_irn);
1375 int idx_u_irn = MAP_IDX(u_irn);
1377 DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG descendants of %+F:\n", u_irn));
1379 //plist_clear(u->dvg_desc_list);
1380 //accumulate_dvg_descendant_values(rss, u, u->dvg_desc_list);
1383 FIXME: The array is build on the phase obstack and we cannot free the data.
1384 So the array get as many times allocated as this function is called.
1387 /* build the sorted array for faster searches */
1388 //u->dvg_desc = build_sorted_array_from_list(u->dvg_desc_list, phase_obst(&rss->ph));
1390 DBG((rss->dbg, DEBUG_MAX_AC, "\t\tadding bipartite entries of %+F:\n", u_irn));
1392 /* add the bipartite entries for all descendants */
1393 for (i = ARR_LEN_SAFE(u->dvg_desc) - 1; i >= 0; --i) {
1394 rss_irn_t *desc = get_rss_irn(rss, u->dvg_desc[i]);
1395 int idx_desc = MAP_IDX(u->dvg_desc[i]);
1397 /* add the entry to the bipartite data structure */
1398 hungarian_add(bp, idx_u_irn, idx_desc, 1);
1399 DBG((rss->dbg, DEBUG_MAX_AC, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1400 idx_u_irn, u_irn, idx_desc, u->dvg_desc[i]));
1407 /* We want maximum cardinality matching */
1408 hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL);
1411 DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG pkiller:\n"));
1412 /* beware: the following function needs the dvg_desc array */
1413 build_dvg_pkiller_list(rss, dvg);
1416 DBG((rss->dbg, LEVEL_2, "\t\tcomputing bipartite matching\n"));
1418 The maximum cardinality bipartite matching gives us the minimal
1419 chain partition, which corresponds to the maximum anti chains.
1421 res = hungarian_solve(bp, assignment, &cost, 1);
1422 assert(res == 0 && "Bipartite matching failed!");
1425 memset(assignment_rev, -1, n * sizeof(assignment_rev[0]));
1427 /* build the reverse assignment, ie. foreach i -> j, add j -> i */
1428 for (i = 0; i < n; ++i) {
1429 if (assignment[i] >= 0) {
1430 int j = assignment[i];
1431 assignment_rev[j] = i;
1435 DBG((rss->dbg, LEVEL_2, "\t\tgot assignment with cost %d\n", cost));
1436 DBG((rss->dbg, LEVEL_3, "\t\t\tassignment --- reverse assignment\n", cost));
1437 for (i = 0; i < n; ++i) {
1438 DBG((rss->dbg, LEVEL_3, "\t\t\t%3d -> %3d %3d -> %3d\n", i, assignment[i], i, assignment_rev[i]));
1441 values = new_nodeset(10);
1443 /* Construction of the minimal chain partition */
1444 for (j = 0; j < n; ++j) {
1445 /* check nodes, which did not occur as target */
1446 if (assignment_rev[j] == -1) {
1447 int xj = idx_map[j];
1448 ir_node *xj_irn = get_idx_irn(rss->irg, xj);
1449 rss_irn_t *xj_rss = get_rss_irn(rss, xj_irn);
1450 chain_t *c = obstack_alloc(phase_obst(&rss->ph), sizeof(*c));
1453 /* there was no source for j -> we have a source of a new chain */
1454 nodeset_insert(values, xj_irn);
1456 c->elements = plist_obstack_new(phase_obst(&rss->ph));
1457 c->nr = cur_chain++;
1458 plist_insert_back(c->elements, xj_irn);
1462 DBG((rss->dbg, LEVEL_2, "\t\tstarting chain %d:\n", c->nr));
1463 DBG((rss->dbg, LEVEL_2, "\t\t\t%+F (%d)", xj_irn, j));
1465 /* follow chain, having j as source */
1467 while (assignment[source] >= 0) {
1468 int target = assignment[source];
1469 int irn_idx = idx_map[target];
1470 ir_node *irn = get_idx_irn(rss->irg, irn_idx);
1471 rss_irn_t *node = get_rss_irn(rss, irn);
1473 plist_insert_back(c->elements, irn);
1476 DB((rss->dbg, LEVEL_2, " -> %+F (%d)", irn, target));
1478 /* new source = last target */
1482 DB((rss->dbg, LEVEL_2, "\n"));
1487 Computing the maximal antichain: Select an element from each
1488 chain such, such it is parallel with the others.
1490 DBG((rss->dbg, LEVEL_1, "\t\tcomputing set of saturation values (MAX AC)\n"));
1491 DBG((rss->dbg, LEVEL_3, "\t\tstarting with:\n"));
1494 if (firm_dbg_get_mask(rss->dbg) & LEVEL_3)
1495 dump_nodeset(values, "\t\t\t");
1501 We need an explicit array for the values as
1502 we cannot iterate multiple times over the same
1503 set at the same time. :-(((((
1505 int n = nodeset_count(values);
1507 ir_node **val_arr = NEW_ARR_F(ir_node *, n);
1509 foreach_nodeset(values, u_irn)
1510 val_arr[i++] = u_irn;
1515 temp = new_nodeset(10);
1517 /* Select all nodes from current value set, having another node in the set as descendant. */
1518 for (i = 0; i < n; ++i) {
1519 rss_irn_t *u = get_rss_irn(rss, val_arr[i]);
1521 for (j = 0; j < n; ++j) {
1525 key.src = val_arr[i];
1526 key.tgt = val_arr[j];
1528 if (pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1529 /* v[j] is descendant of u -> remove u and break */
1530 nodeset_insert(temp, u->irn);
1531 nodeset_remove(values, u->irn);
1533 DBG((rss->dbg, LEVEL_3, "\t\t\tremoving %+F from values, adding it to temp\n", u->irn));
1541 /* Try to insert the chain predecessor of all selected u's */
1542 foreach_nodeset(temp, u_irn) {
1543 rss_irn_t *u = get_rss_irn(rss, u_irn);
1544 chain_t *c = u->chain;
1545 plist_element_t *el = plist_find_value(c->elements, u_irn);
1547 assert(el && "Missing element in chain!");
1549 /* If u has predecessor in chain: insert the predecessor */
1550 if (el == plist_element_get_prev(el)) {
1551 nodeset_insert(values, plist_element_get_value(el));
1552 DBG((rss->dbg, LEVEL_3, "\t\t\tadding %+F to values\n", plist_element_get_value(el)));
1558 } while (nodeset_count(temp) > 0);
1560 DBG((rss->dbg, LEVEL_2, "\t\tfinal set:\n"));
1562 if (firm_dbg_get_mask(rss->dbg) & LEVEL_2) {
1563 dump_nodeset(values, "\t\t\t");
1567 if (rss->opts->dump_flags & RSS_DUMP_MAXAC)
1568 debug_vcg_dump_pkg(rss, values, iteration);
1578 * Computes the best serialization between two nodes of sat_vals.
1580 static serialization_t *compute_best_admissible_serialization(rss_t *rss, nodeset *sat_vals, serialization_t *ser, int num_regs) {
1581 int n = nodeset_count(sat_vals);
1582 int n_idx = ARR_LEN_SAFE(rss->idx_map);
1584 ir_node **val_arr = alloca(n * sizeof(val_arr[0]));
1585 bitset_t *bs_sv = bitset_alloca(n_idx);
1586 bitset_t *bs_vdesc = bitset_alloca(n_idx);
1587 bitset_t *bs_tmp = bitset_alloca(n_idx);
1588 bitset_t *bs_ukilldesc = bitset_alloca(n_idx);
1589 int best_benefit = INT_MAX;
1590 int best_omega2 = INT_MAX;
1591 int best_benefit_omega20 = INT_MAX;
1595 rss_edge_t min_benefit_edge;
1596 rss_edge_t min_omega20_edge;
1597 rss_irn_t *ser_u_omega1, *ser_v_omega1;
1598 rss_irn_t *ser_u_omega20, *ser_v_omega20;
1600 DBG((rss->dbg, LEVEL_1, "\tcomputing admissible serializations:\n"));
1603 We need an explicit array for the values as
1604 we cannot iterate multiple times over the same
1605 set at the same time. :-(((((
1608 foreach_nodeset(sat_vals, irn) {
1610 bitset_set(bs_sv, BLOCK_IDX_MAP(rss, irn));
1614 We build all admissible serializations and remember the best found so far.
1617 if v in pkiller(u): add edge from v to all other pkiller(u)
1618 else: for all vv in pkiller(u): add edge from v to vv if there exists no path from vv to v
1622 A node is unserializable if:
1623 - it has only one killer and this one is Sink
1624 - it kills no other values
1625 In this case there is no serialization which could
1626 reduce the registerpressure
1628 #define IS_UNSERIALIZABLE_NODE(rss_node) \
1631 (plist_count(rss_node->pkiller_list) == 1) && \
1632 is_Sink(rss_node->killer) && \
1633 (rss_node->kill_count == 0) \
1635 be_is_Barrier(rss_node->irn) || \
1636 be_is_Keep(rss_node->irn) \
1639 /* for all u in sat_vals */
1640 for (i = 0; i < n; ++i) {
1641 rss_irn_t *u = get_rss_irn(rss, val_arr[i]);
1642 plist_element_t *el;
1644 /* ignore nodes where serialization does not help */
1645 if (IS_UNSERIALIZABLE_NODE(u)) {
1646 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", u->irn));
1650 /* accumulate all descendants of all pkiller(u) */
1651 bitset_clear_all(bs_ukilldesc);
1652 foreach_plist(u->pkiller_list, el) {
1653 ir_node *irn = plist_element_get_value(el);
1654 rss_irn_t *node = get_rss_irn(rss, irn);
1657 bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, irn));
1661 for (k = ARR_LEN_SAFE(node->descendants) - 1; k >= 0; --k) {
1662 if (! is_Sink(node->descendants[k]))
1663 bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, node->descendants[k]));
1667 /* for all v in sat_vals */
1668 for (j = 0; j < n; ++j) {
1669 ir_node *v_irn = val_arr[j];
1670 rss_irn_t *v = get_rss_irn(rss, v_irn);
1671 unsigned v_height = get_irn_height(rss->h, v_irn);
1672 int omega1, omega2, is_pkiller;
1674 /* v cannot be serialized with itself
1675 * ignore nodes where serialization does not help */
1676 if (i == j || IS_UNSERIALIZABLE_NODE(v)) {
1678 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", v->irn));
1682 /* get descendants of v */
1683 bitset_clear_all(bs_vdesc);
1684 bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v_irn));
1685 for (k = ARR_LEN_SAFE(v->descendants) - 1; k >= 0; --k) {
1686 if (! is_Sink(v->descendants[k]))
1687 bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v->descendants[k]));
1690 /* if v is in pkiller(u) */
1691 is_pkiller = plist_has_value(u->pkiller_list, v_irn);
1693 /* for all vv in pkiller(u) */
1694 foreach_plist(u->pkiller_list, el) {
1695 ir_node *vv_irn = plist_element_get_value(el);
1698 if (is_Sink(vv_irn) || is_cfop(vv_irn))
1702 add_edge = vv_irn != v_irn && skip_Proj(vv_irn) != skip_Proj(v_irn);
1704 add_edge = ! heights_reachable_in_block(rss->h, skip_Proj(vv_irn), skip_Proj(v_irn));
1707 As we add an edge from vv -> v, we have to make sure,
1708 that there exists no path from v to vv.
1712 int vv_height = get_irn_height(rss->h, vv_irn);
1713 int mu1, mu2, critical_path_cost;
1716 mu1 = | descendants(v) cut sat_vals |
1717 the number of saturating values which cannot
1718 be simultaneously alive with u
1720 bitset_copy(bs_tmp, bs_vdesc);
1721 mu1 = bitset_popcnt(bitset_and(bs_tmp, bs_sv));
1724 mu2 = | accum_desc_all_pkiller(u) without descendants(v) |
1727 bitset_copy(bs_tmp, bs_ukilldesc);
1728 mu2 = bitset_popcnt(bitset_andnot(bs_tmp, bs_vdesc));
1734 /* omega1 = mu1 - mu2 */
1740 /* omega2 = increase of critical path */
1741 critical_path_cost =
1742 v_height /* longest path from v to sink */
1743 + rss->max_height - vv_height /* longest path from source to vv */
1747 If critical_path_cost > max_height -> the new edge
1748 would increase the longest critical path by the difference.
1750 omega2 = critical_path_cost > rss->max_height ? critical_path_cost - rss->max_height : 0;
1752 /* this keeps track of the edge with the best benefit */
1753 if (omega1 >= num_regs - n && omega1 < best_benefit) {
1754 min_benefit_edge.src = v_irn;
1755 min_benefit_edge.tgt = vv_irn;
1760 best_benefit = omega1;
1761 ser->new_killer = is_pkiller;
1764 /* this keeps track of the edge with the best omega1 costs where omega2 == 0 */
1765 if (omega2 == 0 && omega1 >= num_regs - n && omega1 < best_benefit_omega20) {
1766 min_omega20_edge.src = v_irn;
1767 min_omega20_edge.tgt = vv_irn;
1772 best_benefit_omega20 = omega1;
1773 ser->new_killer = is_pkiller;
1776 best_omega2 = MIN(best_omega2, omega2);
1778 DBG((rss->dbg, LEVEL_2, "\t\tfound %+F -> %+F (w1 %d, w2 %d)\n",
1779 v_irn, vv_irn, omega1, omega2));
1781 } /* for all vv in pkiller(u) */
1782 } /* for all v in sat_vals */
1783 } /* for all u in sat_vals */
1788 if (best_omega2 == 0) {
1789 ser->u = ser_u_omega20;
1790 ser->v = ser_v_omega20;
1791 ser->edge->src = min_omega20_edge.src;
1792 ser->edge->tgt = min_omega20_edge.tgt;
1793 ser->omega1 = best_benefit_omega20;
1794 ser->omega2 = best_omega2;
1797 ser->u = ser_u_omega1;
1798 ser->v = ser_v_omega1;
1799 ser->edge->src = min_benefit_edge.src;
1800 ser->edge->tgt = min_benefit_edge.tgt;
1801 ser->omega1 = best_benefit;
1802 ser->omega2 = best_omega2;
1807 #undef IS_UNSERIALIZABLE_NODE
1811 * Perform the value serialization heuristic and add all
1812 * computed serialization edges as dependencies to the irg.
1814 static void perform_value_serialization_heuristic(rss_t *rss) {
1815 bitset_t *arch_nonign_bs = bitset_alloca(arch_register_class_n_regs(rss->cls));
1816 bitset_t *abi_ign_bs = bitset_alloca(arch_register_class_n_regs(rss->cls));
1817 int available_regs, iteration;
1820 pset *ser_set = new_pset(cmp_rss_edges, 20);
1822 /* available_regs = R = |arch_non_ignore_regs cut ~abi_ignore_regs| */
1823 arch_put_non_ignore_regs(rss->arch_env, rss->cls, arch_nonign_bs);
1824 be_abi_put_ignore_regs(rss->abi, rss->cls, abi_ign_bs);
1825 bitset_andnot(arch_nonign_bs, abi_ign_bs);
1826 available_regs = bitset_popcnt(arch_nonign_bs) - 1;
1828 DBG((rss->dbg, LEVEL_1, "\n\t#available regs: %d\n\n", available_regs));
1831 At first we need to compute the disjoint value DAG (DVG = {V, E_dv}).
1832 V = set of all nodes we are currently interested in
1833 E_dv = there is an edge from u to v iff v is a descendant of killer(u), forall u, v in V
1835 dvg.nodes = new_nodeset(plist_count(rss->nodes));
1836 dvg.edges = new_pset(cmp_rss_edges, plist_count(rss->nodes) * 5);
1837 compute_dvg(rss, &dvg);
1840 Then we perform the heuristic serialization algorithm
1841 on the DVG which gives us all necessary serialization
1844 DBG((rss->dbg, LEVEL_1, "\tcomputing maximal antichain:\n"));
1846 sat_vals = compute_maximal_antichain(rss, &dvg, iteration++);
1847 while (sat_vals && (nodeset_count(sat_vals) > available_regs)) {
1848 serialization_t *ser, best_ser;
1849 rss_edge_t *edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*edge));
1850 ir_node *dep_src, *dep_tgt;
1852 best_ser.edge = edge;
1853 ser = compute_best_admissible_serialization(rss, sat_vals, &best_ser, available_regs);
1855 DBG((rss->dbg, LEVEL_1, "\tcurrent register saturation %d, target %d\n", nodeset_count(sat_vals), available_regs));
1858 DBG((rss->dbg, LEVEL_1, "\tno RS improving serialization found, breaking at iteration %d\n", iteration));
1862 /* Insert the serialization as dependency edge into the irg. */
1863 DBG((rss->dbg, LEVEL_1, "\tserializing %+F -> %+F with edge %+F -> %+F and cost %d, %d\n",
1864 ser->u->irn, ser->v->irn, ser->edge->src, ser->edge->tgt, ser->omega1, ser->omega2));
1866 if (pset_find(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge)))
1867 ir_printf("WARNING: serialization %+F -> %+F computed twice!\n", ser->edge->src, ser->edge->tgt);
1870 pset_insert(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge));
1872 /* update the dvg */
1873 update_dvg(rss, &dvg, ser->v, ser->u);
1874 update_dvg(rss, &dvg, get_rss_irn(rss, ser->edge->src), get_rss_irn(rss, ser->edge->tgt));
1875 del_nodeset(sat_vals);
1877 dep_src = skip_Proj(ser->edge->src);
1878 dep_tgt = ser->edge->tgt;
1879 add_irn_dep(dep_src, dep_tgt);
1881 /* Update descendants, consumer and pkillers of the target */
1882 update_node_info(rss, ser->edge->tgt, ser->edge->src);
1884 /* TODO: try to find a cheaper way for updating height information */
1885 rss->max_height = heights_recompute_block(rss->h, rss->block);
1887 /* Recompute the antichain for next serialization */
1888 DBG((rss->dbg, LEVEL_1, "\tre-computing maximal antichain:\n"));
1889 sat_vals = compute_maximal_antichain(rss, &dvg, iteration++);
1892 del_nodeset(dvg.nodes);
1893 del_pset(dvg.edges);
1897 * Do initial calculations for a block.
1899 static void process_block(ir_node *block, void *env) {
1902 const ir_edge_t *edge;
1904 phase_init(&rss->ph, "rss block preprocessor", rss->irg, PHASE_DEFAULT_GROWTH, init_rss_irn);
1906 DBG((rss->dbg, LEVEL_1, "preprocessing block %+F\n", block));
1909 /* build an index map for all nodes in the current block */
1911 n = get_irn_n_edges(block);
1912 NEW_ARR_A(int *, rss->idx_map, n);
1913 foreach_out_edge(block, edge) {
1914 ir_node *irn = get_edge_src_irn(edge);
1915 rss->idx_map[i++] = get_irn_idx(irn);
1917 qsort(rss->idx_map, n, sizeof(rss->idx_map[0]), cmp_int);
1918 rss->max_height = heights_recompute_block(rss->h, block);
1920 /* loop over all register classes */
1921 for (i = arch_isa_get_n_reg_class(rss->arch_env->isa) - 1; i >= 0; --i) {
1922 const arch_register_class_t *cls = arch_isa_get_reg_class(rss->arch_env->isa, i);
1925 DBG((rss->dbg, LEVEL_1, "register class %s\n", arch_register_class_name(cls)));
1927 /* reset the list of interesting nodes */
1928 plist_clear(rss->nodes);
1929 plist_insert_back(rss->nodes, _sink);
1931 /* collect all nodes having a certain register class */
1932 foreach_out_edge(block, edge) {
1933 ir_node *irn = get_edge_src_irn(edge);
1937 - mode_T nodes (the projs are asked)
1938 - mode_X nodes (control flow nodes are always scheduled last)
1939 - Keeps (they are always immediately scheduled)
1940 - Phi (same as Keep)
1942 if (get_irn_mode(irn) == mode_T || be_is_Keep(irn) || is_Phi(irn))
1946 In case of a proj, we skip
1947 - Barrier (they are a Barrier :)
1949 - the Proj itself, as it's scheduled always with it's super node
1952 ir_node *pred = get_Proj_pred(irn);
1953 if (be_is_Barrier(pred) || is_Start(pred))
1957 if (! arch_irn_is(rss->arch_env, irn, ignore) && arch_get_irn_reg_class(rss->arch_env, irn, -1) == cls) {
1958 plist_insert_back(rss->nodes, skip_Proj(irn));
1960 /* calculate the descendants and consumer for each node in the block */
1961 collect_node_info(rss, skip_Proj(irn));
1965 /* compute the potential killing set PK(G) */
1966 compute_pkill_set(rss);
1968 /* compute the killing function k* */
1969 compute_killing_function(rss);
1972 Compute the heuristic value serialization and
1973 add the necessary dependencies to the irg.
1975 perform_value_serialization_heuristic(rss);
1978 phase_free(&rss->ph);
1983 * Register the options.
1985 void rss_register_options(lc_opt_entry_t *grp) {
1986 static int run_once = 0;
1987 lc_opt_entry_t *rss_grp;
1991 rss_grp = lc_opt_get_grp(grp, "rss");
1993 lc_opt_add_table(rss_grp, rss_option_table);
1996 #endif /* WITH_LIBCORE */
1999 * Preprocess the irg for scheduling.
2001 void rss_schedule_preparation(const be_irg_t *birg) {
2004 FIRM_DBG_REGISTER(rss.dbg, "firm.be.sched.rss");
2006 //firm_dbg_set_mask(rss.dbg, LEVEL_1 | LEVEL_2 | LEVEL_3);
2008 init_rss_special_nodes(birg->irg);
2010 rss.irg = birg->irg;
2011 rss.arch_env = birg->main_env->arch_env;
2012 rss.abi = birg->abi;
2013 rss.h = heights_new(birg->irg);
2014 rss.nodes = plist_new();
2015 rss.opts = &rss_options;
2016 irg_block_walk_graph(birg->irg, NULL, process_block, &rss);
2017 heights_free(rss.h);
2018 plist_free(rss.nodes);
2020 if (birg->main_env->options->dump_flags & DUMP_SCHED)
2021 be_dump(rss.irg, "-rss", dump_ir_block_graph);