2 * Implementation of a register saturating list scheduler
3 * as described in: Sid-Ahmed-Ali Touati
4 * Register Saturation in Superscalar and VLIW Codes
6 * @license This file protected by GPL - GNU GENERAL PUBLIC LICENSE.
7 * @author Christian Wuerdig
20 #include "irgraph_t.h"
22 #include "iredges_t.h"
24 #include "irphase_t.h"
30 #include "bipartite.h"
31 #include "hungarian.h"
39 #include "besched_t.h"
41 #include <libcore/lc_opts.h>
42 #include <libcore/lc_opts_enum.h>
45 #define ARR_LEN_SAFE(arr) ((arr) != NULL ? ARR_LEN((arr)) : 0)
47 #define HASH_RSS_EDGE(edge) ((get_irn_node_nr((edge)->src) << 16) | (get_irn_node_nr((edge)->tgt) & 0xFFFF))
48 #define BSEARCH_IRN_ARR(val, arr) \
49 bsearch(&(val), (arr), ARR_LEN_SAFE((arr)), sizeof((arr)[0]), cmp_irn_idx)
51 #define BLOCK_IDX_MAP(rss, irn) bsearch_for_index(get_irn_idx((irn)), (rss)->idx_map, ARR_LEN_SAFE((rss)->idx_map), 1)
54 typedef struct _rss_opts_t {
58 /* Represents a child with associated costs */
59 typedef struct _child {
64 /* We need edges for several purposes. */
65 typedef struct _rss_edge {
71 /* Represents a connected bipartite component. */
73 nodeset *parents; /**< = S a set of value producers */
74 nodeset *children; /**< = T a set of value consumers */
75 pset *kill_edges; /**< = E a set of edges (t in T, s in S) such as each s in S gets killed by at least one t in T */
76 int nr; /**< a deterministic index for set insertion (used as hash) */
79 /* Represents a disjoint value DAG. */
85 /* Represents a chain of nodes. */
86 typedef struct _chain {
87 plist_t *elements; /**< List of chain elements */
88 int nr; /**< a deterministic index for set insertion (used as hash) */
91 typedef struct _rss_irn {
92 plist_t *consumer_list; /**< List of consumers */
93 ir_node **consumer; /**< Sorted consumer array (needed for faster access) */
95 plist_t *parent_list; /**< List of parents */
96 plist_t *pkiller_list; /**< List of potential killers */
98 plist_t *descendant_list; /**< List of descendants */
99 ir_node **descendants; /**< Sorted descendant array (needed for faster access) */
101 ir_node *killer; /**< The selected unique killer */
102 ir_node *irn; /**< The corresponding firm node to this rss_irn */
104 chain_t *chain; /**< The chain, this node is associated to */
106 unsigned desc_walk; /**< visited flag for collecting descendants */
107 unsigned kill_count; /**< number of nodes killed by this one */
109 unsigned live_out : 1; /**< irn has consumers outside of it's block */
110 unsigned visited : 1; /**< visited flag for bipartite decomposition */
111 unsigned havecons : 1; /**< visited flag collect consumer */
112 unsigned handled : 1; /**< flag indicating whether or not the list structures have been build */
113 unsigned dumped : 1; /**< flag indication whether or not this node was dumped */
116 /* Represents a serialization edge with associated costs. */
117 typedef struct _serialization {
118 rss_irn_t *u; /* the top node */
119 rss_irn_t *v; /* the node about to be serialized after u */
120 rss_edge_t *edge; /* the edge selected for the serialization */
121 int omega1; /* estimated: available regs - RS reduction */
122 int omega2; /* increase in critical path length */
126 typedef struct _rss {
127 phase_t ph; /**< Phase to hold some data */
128 heights_t *h; /**< The current height object */
129 ir_graph *irg; /**< The irg to preprocess */
130 plist_t *nodes; /**< The list of interesting nodes */
131 const arch_env_t *arch_env; /**< The architecture environment */
132 be_abi_irg_t *abi; /**< The abi for this irg */
133 pset *cbc_set; /**< A set of connected bipartite components */
134 ir_node *block; /**< The current block in progress. */
135 int *idx_map; /**< Mapping irn indices to per block indices */
136 unsigned max_height; /**< maximum height in the current block */
137 rss_opts_t *opts; /**< The options */
138 be_lv_t *liveness; /**< The liveness information for this irg */
139 pset *live_block; /**< Values alive at end of block */
140 const arch_register_class_t *cls; /**< The current register class */
141 DEBUG_ONLY(firm_dbg_module_t *dbg);
144 #define get_rss_irn(rss, irn) (phase_get_or_set_irn_data(&rss->ph, irn))
147 * We need some special nodes:
148 * a source and a sink for all live-in and live-out values of a block
156 /** The opcode of the rss_Source node once allocated. */
157 static ir_op *op_rss_Source;
158 /** The opcode of the rss_Sink node once allocated. */
159 static ir_op *op_rss_Sink;
161 /** The rss_Source node of the current graph. */
162 static ir_node *_source = NULL;
163 /** The rss_Sink node of the current graph. */
164 static ir_node *_sink = NULL;
166 #define is_Source(irn) ((irn) == _source)
167 #define is_Sink(irn) ((irn) == _sink)
171 RSS_DUMP_CBC = 1 << 0,
172 RSS_DUMP_PKG = 1 << 1,
173 RSS_DUMP_KILL = 1 << 2,
174 RSS_DUMP_DVG = 1 << 3,
175 RSS_DUMP_MAXAC = 1 << 4,
176 RSS_DUMP_ALL = (RSS_DUMP_MAXAC << 1) - 1,
179 static rss_opts_t rss_options = {
183 static const lc_opt_enum_int_items_t dump_items[] = {
184 { "none", RSS_DUMP_NONE },
185 { "cbc", RSS_DUMP_CBC },
186 { "pkg", RSS_DUMP_PKG },
187 { "kill", RSS_DUMP_KILL },
188 { "dvg", RSS_DUMP_DVG },
189 { "maxac", RSS_DUMP_MAXAC },
190 { "all", RSS_DUMP_ALL },
194 static lc_opt_enum_int_var_t dump_var = {
195 &rss_options.dump_flags, dump_items
198 static const lc_opt_table_entry_t rss_option_table[] = {
199 LC_OPT_ENT_ENUM_MASK("dump", "dump phases", &dump_var),
203 /******************************************************************************
205 * | | | | / _| | | (_)
206 * | |__ ___| |_ __ ___ _ __ | |_ _ _ _ __ ___| |_ _ ___ _ __ ___
207 * | '_ \ / _ \ | '_ \ / _ \ '__| | _| | | | '_ \ / __| __| |/ _ \| '_ \/ __|
208 * | | | | __/ | |_) | __/ | | | | |_| | | | | (__| |_| | (_) | | | \__ \
209 * |_| |_|\___|_| .__/ \___|_| |_| \__,_|_| |_|\___|\__|_|\___/|_| |_|___/
212 ******************************************************************************/
215 * Acquire opcodes if needed and create source and sink nodes.
217 static void init_rss_special_nodes(ir_graph *irg) {
220 if (op_rss_Source == NULL) {
221 int iro_rss_base = get_next_ir_opcodes(iro_rss_last);
222 op_rss_Source = new_ir_op(iro_rss_base + iro_rss_Source, "rss_Source", op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
223 op_rss_Sink = new_ir_op(iro_rss_base + iro_rss_Sink, "rss_Sink", op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
225 block = get_irg_start_block(irg);
226 _source = new_ir_node(NULL, irg, block, op_rss_Source, mode_ANY, 0, NULL);
227 _sink = new_ir_node(NULL, irg, block, op_rss_Sink, mode_ANY, 0, NULL);
230 static int cmp_int(const void *a, const void *b) {
234 return QSORT_CMP(*i1, *i2);
237 static int cmp_child_costs(const void *a, const void *b) {
238 const child_t *c1 = a;
239 const child_t *c2 = b;
241 return QSORT_CMP(c1->cost, c2->cost);
244 static int cmp_irn_idx(const void *a, const void *b) {
245 const ir_node *n1 = *(ir_node **)a;
246 const ir_node *n2 = *(ir_node **)b;
248 return QSORT_CMP(get_irn_idx(n1), get_irn_idx(n2));
251 static int cmp_rss_edges(const void *a, const void *b) {
252 const rss_edge_t *e1 = a;
253 const rss_edge_t *e2 = b;
255 return (e1->src != e2->src) || (e1->tgt != e2->tgt);
258 static int bsearch_for_index(int key, int *arr, size_t len, int force) {
262 while (right >= left) {
263 int idx = (left + right) / 2;
267 else if (key > arr[idx])
274 assert(0 && "Something is wrong, key not found.");
278 static ir_node **build_sorted_array_from_list(plist_t *irn_list, struct obstack *obst) {
281 int len = plist_count(irn_list);
282 ir_node **arr = NEW_ARR_D(ir_node *, obst, len);
284 /* copy the list into the array */
285 foreach_plist(irn_list, el) {
286 arr[i++] = plist_element_get_value(el);
289 /* sort the array by node index */
290 qsort(arr, len, sizeof(arr[0]), cmp_irn_idx);
295 /*****************************************************
298 * __| | ___| |__ _ _ __ _ __ _ _ _ __ __ _
299 * / _` |/ _ \ '_ \| | | |/ _` |/ _` | | '_ \ / _` |
300 * | (_| | __/ |_) | |_| | (_| | (_| | | | | | (_| |
301 * \__,_|\___|_.__/ \__,_|\__, |\__, |_|_| |_|\__, |
305 *****************************************************/
308 static void dump_nodeset(nodeset *ns, const char *prefix) {
310 foreach_nodeset(ns, irn) {
311 ir_fprintf(stderr, "%s%+F\n", prefix, irn);
316 static void build_file_name(rss_t *rss, const char *suffix, size_t suf_len, char *buf, size_t len) {
317 const char *irg_name;
320 irg_name = get_entity_name(get_irg_entity(rss->irg));
321 snprintf(buf, len - suf_len, "%s-%s-block-%ld",
322 irg_name, arch_register_class_name(rss->cls), get_irn_node_nr(rss->block));
326 /* Dumps all collected bipartite components of current irg as vcg. */
327 static void debug_vcg_dump_bipartite(rss_t *rss) {
331 static const char suffix[] = "-RSS-CBC.vcg";
333 build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
334 f = fopen(file_name, "w");
336 ir_fprintf(f, "graph: { title: \"connected bipartite component graph of %+F\"\n", rss->irg);
337 fprintf(f, "display_edge_labels: no\n");
338 fprintf(f, "layoutalgorithm: mindepth\n");
339 fprintf(f, "manhattan_edges: yes\n\n");
341 foreach_pset(rss->cbc_set, cbc) {
345 fprintf(f, "graph: { titel: \"cbc %d\" label: \"cbc %d\" status:clustered color:yellow\n", cbc->nr, cbc->nr);
346 foreach_nodeset(cbc->parents, n) {
347 ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
349 foreach_nodeset(cbc->children, n) {
350 ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
352 foreach_pset(cbc->kill_edges, ke) {
353 ir_fprintf(f, "edge: { sourcename: \"n%d_%d\" targetname: \"n%d_%d\" }\n",
354 get_irn_node_nr(ke->src), cbc->nr, get_irn_node_nr(ke->tgt), cbc->nr);
362 /* Dump the computed killing function as vcg. */
363 static void debug_vcg_dump_kill(rss_t *rss) {
367 static const char suffix[] = "-RSS-KILL.vcg";
369 build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
370 f = fopen(file_name, "w");
372 ir_fprintf(f, "graph: { title: \"computed kill graph of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
373 fprintf(f, "display_edge_labels: no\n");
374 fprintf(f, "layoutalgorithm: mindepth\n");
375 fprintf(f, "manhattan_edges: yes\n\n");
378 /* reset dump_flag */
380 foreach_phase_irn(&rss->ph, irn) {
381 rss_irn_t *node = get_rss_irn(rss, irn);
386 /* dump all nodes and their killers */
387 foreach_plist(rss->nodes, el) {
388 ir_node *irn = plist_element_get_value(el);
389 rss_irn_t *rirn = get_rss_irn(rss, irn);
390 rss_irn_t *pk_rirn = get_rss_irn(rss, rirn->killer);
392 if (! rirn->dumped) {
393 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
397 if (! pk_rirn->dumped) {
398 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(rirn->killer), rirn->killer);
402 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
403 get_irn_node_nr(rirn->killer), get_irn_node_nr(irn));
410 /* Dumps the potential killing DAG (PKG) as vcg. */
411 static void debug_vcg_dump_pkg(rss_t *rss, nodeset *max_ac, int iteration) {
414 char *suffix = alloca(32);
415 static const char suffix1[] = "-RSS-PKG.vcg";
416 static const char suffix2[] = "-RSS-PKG-MAXAC.vcg";
420 snprintf(suffix, 32, "%s", suffix1);
423 snprintf(suffix, 32, "-%02d%s", iteration, suffix2);
426 build_file_name(rss, suffix, strlen(suffix) + 1, file_name, sizeof(file_name));
427 f = fopen(file_name, "w");
429 ir_fprintf(f, "graph: { title: \"potential killing DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
430 fprintf(f, "display_edge_labels: no\n");
431 fprintf(f, "layoutalgorithm: mindepth\n");
432 fprintf(f, "manhattan_edges: yes\n\n");
435 /* reset dump_flag */
437 foreach_phase_irn(&rss->ph, irn) {
438 rss_irn_t *node = get_rss_irn(rss, irn);
443 foreach_plist(rss->nodes, el) {
444 ir_node *irn = plist_element_get_value(el);
445 rss_irn_t *rirn = get_rss_irn(rss, irn);
447 plist_element_t *k_el;
449 /* dump selected saturating values in yellow */
450 if (max_ac && nodeset_find(max_ac, irn))
453 if (! rirn->dumped) {
455 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F c%d\" %s }\n", get_irn_node_nr(irn), irn, rirn->chain->nr, c1);
457 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(irn), irn, c1);
461 foreach_plist(rirn->pkiller_list, k_el) {
462 ir_node *pkiller = plist_element_get_value(k_el);
463 rss_irn_t *pk_rirn = get_rss_irn(rss, pkiller);
466 if (max_ac && nodeset_find(max_ac, pkiller))
469 if (! pk_rirn->dumped) {
471 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F c%d\" %s }\n", get_irn_node_nr(pkiller), pkiller, pk_rirn->chain->nr, c2);
473 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(pkiller), pkiller, c2);
476 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
477 get_irn_node_nr(pkiller), get_irn_node_nr(irn));
484 /* Dumps the disjoint value DAG (DVG) as vcg. */
485 static void debug_vcg_dump_dvg(rss_t *rss, dvg_t *dvg) {
486 static const char suffix[] = "-RSS-DVG.vcg";
492 build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
493 f = fopen(file_name, "w");
495 ir_fprintf(f, "graph: { title: \"disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
496 fprintf(f, "display_edge_labels: no\n");
497 fprintf(f, "layoutalgorithm: mindepth\n");
498 fprintf(f, "manhattan_edges: yes\n\n");
501 foreach_nodeset(dvg->nodes, irn) {
502 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
506 foreach_pset(dvg->edges, edge) {
507 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
508 get_irn_node_nr(edge->src), get_irn_node_nr(edge->tgt));
516 /* Dumps the PKG(DVG). */
517 static void debug_vcg_dump_dvg_pkiller(rss_t *rss, dvg_t *dvg) {
518 static const char suffix[] = "-RSS-DVG-PKG.vcg";
523 build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
524 f = fopen(file_name, "w");
526 ir_fprintf(f, "graph: { title: \"PKG of disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
527 fprintf(f, "display_edge_labels: no\n");
528 fprintf(f, "layoutalgorithm: mindepth\n");
529 fprintf(f, "manhattan_edges: yes\n\n");
532 foreach_nodeset(dvg->nodes, irn) {
533 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
537 foreach_nodeset(dvg->nodes, irn) {
538 rss_irn_t *node = get_rss_irn(rss, irn);
541 foreach_plist(node->dvg_pkiller_list, el) {
542 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
543 get_irn_node_nr(plist_element_get_value(el)), get_irn_node_nr(irn));
553 * In case there is no rss information for irn, initialize it.
555 static void *init_rss_irn(phase_t *ph, ir_node *irn, void *old) {
556 rss_irn_t *res = old ? old : phase_alloc(ph, sizeof(res[0]));
558 res->descendant_list = plist_obstack_new(phase_obst(ph));
559 res->descendants = NULL;
561 res->consumer_list = plist_obstack_new(phase_obst(ph));
562 res->consumer = NULL;
564 res->pkiller_list = plist_obstack_new(phase_obst(ph));
566 res->parent_list = plist_obstack_new(phase_obst(ph));
584 * Collect all nodes data dependent on current node.
586 static void collect_descendants(rss_t *rss, rss_irn_t *rirn, ir_node *irn, int *got_sink, unsigned cur_desc_walk) {
587 const ir_edge_t *edge;
588 rss_irn_t *cur_node = get_rss_irn(rss, irn);
589 ir_node *block = rss->block;
590 ir_edge_kind_t ekind[2] = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
593 if (cur_node->desc_walk >= cur_desc_walk)
595 cur_node->desc_walk = cur_desc_walk;
597 /* Stop at Barriers */
598 if (be_is_Barrier(irn))
601 /* loop over normal and dependency edges */
602 for (i = 0; i < 2; ++i) {
603 foreach_out_edge_kind(irn, edge, ekind[i]) {
604 ir_node *user = get_edge_src_irn(edge);
606 /* skip ignore nodes as they do not really contribute to register pressure */
607 if (arch_irn_is(rss->arch_env, user, ignore))
611 (a) mode_X means Jump -> out_edge
612 (b) Phi as user of a normal node -> out-edge
614 if (get_irn_mode(user) == mode_X || is_Phi(user)) {
623 //if (arch_get_irn_reg_class(rss->arch_env, user, -1) == rss->cls)
624 collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
627 /* check if user lives in block and is not a control flow node */
628 if (get_nodes_block(user) == block) {
629 if (! plist_has_value(rirn->descendant_list, user)) {
630 plist_insert_back(rirn->descendant_list, user);
631 DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", user));
633 collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
635 else if (! *got_sink) {
637 /* user lives out of block: add sink as descendant if not already done */
638 plist_insert_back(rirn->descendant_list, _sink);
640 DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", _sink));
648 * Handles a single consumer.
650 static void collect_single_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *consumer, int *got_sink) {
651 ir_node *block = rss->block;
653 assert(! is_Proj(consumer) && "Cannot handle Projs");
655 if (! is_Phi(consumer) && ! is_Block(consumer) && get_nodes_block(consumer) == block) {
656 if (! arch_irn_is(rss->arch_env, consumer, ignore) && ! plist_has_value(rss_irn->consumer_list, consumer)) {
657 plist_insert_back(rss_irn->consumer_list, consumer);
658 DBG((rss->dbg, LEVEL_2, "\t\tconsumer %+F\n", consumer));
662 rss_irn->live_out = 1;
663 DBG((rss->dbg, LEVEL_2, "\t\tlive out %+F", consumer));
665 plist_insert_back(rss_irn->consumer_list, _sink);
667 DB((rss->dbg, LEVEL_2, ", %+F added instead", _sink));
669 DB((rss->dbg, LEVEL_2, "\n"));
674 * Collect all nodes consuming the value(s) produced by current node.
676 static void collect_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *irn, int *got_sink) {
677 const ir_edge_t *edge;
679 ir_edge_kind_t ekind[2] = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
680 rss_irn_t *cur_node = get_rss_irn(rss, irn);
682 if (cur_node->havecons)
684 cur_node->havecons = 1;
686 for (i = 0; i < 2; ++i) {
687 foreach_out_edge_kind(irn, edge, ekind[i]) {
688 ir_node *consumer = get_edge_src_irn(edge);
690 if (is_Proj(consumer)) {
691 //if (arch_get_irn_reg_class(rss->arch_env, consumer, -1) == rss->cls)
692 collect_consumer(rss, rss_irn, consumer, got_sink);
695 collect_single_consumer(rss, rss_irn, consumer, got_sink);
701 * Collects all consumer and descendant of a irn.
703 static void collect_node_info(rss_t *rss, ir_node *irn) {
704 static unsigned cur_desc_walk = 0;
705 rss_irn_t *rss_irn = get_rss_irn(rss, irn);
708 assert(! is_Proj(irn) && "Cannot handle Projs.");
710 /* check if node info is already available */
711 if (rss_irn->handled)
713 //phase_reinit_single_irn_data(&rss->ph, irn);
715 DBG((rss->dbg, LEVEL_1, "\tcomputing consumers of %+F:\n", irn));
717 /* collect all consumer */
719 collect_consumer(rss, rss_irn, irn, &got_sink);
721 /* build sorted consumer array */
722 rss_irn->consumer = build_sorted_array_from_list(rss_irn->consumer_list, phase_obst(&rss->ph));
724 DBG((rss->dbg, LEVEL_1, "\tcompute descendants of %+F:\n", irn));
726 /* collect descendants */
728 collect_descendants(rss, rss_irn, irn, &got_sink, ++cur_desc_walk);
730 /* build sorted descendant array */
731 rss_irn->descendants = build_sorted_array_from_list(rss_irn->descendant_list, phase_obst(&rss->ph));
733 rss_irn->handled = 1;
737 * Checks if v is a potential killer of u.
738 * v is in pkill(u) iff descendants(v) cut consumer(u) is v
740 * @param rss The rss object
741 * @param v The node to check for killer
742 * @param u The potentially killed value
743 * @return 1 if v is in pkill(u), 0 otherwise
745 static int is_potential_killer(rss_t *rss, rss_irn_t *v, rss_irn_t *u) {
750 assert(is_Sink(v->irn) || ((plist_count(v->descendant_list) > 0 && v->descendants) || 1));
751 assert(is_Sink(u->irn) || ((plist_count(u->consumer_list) > 0 && u->consumer) || 1));
753 /* as we loop over the list: loop over the shorter one */
754 if (plist_count(v->descendant_list) > plist_count(u->consumer_list)) {
755 list = u->consumer_list;
756 arr = v->descendants;
759 list = v->descendant_list;
763 /* for each list element: try to find element in array */
764 foreach_plist(list, el) {
765 ir_node *irn = plist_element_get_value(el);
766 ir_node *match = BSEARCH_IRN_ARR(irn, arr);
768 if (match && match != irn)
776 * Update descendants, consumer and pkiller list for the given irn.
778 static void update_node_info(rss_t *rss, ir_node *irn, ir_node *pk_irn) {
779 rss_irn_t *node = get_rss_irn(rss, irn);
780 rss_irn_t *pkiller = get_rss_irn(rss, pk_irn);
782 /* add consumer and rebuild the consumer array */
783 if (! plist_has_value(node->consumer_list, pk_irn)) {
784 plist_insert_back(node->consumer_list, pk_irn);
785 node->consumer = build_sorted_array_from_list(node->consumer_list, phase_obst(&rss->ph));
788 /* add pk_irn as descendant, add it's descendants to irn's and rebuild array */
789 if (! plist_has_value(node->descendant_list, pk_irn)) {
792 plist_insert_back(node->descendant_list, pk_irn);
794 foreach_plist(pkiller->descendant_list, el) {
795 ir_node *desc = plist_element_get_value(el);
797 if (! plist_has_value(node->descendant_list, desc))
798 plist_insert_back(node->descendant_list, desc);
801 node->descendants = build_sorted_array_from_list(node->descendant_list, phase_obst(&rss->ph));
807 * Compute the potential killing set PK.
809 static void compute_pkill_set(rss_t *rss) {
810 plist_element_t *u_el, *v_el;
812 foreach_plist(rss->nodes, u_el) {
813 ir_node *u_irn = plist_element_get_value(u_el);
814 rss_irn_t *u = get_rss_irn(rss, u_irn);
816 DBG((rss->dbg, LEVEL_1, "\tcomputing potential killers of %+F:\n", u_irn));
818 /* check each consumer if it is a potential killer */
819 foreach_plist(u->consumer_list, v_el) {
820 ir_node *v_irn = plist_element_get_value(v_el);
821 rss_irn_t *v = get_rss_irn(rss, v_irn);
823 /* check, if v is a potential killer of u */
824 if (is_potential_killer(rss, v, u)) {
825 if (! plist_has_value(u->pkiller_list, v_irn))
826 plist_insert_back(u->pkiller_list, v_irn);
828 DBG((rss->dbg, LEVEL_2, "\t\tpotential killer %+F\n", v_irn));
835 if (rss->opts->dump_flags & RSS_DUMP_PKG)
836 debug_vcg_dump_pkg(rss, NULL, 0);
840 * Build set of killing edges (from values to their potential killers)
842 static void build_kill_edges(rss_t *rss, pset *epk) {
843 plist_element_t *el, *k_el;
845 foreach_plist(rss->nodes, el) {
846 ir_node *irn = plist_element_get_value(el);
847 rss_irn_t *rirn = get_rss_irn(rss, irn);
849 DBG((rss->dbg, LEVEL_3, "\t\tbuilding kill edges for %+F:\n", irn));
851 foreach_plist(rirn->pkiller_list, k_el) {
852 ir_node *pkiller = plist_element_get_value(k_el);
853 rss_edge_t *ke = obstack_alloc(phase_obst(&rss->ph), sizeof(*ke));
859 DBG((rss->dbg, LEVEL_3, "\t\t\ttarget %+F\n", pkiller));
861 pset_insert(epk, ke, HASH_RSS_EDGE(ke));
867 /* print the given cbc for debugging purpose */
868 static void debug_print_cbc(firm_dbg_module_t *mod, cbc_t *cbc) {
872 DBG((mod, LEVEL_3, "\t\tS = set of parents:\n"));
873 foreach_nodeset(cbc->parents, n) {
874 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
876 DBG((mod, LEVEL_3, "\t\tT = set of children:\n"));
877 foreach_nodeset(cbc->children, n) {
878 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
880 DBG((mod, LEVEL_3, "\t\tE = Edges from producers to consumers\n"));
881 foreach_pset(cbc->kill_edges, ke) {
882 DBG((mod, LEVEL_3, "\t\t\t%+F -> %+F\n", ke->src, ke->tgt));
888 * Construct the bipartite decomposition.
889 * Sid-Ahmed-Ali Touati, Phd Thesis
890 * Register Pressure in Instruction Level Parallelism, p. 71
892 static void compute_bipartite_decomposition(rss_t *rss) {
893 pset *epk = new_pset(cmp_rss_edges, 10);
898 DBG((rss->dbg, LEVEL_1, "\tcomputing bipartite decomposition:\n"));
900 build_kill_edges(rss, epk);
902 foreach_plist(rss->nodes, el) {
903 ir_node *u_irn = plist_element_get_value(el);
904 rss_irn_t *u = get_rss_irn(rss, u_irn);
910 plist_element_t *el2;
912 rss_edge_t *kedge_root = NULL;
913 ir_node *t_irn, *s_irn;
915 if (u->visited || u_irn == _sink)
918 DBG((rss->dbg, LEVEL_2, "\t\t%+F choosen:\n", u_irn));
920 cbc = obstack_alloc(phase_obst(&rss->ph), sizeof(*cbc));
923 /* initialize S_cb */
924 cbc->parents = new_nodeset(5);
925 nodeset_insert(cbc->parents, u_irn);
926 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (init)\n", u_irn));
929 cbc->kill_edges = new_pset(cmp_rss_edges, 5);
931 /* each parent gets killed by at least one value from children */
933 /* T_cb = PK_successors(u) */
934 cbc->children = new_nodeset(5);
935 foreach_plist(u->pkiller_list, el2) {
936 nodeset_insert(cbc->children, plist_element_get_value(el2));
937 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (init)\n", plist_element_get_value(el2)));
941 Now: insert the parents of all children into the parent set
942 and insert the children of all parents into the children set
943 until the sets don't change any more
945 while (p_change || c_change) {
946 p_change = c_change = 0;
948 /* accumulate parents */
949 foreach_nodeset(cbc->children, t_irn) {
950 foreach_pset(epk, k_edge) {
951 ir_node *val = k_edge->src;
953 if (k_edge->tgt == t_irn && ! nodeset_find(cbc->parents, val)) {
954 nodeset_insert(cbc->parents, val);
956 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (killed by %+F)\n", val, t_irn));
961 /* accumulate children */
962 foreach_nodeset(cbc->parents, s_irn) {
963 foreach_pset(epk, k_edge) {
964 ir_node *val = k_edge->tgt;
966 if (k_edge->src == s_irn && ! nodeset_find(cbc->children, val)) {
967 nodeset_insert(cbc->children, val);
969 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (kills %+F)\n", val, s_irn));
975 /* mark all parent values as visited */
976 foreach_nodeset(cbc->parents, s_irn) {
977 rss_irn_t *s = get_rss_irn(rss, s_irn);
979 /* assure bipartite property */
981 if (nodeset_find(cbc->children, s_irn)) {
982 nodeset_remove(cbc->children, s_irn);
983 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F removed from to children\n", s_irn));
989 foreach_pset(epk, k_edge) {
990 if (nodeset_find(cbc->parents, k_edge->src) && nodeset_find(cbc->children, k_edge->tgt)) {
991 pset_insert(cbc->kill_edges, k_edge, HASH_RSS_EDGE(k_edge));
993 Link all k_edges which are about to be removed together.
994 Beware: pset_remove kills the iterator.
996 k_edge->next = kedge_root;
1001 /* remove all linked k_edges */
1002 for (; kedge_root; kedge_root = kedge_root->next) {
1003 pset_remove(epk, kedge_root, HASH_RSS_EDGE(kedge_root));
1006 /* verify the cbc */
1007 foreach_nodeset(cbc->parents, s_irn) {
1010 foreach_pset(cbc->kill_edges, k_edge) {
1011 if (k_edge->src == s_irn) {
1013 pset_break(cbc->kill_edges);
1020 ir_fprintf(stderr, "Warning: parent %+F is not killed in current cbc\n", s_irn);
1023 assert(vrfy_ok && "Verification of CBC failed");
1025 /* add the connected bipartite component */
1026 if (nodeset_count(cbc->parents) > 0 && nodeset_count(cbc->children) > 0 && pset_count(cbc->kill_edges) > 0)
1027 pset_insert(rss->cbc_set, cbc, (unsigned)cbc->nr);
1028 DBG((rss->dbg, LEVEL_2, "\tbipartite component %d inserted:\n", cbc->nr));
1030 if (firm_dbg_get_mask(rss->dbg) & LEVEL_2)
1031 debug_print_cbc(rss->dbg, cbc);
1035 if (rss->opts->dump_flags & RSS_DUMP_CBC)
1036 debug_vcg_dump_bipartite(rss);
1042 * Select the child with the maximum cost.
1044 static child_t *select_child_max_cost(rss_t *rss, nodeset *x, nodeset *y, child_t *t, cbc_t *cbc) {
1046 float max_cost = -1.0f;
1048 DBG((rss->dbg, LEVEL_2, "\t\tcomputing children costs:\n"));
1050 foreach_nodeset(cbc->children, child) {
1051 rss_irn_t *r_child = get_rss_irn(rss, child);
1052 int num_unkilled_parents = 0;
1053 int num_descendants;
1057 /* get the number of unkilled parents */
1058 foreach_pset(cbc->kill_edges, k_edge) {
1059 if (k_edge->tgt == child && nodeset_find(x, k_edge->src))
1060 ++num_unkilled_parents;
1063 cost = (float)num_unkilled_parents;
1065 num_descendants = plist_count(r_child->descendant_list) + nodeset_count(y);
1067 if (num_descendants > 0)
1068 cost /= (float)num_descendants;
1070 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F, #desc %d, cost %.3f\n", child, num_descendants, cost));
1072 if (cost > max_cost) {
1083 * Remove all parents from x which are killed by t_irn.
1085 static void remove_covered_parents(rss_t *rss, nodeset *x, ir_node *t_irn, cbc_t *cbc) {
1086 rss_irn_t *t = get_rss_irn(rss, t_irn);
1089 DBG((rss->dbg, LEVEL_2, "\t\tremoving parents covered by %+F:\n", t_irn));
1091 foreach_pset(cbc->kill_edges, k_edge) {
1092 if (k_edge->tgt == t_irn && nodeset_find(x, k_edge->src)) {
1093 nodeset_remove(x, k_edge->src);
1094 plist_insert_back(t->parent_list, k_edge->src);
1095 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", k_edge->src));
1100 static void update_cumulated_descendent_values(rss_t *rss, nodeset *y, ir_node *t_irn) {
1101 rss_irn_t *t = get_rss_irn(rss, t_irn);
1102 plist_element_t *el;
1104 DBG((rss->dbg, LEVEL_2, "\t\tupdating cumulated descendant value of %+F:\n", t_irn));
1106 foreach_plist(t->descendant_list, el) {
1107 nodeset_insert(y, plist_element_get_value(el));
1108 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", plist_element_get_value(el)));
1113 * Greedy-k: a heuristics for the MMA problem
1115 static void compute_killing_function(rss_t *rss) {
1117 struct obstack obst;
1119 obstack_init(&obst);
1121 rss->cbc_set = pset_new_ptr(5);
1122 compute_bipartite_decomposition(rss);
1124 /* for all bipartite components do: */
1125 foreach_pset(rss->cbc_set, cbc) {
1127 nodeset *x = new_nodeset(10);
1128 nodeset *y = new_nodeset(10);
1129 child_t **sks = NEW_ARR_F(child_t *, 20);
1134 DBG((rss->dbg, LEVEL_1, "\tcomputing SKS for cbc %d:\n", cbc->nr));
1135 DBG((rss->dbg, LEVEL_2, "\t\tinitializing parents X:\n"));
1137 /* X = S_cb (all parents are initially uncovered) */
1138 foreach_nodeset(cbc->parents, p) {
1139 nodeset_insert(x, p);
1140 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", p));
1143 /* while X not empty */
1144 while (nodeset_count(x) > 0) {
1145 child_t *t = obstack_alloc(&obst, sizeof(*t));
1146 memset(t, 0, sizeof(*t));
1148 t = select_child_max_cost(rss, x, y, t, cbc);
1150 if (cur_len >= cur_size) {
1151 ARR_EXTO(child_t *, sks, cur_size * 2);
1155 DBG((rss->dbg, LEVEL_2, "\t\tinsert child %+F (%.3f) into SKS at pos %d\n", t->irn, t->cost, cur_len));
1158 remove_covered_parents(rss, x, t->irn, cbc);
1159 update_cumulated_descendent_values(rss, y, t->irn);
1162 ARR_SHRINKLEN(sks, cur_len);
1164 /* sort SKS in increasing cost order */
1165 qsort(sks, cur_len, sizeof(sks[0]), cmp_child_costs);
1167 DBG((rss->dbg, LEVEL_2, "\tprocessing SKS for cbc %d:\n", cbc->nr));
1169 /* build killing function */
1170 for (i = cur_len - 1; i >= 0; --i) { /* loop over sks in decreasing cost order */
1171 child_t *t = sks[i];
1172 rss_irn_t *rt = get_rss_irn(rss, t->irn);
1173 plist_element_t *p_el;
1175 DBG((rss->dbg, LEVEL_3, "\t\t\tkiller %+F (%.3f):\n", t->irn, t->cost));
1177 /* kill all unkilled parents of t */
1178 foreach_plist(rt->parent_list, p_el) {
1179 ir_node *par = plist_element_get_value(p_el);
1180 rss_irn_t *rpar = get_rss_irn(rss, par);
1182 if (is_Sink(rpar->killer)) {
1183 rpar->killer = t->irn;
1184 DBG((rss->dbg, LEVEL_2, "\t\tkill %+F\n", rpar->irn));
1187 DBG((rss->dbg, LEVEL_3, "\t\t\tkeeping %+F as killer for %+F\n", rpar->killer, rpar->irn));
1197 if (rss->opts->dump_flags & RSS_DUMP_KILL)
1198 debug_vcg_dump_kill(rss);
1200 del_pset(rss->cbc_set);
1201 obstack_free(&obst, NULL);
1205 * Adds the edge src -> tgt to the dvg. Checks if reverse edge is already there (asserts).
1207 static INLINE void add_dvg_edge(rss_t *rss, dvg_t *dvg, ir_node *src, ir_node *tgt, int have_source) {
1208 rss_edge_t *dvg_edge;
1212 nodeset_insert(dvg->nodes, src);
1214 assert(nodeset_find(dvg->nodes, src) != NULL && "Wrong assumption");
1216 nodeset_insert(dvg->nodes, tgt);
1220 assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1224 if (NULL != pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1225 /* add the edge to the DVG */
1226 dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1228 dvg_edge->src = src;
1229 dvg_edge->tgt = tgt;
1230 dvg_edge->next = NULL;
1232 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", src, tgt));
1233 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1238 * Computes the disjoint value DAG (DVG).
1239 * BEWARE: It is not made explicitly clear in the Touati paper,
1240 * but the DVG is meant to be build from the KILLING DAG
1242 static void compute_dvg(rss_t *rss, dvg_t *dvg) {
1243 plist_element_t *el;
1245 DBG((rss->dbg, LEVEL_1, "\tcomputing DVG:\n"));
1247 foreach_plist(rss->nodes, el) {
1248 ir_node *u_irn = plist_element_get_value(el);
1249 rss_irn_t *u = get_rss_irn(rss, u_irn);
1250 rss_irn_t *u_killer = get_rss_irn(rss, u->killer);
1253 /* TODO: omit nodes only having sink as pkiller and killing no one */
1255 /* add an edge to killer */
1256 add_dvg_edge(rss, dvg, u_irn, u->killer, 0);
1258 if (is_Sink(u->killer))
1261 /* We add an edge to every killer, from where we could be reached. */
1262 for (i = ARR_LEN_SAFE(u_killer->descendants) - 1; i >= 0; --i) {
1263 add_dvg_edge(rss, dvg, u_irn, u_killer->descendants[i], 1);
1268 foreach_plist(rss->nodes, el2) {
1269 ir_node *v_irn = plist_element_get_value(el2);
1272 There is an edge (u, v) in the DVG iff v is a descendant of the killer(u).
1274 if (BSEARCH_IRN_ARR(v_irn, u_kill->descendants)) {
1275 rss_edge_t *dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1278 /* insert the user into the DVG and append it to the user list of u */
1279 nodeset_insert(dvg->nodes, v_irn);
1280 if (! plist_has_value(u->dvg_user_list, v_irn))
1281 plist_insert_back(u->dvg_user_list, v_irn);
1283 dvg_edge->src = u_irn;
1284 dvg_edge->tgt = v_irn;
1285 dvg_edge->next = NULL;
1290 assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1292 /* add the edge to the DVG */
1293 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", u_irn, v_irn));
1294 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1300 if (rss->opts->dump_flags & RSS_DUMP_DVG)
1301 debug_vcg_dump_dvg(rss, dvg);
1305 * Updates the dvg structure when a serialization edge from src -> tgt is added.
1307 static void update_dvg(rss_t *rss, dvg_t *dvg, rss_irn_t *src, rss_irn_t *tgt) {
1310 rss_edge_t **arr = alloca(pset_count(dvg->edges) * sizeof(arr[0]));
1313 Add an edge from serialization target to serialization src:
1314 src cannot be alive with target
1316 add_dvg_edge(rss, dvg, tgt->irn, src->irn, 0);
1318 /* Add edges to src's descendants as well, they also getting serialized. */
1319 for (i = ARR_LEN_SAFE(src->descendants) - 1; i >= 0; --i) {
1320 add_dvg_edge(rss, dvg, tgt->irn, src->descendants[i], 1);
1323 /* We also have to add edges from targets predecessors in dvg */
1325 /* We cannot insert elements into set over which we iterate ... */
1326 foreach_pset(dvg->edges, edge) {
1327 if (edge->tgt == tgt->irn) {
1332 for (i = 0; i < idx; ++i) {
1334 add_dvg_edge(rss, dvg, edge->src, src->irn, 1);
1335 for (j = ARR_LEN_SAFE(src->descendants) - 1; j >= 0; --j) {
1336 add_dvg_edge(rss, dvg, edge->src, src->descendants[j], 1);
1343 * Accumulate all descendants for root into list.
1345 static void accumulate_dvg_descendant_values(rss_t *rss, rss_irn_t *root, plist_t *list) {
1346 if (plist_count(root->dvg_user_list) > 0) {
1347 plist_element_t *el;
1349 foreach_plist(root->dvg_user_list, el) {
1350 ir_node *v_irn = plist_element_get_value(el);
1351 rss_irn_t *v = get_rss_irn(rss, v_irn);
1353 /* add v as descendant */
1354 if (! plist_has_value(list, v_irn)) {
1355 plist_insert_back(list, v_irn);
1356 DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG descendant %+F\n", v_irn));
1359 /* accumulate v's descendants */
1360 accumulate_dvg_descendant_values(rss, v, list);
1366 * Builds the list of potential killers for each node
1368 * Needs the descendant list for all user as sorted array.
1370 static void build_dvg_pkiller_list(rss_t *rss, dvg_t *dvg) {
1373 foreach_nodeset(dvg->nodes, irn) {
1374 rss_irn_t *node = get_rss_irn(rss, irn);
1375 plist_element_t *el, *el2;
1377 DBG((rss->dbg, DEBUG_DVG, "\t\tbuilding pkiller list for %+F\n", irn));
1379 /* check each user */
1380 foreach_plist(node->dvg_user_list, el) {
1381 ir_node *u_irn = plist_element_get_value(el);
1383 /* is the current user u_irn not a descendant of any other user -> pkiller */
1384 foreach_plist(node->dvg_user_list, el2) {
1385 ir_node *v_irn = plist_element_get_value(el2);
1386 rss_irn_t *v = get_rss_irn(rss, v_irn);
1389 ! BSEARCH_IRN_ARR(u_irn, v->dvg_desc) &&
1390 ! plist_has_value(node->dvg_pkiller_list, u_irn))
1392 plist_insert_back(node->dvg_pkiller_list, u_irn);
1393 DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG pkiller %+F\n", u_irn));
1398 node->dvg_pkiller = build_sorted_array_from_list(node->dvg_pkiller_list, phase_obst(&rss->ph));
1402 if (firm_dbg_get_mask(rss->dbg) & DEBUG_DVG)
1403 debug_vcg_dump_dvg_pkiller(rss, dvg);
1410 * Compute the maximal antichain of the current DVG.
1411 * This is a reimplementation of the MAXIMAL_ANTI_CHAIN function
1412 * from the DDG library 1.1 (DAG.cpp).
1414 static nodeset *compute_maximal_antichain(rss_t *rss, dvg_t *dvg, int iteration) {
1415 int n = nodeset_count(dvg->nodes);
1416 int *assignment = alloca(n * sizeof(assignment[0]));
1417 int *assignment_rev = alloca(n * sizeof(assignment_rev[0]));
1418 int *idx_map = alloca(n * sizeof(idx_map[0]));
1419 hungarian_problem_t *bp;
1420 nodeset *values, *temp;
1422 int i, j, cost, cur_chain, res;
1423 rss_edge_t *dvg_edge;
1425 #define MAP_IDX(irn) bsearch_for_index(get_irn_idx(irn), idx_map, n, 1)
1427 if (pset_count(dvg->edges) == 0)
1430 bp = hungarian_new(n, n, 1, HUNGARIAN_MATCH_NORMAL);
1433 At first, we build an index map for the nodes in the DVG,
1434 because we cannot use the irn idx therefore as the resulting
1435 bipartite data structure would have around 1.2GB.
1436 So we limit the size to the number of nodes we have in the DVG
1437 and build a sorted index map for their irn indices.
1440 foreach_nodeset(dvg->nodes, u_irn) {
1441 idx_map[i++] = get_irn_idx(u_irn);
1443 qsort(idx_map, n, sizeof(idx_map[0]), cmp_int);
1445 foreach_pset(dvg->edges, dvg_edge) {
1446 int idx_u = MAP_IDX(dvg_edge->src);
1447 int idx_v = MAP_IDX(dvg_edge->tgt);
1449 /* add the entry to the bipartite data structure */
1450 hungarian_add(bp, idx_u, idx_v, 1);
1451 DBG((rss->dbg, LEVEL_3, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1452 idx_u, dvg_edge->src, idx_v, dvg_edge->tgt));
1456 Add a bipartite entry for each pair of nodes (u, v), where exists a
1457 path in the DVG from u to v, ie. connect all descendants(v) to v.
1458 desc_dvg(v) = accumulated descendants of all z in dvg_user_list(v)
1460 foreach_nodeset(dvg->nodes, u_irn) {
1461 rss_irn_t *u = get_rss_irn(rss, u_irn);
1462 int idx_u_irn = MAP_IDX(u_irn);
1464 DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG descendants of %+F:\n", u_irn));
1466 //plist_clear(u->dvg_desc_list);
1467 //accumulate_dvg_descendant_values(rss, u, u->dvg_desc_list);
1470 FIXME: The array is build on the phase obstack and we cannot free the data.
1471 So the array get as many times allocated as this function is called.
1474 /* build the sorted array for faster searches */
1475 //u->dvg_desc = build_sorted_array_from_list(u->dvg_desc_list, phase_obst(&rss->ph));
1477 DBG((rss->dbg, DEBUG_MAX_AC, "\t\tadding bipartite entries of %+F:\n", u_irn));
1479 /* add the bipartite entries for all descendants */
1480 for (i = ARR_LEN_SAFE(u->dvg_desc) - 1; i >= 0; --i) {
1481 rss_irn_t *desc = get_rss_irn(rss, u->dvg_desc[i]);
1482 int idx_desc = MAP_IDX(u->dvg_desc[i]);
1484 /* add the entry to the bipartite data structure */
1485 hungarian_add(bp, idx_u_irn, idx_desc, 1);
1486 DBG((rss->dbg, DEBUG_MAX_AC, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1487 idx_u_irn, u_irn, idx_desc, u->dvg_desc[i]));
1494 /* We want maximum cardinality matching */
1495 hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL);
1498 DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG pkiller:\n"));
1499 /* beware: the following function needs the dvg_desc array */
1500 build_dvg_pkiller_list(rss, dvg);
1503 DBG((rss->dbg, LEVEL_2, "\t\tcomputing bipartite matching\n"));
1505 The maximum cardinality bipartite matching gives us the minimal
1506 chain partition, which corresponds to the maximum anti chains.
1508 res = hungarian_solve(bp, assignment, &cost, 1);
1509 assert(res == 0 && "Bipartite matching failed!");
1512 memset(assignment_rev, -1, n * sizeof(assignment_rev[0]));
1514 /* build the reverse assignment, ie. foreach i -> j, add j -> i */
1515 for (i = 0; i < n; ++i) {
1516 if (assignment[i] >= 0) {
1517 int j = assignment[i];
1518 assignment_rev[j] = i;
1522 DBG((rss->dbg, LEVEL_2, "\t\tgot assignment with cost %d\n", cost));
1523 DBG((rss->dbg, LEVEL_3, "\t\t\tassignment --- reverse assignment\n", cost));
1524 for (i = 0; i < n; ++i) {
1525 DBG((rss->dbg, LEVEL_3, "\t\t\t%3d -> %3d %3d -> %3d\n", i, assignment[i], i, assignment_rev[i]));
1528 values = new_nodeset(10);
1530 /* Construction of the minimal chain partition */
1531 for (j = 0; j < n; ++j) {
1532 /* check nodes, which did not occur as target */
1533 if (assignment_rev[j] == -1) {
1534 int xj = idx_map[j];
1535 ir_node *xj_irn = get_idx_irn(rss->irg, xj);
1536 rss_irn_t *xj_rss = get_rss_irn(rss, xj_irn);
1537 chain_t *c = obstack_alloc(phase_obst(&rss->ph), sizeof(*c));
1540 /* there was no source for j -> we have a source of a new chain */
1541 nodeset_insert(values, xj_irn);
1543 c->elements = plist_obstack_new(phase_obst(&rss->ph));
1544 c->nr = cur_chain++;
1545 plist_insert_back(c->elements, xj_irn);
1549 DBG((rss->dbg, LEVEL_2, "\t\tstarting chain %d:\n", c->nr));
1550 DBG((rss->dbg, LEVEL_2, "\t\t\t%+F (%d)", xj_irn, j));
1552 /* follow chain, having j as source */
1554 while (assignment[source] >= 0) {
1555 int target = assignment[source];
1556 int irn_idx = idx_map[target];
1557 ir_node *irn = get_idx_irn(rss->irg, irn_idx);
1558 rss_irn_t *node = get_rss_irn(rss, irn);
1560 plist_insert_back(c->elements, irn);
1563 DB((rss->dbg, LEVEL_2, " -> %+F (%d)", irn, target));
1565 /* new source = last target */
1569 DB((rss->dbg, LEVEL_2, "\n"));
1574 Computing the maximal antichain: Select an element from each
1575 chain such, such it is parallel with the others.
1577 DBG((rss->dbg, LEVEL_1, "\t\tcomputing set of saturation values (MAX AC)\n"));
1578 DBG((rss->dbg, LEVEL_3, "\t\tstarting with:\n"));
1581 if (firm_dbg_get_mask(rss->dbg) & LEVEL_3)
1582 dump_nodeset(values, "\t\t\t");
1588 We need an explicit array for the values as
1589 we cannot iterate multiple times over the same
1590 set at the same time. :-(((((
1592 int n = nodeset_count(values);
1594 ir_node **val_arr = NEW_ARR_F(ir_node *, n);
1596 foreach_nodeset(values, u_irn)
1597 val_arr[i++] = u_irn;
1602 temp = new_nodeset(10);
1604 /* Select all nodes from current value set, having another node in the set as descendant. */
1605 for (i = 0; i < n; ++i) {
1606 rss_irn_t *u = get_rss_irn(rss, val_arr[i]);
1608 for (j = 0; j < n; ++j) {
1612 key.src = val_arr[i];
1613 key.tgt = val_arr[j];
1615 if (pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1616 /* v[j] is descendant of u -> remove u and break */
1617 nodeset_insert(temp, u->irn);
1618 nodeset_remove(values, u->irn);
1620 DBG((rss->dbg, LEVEL_3, "\t\t\tremoving %+F from values, adding it to temp\n", u->irn));
1628 /* Try to insert the chain predecessor of all selected u's */
1629 foreach_nodeset(temp, u_irn) {
1630 rss_irn_t *u = get_rss_irn(rss, u_irn);
1631 chain_t *c = u->chain;
1632 plist_element_t *el = plist_find_value(c->elements, u_irn);
1634 assert(el && "Missing element in chain!");
1636 /* If u has predecessor in chain: insert the predecessor */
1637 if (el == plist_element_get_prev(el)) {
1638 nodeset_insert(values, plist_element_get_value(el));
1639 DBG((rss->dbg, LEVEL_3, "\t\t\tadding %+F to values\n", plist_element_get_value(el)));
1645 } while (nodeset_count(temp) > 0);
1647 DBG((rss->dbg, LEVEL_2, "\t\tfinal set:\n"));
1649 if (firm_dbg_get_mask(rss->dbg) & LEVEL_2) {
1650 dump_nodeset(values, "\t\t\t");
1654 if (rss->opts->dump_flags & RSS_DUMP_MAXAC)
1655 debug_vcg_dump_pkg(rss, values, iteration);
1665 * Computes the best serialization between two nodes of sat_vals.
1667 static serialization_t *compute_best_admissible_serialization(rss_t *rss, nodeset *sat_vals, serialization_t *ser, int num_regs) {
1668 int n = nodeset_count(sat_vals);
1669 int n_idx = ARR_LEN_SAFE(rss->idx_map);
1671 ir_node **val_arr = alloca(n * sizeof(val_arr[0]));
1672 bitset_t *bs_sv = bitset_alloca(n_idx);
1673 bitset_t *bs_vdesc = bitset_alloca(n_idx);
1674 bitset_t *bs_tmp = bitset_alloca(n_idx);
1675 bitset_t *bs_ukilldesc = bitset_alloca(n_idx);
1676 int best_benefit = INT_MAX;
1677 int best_omega2 = INT_MAX;
1678 int best_benefit_omega20 = INT_MAX;
1682 rss_edge_t min_benefit_edge;
1683 rss_edge_t min_omega20_edge;
1684 rss_irn_t *ser_u_omega1 = NULL, *ser_v_omega1 = NULL;
1685 rss_irn_t *ser_u_omega20 = NULL, *ser_v_omega20 = NULL;
1687 DBG((rss->dbg, LEVEL_1, "\tcomputing admissible serializations:\n"));
1690 We need an explicit array for the values as
1691 we cannot iterate multiple times over the same
1692 set at the same time. :-(((((
1695 foreach_nodeset(sat_vals, irn) {
1697 bitset_set(bs_sv, BLOCK_IDX_MAP(rss, irn));
1701 We build all admissible serializations and remember the best found so far.
1704 if v in pkiller(u): add edge from v to all other pkiller(u)
1705 else: for all vv in pkiller(u): add edge from v to vv if there exists no path from vv to v
1709 A node is unserializable if:
1710 - it has only one killer and this one is Sink
1711 - it kills no other values
1712 In this case there is no serialization which could
1713 reduce the registerpressure
1715 #define IS_UNSERIALIZABLE_NODE(rss_node) \
1718 (plist_count(rss_node->pkiller_list) == 1) && \
1719 is_Sink(rss_node->killer) && \
1720 (rss_node->kill_count == 0) \
1722 be_is_Barrier(rss_node->irn) || \
1723 be_is_Keep(rss_node->irn) \
1726 /* for all u in sat_vals */
1727 for (i = 0; i < n; ++i) {
1728 rss_irn_t *u = get_rss_irn(rss, val_arr[i]);
1729 plist_element_t *el;
1731 /* ignore nodes where serialization does not help */
1732 if (IS_UNSERIALIZABLE_NODE(u)) {
1733 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", u->irn));
1737 /* accumulate all descendants of all pkiller(u) */
1738 bitset_clear_all(bs_ukilldesc);
1739 foreach_plist(u->pkiller_list, el) {
1740 ir_node *irn = plist_element_get_value(el);
1741 rss_irn_t *node = get_rss_irn(rss, irn);
1744 bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, irn));
1748 for (k = ARR_LEN_SAFE(node->descendants) - 1; k >= 0; --k) {
1749 if (! is_Sink(node->descendants[k]))
1750 bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, node->descendants[k]));
1754 /* for all v in sat_vals */
1755 for (j = 0; j < n; ++j) {
1756 ir_node *v_irn = val_arr[j];
1757 rss_irn_t *v = get_rss_irn(rss, v_irn);
1758 unsigned v_height = get_irn_height(rss->h, v_irn);
1759 int omega1, omega2, is_pkiller;
1761 /* v cannot be serialized with itself
1762 * ignore nodes where serialization does not help */
1763 if (i == j || IS_UNSERIALIZABLE_NODE(v)) {
1765 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", v->irn));
1769 /* get descendants of v */
1770 bitset_clear_all(bs_vdesc);
1771 bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v_irn));
1772 for (k = ARR_LEN_SAFE(v->descendants) - 1; k >= 0; --k) {
1773 if (! is_Sink(v->descendants[k]))
1774 bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v->descendants[k]));
1777 /* if v is in pkiller(u) */
1778 is_pkiller = plist_has_value(u->pkiller_list, v_irn);
1780 /* for all vv in pkiller(u) */
1781 foreach_plist(u->pkiller_list, el) {
1782 ir_node *vv_irn = plist_element_get_value(el);
1785 if (is_Sink(vv_irn) || is_cfop(vv_irn))
1789 add_edge = vv_irn != v_irn && skip_Proj(vv_irn) != skip_Proj(v_irn);
1791 add_edge = ! heights_reachable_in_block(rss->h, skip_Proj(vv_irn), skip_Proj(v_irn));
1794 As we add an edge from vv -> v, we have to make sure,
1795 that there exists no path from v to vv.
1799 int vv_height = get_irn_height(rss->h, vv_irn);
1800 int mu1, mu2, critical_path_cost;
1803 mu1 = | descendants(v) cut sat_vals |
1804 the number of saturating values which cannot
1805 be simultaneously alive with u
1807 bitset_copy(bs_tmp, bs_vdesc);
1808 mu1 = bitset_popcnt(bitset_and(bs_tmp, bs_sv));
1811 mu2 = | accum_desc_all_pkiller(u) without descendants(v) |
1814 bitset_copy(bs_tmp, bs_ukilldesc);
1815 mu2 = bitset_popcnt(bitset_andnot(bs_tmp, bs_vdesc));
1821 /* omega1 = mu1 - mu2 */
1827 /* omega2 = increase of critical path */
1828 critical_path_cost =
1829 v_height /* longest path from v to sink */
1830 + rss->max_height - vv_height /* longest path from source to vv */
1834 If critical_path_cost > max_height -> the new edge
1835 would increase the longest critical path by the difference.
1837 omega2 = critical_path_cost > rss->max_height ? critical_path_cost - rss->max_height : 0;
1839 /* this keeps track of the edge with the best benefit */
1840 if (omega1 >= num_regs - n && omega1 < best_benefit) {
1841 min_benefit_edge.src = v_irn;
1842 min_benefit_edge.tgt = vv_irn;
1847 best_benefit = omega1;
1848 ser->new_killer = is_pkiller;
1851 /* this keeps track of the edge with the best omega1 costs where omega2 == 0 */
1852 if (omega2 == 0 && omega1 >= num_regs - n && omega1 < best_benefit_omega20) {
1853 min_omega20_edge.src = v_irn;
1854 min_omega20_edge.tgt = vv_irn;
1859 best_benefit_omega20 = omega1;
1860 ser->new_killer = is_pkiller;
1863 best_omega2 = MIN(best_omega2, omega2);
1865 DBG((rss->dbg, LEVEL_2, "\t\tfound %+F -> %+F (w1 %d, w2 %d)\n",
1866 v_irn, vv_irn, omega1, omega2));
1868 } /* for all vv in pkiller(u) */
1869 } /* for all v in sat_vals */
1870 } /* for all u in sat_vals */
1875 if (best_omega2 == 0) {
1876 ser->u = ser_u_omega20;
1877 ser->v = ser_v_omega20;
1878 ser->edge->src = min_omega20_edge.src;
1879 ser->edge->tgt = min_omega20_edge.tgt;
1880 ser->omega1 = best_benefit_omega20;
1881 ser->omega2 = best_omega2;
1884 ser->u = ser_u_omega1;
1885 ser->v = ser_v_omega1;
1886 ser->edge->src = min_benefit_edge.src;
1887 ser->edge->tgt = min_benefit_edge.tgt;
1888 ser->omega1 = best_benefit;
1889 ser->omega2 = best_omega2;
1894 #undef IS_UNSERIALIZABLE_NODE
1898 * Perform the value serialization heuristic and add all
1899 * computed serialization edges as dependencies to the irg.
1901 static void perform_value_serialization_heuristic(rss_t *rss) {
1902 bitset_t *arch_nonign_bs = bitset_alloca(arch_register_class_n_regs(rss->cls));
1903 bitset_t *abi_ign_bs = bitset_alloca(arch_register_class_n_regs(rss->cls));
1904 int available_regs, iteration;
1907 pset *ser_set = new_pset(cmp_rss_edges, 20);
1909 /* available_regs = R = |arch_non_ignore_regs cut ~abi_ignore_regs| */
1910 arch_put_non_ignore_regs(rss->arch_env, rss->cls, arch_nonign_bs);
1911 be_abi_put_ignore_regs(rss->abi, rss->cls, abi_ign_bs);
1912 bitset_andnot(arch_nonign_bs, abi_ign_bs);
1913 available_regs = bitset_popcnt(arch_nonign_bs);
1914 //num_live = pset_count(rss->live_block);
1915 //available_regs -= num_live < available_regs ? num_live : 0;
1917 DBG((rss->dbg, LEVEL_1, "\n\t#available regs: %d\n\n", available_regs));
1920 At first we need to compute the disjoint value DAG (DVG = {V, E_dv}).
1921 V = set of all nodes we are currently interested in
1922 E_dv = there is an edge from u to v iff v is a descendant of killer(u), forall u, v in V
1924 dvg.nodes = new_nodeset(plist_count(rss->nodes));
1925 dvg.edges = new_pset(cmp_rss_edges, plist_count(rss->nodes) * 5);
1926 compute_dvg(rss, &dvg);
1929 Then we perform the heuristic serialization algorithm
1930 on the DVG which gives us all necessary serialization
1933 DBG((rss->dbg, LEVEL_1, "\tcomputing maximal antichain:\n"));
1935 sat_vals = compute_maximal_antichain(rss, &dvg, iteration++);
1936 while (sat_vals && (nodeset_count(sat_vals) > available_regs)) {
1937 serialization_t *ser, best_ser;
1938 rss_edge_t *edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*edge));
1939 ir_node *dep_src, *dep_tgt;
1941 best_ser.edge = edge;
1942 ser = compute_best_admissible_serialization(rss, sat_vals, &best_ser, available_regs);
1944 DBG((rss->dbg, LEVEL_1, "\tcurrent register saturation %d, target %d\n", nodeset_count(sat_vals), available_regs));
1947 DBG((rss->dbg, LEVEL_1, "\tno RS improving serialization found, breaking at iteration %d\n", iteration));
1951 /* Insert the serialization as dependency edge into the irg. */
1952 DBG((rss->dbg, LEVEL_1, "\tserializing %+F -> %+F with edge %+F -> %+F and cost %d, %d\n",
1953 ser->u->irn, ser->v->irn, ser->edge->src, ser->edge->tgt, ser->omega1, ser->omega2));
1955 if (pset_find(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge)))
1956 ir_printf("WARNING: serialization %+F -> %+F computed twice!\n", ser->edge->src, ser->edge->tgt);
1959 pset_insert(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge));
1961 /* update the dvg */
1962 update_dvg(rss, &dvg, ser->v, ser->u);
1963 update_dvg(rss, &dvg, get_rss_irn(rss, ser->edge->src), get_rss_irn(rss, ser->edge->tgt));
1964 del_nodeset(sat_vals);
1966 dep_src = skip_Proj(ser->edge->src);
1967 dep_tgt = ser->edge->tgt;
1968 add_irn_dep(dep_src, dep_tgt);
1970 /* Update descendants, consumer and pkillers of the target */
1971 update_node_info(rss, ser->edge->tgt, ser->edge->src);
1973 /* TODO: try to find a cheaper way for updating height information */
1974 rss->max_height = heights_recompute_block(rss->h, rss->block);
1976 /* Recompute the antichain for next serialization */
1977 DBG((rss->dbg, LEVEL_1, "\tre-computing maximal antichain:\n"));
1978 sat_vals = compute_maximal_antichain(rss, &dvg, iteration++);
1981 del_nodeset(dvg.nodes);
1982 del_pset(dvg.edges);
1986 * Do initial calculations for a block.
1988 static void process_block(ir_node *block, void *env) {
1991 const ir_edge_t *edge;
1993 phase_init(&rss->ph, "rss block preprocessor", rss->irg, PHASE_DEFAULT_GROWTH, init_rss_irn);
1995 DBG((rss->dbg, LEVEL_1, "preprocessing block %+F\n", block));
1998 /* build an index map for all nodes in the current block */
2000 n = get_irn_n_edges(block);
2001 NEW_ARR_A(int *, rss->idx_map, n);
2002 foreach_out_edge(block, edge) {
2003 ir_node *irn = get_edge_src_irn(edge);
2004 rss->idx_map[i++] = get_irn_idx(irn);
2006 qsort(rss->idx_map, n, sizeof(rss->idx_map[0]), cmp_int);
2007 rss->max_height = heights_recompute_block(rss->h, block);
2009 /* loop over all register classes */
2010 for (i = arch_isa_get_n_reg_class(rss->arch_env->isa) - 1; i >= 0; --i) {
2011 const arch_register_class_t *cls = arch_isa_get_reg_class(rss->arch_env->isa, i);
2014 DBG((rss->dbg, LEVEL_1, "register class %s\n", arch_register_class_name(cls)));
2016 /* Get all live value at end of Block having current register class */
2017 rss->live_block = pset_new_ptr(10);
2018 be_liveness_end_of_block(rss->liveness, rss->arch_env, rss->cls, rss->block, rss->live_block);
2020 /* reset the list of interesting nodes */
2021 plist_clear(rss->nodes);
2022 plist_insert_back(rss->nodes, _sink);
2024 /* collect all nodes having a certain register class */
2025 foreach_out_edge(block, edge) {
2026 ir_node *irn = get_edge_src_irn(edge);
2027 ir_mode *mode = get_irn_mode(irn);
2031 - mode_T nodes (the projs are asked)
2032 - mode_X nodes (control flow nodes are always scheduled last)
2033 - Keeps (they are always immediately scheduled)
2034 - Phi (same as Keep)
2036 if (mode == mode_T || mode == mode_X || is_Phi(irn))
2040 In case of a proj, we skip
2041 - Barrier (they are a Barrier :)
2043 - the Proj itself, as it's scheduled always with it's super node
2046 ir_node *pred = get_Proj_pred(irn);
2047 if (be_is_Barrier(pred) || is_Start(pred))
2051 /* calculate the descendants and consumer for each node in the block */
2052 collect_node_info(rss, skip_Proj(irn));
2054 if (be_is_Keep(irn))
2057 if (! arch_irn_is(rss->arch_env, irn, ignore) && arch_get_irn_reg_class(rss->arch_env, irn, -1) == cls) {
2058 plist_insert_back(rss->nodes, skip_Proj(irn));
2063 /* compute the potential killing set PK(G) */
2064 compute_pkill_set(rss);
2066 /* compute the killing function k* */
2067 compute_killing_function(rss);
2070 Compute the heuristic value serialization and
2071 add the necessary dependencies to the irg.
2073 perform_value_serialization_heuristic(rss);
2075 del_pset(rss->live_block);
2078 phase_free(&rss->ph);
2082 * Register the options.
2084 void be_init_schedrss(void) {
2085 lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
2086 lc_opt_entry_t *sched_grp = lc_opt_get_grp(be_grp, "sched");
2087 lc_opt_entry_t *rss_grp = lc_opt_get_grp(sched_grp, "rss");
2089 lc_opt_add_table(rss_grp, rss_option_table);
2092 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_schedrss);
2095 * Preprocess the irg for scheduling.
2097 void rss_schedule_preparation(const be_irg_t *birg) {
2100 FIRM_DBG_REGISTER(rss.dbg, "firm.be.sched.rss");
2102 //firm_dbg_set_mask(rss.dbg, LEVEL_1 | LEVEL_2 | LEVEL_3);
2104 init_rss_special_nodes(birg->irg);
2106 rss.irg = birg->irg;
2107 rss.arch_env = birg->main_env->arch_env;
2108 rss.abi = birg->abi;
2109 rss.h = heights_new(birg->irg);
2110 rss.nodes = plist_new();
2111 rss.opts = &rss_options;
2112 rss.liveness = be_liveness(birg->irg);
2113 irg_block_walk_graph(birg->irg, NULL, process_block, &rss);
2114 heights_free(rss.h);
2115 plist_free(rss.nodes);
2116 be_liveness_free(rss.liveness);
2118 if (birg->main_env->options->dump_flags & DUMP_SCHED)
2119 be_dump(rss.irg, "-rss", dump_ir_block_graph);