2 * Implementation of a register saturating list scheduler
3 * as described in: Sid-Ahmed-Ali Touati
4 * Register Saturation in Superscalar and VLIW Codes
6 * @license This file protected by GPL - GNU GENERAL PUBLIC LICENSE.
7 * @author Christian Wuerdig
20 #include "irgraph_t.h"
22 #include "iredges_t.h"
24 #include "irphase_t.h"
29 #include "bipartite.h"
30 #include "hungarian.h"
37 #include "besched_t.h"
38 #include "beschedmris.h"
40 #define DEBUG_NODEINFO 1 << 0
41 #define DEBUG_PKILL 1 << 1
42 #define DEBUG_BIPARTITE 1 << 2
43 #define DEBUG_SKS 1 << 3
44 #define DEBUG_DVG 1 << 4
45 #define DEBUG_SER_HEUR 1 << 5
46 #define DEBUG_MAX_AC 1 << 6
48 #define HASH_RSS_EDGE(edge) ((get_irn_node_nr((edge)->src) << 16) | (get_irn_node_nr((edge)->tgt) & 0xFFFF))
49 #define BSEARCH_IRN_ARR(val, arr) \
50 bsearch(&(val), (arr), ARR_LEN((arr)), sizeof((arr)[0]), cmp_irn_idx)
52 #define BLOCK_IDX_MAP(rss, irn) bsearch_for_index(get_irn_idx((irn)), (rss)->idx_map, ARR_LEN((rss)->idx_map), 1)
54 /* Represents a child with associated costs */
55 typedef struct _child {
60 /* We need edges for several purposes. */
61 typedef struct _rss_edge {
67 /* Represents a connected bipartite component. */
69 nodeset *parents; /**< = S a set of value producers */
70 nodeset *children; /**< = T a set of value consumers */
71 pset *kill_edges; /**< = E a set of edges (t in T, s in S) such as each s in S gets killed by at least one t in T */
72 int nr; /**< a deterministic index for set insertion (used as hash) */
75 /* Represents a serialization edge with associated costs. */
76 typedef struct _serialization {
82 /* Represents a disjoint value DAG. */
88 /* Represents a chain of nodes. */
89 typedef struct _chain {
90 plist_t *elements; /**< List of chain elements */
91 int nr; /**< a deterministic index for set insertion (used as hash) */
94 typedef struct _rss_irn {
95 plist_t *consumer_list; /**< List of consumers */
96 ir_node **consumer; /**< Sorted consumer array (needed for faster access) */
98 plist_t *parent_list; /**< List of parents */
99 ir_node **parents; /**< Sorted parent array (needed for faster access) */
101 plist_t *descendant_list; /**< List of descendants */
102 ir_node **descendants; /**< Sorted descendant array (needed for faster access) */
104 plist_t *pkiller_list; /**< List of potential killers */
105 ir_node **pkillers; /**< Sorted pkiller array (needed for faster access) */
107 plist_t *dvg_desc_list; /**< List of all descendants in the DVG */
108 ir_node **dvg_desc; /**< Sorted dvg descendant array (needed for faster access) */
110 plist_t *dvg_pkiller_list; /**< List of potential killers in the DVG */
111 ir_node **dvg_pkiller; /**< Sorted dvg pkiller array (needed for faster access) */
113 plist_t *kill_value_list; /**< List of values getting potentially killed by this node */
114 plist_t *dvg_user_list; /**< List of users in the disjoint value DAG DVG */
116 ir_node *killer; /**< The selected unique killer */
117 ir_node *irn; /**< The corresponding firm node to this rss_irn */
119 chain_t *chain; /**< The chain, this node is associated to */
121 unsigned live_out : 1; /**< irn has consumers outside of it's block */
122 unsigned visited : 1; /**< visited flag for bipartite decomposition */
123 unsigned handled : 1; /**< flag indicating whether or not the list structures have been build */
124 unsigned dumped : 1; /**< flag indication whether or not this node was dumped */
127 typedef struct _rss {
128 phase_t ph; /**< Phase to hold some data */
129 heights_t *h; /**< The current height object */
130 ir_graph *irg; /**< The irg to preprocess */
131 plist_t *nodes; /**< The list of interesting nodes */
132 const arch_env_t *arch_env; /**< The architecture environment */
133 be_abi_irg_t *abi; /**< The abi for this irg */
134 pset *cbc_set; /**< A set of connected bipartite components */
135 ir_node *block; /**< The current block in progress. */
136 int *idx_map; /**< Mapping irn indices to per block indices */
137 unsigned max_height; /**< maximum height in the current block */
138 const arch_register_class_t *cls; /**< The current register class */
139 DEBUG_ONLY(firm_dbg_module_t *dbg);
142 #define get_rss_irn(rss, irn) (phase_get_or_set_irn_data(&rss->ph, irn))
145 * We need some special nodes:
146 * a source and a sink for all live-in and live-out values of a block
155 static ir_node *_source = NULL;
156 static ir_node *_sink = NULL;
158 #define is_Source(irn) ((irn) == _source)
159 #define is_Sink(irn) ((irn) == _sink)
162 * Acquire opcodes and create source and sink nodes.
164 static void init_rss_special_nodes(ir_graph *irg) {
165 ir_node *block = get_irg_start_block(irg);
166 int iro_rss_base = get_next_ir_opcodes(iro_rss_last);
167 ir_op *op_rss_Source = new_ir_op(iro_rss_base + iro_rss_Source, "rss_Source", op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
168 ir_op *op_rss_Sink = new_ir_op(iro_rss_base + iro_rss_Sink, "rss_Sink", op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
169 _source = new_ir_node(NULL, irg, block, op_rss_Source, mode_ANY, 0, NULL);
170 _sink = new_ir_node(NULL, irg, block, op_rss_Sink, mode_ANY, 0, NULL);
173 static int cmp_int(const void *a, const void *b) {
177 return QSORT_CMP(*i1, *i2);
180 static int cmp_child_costs(const void *a, const void *b) {
181 const child_t *c1 = a;
182 const child_t *c2 = b;
184 return QSORT_CMP(c1->cost, c2->cost);
187 static int cmp_irn_idx(const void *a, const void *b) {
188 const ir_node *n1 = *(ir_node **)a;
189 const ir_node *n2 = *(ir_node **)b;
191 return QSORT_CMP(get_irn_idx(n1), get_irn_idx(n2));
194 static int cmp_rss_edges(const void *a, const void *b) {
195 const rss_edge_t *e1 = a;
196 const rss_edge_t *e2 = b;
198 return (e1->src != e2->src) || (e1->tgt != e2->tgt);
201 static int bsearch_for_index(int key, int *arr, size_t len, int force) {
205 while (right >= left) {
206 int idx = (left + right) / 2;
210 else if (key > arr[idx])
217 assert(0 && "Something is wrong, key not found.");
221 static void dump_nodeset(nodeset *ns, const char *prefix) {
223 foreach_nodeset(ns, irn) {
224 ir_printf("%s%+F\n", prefix, irn);
228 static ir_node **build_sorted_array_from_list(plist_t *irn_list, struct obstack *obst) {
231 int len = plist_count(irn_list);
232 ir_node **arr = NEW_ARR_D(ir_node *, obst, len);
234 /* copy the list into the array */
235 foreach_plist(irn_list, el) {
236 arr[i++] = plist_element_get_value(el);
239 /* sort the array by node index */
240 qsort(arr, len, sizeof(arr[0]), cmp_irn_idx);
245 static void build_file_name(rss_t *rss, const char *suffix, size_t suf_len, char *buf, size_t len) {
246 const char *irg_name;
249 irg_name = get_entity_name(get_irg_entity(rss->irg));
250 snprintf(buf, len - suf_len, "%s-%s-block-%d",
251 irg_name, arch_register_class_name(rss->cls), get_irn_node_nr(rss->block));
255 /* Dumps all collected bipartite components of current irg as vcg. */
256 static void debug_vcg_dump_bipartite(rss_t *rss) {
260 static const char suffix[] = "-RSS-CBC.vcg";
262 build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
263 f = fopen(file_name, "w");
265 ir_fprintf(f, "graph: { title: \"connected bipartite component graph of %+F\"\n", rss->irg);
266 fprintf(f, "display_edge_labels: no\n");
267 fprintf(f, "layoutalgorithm: mindepth\n");
268 fprintf(f, "manhattan_edges: yes\n\n");
270 foreach_pset(rss->cbc_set, cbc) {
274 fprintf(f, "graph: { titel: \"cbc %d\" label: \"cbc %d\" status:clustered color:yellow\n", cbc->nr, cbc->nr);
275 foreach_nodeset(cbc->parents, n) {
276 ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
278 foreach_nodeset(cbc->children, n) {
279 ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
281 foreach_pset(cbc->kill_edges, ke) {
282 ir_fprintf(f, "edge: { sourcename: \"n%d_%d\" targetname: \"n%d_%d\" }\n",
283 get_irn_node_nr(ke->src), cbc->nr, get_irn_node_nr(ke->tgt), cbc->nr);
291 /* Dump the computed killing function as vcg. */
292 static void debug_vcg_dump_kill(rss_t *rss) {
295 static const char suffix[] = "-RSS-KILL.vcg";
298 build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
299 f = fopen(file_name, "w");
301 ir_fprintf(f, "graph: { title: \"computed kill graph of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
302 fprintf(f, "display_edge_labels: no\n");
303 fprintf(f, "layoutalgorithm: mindepth\n");
304 fprintf(f, "manhattan_edges: yes\n\n");
306 /* first: reset dumped flag of all nodes */
307 foreach_plist(rss->nodes, el) {
308 ir_node *irn = plist_element_get_value(el);
309 rss_irn_t *rirn = get_rss_irn(rss, irn);
313 /* dump all nodes and their killers */
314 foreach_plist(rss->nodes, el) {
315 ir_node *irn = plist_element_get_value(el);
316 rss_irn_t *rirn = get_rss_irn(rss, irn);
317 rss_irn_t *pk_rirn = get_rss_irn(rss, rirn->killer);
319 if (! rirn->dumped) {
320 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
324 if (! pk_rirn->dumped) {
325 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(rirn->killer), rirn->killer);
329 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
330 get_irn_node_nr(rirn->killer), get_irn_node_nr(irn));
337 /* Dumps the potential killing DAG (PKG) as vcg. */
338 static void debug_vcg_dump_pkg(rss_t *rss) {
341 static const char suffix[] = "-RSS-PKG.vcg";
344 build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
345 f = fopen(file_name, "w");
347 ir_fprintf(f, "graph: { title: \"potential killing DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
348 fprintf(f, "display_edge_labels: no\n");
349 fprintf(f, "layoutalgorithm: mindepth\n");
350 fprintf(f, "manhattan_edges: yes\n\n");
352 foreach_plist(rss->nodes, el) {
353 ir_node *irn = plist_element_get_value(el);
354 rss_irn_t *rirn = get_rss_irn(rss, irn);
355 plist_element_t *k_el;
357 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
360 foreach_plist(rirn->pkiller_list, k_el) {
361 ir_node *pkiller = plist_element_get_value(k_el);
362 rss_irn_t *pk_rirn = get_rss_irn(rss, pkiller);
364 if (! pk_rirn->dumped) {
365 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(pkiller), pkiller);
368 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
369 get_irn_node_nr(pkiller), get_irn_node_nr(irn));
376 /* Dumps the disjoint value DAG (DVG) as vcg. */
377 static void debug_vcg_dump_dvg(rss_t *rss, dvg_t *dvg) {
378 static const char suffix[] = "-RSS-DVG.vcg";
384 build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
385 f = fopen(file_name, "w");
387 ir_fprintf(f, "graph: { title: \"disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
388 fprintf(f, "display_edge_labels: no\n");
389 fprintf(f, "layoutalgorithm: mindepth\n");
390 fprintf(f, "manhattan_edges: yes\n\n");
393 foreach_nodeset(dvg->nodes, irn) {
394 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
398 foreach_pset(dvg->edges, edge) {
399 rss_irn_t *src = get_rss_irn(rss, edge->src);
400 rss_irn_t *tgt = get_rss_irn(rss, edge->tgt);
402 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
403 get_irn_node_nr(edge->src), get_irn_node_nr(edge->tgt));
410 /* Dumps the PKG(DVG). */
411 static void debug_vcg_dump_dvg_pkiller(rss_t *rss, dvg_t *dvg) {
412 static const char suffix[] = "-RSS-DVG-PKG.vcg";
417 build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
418 f = fopen(file_name, "w");
420 ir_fprintf(f, "graph: { title: \"PKG of disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
421 fprintf(f, "display_edge_labels: no\n");
422 fprintf(f, "layoutalgorithm: mindepth\n");
423 fprintf(f, "manhattan_edges: yes\n\n");
426 foreach_nodeset(dvg->nodes, irn) {
427 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
431 foreach_nodeset(dvg->nodes, irn) {
432 rss_irn_t *node = get_rss_irn(rss, irn);
435 foreach_plist(node->dvg_pkiller_list, el) {
436 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
437 get_irn_node_nr(plist_element_get_value(el)), get_irn_node_nr(irn));
446 * In case there is no rss information for irn, initialize it.
448 static void *init_rss_irn(phase_t *ph, ir_node *irn, void *old) {
449 rss_irn_t *res = phase_alloc(ph, sizeof(res[0]));
451 res->descendant_list = plist_obstack_new(phase_obst(ph));
452 res->descendants = NULL;
454 res->consumer_list = plist_obstack_new(phase_obst(ph));
455 res->consumer = NULL;
457 res->pkiller_list = plist_obstack_new(phase_obst(ph));
458 res->pkillers = NULL;
460 res->parent_list = plist_obstack_new(phase_obst(ph));
463 res->dvg_desc_list = plist_obstack_new(phase_obst(ph));
464 res->dvg_desc = NULL;
466 res->kill_value_list = plist_obstack_new(phase_obst(ph));
467 res->dvg_user_list = plist_obstack_new(phase_obst(ph));
468 res->dvg_pkiller_list = plist_obstack_new(phase_obst(ph));
483 * Collect all nodes data dependent on current node.
485 static void collect_descendants(rss_t *rss, rss_irn_t *rirn, ir_node *irn, int *got_sink) {
486 const ir_edge_t *edge;
487 ir_node *block = rss->block;
489 foreach_out_edge(irn, edge) {
490 ir_node *user = get_edge_src_irn(edge);
492 /* skip ignore nodes as they do not really contribute to register presssure */
493 if (arch_irn_is(rss->arch_env, user, ignore))
496 /* check if user lives in block and is not a control flow node */
497 if (get_nodes_block(user) == block && get_irn_mode(user) != mode_X) {
498 /* skip mode_T nodes */
499 if (get_irn_mode(user) != mode_T && ! plist_has_value(rirn->descendant_list, user)) {
500 plist_insert_back(rirn->descendant_list, user);
501 DBG((rss->dbg, DEBUG_NODEINFO, "\t\tdescendant %+F\n", user));
503 collect_descendants(rss, rirn, user, got_sink);
505 else if (! *got_sink) {
506 /* user lives out of block: add sink as descendant if not already done */
507 plist_insert_back(rirn->descendant_list, _sink);
509 DBG((rss->dbg, DEBUG_NODEINFO, "\t\tdescendant %+F\n", _sink));
515 * Handles a single consumer.
517 static int collect_single_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *consumer, int got_sink) {
518 ir_node *block = rss->block;
520 if (get_nodes_block(consumer) == block) {
521 /* the consumer of a mode_T node are it's projs */
522 if (get_irn_mode(consumer) == mode_T) {
523 const ir_edge_t *cons_edge;
525 DBG((rss->dbg, DEBUG_NODEINFO, "\t\tmode_T consumer %+F skipped\n", consumer));
526 foreach_out_edge(consumer, cons_edge) {
527 ir_node *cons_proj = get_edge_src_irn(cons_edge);
529 assert(get_nodes_block(cons_proj) == block && "Proj in wrong block!");
531 /* skip ignore nodes, as they do not really contribute to register pressure */
532 if (arch_irn_is(rss->arch_env, cons_proj, ignore))
535 plist_insert_back(rss_irn->consumer_list, cons_proj);
536 DBG((rss->dbg, DEBUG_NODEINFO, "\t\t\treal consumer %+F\n", cons_proj));
539 else if (! arch_irn_is(rss->arch_env, consumer, ignore)) {
540 plist_insert_back(rss_irn->consumer_list, consumer);
541 DBG((rss->dbg, DEBUG_NODEINFO, "\t\tconsumer %+F\n", consumer));
545 rss_irn->live_out = 1;
546 DBG((rss->dbg, DEBUG_NODEINFO, "\t\tlive out %+F", consumer));
548 plist_insert_back(rss_irn->consumer_list, _sink);
550 DB((rss->dbg, DEBUG_NODEINFO, ", %+F added instead", _sink));
552 DB((rss->dbg, DEBUG_NODEINFO, "\n"));
559 * Collect all nodes consuming the value(s) produced by current node.
561 static void collect_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *irn) {
562 const ir_edge_t *edge;
565 foreach_out_edge(irn, edge) {
566 ir_node *consumer = get_edge_src_irn(edge);
567 got_sink = collect_single_consumer(rss, rss_irn, consumer, got_sink);
573 * We need to build the consumer and descendant list for _source.
575 static void collect_node_info_source(rss_t *rss) {
576 const ir_edge_t *edge;
577 rss_irn_t *rirn = get_rss_irn(rss, _source);
578 ir_node *block = rss->block;
583 foreach_out_edge(block, edge) {
584 ir_node *irn = get_edge_src_irn(edge);
587 for (i = get_irn_arity(n) - 1; i >= 0; --i) {
593 static void reset_node_info(rss_irn_t *rss_irn) {
594 /* Beware: array data resides on phase obstack, so it gets removed automatically */
596 plist_clear(rss_irn->consumer_list);
597 rss_irn->consumer = NULL;
599 plist_clear(rss_irn->parent_list);
600 rss_irn->parents = NULL;
602 plist_clear(rss_irn->descendant_list);
603 rss_irn->descendants = NULL;
605 plist_clear(rss_irn->pkiller_list);
606 rss_irn->pkillers = NULL;
608 plist_clear(rss_irn->kill_value_list);
610 rss_irn->killer = NULL;
611 rss_irn->live_out = 0;
612 rss_irn->visited = 0;
613 rss_irn->handled = 0;
618 * Collects all consumer and descendant of a irn.
620 static void collect_node_info(rss_t *rss, ir_node *irn) {
621 rss_irn_t *rss_irn = get_rss_irn(rss, irn);
624 assert(get_irn_mode(irn) != mode_T && "Cannot handle mode_T nodes.");
626 /* check if node info is already available */
627 if (rss_irn->handled)
630 DBG((rss->dbg, DEBUG_NODEINFO, "\tcomputing consumers of %+F:\n", irn));
632 /* collect all consumer */
634 collect_consumer(rss, rss_irn, irn);
636 /* build sorted consumer array */
637 rss_irn->consumer = build_sorted_array_from_list(rss_irn->consumer_list, phase_obst(&rss->ph));
639 DBG((rss->dbg, DEBUG_NODEINFO, "\tcompute descendants of %+F:\n", irn));
641 /* collect descendants */
643 collect_descendants(rss, rss_irn, irn, &got_sink);
645 /* build sorted descendant array */
646 rss_irn->descendants = build_sorted_array_from_list(rss_irn->descendant_list, phase_obst(&rss->ph));
648 rss_irn->handled = 1;
652 * Checks if v is a potential killer of u.
653 * v is in pkill(u) iff descendants(v) cut consumer(u) is v
655 * @param rss The rss object
656 * @param v The node to check for killer
657 * @param u The potentially killed value
658 * @return 1 if v is in pkill(u), 0 otherwise
660 static int is_potential_killer(rss_t *rss, rss_irn_t *v, rss_irn_t *u) {
665 assert(is_Sink(v->irn) || ((plist_count(v->descendant_list) > 0 && v->descendants) || 1));
666 assert(is_Sink(u->irn) || ((plist_count(u->consumer_list) > 0 && u->consumer) || 1));
668 /* as we loop over the list: loop over the shorter one */
669 if (plist_count(v->descendant_list) > plist_count(u->consumer_list)) {
670 list = u->consumer_list;
671 arr = v->descendants;
674 list = v->descendant_list;
678 /* for each list element: try to find element in array */
679 foreach_plist(list, el) {
680 ir_node *irn = plist_element_get_value(el);
681 ir_node *match = BSEARCH_IRN_ARR(irn, arr);
683 if (match && match != irn)
691 * Compute the potential killing set PK.
693 static void compute_pkill_set(rss_t *rss) {
694 plist_element_t *u_el, *v_el;
696 foreach_plist(rss->nodes, u_el) {
697 ir_node *u_irn = plist_element_get_value(u_el);
698 rss_irn_t *u = get_rss_irn(rss, u_irn);
700 DBG((rss->dbg, DEBUG_PKILL, "\tcomputing potential killers of %+F:\n", u_irn));
702 /* check each consumer if it is a potential killer */
703 foreach_plist(u->consumer_list, v_el) {
704 ir_node *v_irn = plist_element_get_value(v_el);
705 rss_irn_t *v = get_rss_irn(rss, v_irn);
707 /* check, if v is a potential killer of u */
708 if (is_potential_killer(rss, v, u)) {
709 if (! plist_has_value(u->pkiller_list, v_irn))
710 plist_insert_back(u->pkiller_list, v_irn);
711 if (! plist_has_value(v->kill_value_list, u_irn))
712 plist_insert_back(v->kill_value_list, u_irn);
713 DBG((rss->dbg, DEBUG_PKILL, "\t\tpotential killer %+F\n", v_irn));
721 if (firm_dbg_get_mask(rss->dbg) & DEBUG_PKILL)
722 debug_vcg_dump_pkg(rss);
727 * Build set of killing edges (from values to their potential killers)
729 static void build_kill_edges(rss_t *rss, pset *epk) {
730 plist_element_t *el, *k_el;
732 foreach_plist(rss->nodes, el) {
733 ir_node *irn = plist_element_get_value(el);
734 rss_irn_t *rirn = get_rss_irn(rss, irn);
736 foreach_plist(rirn->pkiller_list, k_el) {
737 ir_node *pkiller = plist_element_get_value(k_el);
738 rss_edge_t *ke = obstack_alloc(phase_obst(&rss->ph), sizeof(*ke));
744 pset_insert(epk, ke, HASH_RSS_EDGE(ke));
749 /* print the given cbc for debugging purpose */
750 static void debug_print_cbc(firm_dbg_module_t *mod, cbc_t *cbc) {
754 DBG((mod, DEBUG_BIPARTITE, "\t\tS = set of parents:\n"));
755 foreach_nodeset(cbc->parents, n) {
756 DBG((mod, DEBUG_BIPARTITE, "\t\t\t%+F\n", n));
758 DBG((mod, DEBUG_BIPARTITE, "\t\tT = set of children:\n"));
759 foreach_nodeset(cbc->children, n) {
760 DBG((mod, DEBUG_BIPARTITE, "\t\t\t%+F\n", n));
762 DBG((mod, DEBUG_BIPARTITE, "\t\tE = Edges from producers to consumers\n"));
763 foreach_pset(cbc->kill_edges, ke) {
764 DBG((mod, DEBUG_BIPARTITE, "\t\t\t%+F -> %+F\n", ke->src, ke->tgt));
769 * Construct the bipartite decomposition.
770 * Sid-Ahmed-Ali Touati, Phd Thesis
771 * Register Pressure in Instruction Level Parallelism, p. 71
773 static void compute_bipartite_decomposition(rss_t *rss) {
774 pset *epk = new_pset(cmp_rss_edges, 10);
779 DBG((rss->dbg, DEBUG_BIPARTITE, "\tcomputing bipartite decomposition:\n"));
781 build_kill_edges(rss, epk);
783 foreach_plist(rss->nodes, el) {
784 ir_node *u_irn = plist_element_get_value(el);
785 rss_irn_t *u = get_rss_irn(rss, u_irn);
790 plist_element_t *el2;
792 rss_edge_t *kedge_root = NULL;
793 ir_node *t_irn, *s_irn;
795 if (u->visited || u_irn == _sink)
798 DBG((rss->dbg, DEBUG_BIPARTITE, "\t\t%+F choosen:\n", u_irn));
800 cbc = obstack_alloc(phase_obst(&rss->ph), sizeof(*cbc));
803 /* initialize S_cb */
804 cbc->parents = new_nodeset(5);
805 nodeset_insert(cbc->parents, u_irn);
806 DBG((rss->dbg, DEBUG_BIPARTITE, "\t\t\t%+F added to parents (init)\n", u_irn));
809 cbc->kill_edges = new_pset(cmp_rss_edges, 5);
811 /* each parent gets killed by at least one value from children */
813 /* T_cb = PK_successors(u) */
814 cbc->children = new_nodeset(5);
815 foreach_plist(u->pkiller_list, el2) {
816 nodeset_insert(cbc->children, plist_element_get_value(el2));
817 DBG((rss->dbg, DEBUG_BIPARTITE, "\t\t\t%+F added to children (init)\n", plist_element_get_value(el2)));
821 Now: insert the parents of all children into the parent set
822 and insert the children of all parents into the children set
823 until the sets don't change any more
825 while (p_change || c_change) {
826 p_change = c_change = 0;
828 /* accumulate parents */
829 foreach_nodeset(cbc->children, t_irn) {
830 rss_irn_t *t = get_rss_irn(rss, t_irn);
831 plist_element_t *kvl_el;
833 foreach_plist(t->kill_value_list, kvl_el) {
834 ir_node *val = plist_element_get_value(kvl_el);
836 if (! nodeset_find(cbc->parents, val)) {
837 nodeset_insert(cbc->parents, val);
839 DBG((rss->dbg, DEBUG_BIPARTITE, "\t\t\t%+F added to parents\n", val));
844 /* accumulate children */
845 foreach_nodeset(cbc->parents, s_irn) {
846 rss_irn_t *s = get_rss_irn(rss, s_irn);
847 plist_element_t *pkl_el;
849 foreach_plist(s->pkiller_list, pkl_el) {
850 ir_node *val = plist_element_get_value(pkl_el);
852 if (! nodeset_find(cbc->children, val)) {
853 nodeset_insert(cbc->children, val);
855 DBG((rss->dbg, DEBUG_BIPARTITE, "\t\t\t%+F added to children\n", val));
861 /* mark all parent values as visited */
862 foreach_nodeset(cbc->parents, s_irn) {
863 rss_irn_t *s = get_rss_irn(rss, s_irn);
865 /* assure bipartite property */
866 if (nodeset_find(cbc->children, s_irn)) {
867 nodeset_remove(cbc->children, s_irn);
868 DBG((rss->dbg, DEBUG_BIPARTITE, "\t\t\t%+F removed from to children\n", s_irn));
873 foreach_pset(epk, k_edge) {
874 if (nodeset_find(cbc->parents, k_edge->src) && nodeset_find(cbc->children, k_edge->tgt)) {
875 pset_insert(cbc->kill_edges, k_edge, HASH_RSS_EDGE(k_edge));
877 Link all k_edges which are about to be removed together.
878 Beware: pset_remove kills the iterator.
880 k_edge->next = kedge_root;
885 /* remove all linked k_edges */
886 for (; kedge_root; kedge_root = kedge_root->next) {
887 pset_remove(epk, kedge_root, HASH_RSS_EDGE(kedge_root));
890 /* add the connected bipartite component */
891 pset_insert(rss->cbc_set, cbc, (unsigned)cbc->nr);
892 DBG((rss->dbg, DEBUG_BIPARTITE, "\tbipartite component %d inserted:\n", cbc->nr));
893 DEBUG_ONLY(debug_print_cbc(rss->dbg, cbc));
896 if (firm_dbg_get_mask(rss->dbg) & DEBUG_BIPARTITE)
897 debug_vcg_dump_bipartite(rss);
903 * Select the child with the maximum cost.
905 static child_t *select_child_max_cost(rss_t *rss, nodeset *x, nodeset *y, child_t *t, cbc_t *cbc) {
907 float max_cost = -1.0f;
909 DBG((rss->dbg, DEBUG_SKS, "\t\tcomputing children costs:\n"));
911 foreach_nodeset(cbc->children, child) {
912 rss_irn_t *r_child = get_rss_irn(rss, child);
913 int num_unkilled_parents = 0;
918 /* get the number of unkilled parents */
919 foreach_pset(cbc->kill_edges, k_edge) {
920 if (k_edge->tgt == child && nodeset_find(x, k_edge->src))
921 ++num_unkilled_parents;
924 cost = (float)num_unkilled_parents;
926 num_descendants = plist_count(r_child->descendant_list) + nodeset_count(y);
928 if (num_descendants > 0)
929 cost /= (float)num_descendants;
931 DBG((rss->dbg, DEBUG_SKS, "\t\t\t%+F, #desc %d, cost %.3f\n", child, num_descendants, cost));
933 if (cost > max_cost) {
944 * Remove all parents from x which are killed by t_irn.
946 static void remove_covered_parents(rss_t *rss, nodeset *x, ir_node *t_irn, cbc_t *cbc) {
947 rss_irn_t *t = get_rss_irn(rss, t_irn);
950 DBG((rss->dbg, DEBUG_SKS, "\t\tremoving parents covered by %+F:\n", t_irn));
952 foreach_pset(cbc->kill_edges, k_edge) {
953 if (k_edge->tgt == t_irn && nodeset_find(x, k_edge->src)) {
954 nodeset_remove(x, k_edge->src);
955 plist_insert_back(t->parent_list, k_edge->src);
956 DBG((rss->dbg, DEBUG_SKS, "\t\t\t%+F\n", k_edge->src));
961 static void update_cumulated_descendent_values(rss_t *rss, nodeset *y, ir_node *t_irn) {
962 rss_irn_t *t = get_rss_irn(rss, t_irn);
965 DBG((rss->dbg, DEBUG_SKS, "\t\tupdating cumulated descendant value of %+F:\n", t_irn));
967 foreach_plist(t->descendant_list, el) {
968 nodeset_insert(y, plist_element_get_value(el));
969 DBG((rss->dbg, DEBUG_SKS, "\t\t\t%+F\n", plist_element_get_value(el)));
974 * Greedy-k: a heuristics for the MMA problem
976 static void compute_killing_function(rss_t *rss) {
982 rss->cbc_set = pset_new_ptr(5);
983 compute_bipartite_decomposition(rss);
985 /* for all bipartite components do: */
986 foreach_pset(rss->cbc_set, cbc) {
988 nodeset *x = new_nodeset(10);
989 nodeset *y = new_nodeset(10);
990 child_t **sks = NEW_ARR_F(child_t *, 20);
995 DBG((rss->dbg, DEBUG_SKS, "\tcomputing SKS for cbc %d:\n", cbc->nr));
996 DBG((rss->dbg, DEBUG_SKS, "\t\tinitializing parents X:\n"));
998 /* X = S_cb (all parents are initially uncovered) */
999 foreach_nodeset(cbc->parents, p) {
1000 nodeset_insert(x, p);
1001 DBG((rss->dbg, DEBUG_SKS, "\t\t\t%+F\n", p));
1004 /* while X not empty */
1005 while (nodeset_count(x) > 0) {
1006 child_t *t = obstack_alloc(&obst, sizeof(*t));
1007 memset(t, 0, sizeof(*t));
1009 t = select_child_max_cost(rss, x, y, t, cbc);
1011 if (cur_len >= cur_size) {
1012 ARR_EXTO(child_t *, sks, cur_size * 2);
1016 DBG((rss->dbg, DEBUG_SKS, "\t\tinsert child %+F (%.3f) into SKS at pos %d\n", t->irn, t->cost, cur_len));
1019 remove_covered_parents(rss, x, t->irn, cbc);
1020 update_cumulated_descendent_values(rss, y, t->irn);
1023 ARR_SHRINKLEN(sks, cur_len);
1025 /* sort SKS in increasing cost order */
1026 qsort(sks, cur_len, sizeof(sks[0]), cmp_child_costs);
1028 DBG((rss->dbg, DEBUG_SKS, "\tprocessing SKS for cbc %d:\n", cbc->nr));
1030 /* build killing function */
1031 for (i = cur_len - 1; i >= 0; --i) { /* loop over sks in decreasing cost order */
1032 child_t *t = sks[i];
1033 rss_irn_t *rt = get_rss_irn(rss, t->irn);
1034 plist_element_t *p_el;
1036 DBG((rss->dbg, DEBUG_SKS, "\t\t\tkiller %+F (%.3f):\n", t->irn, t->cost));
1038 /* kill all unkilled parents of t */
1039 foreach_plist(rt->parent_list, p_el) {
1040 ir_node *par = plist_element_get_value(p_el);
1041 rss_irn_t *rpar = get_rss_irn(rss, par);
1043 if (is_Sink(rpar->killer)) {
1044 rpar->killer = t->irn;
1045 DBG((rss->dbg, DEBUG_SKS, "\t\tkill %+F\n", rpar->irn));
1048 DBG((rss->dbg, DEBUG_SKS, "\t\t\tkeeping %+F as killer for %+F\n", rpar->killer, rpar->irn));
1059 if (firm_dbg_get_mask(rss->dbg) & DEBUG_SKS)
1060 debug_vcg_dump_kill(rss);
1063 del_pset(rss->cbc_set);
1064 obstack_free(&obst, NULL);
1068 * Computes the disjoint value DAG (DVG).
1069 * BEWARE: It is not made explicitly clear in the Touati paper,
1070 * but the DVG is meant to be build from the KILLING DAG
1072 static void compute_dvg(rss_t *rss, dvg_t *dvg) {
1073 plist_element_t *el, *el2;
1075 DBG((rss->dbg, DEBUG_DVG, "\tcomputing DVG:\n"));
1077 foreach_plist(rss->nodes, el) {
1078 ir_node *u_irn = plist_element_get_value(el);
1079 rss_irn_t *u = get_rss_irn(rss, u_irn);
1080 ir_node *old_killer = NULL;
1081 ir_node *cur_killer = u->killer;
1083 nodeset_insert(dvg->nodes, u_irn);
1085 /* We add an edge to every killer, from where we could be reached. */
1086 while (cur_killer != old_killer) { /* sink kills itself */
1087 rss_edge_t *dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1088 rss_irn_t *c_killer = get_rss_irn(rss, cur_killer);
1091 nodeset_insert(dvg->nodes, cur_killer);
1093 /* add an edge to our killer */
1094 dvg_edge->src = u_irn;
1095 dvg_edge->tgt = cur_killer;
1096 dvg_edge->next = NULL;
1098 key.src = cur_killer;
1100 assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1102 /* add the edge to the DVG */
1103 DBG((rss->dbg, DEBUG_DVG, "\t\tadd edge %+F -> %+F\n", u_irn, cur_killer));
1104 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1106 /* descent to the next killer */
1107 old_killer = cur_killer;
1108 cur_killer = c_killer->killer;
1113 foreach_plist(rss->nodes, el2) {
1114 ir_node *v_irn = plist_element_get_value(el2);
1117 There is an edge (u, v) in the DVG iff v is a descendant of the killer(u).
1119 if (BSEARCH_IRN_ARR(v_irn, u_kill->descendants)) {
1120 rss_edge_t *dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1123 /* insert the user into the DVG and append it to the user list of u */
1124 nodeset_insert(dvg->nodes, v_irn);
1125 if (! plist_has_value(u->dvg_user_list, v_irn))
1126 plist_insert_back(u->dvg_user_list, v_irn);
1128 dvg_edge->src = u_irn;
1129 dvg_edge->tgt = v_irn;
1130 dvg_edge->next = NULL;
1135 assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1137 /* add the edge to the DVG */
1138 DBG((rss->dbg, DEBUG_DVG, "\t\tadd edge %+F -> %+F\n", u_irn, v_irn));
1139 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1146 if (firm_dbg_get_mask(rss->dbg) & DEBUG_DVG)
1147 debug_vcg_dump_dvg(rss, dvg);
1152 * Accumulate all descendants for root into list.
1154 static void accumulate_dvg_descendant_values(rss_t *rss, rss_irn_t *root, plist_t *list) {
1155 if (plist_count(root->dvg_user_list) > 0) {
1156 plist_element_t *el;
1158 foreach_plist(root->dvg_user_list, el) {
1159 ir_node *v_irn = plist_element_get_value(el);
1160 rss_irn_t *v = get_rss_irn(rss, v_irn);
1162 /* add v as descendant */
1163 if (! plist_has_value(list, v_irn)) {
1164 plist_insert_back(list, v_irn);
1165 DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG descendant %+F\n", v_irn));
1168 /* accumulate v's descendants */
1169 accumulate_dvg_descendant_values(rss, v, list);
1175 * Builds the list of potential killers for each node
1177 * Needs the descendant list for all user as sorted array.
1179 static void build_dvg_pkiller_list(rss_t *rss, dvg_t *dvg) {
1182 foreach_nodeset(dvg->nodes, irn) {
1183 rss_irn_t *node = get_rss_irn(rss, irn);
1184 plist_element_t *el, *el2;
1186 DBG((rss->dbg, DEBUG_DVG, "\t\tbuilding pkiller list for %+F\n", irn));
1188 /* check each user */
1189 foreach_plist(node->dvg_user_list, el) {
1190 ir_node *u_irn = plist_element_get_value(el);
1192 /* is the current user u_irn not a descendant of any other user -> pkiller */
1193 foreach_plist(node->dvg_user_list, el2) {
1194 ir_node *v_irn = plist_element_get_value(el2);
1195 rss_irn_t *v = get_rss_irn(rss, v_irn);
1198 ! BSEARCH_IRN_ARR(u_irn, v->dvg_desc) &&
1199 ! plist_has_value(node->dvg_pkiller_list, u_irn))
1201 plist_insert_back(node->dvg_pkiller_list, u_irn);
1202 DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG pkiller %+F\n", u_irn));
1207 node->dvg_pkiller = build_sorted_array_from_list(node->dvg_pkiller_list, phase_obst(&rss->ph));
1211 if (firm_dbg_get_mask(rss->dbg) & DEBUG_DVG)
1212 debug_vcg_dump_dvg_pkiller(rss, dvg);
1217 * Compute the maximal antichain of the current DVG.
1218 * This is a reimplementation of the MAXIMAL_ANTI_CHAIN function
1219 * from the DDG library 1.1 (DAG.cpp).
1221 static nodeset *compute_maximal_antichain(rss_t *rss, dvg_t *dvg) {
1222 int n = nodeset_count(dvg->nodes);
1223 int *assignment = alloca(n * sizeof(assignment[0]));
1224 int *assignment_rev = alloca(n * sizeof(assignment_rev[0]));
1225 int *idx_map = alloca(n * sizeof(idx_map[0]));
1226 hungarian_problem_t *bp;
1227 nodeset *values, *temp;
1229 int i, j, cost, cur_chain;
1230 rss_edge_t *dvg_edge;
1232 #define MAP_IDX(irn) bsearch_for_index(get_irn_idx(irn), idx_map, n, 1)
1234 if (pset_count(dvg->edges) == 0)
1237 bp = hungarian_new(n, n, 1, HUNGARIAN_MATCH_NORMAL);
1240 At first, we build an index map for the nodes in the DVG,
1241 because we cannot use the irn idx therefore as the resulting
1242 bipartite data structure would have around 1.2GB.
1243 So we limit the size to the number of nodes we have in the DVG
1244 and build a sorted index map for their irn indices.
1247 foreach_nodeset(dvg->nodes, u_irn) {
1248 idx_map[i++] = get_irn_idx(u_irn);
1250 qsort(idx_map, n, sizeof(idx_map[0]), cmp_int);
1252 foreach_pset(dvg->edges, dvg_edge) {
1253 int idx_u = MAP_IDX(dvg_edge->src);
1254 int idx_v = MAP_IDX(dvg_edge->tgt);
1256 /* add the entry to the bipartite data structure */
1257 hungarian_add(bp, idx_u, idx_v, 1);
1258 DBG((rss->dbg, DEBUG_MAX_AC, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1259 idx_u, dvg_edge->src, idx_v, dvg_edge->tgt));
1263 Add a bipartite entry for each pair of nodes (u, v), where exists a
1264 path in the DVG from u to v, ie. connect all descendants(v) to v.
1265 desc_dvg(v) = accumulated descendants of all z in dvg_user_list(v)
1267 foreach_nodeset(dvg->nodes, u_irn) {
1268 rss_irn_t *u = get_rss_irn(rss, u_irn);
1269 int idx_u_irn = MAP_IDX(u_irn);
1271 DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG descendants of %+F:\n", u_irn));
1273 //plist_clear(u->dvg_desc_list);
1274 //accumulate_dvg_descendant_values(rss, u, u->dvg_desc_list);
1277 FIXME: The array is build on the phase obstack and we cannot free the data.
1278 So the array get as many times allocated as this function is called.
1281 /* build the sorted array for faster searches */
1282 //u->dvg_desc = build_sorted_array_from_list(u->dvg_desc_list, phase_obst(&rss->ph));
1284 DBG((rss->dbg, DEBUG_MAX_AC, "\t\tadding bipartite entries of %+F:\n", u_irn));
1286 /* add the bipartite entries for all descendants */
1287 for (i = ARR_LEN(u->dvg_desc) - 1; i >= 0; --i) {
1288 rss_irn_t *desc = get_rss_irn(rss, u->dvg_desc[i]);
1289 int idx_desc = MAP_IDX(u->dvg_desc[i]);
1291 /* add the entry to the bipartite data structure */
1292 hungarian_add(bp, idx_u_irn, idx_desc, 1);
1293 DBG((rss->dbg, DEBUG_MAX_AC, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1294 idx_u_irn, u_irn, idx_desc, u->dvg_desc[i]));
1301 /* We want maximum cardinality matching */
1302 hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL);
1304 DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG pkiller:\n"));
1305 /* beware: the following function needs the dvg_desc array */
1306 build_dvg_pkiller_list(rss, dvg);
1308 DBG((rss->dbg, DEBUG_MAX_AC, "\t\tcomputing bipartite matching\n"));
1310 The maximum cardinality bipartite matching gives us the minimal
1311 chain partition, which corresponds to the maximum anti chains.
1313 cost = hungarian_solve(bp, assignment);
1314 assert(cost >= 0 && "Bipartite matching failed!");
1317 memset(assignment_rev, -1, n * sizeof(assignment_rev[0]));
1319 /* build the reverse assignment, ie. foreach i -> j, add j -> i */
1320 for (i = 0; i < n; ++i) {
1321 if (assignment[i] >= 0) {
1322 int j = assignment[i];
1323 assignment_rev[j] = i;
1327 DBG((rss->dbg, DEBUG_MAX_AC, "\t\t\tgot assignment with cost %d\n", cost));
1328 DBG((rss->dbg, DEBUG_MAX_AC, "\t\t\tassignment --- reverse assignment\n", cost));
1329 for (i = 0; i < n; ++i) {
1330 DBG((rss->dbg, DEBUG_MAX_AC, "\t\t\t%3d -> %3d %3d -> %3d\n", i, assignment[i], i, assignment_rev[i]));
1334 values = new_nodeset(10);
1336 /* Construction of the minimal chain partition */
1337 for (j = 0; j < n; ++j) {
1338 /* check nodes, which did not occur as target */
1339 if (assignment_rev[j] == -1) {
1340 int xj = idx_map[j];
1341 ir_node *xj_irn = get_idx_irn(rss->irg, xj);
1342 rss_irn_t *xj_rss = get_rss_irn(rss, xj_irn);
1343 chain_t *c = obstack_alloc(phase_obst(&rss->ph), sizeof(*c));
1346 /* there was no source for j -> we have a source of a new chain */
1347 nodeset_insert(values, xj_irn);
1349 c->elements = plist_obstack_new(phase_obst(&rss->ph));
1350 c->nr = cur_chain++;
1351 plist_insert_back(c->elements, xj_irn);
1355 DBG((rss->dbg, DEBUG_MAX_AC, "\t\tstarting chain %d:\n", c->nr));
1356 DBG((rss->dbg, DEBUG_MAX_AC, "\t\t\t%+F (%d)", xj_irn, j));
1358 /* follow chain, having j as source */
1360 while (assignment[source] >= 0) {
1361 int target = assignment[source];
1362 int irn_idx = idx_map[target];
1363 ir_node *irn = get_idx_irn(rss->irg, irn_idx);
1364 rss_irn_t *node = get_rss_irn(rss, irn);
1366 plist_insert_back(c->elements, irn);
1369 DB((rss->dbg, DEBUG_MAX_AC, " -> %+F (%d)", irn, target));
1371 /* new source = last target */
1375 DB((rss->dbg, DEBUG_MAX_AC, "\n"));
1380 Computing the maximal antichain: Select an element from each
1381 chain such, such it is parallel with the others.
1383 DBG((rss->dbg, DEBUG_MAX_AC, "\t\tcomputing set of saturation values (MAX AC)\n"));
1384 DBG((rss->dbg, DEBUG_MAX_AC, "\t\tstarting with:\n"));
1385 dump_nodeset(values, "\t\t\t");
1389 We need an explicit array for the values as
1390 we cannot iterate multiple times over the same
1391 set at the same time. :-(((((
1393 int n = nodeset_count(values);
1395 ir_node **val_arr = NEW_ARR_F(ir_node *, n);
1397 foreach_nodeset(values, u_irn)
1398 val_arr[i++] = u_irn;
1403 temp = new_nodeset(10);
1405 /* Select all nodes from current value set, having another node in the set as descendant. */
1406 for (i = 0; i < n; ++i) {
1407 rss_irn_t *u = get_rss_irn(rss, val_arr[i]);
1410 for (j = 0; j < n; ++j) {
1415 // BSEARCH_IRN_ARR(val_arr[j], u->dvg_desc))
1416 /* v[j] is descendant of u -> remove u and break */
1417 nodeset_insert(temp, u->irn);
1418 nodeset_remove(values, u->irn);
1420 DBG((rss->dbg, DEBUG_MAX_AC, "\t\t\tremoving %+F from values, adding it to temp\n", u->irn));
1427 /* Try to insert the chain predecessor of all selected u's */
1428 foreach_nodeset(temp, u_irn) {
1429 rss_irn_t *u = get_rss_irn(rss, u_irn);
1430 chain_t *c = u->chain;
1431 plist_element_t *el = plist_find_value(c->elements, u_irn);
1433 assert(el && "Missing element in chain!");
1435 /* If u has predecessor in chain: insert the predecessor */
1436 if (el = plist_element_get_prev(el)) {
1437 nodeset_insert(values, plist_element_get_value(el));
1438 DBG((rss->dbg, DEBUG_MAX_AC, "\t\t\tadding %+F to values\n", plist_element_get_value(el)));
1444 } while (nodeset_count(temp) > 0);
1446 DBG((rss->dbg, DEBUG_MAX_AC, "\t\tfinal set:\n"));
1447 dump_nodeset(values, "\t\t\t");
1456 static serialization_t *compute_best_admissible_serialization(rss_t *rss, nodeset *sat_vals, serialization_t *ser, int num_regs) {
1457 int n = nodeset_count(sat_vals);
1458 int n_idx = ARR_LEN(rss->idx_map);
1460 ir_node **val_arr = alloca(n * sizeof(val_arr[0]));
1461 bitset_t *bs_sv = bitset_alloca(n_idx);
1462 bitset_t *bs_vdesc = bitset_alloca(n_idx);
1463 bitset_t *bs_tmp = bitset_alloca(n_idx);
1464 bitset_t *bs_ukilldesc = bitset_alloca(n_idx);
1465 unsigned best_benefit = UINT_MAX;
1466 unsigned best_omega2 = UINT_MAX;
1467 unsigned best_benefit_omega20 = UINT_MAX;
1468 int has_positive_omega1 = 0;
1471 rss_edge_t min_benefit_edge;
1472 rss_edge_t min_omega20_edge;
1475 We need an explicit array for the values as
1476 we cannot iterate multiple times over the same
1477 set at the same time. :-(((((
1480 foreach_nodeset(sat_vals, irn) {
1482 bitset_set(bs_sv, BLOCK_IDX_MAP(rss, irn));
1486 We build all admissible serializations and remember the best found so far.
1489 if v in pkiller(u): add edge to v from all other pkiller(u)
1490 else: for all uu in pkiller(u): add edge to v if there exists no path from v to uu
1494 /* for all u in sat_vals */
1495 for (i = 0; i < n; ++i) {
1496 rss_irn_t *u = get_rss_irn(rss, val_arr[i]);
1497 int u_height = get_irn_height(rss->h, val_arr[i]);
1498 plist_element_t *el;
1500 /* accumulate all descendants of all pkiller(u) */
1501 bitset_clear_all(bs_ukilldesc);
1502 foreach_plist(u->dvg_pkiller_list, el) {
1503 ir_node *irn = plist_element_get_value(el);
1504 rss_irn_t *node = get_rss_irn(rss, irn);
1507 bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, irn));
1511 for (k = ARR_LEN(node->dvg_desc) - 1; k >= 0; --k) {
1512 if (! is_Sink(node->dvg_desc[k]))
1513 bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, node->dvg_desc[k]));
1517 /* for all v in sat_vals */
1518 for (j = 0; j < n; ++j) {
1519 ir_node *v_irn = val_arr[j];
1520 rss_irn_t *v = get_rss_irn(rss, v_irn);
1521 unsigned v_height = get_irn_height(rss->h, v_irn);
1522 unsigned omega1, omega2, is_pkiller;
1527 /* get descendants of v */
1528 bitset_clear_all(bs_vdesc);
1529 for (k = ARR_LEN(v->dvg_desc) - 1; k >= 0; --k) {
1530 if (! is_Sink(v->dvg_desc[k]))
1531 bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v->dvg_desc[k]));
1534 /* if v is in pkiller(u) */
1535 is_pkiller = BSEARCH_IRN_ARR(val_arr[j], u->dvg_pkiller) != NULL ? 1 : 0;
1537 /* for all vv in pkiller(u) */
1538 for (k = ARR_LEN(u->dvg_pkiller) - 1; k >= 0; --k) {
1539 ir_node *vv_irn = u->dvg_pkiller[k];
1542 if (is_Sink(vv_irn))
1545 add_edge = is_pkiller ? k != j : ! heights_reachable_in_block(rss->h, v_irn, vv_irn);
1548 As we add an edge from vv -> v, we have to make sure,
1549 that there exists no path from v to vv.
1553 unsigned vv_height = get_irn_height(rss->h, vv_irn);
1554 unsigned mu1, mu2, critical_path_cost;
1557 mu1 = | descendants(v) cut sat_vals |
1558 the number of saturating values which cannot
1559 be simultaneously alive with u
1561 bitset_copy(bs_tmp, bs_vdesc);
1562 mu1 = bitset_popcnt(bitset_and(bs_tmp, bs_sv));
1565 mu2 = | accum_desc_all_pkiller(u) without descendants(v) |
1568 bitset_copy(bs_tmp, bs_ukilldesc);
1569 mu2 = bitset_popcnt(bitset_andnot(bs_tmp, bs_vdesc));
1577 /* omega1 = mu1 - mu2 */
1581 has_positive_omega1 = 1;
1583 /* omega2 = increase of critical path */
1584 critical_path_cost =
1585 v_height /* longest path from v to sink */
1586 + rss->max_height - vv_height /* longest path from source to vv */
1590 If critical_path_cost > max_height -> the new edge
1591 would increase the longest critical path by the difference.
1593 omega2 = critical_path_cost > rss->max_height ? critical_path_cost - rss->max_height : 0;
1595 /* this keeps track of the edge with the best benefit */
1596 if (num_regs - omega1 < best_benefit) {
1597 min_benefit_edge.src = vv_irn;
1598 min_benefit_edge.tgt = v_irn;
1600 best_benefit = num_regs - omega1;
1603 /* this keeps track of the edge with the best omega1 costs where omega2 == 0 */
1604 if (omega2 == 0 && (num_regs - omega1 < best_benefit_omega20)) {
1605 min_omega20_edge.src = vv_irn;
1606 min_omega20_edge.tgt = v_irn;
1608 best_benefit_omega20 = num_regs - omega1;
1611 best_omega2 = MIN(best_omega2, omega2);
1613 } /* for all vv in pkiller(u) */
1614 } /* for all v in sat_vals */
1615 } /* for all u in sat_vals */
1617 if (! has_positive_omega1)
1620 if (best_omega2 == 0) {
1621 ser->edge->src = min_omega20_edge.src;
1622 ser->edge->tgt = min_omega20_edge.tgt;
1623 ser->omega1 = best_benefit_omega20;
1624 ser->omega2 = best_omega2;
1627 ser->edge->src = min_benefit_edge.src;
1628 ser->edge->tgt = min_benefit_edge.tgt;
1629 ser->omega1 = best_benefit;
1630 ser->omega2 = best_omega2;
1637 * Perform the value serialization heuristic and add all
1638 * computed serialization edges as dependencies to the irg.
1640 static void perform_value_serialization_heuristic(rss_t *rss) {
1641 bitset_t *arch_nonign_bs = bitset_alloca(arch_register_class_n_regs(rss->cls));
1642 bitset_t *abi_ign_bs = bitset_alloca(arch_register_class_n_regs(rss->cls));
1647 /* available_regs = R = |arch_non_ignore_regs cut ~abi_ignore_regs| */
1648 arch_put_non_ignore_regs(rss->arch_env, rss->cls, arch_nonign_bs);
1649 be_abi_put_ignore_regs(rss->abi, rss->cls, abi_ign_bs);
1650 bitset_andnot(arch_nonign_bs, abi_ign_bs);
1651 available_regs = bitset_popcnt(arch_nonign_bs);
1653 DBG((rss->dbg, DEBUG_SER_HEUR, "\n\t#available regs: %d\n\n", available_regs));
1656 At first we need to compute the disjoint value DAG (DVG = {V, E_dv}).
1657 V = set of all nodes we are currently interested in
1658 E_dv = there is an edge from u to v iff v is a descendant of killer(u), forall u, v in V
1660 dvg.nodes = new_nodeset(plist_count(rss->nodes));
1661 dvg.edges = new_pset(cmp_rss_edges, 20);
1662 compute_dvg(rss, &dvg);
1665 Then we perform the heuristic serialization algorithm
1666 on the DVG which gives us all necessary serialization
1669 DBG((rss->dbg, DEBUG_MAX_AC, "\tcomputing maximal antichain:\n"));
1670 sat_vals = compute_maximal_antichain(rss, &dvg);
1671 while (sat_vals && (nodeset_count(sat_vals) > available_regs)) {
1672 serialization_t *ser, best_ser;
1676 best_ser.edge = &edge;
1677 ser = compute_best_admissible_serialization(rss, sat_vals, &best_ser, available_regs);
1678 tgt = get_rss_irn(rss, ser->edge->tgt);
1680 DBG((rss->dbg, DEBUG_SER_HEUR, "\tcurrent register saturation %d, target %d\n", nodeset_count(sat_vals), available_regs));
1682 /* BEWARE: Update dvg_user_list when inserting a serialization edge !!! */
1683 plist_insert_back(tgt->dvg_user_list, ser->edge->src);
1684 pset_insert(dvg.edges, ser->edge, HASH_RSS_EDGE(ser->edge));
1685 del_nodeset(sat_vals);
1687 /* TODO: Might be better to update the dvg descendants here as well, instead of recalculating them */
1689 /* Insert the serialization as dependency edge into the irg. */
1690 DBG((rss->dbg, DEBUG_SER_HEUR, "\tinserting serialization %+F -> %+F with cost %d, %d\n",
1691 ser->edge->src, ser->edge->tgt, ser->omega1, ser->omega2));
1692 add_irn_dep(ser->edge->src, ser->edge->tgt);
1694 /* TODO: try to find a cheaper way for updating height information */
1695 rss->max_height = heights_recompute_block(rss->h, rss->block);
1697 /* Recompute the antichain for next serialization */
1698 DBG((rss->dbg, DEBUG_MAX_AC, "\tre-computing maximal antichain:\n"));
1699 sat_vals = compute_maximal_antichain(rss, &dvg);
1702 del_nodeset(dvg.nodes);
1703 del_pset(dvg.edges);
1707 * Do initial calculations for a block.
1709 static void process_block(ir_node *block, void *env) {
1712 const ir_edge_t *edge;
1714 phase_init(&rss->ph, "rss block preprocessor", rss->irg, PHASE_DEFAULT_GROWTH, init_rss_irn);
1716 DBG((rss->dbg, LEVEL_1, "preprocessing block %+F\n", block));
1719 /* build an index map for all nodes in the current block */
1721 n = get_irn_n_edges(block);
1722 NEW_ARR_A(int *, rss->idx_map, n);
1723 foreach_out_edge(block, edge) {
1724 ir_node *irn = get_edge_src_irn(edge);
1725 rss->idx_map[i++] = get_irn_idx(irn);
1727 qsort(rss->idx_map, n, sizeof(rss->idx_map[0]), cmp_int);
1728 rss->max_height = heights_recompute_block(rss->h, block);
1730 /* loop over all register classes */
1731 for (i = arch_isa_get_n_reg_class(rss->arch_env->isa) - 1; i >= 0; --i) {
1732 const arch_register_class_t *cls = arch_isa_get_reg_class(rss->arch_env->isa, i);
1735 DBG((rss->dbg, LEVEL_1, "register class %s\n", arch_register_class_name(cls)));
1737 /* reset the list of interesting nodes */
1738 plist_clear(rss->nodes);
1739 plist_insert_back(rss->nodes, _sink);
1741 /* collect all nodes having a certain register class */
1742 foreach_out_edge(block, edge) {
1743 ir_node *irn = get_edge_src_irn(edge);
1745 if (get_irn_mode(irn) == mode_T)
1748 if (! arch_irn_is(rss->arch_env, irn, ignore) && arch_get_irn_reg_class(rss->arch_env, irn, -1) == cls) {
1749 plist_insert_back(rss->nodes, irn);
1751 /* calculate the descendants and consumer for each node in the block */
1752 collect_node_info(rss, irn);
1756 /* compute the potential killing set PK(G) */
1757 compute_pkill_set(rss);
1759 /* compute the killing function k* */
1760 compute_killing_function(rss);
1763 Compute the heuristic value serialization and
1764 add the necessary dependencies to the irg.
1766 perform_value_serialization_heuristic(rss);
1769 phase_free(&rss->ph);
1773 * Preprocess the irg for scheduling.
1775 void rss_schedule_preparation(const be_irg_t *birg) {
1778 FIRM_DBG_REGISTER(rss.dbg, "firm.be.sched.rss");
1780 firm_dbg_set_mask(rss.dbg, 255);
1782 init_rss_special_nodes(birg->irg);
1784 rss.irg = birg->irg;
1785 rss.arch_env = birg->main_env->arch_env;
1786 rss.abi = birg->abi;
1787 rss.h = heights_new(birg->irg);
1788 rss.nodes = plist_new();
1789 irg_block_walk_graph(birg->irg, NULL, process_block, &rss);
1790 heights_free(rss.h);
1791 plist_free(rss.nodes);