2 * Implementation of a register saturating list scheduler
3 * as described in: Sid-Ahmed-Ali Touati
4 * Register Saturation in Superscalar and VLIW Codes
6 * @license This file protected by GPL - GNU GENERAL PUBLIC LICENSE.
7 * @author Christian Wuerdig
20 #include "irgraph_t.h"
22 #include "iredges_t.h"
24 #include "irphase_t.h"
29 #include "bipartite.h"
30 #include "hungarian.h"
37 #include "besched_t.h"
39 #define ARR_LEN_SAFE(arr) ((arr) != NULL ? ARR_LEN((arr)) : 0)
41 #define HASH_RSS_EDGE(edge) ((get_irn_node_nr((edge)->src) << 16) | (get_irn_node_nr((edge)->tgt) & 0xFFFF))
42 #define BSEARCH_IRN_ARR(val, arr) \
43 bsearch(&(val), (arr), ARR_LEN_SAFE((arr)), sizeof((arr)[0]), cmp_irn_idx)
45 #define BLOCK_IDX_MAP(rss, irn) bsearch_for_index(get_irn_idx((irn)), (rss)->idx_map, ARR_LEN_SAFE((rss)->idx_map), 1)
47 /* Represents a child with associated costs */
48 typedef struct _child {
53 /* We need edges for several purposes. */
54 typedef struct _rss_edge {
60 /* Represents a connected bipartite component. */
62 nodeset *parents; /**< = S a set of value producers */
63 nodeset *children; /**< = T a set of value consumers */
64 pset *kill_edges; /**< = E a set of edges (t in T, s in S) such as each s in S gets killed by at least one t in T */
65 int nr; /**< a deterministic index for set insertion (used as hash) */
68 /* Represents a disjoint value DAG. */
74 /* Represents a chain of nodes. */
75 typedef struct _chain {
76 plist_t *elements; /**< List of chain elements */
77 int nr; /**< a deterministic index for set insertion (used as hash) */
80 typedef struct _rss_irn {
81 plist_t *consumer_list; /**< List of consumers */
82 ir_node **consumer; /**< Sorted consumer array (needed for faster access) */
84 plist_t *parent_list; /**< List of parents */
85 ir_node **parents; /**< Sorted parent array (needed for faster access) */
87 plist_t *descendant_list; /**< List of descendants */
88 ir_node **descendants; /**< Sorted descendant array (needed for faster access) */
90 plist_t *pkiller_list; /**< List of potential killers */
91 ir_node **pkillers; /**< Sorted pkiller array (needed for faster access) */
94 plist_t *dvg_desc_list; /**< List of all descendants in the DVG */
95 ir_node **dvg_desc; /**< Sorted dvg descendant array (needed for faster access) */
97 plist_t *dvg_pkiller_list; /**< List of potential killers in the DVG */
98 ir_node **dvg_pkiller; /**< Sorted dvg pkiller array (needed for faster access) */
100 plist_t *dvg_user_list; /**< List of users in the disjoint value DAG DVG */
102 plist_t *kill_value_list; /**< List of values getting potentially killed by this node */
104 ir_node *killer; /**< The selected unique killer */
105 ir_node *irn; /**< The corresponding firm node to this rss_irn */
107 chain_t *chain; /**< The chain, this node is associated to */
109 unsigned live_out : 1; /**< irn has consumers outside of it's block */
110 unsigned visited : 1; /**< visited flag for bipartite decomposition */
111 unsigned handled : 1; /**< flag indicating whether or not the list structures have been build */
112 unsigned dumped : 1; /**< flag indication whether or not this node was dumped */
115 /* Represents a serialization edge with associated costs. */
116 typedef struct _serialization {
117 rss_irn_t *u; /* the top node */
118 rss_irn_t *v; /* the node about to be serialized after u */
119 rss_edge_t *edge; /* the edge selected for the serialization */
120 int omega1; /* estimated: available regs - RS reduction */
121 int omega2; /* increase in critical path length */
124 typedef struct _rss {
125 phase_t ph; /**< Phase to hold some data */
126 heights_t *h; /**< The current height object */
127 ir_graph *irg; /**< The irg to preprocess */
128 plist_t *nodes; /**< The list of interesting nodes */
129 const arch_env_t *arch_env; /**< The architecture environment */
130 be_abi_irg_t *abi; /**< The abi for this irg */
131 pset *cbc_set; /**< A set of connected bipartite components */
132 ir_node *block; /**< The current block in progress. */
133 int *idx_map; /**< Mapping irn indices to per block indices */
134 unsigned max_height; /**< maximum height in the current block */
135 const arch_register_class_t *cls; /**< The current register class */
136 DEBUG_ONLY(firm_dbg_module_t *dbg);
139 #define get_rss_irn(rss, irn) (phase_get_or_set_irn_data(&rss->ph, irn))
142 * We need some special nodes:
143 * a source and a sink for all live-in and live-out values of a block
152 static ir_node *_source = NULL;
153 static ir_node *_sink = NULL;
155 #define is_Source(irn) ((irn) == _source)
156 #define is_Sink(irn) ((irn) == _sink)
158 /******************************************************************************
160 * | | | | / _| | | (_)
161 * | |__ ___| |_ __ ___ _ __ | |_ _ _ _ __ ___| |_ _ ___ _ __ ___
162 * | '_ \ / _ \ | '_ \ / _ \ '__| | _| | | | '_ \ / __| __| |/ _ \| '_ \/ __|
163 * | | | | __/ | |_) | __/ | | | | |_| | | | | (__| |_| | (_) | | | \__ \
164 * |_| |_|\___|_| .__/ \___|_| |_| \__,_|_| |_|\___|\__|_|\___/|_| |_|___/
167 ******************************************************************************/
170 * Acquire opcodes and create source and sink nodes.
172 static void init_rss_special_nodes(ir_graph *irg) {
173 ir_node *block = get_irg_start_block(irg);
174 int iro_rss_base = get_next_ir_opcodes(iro_rss_last);
175 ir_op *op_rss_Source = new_ir_op(iro_rss_base + iro_rss_Source, "rss_Source", op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
176 ir_op *op_rss_Sink = new_ir_op(iro_rss_base + iro_rss_Sink, "rss_Sink", op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
177 _source = new_ir_node(NULL, irg, block, op_rss_Source, mode_ANY, 0, NULL);
178 _sink = new_ir_node(NULL, irg, block, op_rss_Sink, mode_ANY, 0, NULL);
181 static int cmp_int(const void *a, const void *b) {
185 return QSORT_CMP(*i1, *i2);
188 static int cmp_child_costs(const void *a, const void *b) {
189 const child_t *c1 = a;
190 const child_t *c2 = b;
192 return QSORT_CMP(c1->cost, c2->cost);
195 static int cmp_irn_idx(const void *a, const void *b) {
196 const ir_node *n1 = *(ir_node **)a;
197 const ir_node *n2 = *(ir_node **)b;
199 return QSORT_CMP(get_irn_idx(n1), get_irn_idx(n2));
202 static int cmp_rss_edges(const void *a, const void *b) {
203 const rss_edge_t *e1 = a;
204 const rss_edge_t *e2 = b;
206 return (e1->src != e2->src) || (e1->tgt != e2->tgt);
209 static int bsearch_for_index(int key, int *arr, size_t len, int force) {
213 while (right >= left) {
214 int idx = (left + right) / 2;
218 else if (key > arr[idx])
225 assert(0 && "Something is wrong, key not found.");
229 static ir_node **build_sorted_array_from_list(plist_t *irn_list, struct obstack *obst) {
232 int len = plist_count(irn_list);
233 ir_node **arr = NEW_ARR_D(ir_node *, obst, len);
235 /* copy the list into the array */
236 foreach_plist(irn_list, el) {
237 arr[i++] = plist_element_get_value(el);
240 /* sort the array by node index */
241 qsort(arr, len, sizeof(arr[0]), cmp_irn_idx);
246 /*****************************************************
249 * __| | ___| |__ _ _ __ _ __ _ _ _ __ __ _
250 * / _` |/ _ \ '_ \| | | |/ _` |/ _` | | '_ \ / _` |
251 * | (_| | __/ |_) | |_| | (_| | (_| | | | | | (_| |
252 * \__,_|\___|_.__/ \__,_|\__, |\__, |_|_| |_|\__, |
256 *****************************************************/
258 static void dump_nodeset(nodeset *ns, const char *prefix) {
260 foreach_nodeset(ns, irn) {
261 ir_printf("%s%+F\n", prefix, irn);
265 static void build_file_name(rss_t *rss, const char *suffix, size_t suf_len, char *buf, size_t len) {
266 const char *irg_name;
269 irg_name = get_entity_name(get_irg_entity(rss->irg));
270 snprintf(buf, len - suf_len, "%s-%s-block-%d",
271 irg_name, arch_register_class_name(rss->cls), get_irn_node_nr(rss->block));
275 /* Dumps all collected bipartite components of current irg as vcg. */
276 static void debug_vcg_dump_bipartite(rss_t *rss) {
280 static const char suffix[] = "-RSS-CBC.vcg";
282 build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
283 f = fopen(file_name, "w");
285 ir_fprintf(f, "graph: { title: \"connected bipartite component graph of %+F\"\n", rss->irg);
286 fprintf(f, "display_edge_labels: no\n");
287 fprintf(f, "layoutalgorithm: mindepth\n");
288 fprintf(f, "manhattan_edges: yes\n\n");
290 foreach_pset(rss->cbc_set, cbc) {
294 fprintf(f, "graph: { titel: \"cbc %d\" label: \"cbc %d\" status:clustered color:yellow\n", cbc->nr, cbc->nr);
295 foreach_nodeset(cbc->parents, n) {
296 ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
298 foreach_nodeset(cbc->children, n) {
299 ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
301 foreach_pset(cbc->kill_edges, ke) {
302 ir_fprintf(f, "edge: { sourcename: \"n%d_%d\" targetname: \"n%d_%d\" }\n",
303 get_irn_node_nr(ke->src), cbc->nr, get_irn_node_nr(ke->tgt), cbc->nr);
311 /* Dump the computed killing function as vcg. */
312 static void debug_vcg_dump_kill(rss_t *rss) {
316 static const char suffix[] = "-RSS-KILL.vcg";
318 build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
319 f = fopen(file_name, "w");
321 ir_fprintf(f, "graph: { title: \"computed kill graph of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
322 fprintf(f, "display_edge_labels: no\n");
323 fprintf(f, "layoutalgorithm: mindepth\n");
324 fprintf(f, "manhattan_edges: yes\n\n");
326 /* first: reset dumped flag of all nodes */
327 foreach_plist(rss->nodes, el) {
328 ir_node *irn = plist_element_get_value(el);
329 rss_irn_t *rirn = get_rss_irn(rss, irn);
333 /* dump all nodes and their killers */
334 foreach_plist(rss->nodes, el) {
335 ir_node *irn = plist_element_get_value(el);
336 rss_irn_t *rirn = get_rss_irn(rss, irn);
337 rss_irn_t *pk_rirn = get_rss_irn(rss, rirn->killer);
339 if (! rirn->dumped) {
340 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
344 if (! pk_rirn->dumped) {
345 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(rirn->killer), rirn->killer);
349 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
350 get_irn_node_nr(rirn->killer), get_irn_node_nr(irn));
357 /* Dumps the potential killing DAG (PKG) as vcg. */
358 static void debug_vcg_dump_pkg(rss_t *rss, nodeset *max_ac, int iteration) {
361 char *suffix = alloca(32);
362 static const char suffix1[] = "-RSS-PKG.vcg";
363 static const char suffix2[] = "-RSS-PKG-MAXAC.vcg";
367 snprintf(suffix, 32, "%s", suffix1);
370 snprintf(suffix, 32, "-%0.2d%s", iteration, suffix2);
373 build_file_name(rss, suffix, strlen(suffix) + 1, file_name, sizeof(file_name));
374 f = fopen(file_name, "w");
376 ir_fprintf(f, "graph: { title: \"potential killing DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
377 fprintf(f, "display_edge_labels: no\n");
378 fprintf(f, "layoutalgorithm: mindepth\n");
379 fprintf(f, "manhattan_edges: yes\n\n");
381 foreach_plist(rss->nodes, el) {
382 ir_node *irn = plist_element_get_value(el);
383 rss_irn_t *rirn = get_rss_irn(rss, irn);
385 plist_element_t *k_el;
387 /* dump selected saturating values in yellow */
388 if (max_ac && nodeset_find(max_ac, irn))
391 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(irn), irn, c1);
394 foreach_plist(rirn->pkiller_list, k_el) {
395 ir_node *pkiller = plist_element_get_value(k_el);
396 rss_irn_t *pk_rirn = get_rss_irn(rss, pkiller);
399 if (max_ac && nodeset_find(max_ac, pkiller))
402 if (! pk_rirn->dumped) {
403 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(pkiller), pkiller, c2);
406 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
407 get_irn_node_nr(pkiller), get_irn_node_nr(irn));
414 /* Dumps the disjoint value DAG (DVG) as vcg. */
415 static void debug_vcg_dump_dvg(rss_t *rss, dvg_t *dvg) {
416 static const char suffix[] = "-RSS-DVG.vcg";
422 build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
423 f = fopen(file_name, "w");
425 ir_fprintf(f, "graph: { title: \"disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
426 fprintf(f, "display_edge_labels: no\n");
427 fprintf(f, "layoutalgorithm: mindepth\n");
428 fprintf(f, "manhattan_edges: yes\n\n");
431 foreach_nodeset(dvg->nodes, irn) {
432 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
436 foreach_pset(dvg->edges, edge) {
437 rss_irn_t *src = get_rss_irn(rss, edge->src);
438 rss_irn_t *tgt = get_rss_irn(rss, edge->tgt);
440 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
441 get_irn_node_nr(edge->src), get_irn_node_nr(edge->tgt));
449 /* Dumps the PKG(DVG). */
450 static void debug_vcg_dump_dvg_pkiller(rss_t *rss, dvg_t *dvg) {
451 static const char suffix[] = "-RSS-DVG-PKG.vcg";
456 build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
457 f = fopen(file_name, "w");
459 ir_fprintf(f, "graph: { title: \"PKG of disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
460 fprintf(f, "display_edge_labels: no\n");
461 fprintf(f, "layoutalgorithm: mindepth\n");
462 fprintf(f, "manhattan_edges: yes\n\n");
465 foreach_nodeset(dvg->nodes, irn) {
466 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
470 foreach_nodeset(dvg->nodes, irn) {
471 rss_irn_t *node = get_rss_irn(rss, irn);
474 foreach_plist(node->dvg_pkiller_list, el) {
475 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
476 get_irn_node_nr(plist_element_get_value(el)), get_irn_node_nr(irn));
486 * In case there is no rss information for irn, initialize it.
488 static void *init_rss_irn(phase_t *ph, ir_node *irn, void *old) {
489 rss_irn_t *res = old ? old : phase_alloc(ph, sizeof(res[0]));
491 res->descendant_list = plist_obstack_new(phase_obst(ph));
492 res->descendants = NULL;
494 res->consumer_list = plist_obstack_new(phase_obst(ph));
495 res->consumer = NULL;
497 res->pkiller_list = plist_obstack_new(phase_obst(ph));
498 res->pkillers = NULL;
500 res->parent_list = plist_obstack_new(phase_obst(ph));
503 //res->dvg_desc_list = plist_obstack_new(phase_obst(ph));
504 //res->dvg_desc = NULL;
506 res->kill_value_list = plist_obstack_new(phase_obst(ph));
507 //res->dvg_user_list = plist_obstack_new(phase_obst(ph));
508 //res->dvg_pkiller_list = plist_obstack_new(phase_obst(ph));
523 * Collect all nodes data dependent on current node.
525 static void collect_descendants(rss_t *rss, rss_irn_t *rirn, ir_node *irn, int *got_sink) {
526 const ir_edge_t *edge;
527 ir_node *block = rss->block;
528 ir_edge_kind_t ekind[2] = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
531 /* loop over normal and dependency edges */
532 for (i = 0; i < 2; ++i) {
533 foreach_out_edge_kind(irn, edge, ekind[i]) {
534 ir_node *user = get_edge_src_irn(edge);
536 /* skip ignore nodes as they do not really contribute to register pressure */
537 if (arch_irn_is(rss->arch_env, user, ignore))
540 /* check if user lives in block and is not a control flow node */
541 if (get_nodes_block(user) == block && get_irn_mode(user) != mode_X) {
542 /* skip mode_T nodes */
543 if (get_irn_mode(user) != mode_T && ! plist_has_value(rirn->descendant_list, user)) {
544 plist_insert_back(rirn->descendant_list, user);
545 DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", user));
547 collect_descendants(rss, rirn, user, got_sink);
549 else if (! *got_sink) {
550 /* user lives out of block: add sink as descendant if not already done */
551 plist_insert_back(rirn->descendant_list, _sink);
553 DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", _sink));
560 * Handles a single consumer.
562 static int collect_single_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *consumer, int got_sink) {
563 ir_node *block = rss->block;
564 ir_edge_kind_t ekind[2] = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
567 if (get_nodes_block(consumer) == block) {
568 /* the consumer of a mode_T node are it's projs */
569 if (get_irn_mode(consumer) == mode_T) {
570 const ir_edge_t *cons_edge;
572 DBG((rss->dbg, LEVEL_2, "\t\tmode_T consumer %+F skipped\n", consumer));
574 for (i = 0; i < 2; ++i) {
575 foreach_out_edge_kind(consumer, cons_edge, ekind[i]) {
576 ir_node *cons_proj = get_edge_src_irn(cons_edge);
578 assert(get_nodes_block(cons_proj) == block && "Proj in wrong block!");
580 /* skip ignore nodes, as they do not really contribute to register pressure */
581 if (arch_irn_is(rss->arch_env, cons_proj, ignore))
584 plist_insert_back(rss_irn->consumer_list, cons_proj);
585 DBG((rss->dbg, LEVEL_3, "\t\t\treal consumer %+F\n", cons_proj));
589 else if (! arch_irn_is(rss->arch_env, consumer, ignore)) {
590 plist_insert_back(rss_irn->consumer_list, consumer);
591 DBG((rss->dbg, LEVEL_2, "\t\tconsumer %+F\n", consumer));
595 rss_irn->live_out = 1;
596 DBG((rss->dbg, LEVEL_2, "\t\tlive out %+F", consumer));
598 plist_insert_back(rss_irn->consumer_list, _sink);
600 DB((rss->dbg, LEVEL_2, ", %+F added instead", _sink));
602 DB((rss->dbg, LEVEL_2, "\n"));
609 * Collect all nodes consuming the value(s) produced by current node.
611 static void collect_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *irn) {
612 const ir_edge_t *edge;
615 ir_edge_kind_t ekind[2] = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
617 for (i = 0; i < 2; ++i) {
618 foreach_out_edge_kind(irn, edge, ekind[i]) {
619 ir_node *consumer = get_edge_src_irn(edge);
620 got_sink = collect_single_consumer(rss, rss_irn, consumer, got_sink);
627 * We need to build the consumer and descendant list for _source.
629 static void collect_node_info_source(rss_t *rss) {
630 const ir_edge_t *edge;
631 rss_irn_t *rirn = get_rss_irn(rss, _source);
632 ir_node *block = rss->block;
637 foreach_out_edge(block, edge) {
638 ir_node *irn = get_edge_src_irn(edge);
641 for (i = get_irn_arity(n) - 1; i >= 0; --i) {
647 static void reset_node_info(rss_irn_t *rss_irn) {
648 /* Beware: array data resides on phase obstack, so it gets removed automatically */
650 plist_clear(rss_irn->consumer_list);
651 rss_irn->consumer = NULL;
653 plist_clear(rss_irn->parent_list);
654 rss_irn->parents = NULL;
656 plist_clear(rss_irn->descendant_list);
657 rss_irn->descendants = NULL;
659 plist_clear(rss_irn->pkiller_list);
660 rss_irn->pkillers = NULL;
662 plist_clear(rss_irn->kill_value_list);
664 rss_irn->killer = NULL;
665 rss_irn->live_out = 0;
666 rss_irn->visited = 0;
667 rss_irn->handled = 0;
672 * Collects all consumer and descendant of a irn.
674 static void collect_node_info(rss_t *rss, ir_node *irn) {
675 rss_irn_t *rss_irn = get_rss_irn(rss, irn);
678 assert(get_irn_mode(irn) != mode_T && "Cannot handle mode_T nodes.");
680 /* check if node info is already available */
681 if (rss_irn->handled)
682 phase_reinit_single_irn_data(&rss->ph, irn);
684 DBG((rss->dbg, LEVEL_1, "\tcomputing consumers of %+F:\n", irn));
686 /* collect all consumer */
687 collect_consumer(rss, rss_irn, irn);
689 /* build sorted consumer array */
690 rss_irn->consumer = build_sorted_array_from_list(rss_irn->consumer_list, phase_obst(&rss->ph));
692 DBG((rss->dbg, LEVEL_1, "\tcompute descendants of %+F:\n", irn));
694 /* collect descendants */
696 collect_descendants(rss, rss_irn, irn, &got_sink);
698 /* build sorted descendant array */
699 rss_irn->descendants = build_sorted_array_from_list(rss_irn->descendant_list, phase_obst(&rss->ph));
701 rss_irn->handled = 1;
705 * Checks if v is a potential killer of u.
706 * v is in pkill(u) iff descendants(v) cut consumer(u) is v
708 * @param rss The rss object
709 * @param v The node to check for killer
710 * @param u The potentially killed value
711 * @return 1 if v is in pkill(u), 0 otherwise
713 static int is_potential_killer(rss_t *rss, rss_irn_t *v, rss_irn_t *u) {
718 assert(is_Sink(v->irn) || ((plist_count(v->descendant_list) > 0 && v->descendants) || 1));
719 assert(is_Sink(u->irn) || ((plist_count(u->consumer_list) > 0 && u->consumer) || 1));
721 /* as we loop over the list: loop over the shorter one */
722 if (plist_count(v->descendant_list) > plist_count(u->consumer_list)) {
723 list = u->consumer_list;
724 arr = v->descendants;
727 list = v->descendant_list;
731 /* for each list element: try to find element in array */
732 foreach_plist(list, el) {
733 ir_node *irn = plist_element_get_value(el);
734 ir_node *match = BSEARCH_IRN_ARR(irn, arr);
736 if (match && match != irn)
744 * Update descendants, consumer and pkiller list for the given irn.
746 static void update_node_info(rss_t *rss, ir_node *irn, ir_node *pk_irn) {
747 rss_irn_t *node = get_rss_irn(rss, irn);
748 rss_irn_t *pkiller = get_rss_irn(rss, pk_irn);
750 /* add consumer and rebuild the consumer array */
751 if (! plist_has_value(node->consumer_list, pk_irn)) {
752 plist_insert_back(node->consumer_list, pk_irn);
753 node->consumer = build_sorted_array_from_list(node->consumer_list, phase_obst(&rss->ph));
756 /* add pk_irn as descendant, add it's descendants to irn's and rebuild array */
757 if (! plist_has_value(node->descendant_list, pk_irn)) {
760 plist_insert_back(node->descendant_list, pk_irn);
762 foreach_plist(pkiller->descendant_list, el) {
763 ir_node *desc = plist_element_get_value(el);
765 if (! plist_has_value(node->descendant_list, desc))
766 plist_insert_back(node->descendant_list, desc);
769 node->descendants = build_sorted_array_from_list(node->descendant_list, phase_obst(&rss->ph));
773 /* check, if pkiller is a potential killer of irn */
774 if (is_potential_killer(rss, pkiller, node)) {
775 if (! plist_has_value(node->pkiller_list, pk_irn))
776 plist_insert_back(node->pkiller_list, pk_irn);
777 if (! plist_has_value(pkiller->kill_value_list, irn))
778 plist_insert_back(pkiller->kill_value_list, irn);
779 DBG((rss->dbg, LEVEL_1, "\tpotential killer %+F added to %+F\n", pk_irn, irn));
785 * Compute the potential killing set PK.
787 static void compute_pkill_set(rss_t *rss) {
788 plist_element_t *u_el, *v_el;
790 foreach_plist(rss->nodes, u_el) {
791 ir_node *u_irn = plist_element_get_value(u_el);
792 rss_irn_t *u = get_rss_irn(rss, u_irn);
794 DBG((rss->dbg, LEVEL_1, "\tcomputing potential killers of %+F:\n", u_irn));
796 /* check each consumer if it is a potential killer */
797 foreach_plist(u->consumer_list, v_el) {
798 ir_node *v_irn = plist_element_get_value(v_el);
799 rss_irn_t *v = get_rss_irn(rss, v_irn);
801 /* check, if v is a potential killer of u */
802 if (is_potential_killer(rss, v, u)) {
803 if (! plist_has_value(u->pkiller_list, v_irn))
804 plist_insert_back(u->pkiller_list, v_irn);
805 if (! plist_has_value(v->kill_value_list, u_irn))
806 plist_insert_back(v->kill_value_list, u_irn);
807 DBG((rss->dbg, LEVEL_2, "\t\tpotential killer %+F\n", v_irn));
815 if (firm_dbg_get_mask(rss->dbg))
816 debug_vcg_dump_pkg(rss, NULL, 0);
821 * Build set of killing edges (from values to their potential killers)
823 static void build_kill_edges(rss_t *rss, pset *epk) {
824 plist_element_t *el, *k_el;
826 foreach_plist(rss->nodes, el) {
827 ir_node *irn = plist_element_get_value(el);
828 rss_irn_t *rirn = get_rss_irn(rss, irn);
830 DBG((rss->dbg, LEVEL_3, "\t\tbuilding kill edges for %+F:\n", irn));
832 foreach_plist(rirn->pkiller_list, k_el) {
833 ir_node *pkiller = plist_element_get_value(k_el);
834 rss_edge_t *ke = obstack_alloc(phase_obst(&rss->ph), sizeof(*ke));
840 DBG((rss->dbg, LEVEL_3, "\t\t\ttarget %+F\n", pkiller));
842 pset_insert(epk, ke, HASH_RSS_EDGE(ke));
847 /* print the given cbc for debugging purpose */
848 static void debug_print_cbc(firm_dbg_module_t *mod, cbc_t *cbc) {
852 DBG((mod, LEVEL_3, "\t\tS = set of parents:\n"));
853 foreach_nodeset(cbc->parents, n) {
854 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
856 DBG((mod, LEVEL_3, "\t\tT = set of children:\n"));
857 foreach_nodeset(cbc->children, n) {
858 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
860 DBG((mod, LEVEL_3, "\t\tE = Edges from producers to consumers\n"));
861 foreach_pset(cbc->kill_edges, ke) {
862 DBG((mod, LEVEL_3, "\t\t\t%+F -> %+F\n", ke->src, ke->tgt));
867 * Construct the bipartite decomposition.
868 * Sid-Ahmed-Ali Touati, Phd Thesis
869 * Register Pressure in Instruction Level Parallelism, p. 71
871 static void compute_bipartite_decomposition(rss_t *rss) {
872 pset *epk = new_pset(cmp_rss_edges, 10);
877 DBG((rss->dbg, LEVEL_1, "\tcomputing bipartite decomposition:\n"));
879 build_kill_edges(rss, epk);
881 foreach_plist(rss->nodes, el) {
882 ir_node *u_irn = plist_element_get_value(el);
883 rss_irn_t *u = get_rss_irn(rss, u_irn);
888 plist_element_t *el2;
890 rss_edge_t *kedge_root = NULL;
891 ir_node *t_irn, *s_irn;
893 if (u->visited || u_irn == _sink)
896 DBG((rss->dbg, LEVEL_2, "\t\t%+F choosen:\n", u_irn));
898 cbc = obstack_alloc(phase_obst(&rss->ph), sizeof(*cbc));
901 /* initialize S_cb */
902 cbc->parents = new_nodeset(5);
903 nodeset_insert(cbc->parents, u_irn);
904 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (init)\n", u_irn));
907 cbc->kill_edges = new_pset(cmp_rss_edges, 5);
909 /* each parent gets killed by at least one value from children */
911 /* T_cb = PK_successors(u) */
912 cbc->children = new_nodeset(5);
913 foreach_plist(u->pkiller_list, el2) {
914 nodeset_insert(cbc->children, plist_element_get_value(el2));
915 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (init)\n", plist_element_get_value(el2)));
919 Now: insert the parents of all children into the parent set
920 and insert the children of all parents into the children set
921 until the sets don't change any more
923 while (p_change || c_change) {
924 p_change = c_change = 0;
926 /* accumulate parents */
927 foreach_nodeset(cbc->children, t_irn) {
928 rss_irn_t *t = get_rss_irn(rss, t_irn);
929 plist_element_t *kvl_el;
931 foreach_pset(epk, k_edge) {
932 ir_node *val = k_edge->src;
934 if (k_edge->src == t_irn && ! nodeset_find(cbc->parents, k_edge->src)) {
935 nodeset_insert(cbc->parents, val);
937 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents\n", val));
942 /* accumulate children */
943 foreach_nodeset(cbc->parents, s_irn) {
944 rss_irn_t *s = get_rss_irn(rss, s_irn);
945 plist_element_t *pkl_el;
947 foreach_plist(s->pkiller_list, pkl_el) {
948 ir_node *val = plist_element_get_value(pkl_el);
950 if (! nodeset_find(cbc->children, val)) {
951 nodeset_insert(cbc->children, val);
953 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children\n", val));
959 /* mark all parent values as visited */
960 foreach_nodeset(cbc->parents, s_irn) {
961 rss_irn_t *s = get_rss_irn(rss, s_irn);
963 /* assure bipartite property */
964 if (nodeset_find(cbc->children, s_irn)) {
965 nodeset_remove(cbc->children, s_irn);
966 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F removed from to children\n", s_irn));
971 foreach_pset(epk, k_edge) {
972 if (nodeset_find(cbc->parents, k_edge->src) && nodeset_find(cbc->children, k_edge->tgt)) {
973 pset_insert(cbc->kill_edges, k_edge, HASH_RSS_EDGE(k_edge));
975 Link all k_edges which are about to be removed together.
976 Beware: pset_remove kills the iterator.
978 k_edge->next = kedge_root;
983 /* remove all linked k_edges */
984 for (; kedge_root; kedge_root = kedge_root->next) {
985 pset_remove(epk, kedge_root, HASH_RSS_EDGE(kedge_root));
988 /* add the connected bipartite component */
989 pset_insert(rss->cbc_set, cbc, (unsigned)cbc->nr);
990 DBG((rss->dbg, LEVEL_1, "\tbipartite component %d inserted:\n", cbc->nr));
991 DEBUG_ONLY(debug_print_cbc(rss->dbg, cbc));
994 if (firm_dbg_get_mask(rss->dbg))
995 debug_vcg_dump_bipartite(rss);
1001 * Select the child with the maximum cost.
1003 static child_t *select_child_max_cost(rss_t *rss, nodeset *x, nodeset *y, child_t *t, cbc_t *cbc) {
1005 float max_cost = -1.0f;
1007 DBG((rss->dbg, LEVEL_2, "\t\tcomputing children costs:\n"));
1009 foreach_nodeset(cbc->children, child) {
1010 rss_irn_t *r_child = get_rss_irn(rss, child);
1011 int num_unkilled_parents = 0;
1012 int num_descendants;
1016 /* get the number of unkilled parents */
1017 foreach_pset(cbc->kill_edges, k_edge) {
1018 if (k_edge->tgt == child && nodeset_find(x, k_edge->src))
1019 ++num_unkilled_parents;
1022 cost = (float)num_unkilled_parents;
1024 num_descendants = plist_count(r_child->descendant_list) + nodeset_count(y);
1026 if (num_descendants > 0)
1027 cost /= (float)num_descendants;
1029 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F, #desc %d, cost %.3f\n", child, num_descendants, cost));
1031 if (cost > max_cost) {
1042 * Remove all parents from x which are killed by t_irn.
1044 static void remove_covered_parents(rss_t *rss, nodeset *x, ir_node *t_irn, cbc_t *cbc) {
1045 rss_irn_t *t = get_rss_irn(rss, t_irn);
1048 DBG((rss->dbg, LEVEL_2, "\t\tremoving parents covered by %+F:\n", t_irn));
1050 foreach_pset(cbc->kill_edges, k_edge) {
1051 if (k_edge->tgt == t_irn && nodeset_find(x, k_edge->src)) {
1052 nodeset_remove(x, k_edge->src);
1053 plist_insert_back(t->parent_list, k_edge->src);
1054 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", k_edge->src));
1059 static void update_cumulated_descendent_values(rss_t *rss, nodeset *y, ir_node *t_irn) {
1060 rss_irn_t *t = get_rss_irn(rss, t_irn);
1061 plist_element_t *el;
1063 DBG((rss->dbg, LEVEL_2, "\t\tupdating cumulated descendant value of %+F:\n", t_irn));
1065 foreach_plist(t->descendant_list, el) {
1066 nodeset_insert(y, plist_element_get_value(el));
1067 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", plist_element_get_value(el)));
1072 * Greedy-k: a heuristics for the MMA problem
1074 static void compute_killing_function(rss_t *rss) {
1076 struct obstack obst;
1078 obstack_init(&obst);
1080 rss->cbc_set = pset_new_ptr(5);
1081 compute_bipartite_decomposition(rss);
1083 /* for all bipartite components do: */
1084 foreach_pset(rss->cbc_set, cbc) {
1086 nodeset *x = new_nodeset(10);
1087 nodeset *y = new_nodeset(10);
1088 child_t **sks = NEW_ARR_F(child_t *, 20);
1093 DBG((rss->dbg, LEVEL_1, "\tcomputing SKS for cbc %d:\n", cbc->nr));
1094 DBG((rss->dbg, LEVEL_2, "\t\tinitializing parents X:\n"));
1096 /* X = S_cb (all parents are initially uncovered) */
1097 foreach_nodeset(cbc->parents, p) {
1098 nodeset_insert(x, p);
1099 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", p));
1102 /* while X not empty */
1103 while (nodeset_count(x) > 0) {
1104 child_t *t = obstack_alloc(&obst, sizeof(*t));
1105 memset(t, 0, sizeof(*t));
1107 t = select_child_max_cost(rss, x, y, t, cbc);
1109 if (cur_len >= cur_size) {
1110 ARR_EXTO(child_t *, sks, cur_size * 2);
1114 DBG((rss->dbg, LEVEL_2, "\t\tinsert child %+F (%.3f) into SKS at pos %d\n", t->irn, t->cost, cur_len));
1117 remove_covered_parents(rss, x, t->irn, cbc);
1118 update_cumulated_descendent_values(rss, y, t->irn);
1121 ARR_SHRINKLEN(sks, cur_len);
1123 /* sort SKS in increasing cost order */
1124 qsort(sks, cur_len, sizeof(sks[0]), cmp_child_costs);
1126 DBG((rss->dbg, LEVEL_2, "\tprocessing SKS for cbc %d:\n", cbc->nr));
1128 /* build killing function */
1129 for (i = cur_len - 1; i >= 0; --i) { /* loop over sks in decreasing cost order */
1130 child_t *t = sks[i];
1131 rss_irn_t *rt = get_rss_irn(rss, t->irn);
1132 plist_element_t *p_el;
1134 DBG((rss->dbg, LEVEL_3, "\t\t\tkiller %+F (%.3f):\n", t->irn, t->cost));
1136 /* kill all unkilled parents of t */
1137 foreach_plist(rt->parent_list, p_el) {
1138 ir_node *par = plist_element_get_value(p_el);
1139 rss_irn_t *rpar = get_rss_irn(rss, par);
1141 if (is_Sink(rpar->killer)) {
1142 rpar->killer = t->irn;
1143 DBG((rss->dbg, LEVEL_2, "\t\tkill %+F\n", rpar->irn));
1146 DBG((rss->dbg, LEVEL_3, "\t\t\tkeeping %+F as killer for %+F\n", rpar->killer, rpar->irn));
1157 if (firm_dbg_get_mask(rss->dbg))
1158 debug_vcg_dump_kill(rss);
1161 del_pset(rss->cbc_set);
1162 obstack_free(&obst, NULL);
1166 * Adds the edge src -> tgt to the dvg. Checks if reverse edge is already there (asserts).
1168 static INLINE void add_dvg_edge(rss_t *rss, dvg_t *dvg, ir_node *src, ir_node *tgt, int have_source) {
1169 rss_edge_t *dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1173 nodeset_insert(dvg->nodes, src);
1175 nodeset_insert(dvg->nodes, tgt);
1177 /* add an edge to our killer */
1178 dvg_edge->src = src;
1179 dvg_edge->tgt = tgt;
1180 dvg_edge->next = NULL;
1184 assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1186 /* add the edge to the DVG */
1187 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", src, tgt));
1188 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1192 * Computes the disjoint value DAG (DVG).
1193 * BEWARE: It is not made explicitly clear in the Touati paper,
1194 * but the DVG is meant to be build from the KILLING DAG
1196 static void compute_dvg(rss_t *rss, dvg_t *dvg) {
1197 plist_element_t *el;
1199 DBG((rss->dbg, LEVEL_1, "\tcomputing DVG:\n"));
1201 foreach_plist(rss->nodes, el) {
1202 ir_node *u_irn = plist_element_get_value(el);
1203 rss_irn_t *u = get_rss_irn(rss, u_irn);
1204 rss_irn_t *u_killer = get_rss_irn(rss, u->killer);
1207 /* TODO: omit nodes only having sink as pkiller and killing no one */
1209 /* add an edge to killer */
1210 add_dvg_edge(rss, dvg, u_irn, u->killer, 0);
1212 if (is_Sink(u->killer))
1215 /* We add an edge to every killer, from where we could be reached. */
1216 for (i = ARR_LEN_SAFE(u_killer->descendants) - 1; i >= 0; --i) {
1217 add_dvg_edge(rss, dvg, u_irn, u_killer->descendants[i], 1);
1222 foreach_plist(rss->nodes, el2) {
1223 ir_node *v_irn = plist_element_get_value(el2);
1226 There is an edge (u, v) in the DVG iff v is a descendant of the killer(u).
1228 if (BSEARCH_IRN_ARR(v_irn, u_kill->descendants)) {
1229 rss_edge_t *dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1232 /* insert the user into the DVG and append it to the user list of u */
1233 nodeset_insert(dvg->nodes, v_irn);
1234 if (! plist_has_value(u->dvg_user_list, v_irn))
1235 plist_insert_back(u->dvg_user_list, v_irn);
1237 dvg_edge->src = u_irn;
1238 dvg_edge->tgt = v_irn;
1239 dvg_edge->next = NULL;
1244 assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1246 /* add the edge to the DVG */
1247 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", u_irn, v_irn));
1248 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1255 if (firm_dbg_get_mask(rss->dbg))
1256 debug_vcg_dump_dvg(rss, dvg);
1261 * Updates the dvg structure when a serialization edge from src -> tgt is added.
1263 static void update_dvg(rss_t *rss, dvg_t *dvg, rss_irn_t *src, rss_irn_t *tgt) {
1266 add_dvg_edge(rss, dvg, tgt->irn, src->irn, 1);
1268 for (i = ARR_LEN_SAFE(src->descendants) - 1; i >= 0; --i) {
1269 add_dvg_edge(rss, dvg, tgt->irn, src->descendants[i], 1);
1275 * Accumulate all descendants for root into list.
1277 static void accumulate_dvg_descendant_values(rss_t *rss, rss_irn_t *root, plist_t *list) {
1278 if (plist_count(root->dvg_user_list) > 0) {
1279 plist_element_t *el;
1281 foreach_plist(root->dvg_user_list, el) {
1282 ir_node *v_irn = plist_element_get_value(el);
1283 rss_irn_t *v = get_rss_irn(rss, v_irn);
1285 /* add v as descendant */
1286 if (! plist_has_value(list, v_irn)) {
1287 plist_insert_back(list, v_irn);
1288 DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG descendant %+F\n", v_irn));
1291 /* accumulate v's descendants */
1292 accumulate_dvg_descendant_values(rss, v, list);
1298 * Builds the list of potential killers for each node
1300 * Needs the descendant list for all user as sorted array.
1302 static void build_dvg_pkiller_list(rss_t *rss, dvg_t *dvg) {
1305 foreach_nodeset(dvg->nodes, irn) {
1306 rss_irn_t *node = get_rss_irn(rss, irn);
1307 plist_element_t *el, *el2;
1309 DBG((rss->dbg, DEBUG_DVG, "\t\tbuilding pkiller list for %+F\n", irn));
1311 /* check each user */
1312 foreach_plist(node->dvg_user_list, el) {
1313 ir_node *u_irn = plist_element_get_value(el);
1315 /* is the current user u_irn not a descendant of any other user -> pkiller */
1316 foreach_plist(node->dvg_user_list, el2) {
1317 ir_node *v_irn = plist_element_get_value(el2);
1318 rss_irn_t *v = get_rss_irn(rss, v_irn);
1321 ! BSEARCH_IRN_ARR(u_irn, v->dvg_desc) &&
1322 ! plist_has_value(node->dvg_pkiller_list, u_irn))
1324 plist_insert_back(node->dvg_pkiller_list, u_irn);
1325 DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG pkiller %+F\n", u_irn));
1330 node->dvg_pkiller = build_sorted_array_from_list(node->dvg_pkiller_list, phase_obst(&rss->ph));
1334 if (firm_dbg_get_mask(rss->dbg) & DEBUG_DVG)
1335 debug_vcg_dump_dvg_pkiller(rss, dvg);
1342 * Compute the maximal antichain of the current DVG.
1343 * This is a reimplementation of the MAXIMAL_ANTI_CHAIN function
1344 * from the DDG library 1.1 (DAG.cpp).
1346 static nodeset *compute_maximal_antichain(rss_t *rss, dvg_t *dvg, int iteration) {
1347 int n = nodeset_count(dvg->nodes);
1348 int *assignment = alloca(n * sizeof(assignment[0]));
1349 int *assignment_rev = alloca(n * sizeof(assignment_rev[0]));
1350 int *idx_map = alloca(n * sizeof(idx_map[0]));
1351 hungarian_problem_t *bp;
1352 nodeset *values, *temp;
1354 int i, j, cost, cur_chain, res;
1355 rss_edge_t *dvg_edge;
1357 #define MAP_IDX(irn) bsearch_for_index(get_irn_idx(irn), idx_map, n, 1)
1359 if (pset_count(dvg->edges) == 0)
1362 bp = hungarian_new(n, n, 1, HUNGARIAN_MATCH_NORMAL);
1365 At first, we build an index map for the nodes in the DVG,
1366 because we cannot use the irn idx therefore as the resulting
1367 bipartite data structure would have around 1.2GB.
1368 So we limit the size to the number of nodes we have in the DVG
1369 and build a sorted index map for their irn indices.
1372 foreach_nodeset(dvg->nodes, u_irn) {
1373 idx_map[i++] = get_irn_idx(u_irn);
1375 qsort(idx_map, n, sizeof(idx_map[0]), cmp_int);
1377 foreach_pset(dvg->edges, dvg_edge) {
1378 int idx_u = MAP_IDX(dvg_edge->src);
1379 int idx_v = MAP_IDX(dvg_edge->tgt);
1381 /* add the entry to the bipartite data structure */
1382 hungarian_add(bp, idx_u, idx_v, 1);
1383 DBG((rss->dbg, LEVEL_3, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1384 idx_u, dvg_edge->src, idx_v, dvg_edge->tgt));
1388 Add a bipartite entry for each pair of nodes (u, v), where exists a
1389 path in the DVG from u to v, ie. connect all descendants(v) to v.
1390 desc_dvg(v) = accumulated descendants of all z in dvg_user_list(v)
1392 foreach_nodeset(dvg->nodes, u_irn) {
1393 rss_irn_t *u = get_rss_irn(rss, u_irn);
1394 int idx_u_irn = MAP_IDX(u_irn);
1396 DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG descendants of %+F:\n", u_irn));
1398 //plist_clear(u->dvg_desc_list);
1399 //accumulate_dvg_descendant_values(rss, u, u->dvg_desc_list);
1402 FIXME: The array is build on the phase obstack and we cannot free the data.
1403 So the array get as many times allocated as this function is called.
1406 /* build the sorted array for faster searches */
1407 //u->dvg_desc = build_sorted_array_from_list(u->dvg_desc_list, phase_obst(&rss->ph));
1409 DBG((rss->dbg, DEBUG_MAX_AC, "\t\tadding bipartite entries of %+F:\n", u_irn));
1411 /* add the bipartite entries for all descendants */
1412 for (i = ARR_LEN_SAFE(u->dvg_desc) - 1; i >= 0; --i) {
1413 rss_irn_t *desc = get_rss_irn(rss, u->dvg_desc[i]);
1414 int idx_desc = MAP_IDX(u->dvg_desc[i]);
1416 /* add the entry to the bipartite data structure */
1417 hungarian_add(bp, idx_u_irn, idx_desc, 1);
1418 DBG((rss->dbg, DEBUG_MAX_AC, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1419 idx_u_irn, u_irn, idx_desc, u->dvg_desc[i]));
1426 /* We want maximum cardinality matching */
1427 hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL);
1430 DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG pkiller:\n"));
1431 /* beware: the following function needs the dvg_desc array */
1432 build_dvg_pkiller_list(rss, dvg);
1435 DBG((rss->dbg, LEVEL_2, "\t\tcomputing bipartite matching\n"));
1437 The maximum cardinality bipartite matching gives us the minimal
1438 chain partition, which corresponds to the maximum anti chains.
1440 res = hungarian_solve(bp, assignment, &cost);
1441 assert(res == 0 && "Bipartite matching failed!");
1444 memset(assignment_rev, -1, n * sizeof(assignment_rev[0]));
1446 /* build the reverse assignment, ie. foreach i -> j, add j -> i */
1447 for (i = 0; i < n; ++i) {
1448 if (assignment[i] >= 0) {
1449 int j = assignment[i];
1450 assignment_rev[j] = i;
1454 DBG((rss->dbg, LEVEL_2, "\t\tgot assignment with cost %d\n", cost));
1455 DBG((rss->dbg, LEVEL_3, "\t\t\tassignment --- reverse assignment\n", cost));
1456 for (i = 0; i < n; ++i) {
1457 DBG((rss->dbg, LEVEL_3, "\t\t\t%3d -> %3d %3d -> %3d\n", i, assignment[i], i, assignment_rev[i]));
1460 values = new_nodeset(10);
1462 /* Construction of the minimal chain partition */
1463 for (j = 0; j < n; ++j) {
1464 /* check nodes, which did not occur as target */
1465 if (assignment_rev[j] == -1) {
1466 int xj = idx_map[j];
1467 ir_node *xj_irn = get_idx_irn(rss->irg, xj);
1468 rss_irn_t *xj_rss = get_rss_irn(rss, xj_irn);
1469 chain_t *c = obstack_alloc(phase_obst(&rss->ph), sizeof(*c));
1472 /* there was no source for j -> we have a source of a new chain */
1473 nodeset_insert(values, xj_irn);
1475 c->elements = plist_obstack_new(phase_obst(&rss->ph));
1476 c->nr = cur_chain++;
1477 plist_insert_back(c->elements, xj_irn);
1481 DBG((rss->dbg, LEVEL_2, "\t\tstarting chain %d:\n", c->nr));
1482 DBG((rss->dbg, LEVEL_2, "\t\t\t%+F (%d)", xj_irn, j));
1484 /* follow chain, having j as source */
1486 while (assignment[source] >= 0) {
1487 int target = assignment[source];
1488 int irn_idx = idx_map[target];
1489 ir_node *irn = get_idx_irn(rss->irg, irn_idx);
1490 rss_irn_t *node = get_rss_irn(rss, irn);
1492 plist_insert_back(c->elements, irn);
1495 DB((rss->dbg, LEVEL_2, " -> %+F (%d)", irn, target));
1497 /* new source = last target */
1501 DB((rss->dbg, LEVEL_2, "\n"));
1506 Computing the maximal antichain: Select an element from each
1507 chain such, such it is parallel with the others.
1509 DBG((rss->dbg, LEVEL_1, "\t\tcomputing set of saturation values (MAX AC)\n"));
1510 DBG((rss->dbg, LEVEL_2, "\t\tstarting with:\n"));
1511 dump_nodeset(values, "\t\t\t");
1515 We need an explicit array for the values as
1516 we cannot iterate multiple times over the same
1517 set at the same time. :-(((((
1519 int n = nodeset_count(values);
1521 ir_node **val_arr = NEW_ARR_F(ir_node *, n);
1523 foreach_nodeset(values, u_irn)
1524 val_arr[i++] = u_irn;
1529 temp = new_nodeset(10);
1531 /* Select all nodes from current value set, having another node in the set as descendant. */
1532 for (i = 0; i < n; ++i) {
1533 rss_irn_t *u = get_rss_irn(rss, val_arr[i]);
1535 for (j = 0; j < n; ++j) {
1539 key.src = val_arr[i];
1540 key.tgt = val_arr[j];
1542 if (pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1543 /* v[j] is descendant of u -> remove u and break */
1544 nodeset_insert(temp, u->irn);
1545 nodeset_remove(values, u->irn);
1547 DBG((rss->dbg, LEVEL_3, "\t\t\tremoving %+F from values, adding it to temp\n", u->irn));
1555 /* Try to insert the chain predecessor of all selected u's */
1556 foreach_nodeset(temp, u_irn) {
1557 rss_irn_t *u = get_rss_irn(rss, u_irn);
1558 chain_t *c = u->chain;
1559 plist_element_t *el = plist_find_value(c->elements, u_irn);
1561 assert(el && "Missing element in chain!");
1563 /* If u has predecessor in chain: insert the predecessor */
1564 if (el = plist_element_get_prev(el)) {
1565 nodeset_insert(values, plist_element_get_value(el));
1566 DBG((rss->dbg, LEVEL_3, "\t\t\tadding %+F to values\n", plist_element_get_value(el)));
1572 } while (nodeset_count(temp) > 0);
1574 DBG((rss->dbg, LEVEL_2, "\t\tfinal set:\n"));
1576 if (firm_dbg_get_mask(rss->dbg) & LEVEL_2) {
1577 dump_nodeset(values, "\t\t\t");
1578 debug_vcg_dump_pkg(rss, values, iteration);
1590 * Computes the best serialization between two nodes of sat_vals.
1592 static serialization_t *compute_best_admissible_serialization(rss_t *rss, nodeset *sat_vals, serialization_t *ser, int num_regs) {
1593 int n = nodeset_count(sat_vals);
1594 int n_idx = ARR_LEN_SAFE(rss->idx_map);
1596 ir_node **val_arr = alloca(n * sizeof(val_arr[0]));
1597 bitset_t *bs_sv = bitset_alloca(n_idx);
1598 bitset_t *bs_vdesc = bitset_alloca(n_idx);
1599 bitset_t *bs_tmp = bitset_alloca(n_idx);
1600 bitset_t *bs_ukilldesc = bitset_alloca(n_idx);
1601 unsigned best_benefit = UINT_MAX;
1602 unsigned best_omega2 = UINT_MAX;
1603 unsigned best_benefit_omega20 = UINT_MAX;
1604 int has_positive_omega1 = 0;
1607 rss_edge_t min_benefit_edge;
1608 rss_edge_t min_omega20_edge;
1609 rss_irn_t *ser_u_omega1, *ser_v_omega1;
1610 rss_irn_t *ser_u_omega20, *ser_v_omega20;
1612 DBG((rss->dbg, LEVEL_1, "\tcomputing admissible serializations:\n"));
1615 We need an explicit array for the values as
1616 we cannot iterate multiple times over the same
1617 set at the same time. :-(((((
1620 foreach_nodeset(sat_vals, irn) {
1622 bitset_set(bs_sv, BLOCK_IDX_MAP(rss, irn));
1626 We build all admissible serializations and remember the best found so far.
1629 if v in pkiller(u): add edge from v to all other pkiller(u)
1630 else: for all vv in pkiller(u): add edge from v to vv if there exists no path from vv to v
1634 A node is unserializable if:
1635 - it has only one killer and this one is Sink
1636 - it kills no other values
1637 In this case there is no serialization which could
1638 reduce the registerpressure
1640 #define IS_UNSERIALIZABLE_NODE(rss_node) \
1641 ((plist_count(rss_node->pkiller_list) == 1) && \
1642 is_Sink(rss_node->killer) && \
1643 (plist_count(rss_node->kill_value_list) == 0))
1645 /* for all u in sat_vals */
1646 for (i = 0; i < n; ++i) {
1647 rss_irn_t *u = get_rss_irn(rss, val_arr[i]);
1648 int u_height = get_irn_height(rss->h, val_arr[i]);
1649 plist_element_t *el;
1651 /* ignore nodes where serialization does not help */
1652 if (IS_UNSERIALIZABLE_NODE(u))
1655 /* accumulate all descendants of all pkiller(u) */
1656 bitset_clear_all(bs_ukilldesc);
1657 foreach_plist(u->pkiller_list, el) {
1658 ir_node *irn = plist_element_get_value(el);
1659 rss_irn_t *node = get_rss_irn(rss, irn);
1662 bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, irn));
1666 for (k = ARR_LEN_SAFE(node->descendants) - 1; k >= 0; --k) {
1667 if (! is_Sink(node->descendants[k]))
1668 bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, node->descendants[k]));
1672 /* for all v in sat_vals */
1673 for (j = 0; j < n; ++j) {
1674 ir_node *v_irn = val_arr[j];
1675 rss_irn_t *v = get_rss_irn(rss, v_irn);
1676 unsigned v_height = get_irn_height(rss->h, v_irn);
1677 unsigned omega1, omega2, is_pkiller;
1679 /* v cannot be serialized with itself
1680 * ignore nodes where serialization does not help */
1681 if (i == j || IS_UNSERIALIZABLE_NODE(v))
1684 /* get descendants of v */
1685 bitset_clear_all(bs_vdesc);
1686 bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v_irn));
1687 for (k = ARR_LEN_SAFE(v->descendants) - 1; k >= 0; --k) {
1688 if (! is_Sink(v->descendants[k]))
1689 bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v->descendants[k]));
1692 /* if v is in pkiller(u) */
1693 is_pkiller = plist_has_value(u->pkiller_list, val_arr[j]);
1695 /* for all vv in pkiller(u) */
1696 foreach_plist(u->pkiller_list, el) {
1697 ir_node *vv_irn = plist_element_get_value(el);
1700 if (is_Sink(vv_irn))
1704 add_edge = vv_irn != v_irn && skip_Proj(vv_irn) != skip_Proj(v_irn);
1706 add_edge = ! heights_reachable_in_block(rss->h, skip_Proj(vv_irn), skip_Proj(v_irn));
1709 As we add an edge from vv -> v, we have to make sure,
1710 that there exists no path from v to vv.
1714 unsigned vv_height = get_irn_height(rss->h, vv_irn);
1715 unsigned mu1, mu2, critical_path_cost;
1718 mu1 = | descendants(v) cut sat_vals |
1719 the number of saturating values which cannot
1720 be simultaneously alive with u
1722 bitset_copy(bs_tmp, bs_vdesc);
1723 mu1 = bitset_popcnt(bitset_and(bs_tmp, bs_sv));
1726 mu2 = | accum_desc_all_pkiller(u) without descendants(v) |
1729 bitset_copy(bs_tmp, bs_ukilldesc);
1730 mu2 = bitset_popcnt(bitset_andnot(bs_tmp, bs_vdesc));
1736 //assert(mu1 >= mu2);
1738 /* omega1 = mu1 - mu2 */
1739 omega1 = mu2 >= mu2 ? mu1 - mu2 : 0;
1742 has_positive_omega1 = 1;
1744 /* omega2 = increase of critical path */
1745 critical_path_cost =
1746 v_height /* longest path from v to sink */
1747 + rss->max_height - vv_height /* longest path from source to vv */
1751 If critical_path_cost > max_height -> the new edge
1752 would increase the longest critical path by the difference.
1754 omega2 = critical_path_cost > rss->max_height ? critical_path_cost - rss->max_height : 0;
1756 /* this keeps track of the edge with the best benefit */
1757 if (num_regs - omega1 < best_benefit) {
1758 min_benefit_edge.src = v_irn;
1759 min_benefit_edge.tgt = vv_irn;
1764 best_benefit = num_regs - omega1;
1767 /* this keeps track of the edge with the best omega1 costs where omega2 == 0 */
1768 if (omega2 == 0 && (num_regs - omega1 < best_benefit_omega20)) {
1769 min_omega20_edge.src = v_irn;
1770 min_omega20_edge.tgt = vv_irn;
1775 best_benefit_omega20 = num_regs - omega1;
1778 best_omega2 = MIN(best_omega2, omega2);
1780 DBG((rss->dbg, LEVEL_2, "\t\tfound %+F -> %+F (w1 %d, w2 %d)\n",
1781 v_irn, vv_irn, omega1, omega2));
1783 } /* for all vv in pkiller(u) */
1784 } /* for all v in sat_vals */
1785 } /* for all u in sat_vals */
1787 if (! has_positive_omega1)
1790 if (best_omega2 == 0) {
1791 ser->u = ser_u_omega20;
1792 ser->v = ser_v_omega20;
1793 ser->edge->src = min_omega20_edge.src;
1794 ser->edge->tgt = min_omega20_edge.tgt;
1795 ser->omega1 = best_benefit_omega20;
1796 ser->omega2 = best_omega2;
1799 ser->u = ser_u_omega1;
1800 ser->v = ser_v_omega1;
1801 ser->edge->src = min_benefit_edge.src;
1802 ser->edge->tgt = min_benefit_edge.tgt;
1803 ser->omega1 = best_benefit;
1804 ser->omega2 = best_omega2;
1809 #undef IS_UNSERIALIZABLE_NODE
1813 * Perform the value serialization heuristic and add all
1814 * computed serialization edges as dependencies to the irg.
1816 static void perform_value_serialization_heuristic(rss_t *rss) {
1817 bitset_t *arch_nonign_bs = bitset_alloca(arch_register_class_n_regs(rss->cls));
1818 bitset_t *abi_ign_bs = bitset_alloca(arch_register_class_n_regs(rss->cls));
1819 int available_regs, iteration;
1822 pset *ser_set = new_pset(cmp_rss_edges, 20);
1824 /* available_regs = R = |arch_non_ignore_regs cut ~abi_ignore_regs| */
1825 arch_put_non_ignore_regs(rss->arch_env, rss->cls, arch_nonign_bs);
1826 be_abi_put_ignore_regs(rss->abi, rss->cls, abi_ign_bs);
1827 bitset_andnot(arch_nonign_bs, abi_ign_bs);
1828 available_regs = bitset_popcnt(arch_nonign_bs);
1830 DBG((rss->dbg, LEVEL_1, "\n\t#available regs: %d\n\n", available_regs));
1833 At first we need to compute the disjoint value DAG (DVG = {V, E_dv}).
1834 V = set of all nodes we are currently interested in
1835 E_dv = there is an edge from u to v iff v is a descendant of killer(u), forall u, v in V
1837 dvg.nodes = new_nodeset(plist_count(rss->nodes));
1838 dvg.edges = new_pset(cmp_rss_edges, plist_count(rss->nodes) * 5);
1839 compute_dvg(rss, &dvg);
1842 Then we perform the heuristic serialization algorithm
1843 on the DVG which gives us all necessary serialization
1846 DBG((rss->dbg, LEVEL_1, "\tcomputing maximal antichain:\n"));
1848 sat_vals = compute_maximal_antichain(rss, &dvg, iteration++);
1849 while (sat_vals && (nodeset_count(sat_vals) > available_regs)) {
1850 serialization_t *ser, best_ser;
1851 rss_edge_t *edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*edge));
1852 rss_irn_t *tgt, *src;
1853 ir_node *dep_src, *dep_tgt;
1855 best_ser.edge = edge;
1856 ser = compute_best_admissible_serialization(rss, sat_vals, &best_ser, available_regs);
1858 DBG((rss->dbg, LEVEL_1, "\tcurrent register saturation %d, target %d\n", nodeset_count(sat_vals), available_regs));
1861 DBG((rss->dbg, LEVEL_1, "\tno RS improving serialization found, breaking at iteration %d\n", iteration));
1865 /* Insert the serialization as dependency edge into the irg. */
1866 DBG((rss->dbg, LEVEL_1, "\tserializing %+F -> %+F with edge %+F -> %+F and cost %d, %d\n",
1867 ser->u->irn, ser->v->irn, ser->edge->src, ser->edge->tgt, ser->omega1, ser->omega2));
1869 if (pset_find(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge)))
1870 ir_printf("WARNING: serialization %+F -> %+F computed twice!\n", ser->edge->src, ser->edge->tgt);
1873 pset_insert(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge));
1875 /* update the dvg */
1876 update_dvg(rss, &dvg, ser->v, ser->u);
1877 update_dvg(rss, &dvg, get_rss_irn(rss, ser->edge->src), get_rss_irn(rss, ser->edge->tgt));
1878 del_nodeset(sat_vals);
1880 dep_src = skip_Proj(ser->edge->src);
1881 dep_tgt = ser->edge->tgt;
1882 add_irn_dep(dep_src, dep_tgt);
1884 /* Update descendants, consumer and pkillers of the target */
1885 update_node_info(rss, ser->edge->tgt, ser->edge->src);
1887 /* TODO: try to find a cheaper way for updating height information */
1888 rss->max_height = heights_recompute_block(rss->h, rss->block);
1891 src = get_rss_irn(rss, ser->edge->src);
1892 tgt = get_rss_irn(rss, ser->edge->tgt);
1893 src->killer = tgt->irn;
1894 del_nodeset(dvg.nodes);
1895 del_pset(dvg.edges);
1896 dvg.nodes = new_nodeset(plist_count(rss->nodes));
1897 dvg.edges = new_pset(cmp_rss_edges, plist_count(rss->nodes) * 5);
1898 compute_dvg(rss, &dvg);
1901 /* Recompute the antichain for next serialization */
1902 DBG((rss->dbg, LEVEL_1, "\tre-computing maximal antichain:\n"));
1903 sat_vals = compute_maximal_antichain(rss, &dvg, iteration++);
1906 del_nodeset(dvg.nodes);
1907 del_pset(dvg.edges);
1911 * Do initial calculations for a block.
1913 static void process_block(ir_node *block, void *env) {
1916 const ir_edge_t *edge;
1918 phase_init(&rss->ph, "rss block preprocessor", rss->irg, PHASE_DEFAULT_GROWTH, init_rss_irn);
1920 DBG((rss->dbg, LEVEL_1, "preprocessing block %+F\n", block));
1923 /* build an index map for all nodes in the current block */
1925 n = get_irn_n_edges(block);
1926 NEW_ARR_A(int *, rss->idx_map, n);
1927 foreach_out_edge(block, edge) {
1928 ir_node *irn = get_edge_src_irn(edge);
1929 rss->idx_map[i++] = get_irn_idx(irn);
1931 qsort(rss->idx_map, n, sizeof(rss->idx_map[0]), cmp_int);
1932 rss->max_height = heights_recompute_block(rss->h, block);
1934 /* loop over all register classes */
1935 for (i = arch_isa_get_n_reg_class(rss->arch_env->isa) - 1; i >= 0; --i) {
1936 const arch_register_class_t *cls = arch_isa_get_reg_class(rss->arch_env->isa, i);
1939 DBG((rss->dbg, LEVEL_1, "register class %s\n", arch_register_class_name(cls)));
1941 /* reset the list of interesting nodes */
1942 plist_clear(rss->nodes);
1943 plist_insert_back(rss->nodes, _sink);
1945 /* collect all nodes having a certain register class */
1946 foreach_out_edge(block, edge) {
1947 ir_node *irn = get_edge_src_irn(edge);
1949 if (get_irn_mode(irn) == mode_T || be_is_Keep(irn) || be_is_Barrier(skip_Proj(irn)))
1952 if (! arch_irn_is(rss->arch_env, irn, ignore) && arch_get_irn_reg_class(rss->arch_env, irn, -1) == cls) {
1953 plist_insert_back(rss->nodes, irn);
1955 /* calculate the descendants and consumer for each node in the block */
1956 collect_node_info(rss, irn);
1960 /* compute the potential killing set PK(G) */
1961 compute_pkill_set(rss);
1963 /* compute the killing function k* */
1964 compute_killing_function(rss);
1967 Compute the heuristic value serialization and
1968 add the necessary dependencies to the irg.
1970 perform_value_serialization_heuristic(rss);
1973 phase_free(&rss->ph);
1977 * Preprocess the irg for scheduling.
1979 void rss_schedule_preparation(const be_irg_t *birg) {
1982 FIRM_DBG_REGISTER(rss.dbg, "firm.be.sched.rss");
1984 firm_dbg_set_mask(rss.dbg, LEVEL_1 | LEVEL_2 | LEVEL_3);
1986 init_rss_special_nodes(birg->irg);
1988 rss.irg = birg->irg;
1989 rss.arch_env = birg->main_env->arch_env;
1990 rss.abi = birg->abi;
1991 rss.h = heights_new(birg->irg);
1992 rss.nodes = plist_new();
1993 irg_block_walk_graph(birg->irg, NULL, process_block, &rss);
1994 heights_free(rss.h);
1995 plist_free(rss.nodes);