2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief Implementation of a register saturating list scheduler.
23 * @author Christian Wuerdig
27 * Implementation of a register saturating list scheduler
28 * as described in: Sid-Ahmed-Ali Touati
29 * Register Saturation in Superscalar and VLIW Codes
40 #include "irgraph_t.h"
42 #include "iredges_t.h"
44 #include "irphase_t.h"
50 #include "bipartite.h"
51 #include "hungarian.h"
59 #include "besched_t.h"
62 #include <libcore/lc_opts.h>
63 #include <libcore/lc_opts_enum.h>
66 #define ARR_LEN_SAFE(arr) ((arr) != NULL ? ARR_LEN((arr)) : 0)
68 #define HASH_RSS_EDGE(edge) ((get_irn_node_nr((edge)->src) << 16) | (get_irn_node_nr((edge)->tgt) & 0xFFFF))
69 #define BSEARCH_IRN_ARR(val, arr) \
70 bsearch(&(val), (arr), ARR_LEN_SAFE((arr)), sizeof((arr)[0]), cmp_irn_idx)
72 #define BLOCK_IDX_MAP(rss, irn) bsearch_for_index(get_irn_idx((irn)), (rss)->idx_map, ARR_LEN_SAFE((rss)->idx_map), 1)
75 typedef struct _rss_opts_t {
79 /* Represents a child with associated costs */
80 typedef struct _child {
85 /* We need edges for several purposes. */
86 typedef struct _rss_edge {
92 /* Represents a connected bipartite component. */
94 ir_nodeset_t parents; /**< = S a set of value producers */
95 ir_nodeset_t children; /**< = T a set of value consumers */
96 pset *kill_edges; /**< = E a set of edges (t in T, s in S) such as each s in S gets killed by at least one t in T */
97 int nr; /**< a deterministic index for set insertion (used as hash) */
100 /* Represents a disjoint value DAG. */
101 typedef struct _dvg {
106 /* Represents a chain of nodes. */
107 typedef struct _chain {
108 plist_t *elements; /**< List of chain elements */
109 int nr; /**< a deterministic index for set insertion (used as hash) */
112 typedef struct _rss_irn {
113 plist_t *consumer_list; /**< List of consumers */
114 const ir_node **consumer; /**< Sorted consumer array (needed for faster access) */
116 plist_t *parent_list; /**< List of parents */
117 plist_t *pkiller_list; /**< List of potential killers */
119 plist_t *descendant_list; /**< List of descendants */
120 const ir_node **descendants; /**< Sorted descendant array (needed for faster access) */
122 const ir_node *killer; /**< The selected unique killer */
123 const ir_node *irn; /**< The corresponding firm node to this rss_irn */
125 chain_t *chain; /**< The chain, this node is associated to */
127 unsigned desc_walk; /**< visited flag for collecting descendants */
128 unsigned kill_count; /**< number of nodes killed by this one */
130 unsigned live_out : 1; /**< irn has consumers outside of it's block */
131 unsigned visited : 1; /**< visited flag for bipartite decomposition */
132 unsigned havecons : 1; /**< visited flag collect consumer */
133 unsigned handled : 1; /**< flag indicating whether or not the list structures have been build */
134 unsigned dumped : 1; /**< flag indication whether or not this node was dumped */
137 /* Represents a serialization edge with associated costs. */
138 typedef struct _serialization {
139 rss_irn_t *u; /* the top node */
140 rss_irn_t *v; /* the node about to be serialized after u */
141 rss_edge_t *edge; /* the edge selected for the serialization */
142 int omega1; /* estimated: available regs - RS reduction */
143 int omega2; /* increase in critical path length */
147 typedef struct _rss {
148 ir_phase ph; /**< Phase to hold some data */
149 heights_t *h; /**< The current height object */
150 ir_graph *irg; /**< The irg to preprocess */
151 plist_t *nodes; /**< The list of interesting nodes */
152 const arch_env_t *arch_env; /**< The architecture environment */
153 be_abi_irg_t *abi; /**< The abi for this irg */
154 pset *cbc_set; /**< A set of connected bipartite components */
155 ir_node *block; /**< The current block in progress. */
156 int *idx_map; /**< Mapping irn indices to per block indices */
157 unsigned max_height; /**< maximum height in the current block */
158 rss_opts_t *opts; /**< The options */
159 be_lv_t *liveness; /**< The liveness information for this irg */
160 ir_nodeset_t live_block; /**< Values alive at end of block */
161 const arch_register_class_t *cls; /**< The current register class */
162 DEBUG_ONLY(firm_dbg_module_t *dbg);
165 #define get_rss_irn(rss, irn) (phase_get_or_set_irn_data(&rss->ph, irn))
168 * We need some special nodes:
169 * a source and a sink for all live-in and live-out values of a block
177 /** The opcode of the rss_Source node once allocated. */
178 static ir_op *op_rss_Source;
179 /** The opcode of the rss_Sink node once allocated. */
180 static ir_op *op_rss_Sink;
182 /** The rss_Source node of the current graph. */
183 static ir_node *_source = NULL;
184 /** The rss_Sink node of the current graph. */
185 static ir_node *_sink = NULL;
187 #define is_Source(irn) ((irn) == _source)
188 #define is_Sink(irn) ((irn) == _sink)
192 RSS_DUMP_CBC = 1 << 0,
193 RSS_DUMP_PKG = 1 << 1,
194 RSS_DUMP_KILL = 1 << 2,
195 RSS_DUMP_DVG = 1 << 3,
196 RSS_DUMP_MAXAC = 1 << 4,
197 RSS_DUMP_ALL = (RSS_DUMP_MAXAC << 1) - 1,
200 static rss_opts_t rss_options = {
204 static const lc_opt_enum_int_items_t dump_items[] = {
205 { "none", RSS_DUMP_NONE },
206 { "cbc", RSS_DUMP_CBC },
207 { "pkg", RSS_DUMP_PKG },
208 { "kill", RSS_DUMP_KILL },
209 { "dvg", RSS_DUMP_DVG },
210 { "maxac", RSS_DUMP_MAXAC },
211 { "all", RSS_DUMP_ALL },
215 static lc_opt_enum_int_var_t dump_var = {
216 &rss_options.dump_flags, dump_items
219 static const lc_opt_table_entry_t rss_option_table[] = {
220 LC_OPT_ENT_ENUM_MASK("dump", "dump phases", &dump_var),
224 /******************************************************************************
226 * | | | | / _| | | (_)
227 * | |__ ___| |_ __ ___ _ __ | |_ _ _ _ __ ___| |_ _ ___ _ __ ___
228 * | '_ \ / _ \ | '_ \ / _ \ '__| | _| | | | '_ \ / __| __| |/ _ \| '_ \/ __|
229 * | | | | __/ | |_) | __/ | | | | |_| | | | | (__| |_| | (_) | | | \__ \
230 * |_| |_|\___|_| .__/ \___|_| |_| \__,_|_| |_|\___|\__|_|\___/|_| |_|___/
233 ******************************************************************************/
236 * Acquire opcodes if needed and create source and sink nodes.
238 static void init_rss_special_nodes(ir_graph *irg) {
241 if (op_rss_Source == NULL) {
242 int iro_rss_base = get_next_ir_opcodes(iro_rss_last);
243 op_rss_Source = new_ir_op(iro_rss_base + iro_rss_Source, "rss_Source", op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
244 op_rss_Sink = new_ir_op(iro_rss_base + iro_rss_Sink, "rss_Sink", op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
246 block = get_irg_start_block(irg);
247 _source = new_ir_node(NULL, irg, block, op_rss_Source, mode_ANY, 0, NULL);
248 _sink = new_ir_node(NULL, irg, block, op_rss_Sink, mode_ANY, 0, NULL);
251 static int cmp_int(const void *a, const void *b) {
255 return QSORT_CMP(*i1, *i2);
258 static int cmp_child_costs(const void *a, const void *b) {
259 const child_t *c1 = a;
260 const child_t *c2 = b;
262 return QSORT_CMP(c1->cost, c2->cost);
265 static int cmp_irn_idx(const void *a, const void *b) {
266 const ir_node *n1 = *(ir_node **)a;
267 const ir_node *n2 = *(ir_node **)b;
269 return QSORT_CMP(get_irn_idx(n1), get_irn_idx(n2));
272 static int cmp_rss_edges(const void *a, const void *b) {
273 const rss_edge_t *e1 = a;
274 const rss_edge_t *e2 = b;
276 return (e1->src != e2->src) || (e1->tgt != e2->tgt);
279 static int bsearch_for_index(int key, int *arr, size_t len, int force) {
283 while (right >= left) {
284 int idx = (left + right) / 2;
288 else if (key > arr[idx])
295 assert(0 && "Something is wrong, key not found.");
299 static const ir_node **build_sorted_array_from_list(plist_t *irn_list, struct obstack *obst) {
302 int len = plist_count(irn_list);
303 const ir_node **arr = (const ir_node**)NEW_ARR_D(ir_node*, obst, len);
305 /* copy the list into the array */
306 foreach_plist(irn_list, el) {
307 arr[i++] = plist_element_get_value(el);
310 /* sort the array by node index */
311 /* HACK cast for MSVC */
312 qsort((void*)arr, len, sizeof(arr[0]), cmp_irn_idx);
317 /*****************************************************
320 * __| | ___| |__ _ _ __ _ __ _ _ _ __ __ _
321 * / _` |/ _ \ '_ \| | | |/ _` |/ _` | | '_ \ / _` |
322 * | (_| | __/ |_) | |_| | (_| | (_| | | | | | (_| |
323 * \__,_|\___|_.__/ \__,_|\__, |\__, |_|_| |_|\__, |
327 *****************************************************/
330 static void dump_nodeset(ir_nodeset_t *ns, const char *prefix) {
331 ir_nodeset_iterator_t iter;
334 ir_nodeset_iterator_init(&iter, ns);
335 while( (irn = ir_nodeset_iterator_next(&iter)) != NULL ) {
336 ir_fprintf(stderr, "%s%+F\n", prefix, irn);
341 static void build_file_name(rss_t *rss, const char *suffix, size_t suf_len, char *buf, size_t len) {
342 const char *irg_name;
345 irg_name = get_entity_name(get_irg_entity(rss->irg));
346 snprintf(buf, len - suf_len, "%s-%s-block-%ld",
347 irg_name, arch_register_class_name(rss->cls), get_irn_node_nr(rss->block));
351 /* Dumps all collected bipartite components of current irg as vcg. */
352 static void debug_vcg_dump_bipartite(rss_t *rss) {
356 static const char suffix[] = "-RSS-CBC.vcg";
358 build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
359 f = fopen(file_name, "w");
361 ir_fprintf(f, "graph: { title: \"connected bipartite component graph of %+F\"\n", rss->irg);
362 fprintf(f, "display_edge_labels: no\n");
363 fprintf(f, "layoutalgorithm: mindepth\n");
364 fprintf(f, "manhattan_edges: yes\n\n");
366 foreach_pset(rss->cbc_set, cbc) {
367 ir_nodeset_iterator_t iter;
371 fprintf(f, "graph: { titel: \"cbc %d\" label: \"cbc %d\" status:clustered color:yellow\n", cbc->nr, cbc->nr);
372 foreach_ir_nodeset(&cbc->parents, n, iter) {
373 ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
375 foreach_ir_nodeset(&cbc->children, n, iter) {
376 ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
378 foreach_pset(cbc->kill_edges, ke) {
379 ir_fprintf(f, "edge: { sourcename: \"n%d_%d\" targetname: \"n%d_%d\" }\n",
380 get_irn_node_nr(ke->src), cbc->nr, get_irn_node_nr(ke->tgt), cbc->nr);
388 /* Dump the computed killing function as vcg. */
389 static void debug_vcg_dump_kill(rss_t *rss) {
393 static const char suffix[] = "-RSS-KILL.vcg";
395 build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
396 f = fopen(file_name, "w");
398 ir_fprintf(f, "graph: { title: \"computed kill graph of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
399 fprintf(f, "display_edge_labels: no\n");
400 fprintf(f, "layoutalgorithm: mindepth\n");
401 fprintf(f, "manhattan_edges: yes\n\n");
404 /* reset dump_flag */
406 foreach_phase_irn(&rss->ph, irn) {
407 rss_irn_t *node = get_rss_irn(rss, irn);
412 /* dump all nodes and their killers */
413 foreach_plist(rss->nodes, el) {
414 ir_node *irn = plist_element_get_value(el);
415 rss_irn_t *rirn = get_rss_irn(rss, irn);
416 rss_irn_t *pk_rirn = get_rss_irn(rss, rirn->killer);
418 if (! rirn->dumped) {
419 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
423 if (! pk_rirn->dumped) {
424 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(rirn->killer), rirn->killer);
428 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
429 get_irn_node_nr(rirn->killer), get_irn_node_nr(irn));
436 /* Dumps the potential killing DAG (PKG) as vcg. */
437 static void debug_vcg_dump_pkg(rss_t *rss, ir_nodeset_t *max_ac, int iteration) {
440 char *suffix = alloca(32);
441 static const char suffix1[] = "-RSS-PKG.vcg";
442 static const char suffix2[] = "-RSS-PKG-MAXAC.vcg";
446 snprintf(suffix, 32, "%s", suffix1);
449 snprintf(suffix, 32, "-%02d%s", iteration, suffix2);
452 build_file_name(rss, suffix, strlen(suffix) + 1, file_name, sizeof(file_name));
453 f = fopen(file_name, "w");
455 ir_fprintf(f, "graph: { title: \"potential killing DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
456 fprintf(f, "display_edge_labels: no\n");
457 fprintf(f, "layoutalgorithm: mindepth\n");
458 fprintf(f, "manhattan_edges: yes\n\n");
461 /* reset dump_flag */
463 foreach_phase_irn(&rss->ph, irn) {
464 rss_irn_t *node = get_rss_irn(rss, irn);
469 foreach_plist(rss->nodes, el) {
470 ir_node *irn = plist_element_get_value(el);
471 rss_irn_t *rirn = get_rss_irn(rss, irn);
473 plist_element_t *k_el;
475 /* dump selected saturating values in yellow */
476 if (max_ac && ir_nodeset_contains(max_ac, irn))
479 if (! rirn->dumped) {
481 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F c%d\" %s }\n", get_irn_node_nr(irn), irn, rirn->chain->nr, c1);
483 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(irn), irn, c1);
487 foreach_plist(rirn->pkiller_list, k_el) {
488 ir_node *pkiller = plist_element_get_value(k_el);
489 rss_irn_t *pk_rirn = get_rss_irn(rss, pkiller);
492 if (max_ac && ir_nodeset_contains(max_ac, pkiller))
495 if (! pk_rirn->dumped) {
497 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F c%d\" %s }\n", get_irn_node_nr(pkiller), pkiller, pk_rirn->chain->nr, c2);
499 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(pkiller), pkiller, c2);
502 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
503 get_irn_node_nr(pkiller), get_irn_node_nr(irn));
510 /* Dumps the disjoint value DAG (DVG) as vcg. */
511 static void debug_vcg_dump_dvg(rss_t *rss, dvg_t *dvg) {
512 static const char suffix[] = "-RSS-DVG.vcg";
515 ir_nodeset_iterator_t iter;
519 build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
520 f = fopen(file_name, "w");
522 ir_fprintf(f, "graph: { title: \"disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
523 fprintf(f, "display_edge_labels: no\n");
524 fprintf(f, "layoutalgorithm: mindepth\n");
525 fprintf(f, "manhattan_edges: yes\n\n");
528 foreach_ir_nodeset(&dvg->nodes, irn, iter) {
529 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
533 foreach_pset(dvg->edges, edge) {
534 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
535 get_irn_node_nr(edge->src), get_irn_node_nr(edge->tgt));
543 /* Dumps the PKG(DVG). */
544 static void debug_vcg_dump_dvg_pkiller(rss_t *rss, dvg_t *dvg) {
545 static const char suffix[] = "-RSS-DVG-PKG.vcg";
548 ir_nodeset_iterator_t iter;
551 build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
552 f = fopen(file_name, "w");
554 ir_fprintf(f, "graph: { title: \"PKG of disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
555 fprintf(f, "display_edge_labels: no\n");
556 fprintf(f, "layoutalgorithm: mindepth\n");
557 fprintf(f, "manhattan_edges: yes\n\n");
560 foreach_ir_nodeset(&dvg->nodes, irn, iter) {
561 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
565 foreach_ir_nodeset(&dvg->nodes, irn, iter) {
566 rss_irn_t *node = get_rss_irn(rss, irn);
569 foreach_plist(node->dvg_pkiller_list, el) {
570 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
571 get_irn_node_nr(plist_element_get_value(el)), get_irn_node_nr(irn));
581 * In case there is no rss information for irn, initialize it.
583 static void *init_rss_irn(ir_phase *ph, const ir_node *irn, void *old) {
584 rss_irn_t *res = old ? old : phase_alloc(ph, sizeof(res[0]));
586 res->descendant_list = plist_obstack_new(phase_obst(ph));
587 res->descendants = NULL;
589 res->consumer_list = plist_obstack_new(phase_obst(ph));
590 res->consumer = NULL;
592 res->pkiller_list = plist_obstack_new(phase_obst(ph));
594 res->parent_list = plist_obstack_new(phase_obst(ph));
612 * Collect all nodes data dependent on current node.
614 static void collect_descendants(rss_t *rss, rss_irn_t *rirn, ir_node *irn, int *got_sink, unsigned cur_desc_walk) {
615 const ir_edge_t *edge;
616 rss_irn_t *cur_node = get_rss_irn(rss, irn);
617 ir_node *block = rss->block;
618 ir_edge_kind_t ekind[2] = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
621 if (cur_node->desc_walk >= cur_desc_walk)
623 cur_node->desc_walk = cur_desc_walk;
625 /* Stop at Barriers */
626 if (be_is_Barrier(irn))
629 /* loop over normal and dependency edges */
630 for (i = 0; i < 2; ++i) {
631 foreach_out_edge_kind(irn, edge, ekind[i]) {
632 ir_node *user = get_edge_src_irn(edge);
634 /* skip ignore nodes as they do not really contribute to register pressure */
635 if (arch_irn_is(rss->arch_env, user, ignore))
639 (a) mode_X means Jump -> out_edge
640 (b) Phi as user of a normal node -> out-edge
642 if (get_irn_mode(user) == mode_X || is_Phi(user)) {
651 //if (arch_get_irn_reg_class(rss->arch_env, user, -1) == rss->cls)
652 collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
655 /* check if user lives in block and is not a control flow node */
656 if (get_nodes_block(user) == block) {
657 if (! plist_has_value(rirn->descendant_list, user)) {
658 plist_insert_back(rirn->descendant_list, user);
659 DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", user));
661 collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
663 else if (! *got_sink) {
665 /* user lives out of block: add sink as descendant if not already done */
666 plist_insert_back(rirn->descendant_list, _sink);
668 DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", _sink));
676 * Handles a single consumer.
678 static void collect_single_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *consumer, int *got_sink) {
679 ir_node *block = rss->block;
681 assert(! is_Proj(consumer) && "Cannot handle Projs");
683 if (! is_Phi(consumer) && ! is_Block(consumer) && get_nodes_block(consumer) == block) {
684 if (! arch_irn_is(rss->arch_env, consumer, ignore) && ! plist_has_value(rss_irn->consumer_list, consumer)) {
685 plist_insert_back(rss_irn->consumer_list, consumer);
686 DBG((rss->dbg, LEVEL_2, "\t\tconsumer %+F\n", consumer));
690 rss_irn->live_out = 1;
691 DBG((rss->dbg, LEVEL_2, "\t\tlive out %+F", consumer));
693 plist_insert_back(rss_irn->consumer_list, _sink);
695 DB((rss->dbg, LEVEL_2, ", %+F added instead", _sink));
697 DB((rss->dbg, LEVEL_2, "\n"));
702 * Collect all nodes consuming the value(s) produced by current node.
704 static void collect_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *irn, int *got_sink) {
705 const ir_edge_t *edge;
707 ir_edge_kind_t ekind[2] = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
708 rss_irn_t *cur_node = get_rss_irn(rss, irn);
710 if (cur_node->havecons)
712 cur_node->havecons = 1;
714 for (i = 0; i < 2; ++i) {
715 foreach_out_edge_kind(irn, edge, ekind[i]) {
716 ir_node *consumer = get_edge_src_irn(edge);
718 if (is_Proj(consumer)) {
719 //if (arch_get_irn_reg_class(rss->arch_env, consumer, -1) == rss->cls)
720 collect_consumer(rss, rss_irn, consumer, got_sink);
723 collect_single_consumer(rss, rss_irn, consumer, got_sink);
729 * Collects all consumer and descendant of a irn.
731 static void collect_node_info(rss_t *rss, ir_node *irn) {
732 static unsigned cur_desc_walk = 0;
733 rss_irn_t *rss_irn = get_rss_irn(rss, irn);
736 assert(! is_Proj(irn) && "Cannot handle Projs.");
738 /* check if node info is already available */
739 if (rss_irn->handled)
741 //phase_reinit_single_irn_data(&rss->ph, irn);
743 DBG((rss->dbg, LEVEL_1, "\tcomputing consumers of %+F:\n", irn));
745 /* collect all consumer */
747 collect_consumer(rss, rss_irn, irn, &got_sink);
749 /* build sorted consumer array */
750 rss_irn->consumer = build_sorted_array_from_list(rss_irn->consumer_list, phase_obst(&rss->ph));
752 DBG((rss->dbg, LEVEL_1, "\tcompute descendants of %+F:\n", irn));
754 /* collect descendants */
756 collect_descendants(rss, rss_irn, irn, &got_sink, ++cur_desc_walk);
758 /* build sorted descendant array */
759 rss_irn->descendants = build_sorted_array_from_list(rss_irn->descendant_list, phase_obst(&rss->ph));
761 rss_irn->handled = 1;
765 * Checks if v is a potential killer of u.
766 * v is in pkill(u) iff descendants(v) cut consumer(u) is v
768 * @param rss The rss object
769 * @param v The node to check for killer
770 * @param u The potentially killed value
771 * @return 1 if v is in pkill(u), 0 otherwise
773 static int is_potential_killer(rss_t *rss, rss_irn_t *v, rss_irn_t *u) {
779 assert(is_Sink(v->irn) || ((plist_count(v->descendant_list) > 0 && v->descendants) || 1));
780 assert(is_Sink(u->irn) || ((plist_count(u->consumer_list) > 0 && u->consumer) || 1));
782 /* as we loop over the list: loop over the shorter one */
783 if (plist_count(v->descendant_list) > plist_count(u->consumer_list)) {
784 list = u->consumer_list;
785 arr = v->descendants;
788 list = v->descendant_list;
792 /* for each list element: try to find element in array */
793 foreach_plist(list, el) {
794 ir_node *irn = plist_element_get_value(el);
795 ir_node *match = BSEARCH_IRN_ARR(irn, arr);
797 if (match && match != irn)
805 * Update descendants, consumer and pkiller list for the given irn.
807 static void update_node_info(rss_t *rss, ir_node *irn, ir_node *pk_irn) {
808 rss_irn_t *node = get_rss_irn(rss, irn);
809 rss_irn_t *pkiller = get_rss_irn(rss, pk_irn);
811 /* add consumer and rebuild the consumer array */
812 if (! plist_has_value(node->consumer_list, pk_irn)) {
813 plist_insert_back(node->consumer_list, pk_irn);
814 node->consumer = build_sorted_array_from_list(node->consumer_list, phase_obst(&rss->ph));
817 /* add pk_irn as descendant, add it's descendants to irn's and rebuild array */
818 if (! plist_has_value(node->descendant_list, pk_irn)) {
821 plist_insert_back(node->descendant_list, pk_irn);
823 foreach_plist(pkiller->descendant_list, el) {
824 ir_node *desc = plist_element_get_value(el);
826 if (! plist_has_value(node->descendant_list, desc))
827 plist_insert_back(node->descendant_list, desc);
830 node->descendants = build_sorted_array_from_list(node->descendant_list, phase_obst(&rss->ph));
836 * Compute the potential killing set PK.
838 static void compute_pkill_set(rss_t *rss) {
839 plist_element_t *u_el, *v_el;
841 foreach_plist(rss->nodes, u_el) {
842 ir_node *u_irn = plist_element_get_value(u_el);
843 rss_irn_t *u = get_rss_irn(rss, u_irn);
845 DBG((rss->dbg, LEVEL_1, "\tcomputing potential killers of %+F:\n", u_irn));
847 /* check each consumer if it is a potential killer */
848 foreach_plist(u->consumer_list, v_el) {
849 ir_node *v_irn = plist_element_get_value(v_el);
850 rss_irn_t *v = get_rss_irn(rss, v_irn);
852 /* check, if v is a potential killer of u */
853 if (is_potential_killer(rss, v, u)) {
854 if (! plist_has_value(u->pkiller_list, v_irn))
855 plist_insert_back(u->pkiller_list, v_irn);
857 DBG((rss->dbg, LEVEL_2, "\t\tpotential killer %+F\n", v_irn));
864 if (rss->opts->dump_flags & RSS_DUMP_PKG)
865 debug_vcg_dump_pkg(rss, NULL, 0);
869 * Build set of killing edges (from values to their potential killers)
871 static void build_kill_edges(rss_t *rss, pset *epk) {
872 plist_element_t *el, *k_el;
874 foreach_plist(rss->nodes, el) {
875 ir_node *irn = plist_element_get_value(el);
876 rss_irn_t *rirn = get_rss_irn(rss, irn);
878 DBG((rss->dbg, LEVEL_3, "\t\tbuilding kill edges for %+F:\n", irn));
880 foreach_plist(rirn->pkiller_list, k_el) {
881 ir_node *pkiller = plist_element_get_value(k_el);
882 rss_edge_t *ke = obstack_alloc(phase_obst(&rss->ph), sizeof(*ke));
888 DBG((rss->dbg, LEVEL_3, "\t\t\ttarget %+F\n", pkiller));
890 pset_insert(epk, ke, HASH_RSS_EDGE(ke));
896 /* print the given cbc for debugging purpose */
897 static void debug_print_cbc(firm_dbg_module_t *mod, cbc_t *cbc) {
898 ir_nodeset_iterator_t iter;
902 DBG((mod, LEVEL_3, "\t\tS = set of parents:\n"));
903 foreach_ir_nodeset(&cbc->parents, n, iter) {
904 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
906 DBG((mod, LEVEL_3, "\t\tT = set of children:\n"));
907 foreach_ir_nodeset(&cbc->children, n, iter) {
908 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
910 DBG((mod, LEVEL_3, "\t\tE = Edges from producers to consumers\n"));
911 foreach_pset(cbc->kill_edges, ke) {
912 DBG((mod, LEVEL_3, "\t\t\t%+F -> %+F\n", ke->src, ke->tgt));
918 * Construct the bipartite decomposition.
919 * Sid-Ahmed-Ali Touati, Phd Thesis
920 * Register Pressure in Instruction Level Parallelism, p. 71
922 static void compute_bipartite_decomposition(rss_t *rss) {
923 pset *epk = new_pset(cmp_rss_edges, 10);
928 DBG((rss->dbg, LEVEL_1, "\tcomputing bipartite decomposition:\n"));
930 build_kill_edges(rss, epk);
932 foreach_plist(rss->nodes, el) {
933 ir_node *u_irn = plist_element_get_value(el);
934 rss_irn_t *u = get_rss_irn(rss, u_irn);
940 plist_element_t *el2;
942 rss_edge_t *kedge_root = NULL;
943 ir_node *t_irn, *s_irn;
944 ir_nodeset_iterator_t iter;
946 if (u->visited || u_irn == _sink)
949 DBG((rss->dbg, LEVEL_2, "\t\t%+F choosen:\n", u_irn));
951 cbc = obstack_alloc(phase_obst(&rss->ph), sizeof(*cbc));
954 /* initialize S_cb */
955 ir_nodeset_init_size(&cbc->parents, 5);
956 ir_nodeset_insert(&cbc->parents, u_irn);
957 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (init)\n", u_irn));
960 cbc->kill_edges = new_pset(cmp_rss_edges, 5);
962 /* each parent gets killed by at least one value from children */
964 /* T_cb = PK_successors(u) */
965 ir_nodeset_init_size(&cbc->children, 5);
966 foreach_plist(u->pkiller_list, el2) {
967 ir_nodeset_insert(&cbc->children, plist_element_get_value(el2));
968 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (init)\n", plist_element_get_value(el2)));
972 Now: insert the parents of all children into the parent set
973 and insert the children of all parents into the children set
974 until the sets don't change any more
976 while (p_change || c_change) {
977 ir_nodeset_iterator_t iter;
978 p_change = c_change = 0;
980 /* accumulate parents */
981 foreach_ir_nodeset(&cbc->children, t_irn, iter) {
982 foreach_pset(epk, k_edge) {
983 ir_node *val = k_edge->src;
985 if (k_edge->tgt == t_irn && ! ir_nodeset_contains(&cbc->parents, val)) {
986 ir_nodeset_insert(&cbc->parents, val);
988 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (killed by %+F)\n", val, t_irn));
993 /* accumulate children */
994 foreach_ir_nodeset(&cbc->parents, s_irn, iter) {
995 foreach_pset(epk, k_edge) {
996 ir_node *val = k_edge->tgt;
998 if (k_edge->src == s_irn && ! ir_nodeset_contains(&cbc->children, val)) {
999 ir_nodeset_insert(&cbc->children, val);
1001 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (kills %+F)\n", val, s_irn));
1007 /* mark all parent values as visited */
1008 foreach_ir_nodeset(&cbc->parents, s_irn, iter) {
1009 rss_irn_t *s = get_rss_irn(rss, s_irn);
1011 /* assure bipartite property */
1013 if (ir_nodeset_contains(&cbc->children, s_irn)) {
1014 ir_nodeset_remove(&cbc->children, s_irn);
1015 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F removed from to children\n", s_irn));
1021 foreach_pset(epk, k_edge) {
1022 if (ir_nodeset_contains(&cbc->parents, k_edge->src) &&
1023 ir_nodeset_contains(&cbc->children, k_edge->tgt)) {
1024 pset_insert(cbc->kill_edges, k_edge, HASH_RSS_EDGE(k_edge));
1026 Link all k_edges which are about to be removed together.
1027 Beware: pset_remove kills the iterator.
1029 k_edge->next = kedge_root;
1030 kedge_root = k_edge;
1034 /* remove all linked k_edges */
1035 for (; kedge_root; kedge_root = kedge_root->next) {
1036 pset_remove(epk, kedge_root, HASH_RSS_EDGE(kedge_root));
1039 /* verify the cbc */
1040 foreach_ir_nodeset(&cbc->parents, s_irn, iter) {
1043 foreach_pset(cbc->kill_edges, k_edge) {
1044 if (k_edge->src == s_irn) {
1046 pset_break(cbc->kill_edges);
1053 ir_fprintf(stderr, "Warning: parent %+F is not killed in current cbc\n", s_irn);
1056 assert(vrfy_ok && "Verification of CBC failed");
1058 /* add the connected bipartite component */
1059 if (ir_nodeset_size(&cbc->parents) > 0 &&
1060 ir_nodeset_size(&cbc->children) > 0 &&
1061 pset_count(cbc->kill_edges) > 0)
1062 pset_insert(rss->cbc_set, cbc, (unsigned)cbc->nr);
1063 DBG((rss->dbg, LEVEL_2, "\tbipartite component %d inserted:\n", cbc->nr));
1065 if (firm_dbg_get_mask(rss->dbg) & LEVEL_2)
1066 debug_print_cbc(rss->dbg, cbc);
1070 if (rss->opts->dump_flags & RSS_DUMP_CBC)
1071 debug_vcg_dump_bipartite(rss);
1077 * Select the child with the maximum cost.
1079 static child_t *select_child_max_cost(rss_t *rss, ir_nodeset_t *x, ir_nodeset_t *y, child_t *t, cbc_t *cbc) {
1081 ir_nodeset_iterator_t iter;
1082 float max_cost = -1.0f;
1084 DBG((rss->dbg, LEVEL_2, "\t\tcomputing children costs:\n"));
1086 foreach_ir_nodeset(&cbc->children, child, iter) {
1087 rss_irn_t *r_child = get_rss_irn(rss, child);
1088 int num_unkilled_parents = 0;
1089 int num_descendants;
1093 /* get the number of unkilled parents */
1094 foreach_pset(cbc->kill_edges, k_edge) {
1095 if (k_edge->tgt == child && ir_nodeset_contains(x, k_edge->src))
1096 ++num_unkilled_parents;
1099 cost = (float)num_unkilled_parents;
1101 num_descendants = plist_count(r_child->descendant_list) + ir_nodeset_size(y);
1103 if (num_descendants > 0)
1104 cost /= (float)num_descendants;
1106 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F, #desc %d, cost %.3f\n", child, num_descendants, cost));
1108 if (cost > max_cost) {
1119 * Remove all parents from x which are killed by t_irn.
1121 static void remove_covered_parents(rss_t *rss, ir_nodeset_t *x, ir_node *t_irn, cbc_t *cbc) {
1122 rss_irn_t *t = get_rss_irn(rss, t_irn);
1125 DBG((rss->dbg, LEVEL_2, "\t\tremoving parents covered by %+F:\n", t_irn));
1127 foreach_pset(cbc->kill_edges, k_edge) {
1128 if (k_edge->tgt == t_irn && ir_nodeset_contains(x, k_edge->src)) {
1129 ir_nodeset_remove(x, k_edge->src);
1130 plist_insert_back(t->parent_list, k_edge->src);
1131 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", k_edge->src));
1136 static void update_cumulated_descendent_values(rss_t *rss, ir_nodeset_t *y, ir_node *t_irn) {
1137 rss_irn_t *t = get_rss_irn(rss, t_irn);
1138 plist_element_t *el;
1140 DBG((rss->dbg, LEVEL_2, "\t\tupdating cumulated descendant value of %+F:\n", t_irn));
1142 foreach_plist(t->descendant_list, el) {
1143 ir_nodeset_insert(y, plist_element_get_value(el));
1144 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", plist_element_get_value(el)));
1149 * Greedy-k: a heuristics for the MMA problem
1151 static void compute_killing_function(rss_t *rss) {
1153 struct obstack obst;
1155 obstack_init(&obst);
1157 rss->cbc_set = pset_new_ptr(5);
1158 compute_bipartite_decomposition(rss);
1160 /* for all bipartite components do: */
1161 foreach_pset(rss->cbc_set, cbc) {
1165 ir_nodeset_iterator_t iter;
1166 child_t **sks = NEW_ARR_F(child_t *, 20);
1171 ir_nodeset_init_size(&x, 10);
1172 ir_nodeset_init_size(&y, 10);
1174 DBG((rss->dbg, LEVEL_1, "\tcomputing SKS for cbc %d:\n", cbc->nr));
1175 DBG((rss->dbg, LEVEL_2, "\t\tinitializing parents X:\n"));
1177 /* X = S_cb (all parents are initially uncovered) */
1178 foreach_ir_nodeset(&cbc->parents, p, iter) {
1179 ir_nodeset_insert(&x, p);
1180 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", p));
1183 /* while X not empty */
1184 while (ir_nodeset_size(&x) > 0) {
1185 child_t *t = obstack_alloc(&obst, sizeof(*t));
1186 memset(t, 0, sizeof(*t));
1188 t = select_child_max_cost(rss, &x, &y, t, cbc);
1190 if (cur_len >= cur_size) {
1191 ARR_EXTO(child_t *, sks, cur_size * 2);
1195 DBG((rss->dbg, LEVEL_2, "\t\tinsert child %+F (%.3f) into SKS at pos %d\n", t->irn, t->cost, cur_len));
1198 remove_covered_parents(rss, &x, t->irn, cbc);
1199 update_cumulated_descendent_values(rss, &y, t->irn);
1202 ARR_SHRINKLEN(sks, cur_len);
1204 /* sort SKS in increasing cost order */
1205 qsort(sks, cur_len, sizeof(sks[0]), cmp_child_costs);
1207 DBG((rss->dbg, LEVEL_2, "\tprocessing SKS for cbc %d:\n", cbc->nr));
1209 /* build killing function */
1210 for (i = cur_len - 1; i >= 0; --i) { /* loop over sks in decreasing cost order */
1211 child_t *t = sks[i];
1212 rss_irn_t *rt = get_rss_irn(rss, t->irn);
1213 plist_element_t *p_el;
1215 DBG((rss->dbg, LEVEL_3, "\t\t\tkiller %+F (%.3f):\n", t->irn, t->cost));
1217 /* kill all unkilled parents of t */
1218 foreach_plist(rt->parent_list, p_el) {
1219 ir_node *par = plist_element_get_value(p_el);
1220 rss_irn_t *rpar = get_rss_irn(rss, par);
1222 if (is_Sink(rpar->killer)) {
1223 rpar->killer = t->irn;
1224 DBG((rss->dbg, LEVEL_2, "\t\tkill %+F\n", rpar->irn));
1227 DBG((rss->dbg, LEVEL_3, "\t\t\tkeeping %+F as killer for %+F\n", rpar->killer, rpar->irn));
1232 ir_nodeset_destroy(&x);
1233 ir_nodeset_destroy(&y);
1237 if (rss->opts->dump_flags & RSS_DUMP_KILL)
1238 debug_vcg_dump_kill(rss);
1240 del_pset(rss->cbc_set);
1241 obstack_free(&obst, NULL);
1245 * Adds the edge src -> tgt to the dvg. Checks if reverse edge is already there (asserts).
1247 static INLINE void add_dvg_edge(rss_t *rss, dvg_t *dvg, const ir_node *src, const ir_node *tgt, int have_source) {
1248 rss_edge_t *dvg_edge;
1252 ir_nodeset_insert(&dvg->nodes, (ir_node *) src);
1254 assert(ir_nodeset_contains(&dvg->nodes, src) && "Wrong assumption");
1256 ir_nodeset_insert(&dvg->nodes, (ir_node *) tgt);
1258 key.src = (ir_node *) tgt;
1259 key.tgt = (ir_node *) src;
1260 assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1262 key.src = (ir_node *) src;
1263 key.tgt = (ir_node *) tgt;
1264 if (NULL != pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1265 /* add the edge to the DVG */
1266 dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1268 dvg_edge->src = (ir_node *) src;
1269 dvg_edge->tgt = (ir_node *) tgt;
1270 dvg_edge->next = NULL;
1272 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", src, tgt));
1273 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1278 * Computes the disjoint value DAG (DVG).
1279 * BEWARE: It is not made explicitly clear in the Touati paper,
1280 * but the DVG is meant to be build from the KILLING DAG
1282 static void compute_dvg(rss_t *rss, dvg_t *dvg) {
1283 plist_element_t *el;
1285 DBG((rss->dbg, LEVEL_1, "\tcomputing DVG:\n"));
1287 foreach_plist(rss->nodes, el) {
1288 ir_node *u_irn = plist_element_get_value(el);
1289 rss_irn_t *u = get_rss_irn(rss, u_irn);
1290 rss_irn_t *u_killer = get_rss_irn(rss, u->killer);
1293 /* TODO: omit nodes only having sink as pkiller and killing no one */
1295 /* add an edge to killer */
1296 add_dvg_edge(rss, dvg, u_irn, u->killer, 0);
1298 if (is_Sink(u->killer))
1301 /* We add an edge to every killer, from where we could be reached. */
1302 for (i = ARR_LEN_SAFE(u_killer->descendants) - 1; i >= 0; --i) {
1303 add_dvg_edge(rss, dvg, u_irn, u_killer->descendants[i], 1);
1308 foreach_plist(rss->nodes, el2) {
1309 ir_node *v_irn = plist_element_get_value(el2);
1312 There is an edge (u, v) in the DVG iff v is a descendant of the killer(u).
1314 if (BSEARCH_IRN_ARR(v_irn, u_kill->descendants)) {
1315 rss_edge_t *dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1318 /* insert the user into the DVG and append it to the user list of u */
1319 ir_nodeset_insert(&dvg->nodes, v_irn);
1320 if (! plist_has_value(u->dvg_user_list, v_irn))
1321 plist_insert_back(u->dvg_user_list, v_irn);
1323 dvg_edge->src = u_irn;
1324 dvg_edge->tgt = v_irn;
1325 dvg_edge->next = NULL;
1330 assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1332 /* add the edge to the DVG */
1333 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", u_irn, v_irn));
1334 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1340 if (rss->opts->dump_flags & RSS_DUMP_DVG)
1341 debug_vcg_dump_dvg(rss, dvg);
1345 * Updates the dvg structure when a serialization edge from src -> tgt is added.
1347 static void update_dvg(rss_t *rss, dvg_t *dvg, rss_irn_t *src, rss_irn_t *tgt) {
1350 rss_edge_t **arr = alloca(pset_count(dvg->edges) * sizeof(arr[0]));
1353 Add an edge from serialization target to serialization src:
1354 src cannot be alive with target
1356 add_dvg_edge(rss, dvg, tgt->irn, src->irn, 0);
1358 /* Add edges to src's descendants as well, they also getting serialized. */
1359 for (i = ARR_LEN_SAFE(src->descendants) - 1; i >= 0; --i) {
1360 add_dvg_edge(rss, dvg, tgt->irn, src->descendants[i], 1);
1363 /* We also have to add edges from targets predecessors in dvg */
1365 /* We cannot insert elements into set over which we iterate ... */
1366 foreach_pset(dvg->edges, edge) {
1367 if (edge->tgt == tgt->irn) {
1372 for (i = 0; i < idx; ++i) {
1374 add_dvg_edge(rss, dvg, edge->src, src->irn, 1);
1375 for (j = ARR_LEN_SAFE(src->descendants) - 1; j >= 0; --j) {
1376 add_dvg_edge(rss, dvg, edge->src, src->descendants[j], 1);
1383 * Accumulate all descendants for root into list.
1385 static void accumulate_dvg_descendant_values(rss_t *rss, rss_irn_t *root, plist_t *list) {
1386 if (plist_count(root->dvg_user_list) > 0) {
1387 plist_element_t *el;
1389 foreach_plist(root->dvg_user_list, el) {
1390 ir_node *v_irn = plist_element_get_value(el);
1391 rss_irn_t *v = get_rss_irn(rss, v_irn);
1393 /* add v as descendant */
1394 if (! plist_has_value(list, v_irn)) {
1395 plist_insert_back(list, v_irn);
1396 DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG descendant %+F\n", v_irn));
1399 /* accumulate v's descendants */
1400 accumulate_dvg_descendant_values(rss, v, list);
1406 * Builds the list of potential killers for each node
1408 * Needs the descendant list for all user as sorted array.
1410 static void build_dvg_pkiller_list(rss_t *rss, dvg_t *dvg) {
1411 ir_nodeset_iterator_t iter;
1414 foreach_nodeset(&dvg->nodes, irn, iter) {
1415 rss_irn_t *node = get_rss_irn(rss, irn);
1416 plist_element_t *el, *el2;
1418 DBG((rss->dbg, DEBUG_DVG, "\t\tbuilding pkiller list for %+F\n", irn));
1420 /* check each user */
1421 foreach_plist(node->dvg_user_list, el) {
1422 ir_node *u_irn = plist_element_get_value(el);
1424 /* is the current user u_irn not a descendant of any other user -> pkiller */
1425 foreach_plist(node->dvg_user_list, el2) {
1426 ir_node *v_irn = plist_element_get_value(el2);
1427 rss_irn_t *v = get_rss_irn(rss, v_irn);
1430 ! BSEARCH_IRN_ARR(u_irn, v->dvg_desc) &&
1431 ! plist_has_value(node->dvg_pkiller_list, u_irn))
1433 plist_insert_back(node->dvg_pkiller_list, u_irn);
1434 DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG pkiller %+F\n", u_irn));
1439 node->dvg_pkiller = build_sorted_array_from_list(node->dvg_pkiller_list, phase_obst(&rss->ph));
1443 if (firm_dbg_get_mask(rss->dbg) & DEBUG_DVG)
1444 debug_vcg_dump_dvg_pkiller(rss, dvg);
1451 * Compute the maximal antichain of the current DVG.
1452 * This is a reimplementation of the MAXIMAL_ANTI_CHAIN function
1453 * from the DDG library 1.1 (DAG.cpp).
1455 static ir_nodeset_t *compute_maximal_antichain(rss_t *rss, dvg_t *dvg, int iteration) {
1456 int n = ir_nodeset_size(&dvg->nodes);
1457 int *assignment = alloca(n * sizeof(assignment[0]));
1458 int *assignment_rev = alloca(n * sizeof(assignment_rev[0]));
1459 int *idx_map = alloca(n * sizeof(idx_map[0]));
1460 hungarian_problem_t *bp;
1461 ir_nodeset_t *values, *temp;
1462 ir_nodeset_iterator_t iter;
1464 int i, j, cost, cur_chain, res;
1465 rss_edge_t *dvg_edge;
1467 #define MAP_IDX(irn) bsearch_for_index(get_irn_idx(irn), idx_map, n, 1)
1469 if (pset_count(dvg->edges) == 0)
1472 bp = hungarian_new(n, n, 1, HUNGARIAN_MATCH_NORMAL);
1475 At first, we build an index map for the nodes in the DVG,
1476 because we cannot use the irn idx therefore as the resulting
1477 bipartite data structure would have around 1.2GB.
1478 So we limit the size to the number of nodes we have in the DVG
1479 and build a sorted index map for their irn indices.
1482 foreach_ir_nodeset(&dvg->nodes, u_irn, iter) {
1483 idx_map[i++] = get_irn_idx(u_irn);
1485 qsort(idx_map, n, sizeof(idx_map[0]), cmp_int);
1487 foreach_pset(dvg->edges, dvg_edge) {
1488 int idx_u = MAP_IDX(dvg_edge->src);
1489 int idx_v = MAP_IDX(dvg_edge->tgt);
1491 /* add the entry to the bipartite data structure */
1492 hungarian_add(bp, idx_u, idx_v, 1);
1493 DBG((rss->dbg, LEVEL_3, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1494 idx_u, dvg_edge->src, idx_v, dvg_edge->tgt));
1498 Add a bipartite entry for each pair of nodes (u, v), where exists a
1499 path in the DVG from u to v, ie. connect all descendants(v) to v.
1500 desc_dvg(v) = accumulated descendants of all z in dvg_user_list(v)
1502 foreach_ir_nodeset(&dvg->nodes, u_irn, iter) {
1503 rss_irn_t *u = get_rss_irn(rss, u_irn);
1504 int idx_u_irn = MAP_IDX(u_irn);
1506 DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG descendants of %+F:\n", u_irn));
1508 //plist_clear(u->dvg_desc_list);
1509 //accumulate_dvg_descendant_values(rss, u, u->dvg_desc_list);
1512 FIXME: The array is build on the phase obstack and we cannot free the data.
1513 So the array get as many times allocated as this function is called.
1516 /* build the sorted array for faster searches */
1517 //u->dvg_desc = build_sorted_array_from_list(u->dvg_desc_list, phase_obst(&rss->ph));
1519 DBG((rss->dbg, DEBUG_MAX_AC, "\t\tadding bipartite entries of %+F:\n", u_irn));
1521 /* add the bipartite entries for all descendants */
1522 for (i = ARR_LEN_SAFE(u->dvg_desc) - 1; i >= 0; --i) {
1523 rss_irn_t *desc = get_rss_irn(rss, u->dvg_desc[i]);
1524 int idx_desc = MAP_IDX(u->dvg_desc[i]);
1526 /* add the entry to the bipartite data structure */
1527 hungarian_add(bp, idx_u_irn, idx_desc, 1);
1528 DBG((rss->dbg, DEBUG_MAX_AC, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1529 idx_u_irn, u_irn, idx_desc, u->dvg_desc[i]));
1536 /* We want maximum cardinality matching */
1537 hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL);
1540 DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG pkiller:\n"));
1541 /* beware: the following function needs the dvg_desc array */
1542 build_dvg_pkiller_list(rss, dvg);
1545 DBG((rss->dbg, LEVEL_2, "\t\tcomputing bipartite matching\n"));
1547 The maximum cardinality bipartite matching gives us the minimal
1548 chain partition, which corresponds to the maximum anti chains.
1550 res = hungarian_solve(bp, assignment, &cost, 1);
1551 assert(res == 0 && "Bipartite matching failed!");
1554 memset(assignment_rev, -1, n * sizeof(assignment_rev[0]));
1556 /* build the reverse assignment, ie. foreach i -> j, add j -> i */
1557 for (i = 0; i < n; ++i) {
1558 if (assignment[i] >= 0) {
1559 int j = assignment[i];
1560 assignment_rev[j] = i;
1564 DBG((rss->dbg, LEVEL_2, "\t\tgot assignment with cost %d\n", cost));
1565 DBG((rss->dbg, LEVEL_3, "\t\t\tassignment --- reverse assignment\n", cost));
1566 for (i = 0; i < n; ++i) {
1567 DBG((rss->dbg, LEVEL_3, "\t\t\t%3d -> %3d %3d -> %3d\n", i, assignment[i], i, assignment_rev[i]));
1570 values = xmalloc(sizeof(values[0]));
1571 ir_nodeset_init_size(values, 10);
1573 /* Construction of the minimal chain partition */
1574 for (j = 0; j < n; ++j) {
1575 /* check nodes, which did not occur as target */
1576 if (assignment_rev[j] == -1) {
1577 int xj = idx_map[j];
1578 ir_node *xj_irn = get_idx_irn(rss->irg, xj);
1579 rss_irn_t *xj_rss = get_rss_irn(rss, xj_irn);
1580 chain_t *c = obstack_alloc(phase_obst(&rss->ph), sizeof(*c));
1583 /* there was no source for j -> we have a source of a new chain */
1584 ir_nodeset_insert(values, xj_irn);
1586 c->elements = plist_obstack_new(phase_obst(&rss->ph));
1587 c->nr = cur_chain++;
1588 plist_insert_back(c->elements, xj_irn);
1592 DBG((rss->dbg, LEVEL_2, "\t\tstarting chain %d:\n", c->nr));
1593 DBG((rss->dbg, LEVEL_2, "\t\t\t%+F (%d)", xj_irn, j));
1595 /* follow chain, having j as source */
1597 while (assignment[source] >= 0) {
1598 int target = assignment[source];
1599 int irn_idx = idx_map[target];
1600 ir_node *irn = get_idx_irn(rss->irg, irn_idx);
1601 rss_irn_t *node = get_rss_irn(rss, irn);
1603 plist_insert_back(c->elements, irn);
1606 DB((rss->dbg, LEVEL_2, " -> %+F (%d)", irn, target));
1608 /* new source = last target */
1612 DB((rss->dbg, LEVEL_2, "\n"));
1617 Computing the maximal antichain: Select an element from each
1618 chain such, such it is parallel with the others.
1620 DBG((rss->dbg, LEVEL_1, "\t\tcomputing set of saturation values (MAX AC)\n"));
1621 DBG((rss->dbg, LEVEL_3, "\t\tstarting with:\n"));
1624 if (firm_dbg_get_mask(rss->dbg) & LEVEL_3)
1625 dump_nodeset(values, "\t\t\t");
1631 We need an explicit array for the values as
1632 we cannot iterate multiple times over the same
1633 set at the same time. :-(((((
1634 TODO Matze: now we can, so rewrite this...
1636 int n = ir_nodeset_size(values);
1638 ir_node **val_arr = NEW_ARR_F(ir_node *, n);
1640 foreach_ir_nodeset(values, u_irn, iter)
1641 val_arr[i++] = u_irn;
1644 ir_nodeset_destroy(temp);
1648 temp = xmalloc(sizeof(temp[0]));
1649 ir_nodeset_init_size(temp, 10);
1651 /* Select all nodes from current value set, having another node in the set as descendant. */
1652 for (i = 0; i < n; ++i) {
1653 rss_irn_t *u = get_rss_irn(rss, val_arr[i]);
1655 for (j = 0; j < n; ++j) {
1659 key.src = val_arr[i];
1660 key.tgt = val_arr[j];
1662 if (pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1663 /* v[j] is descendant of u -> remove u and break */
1664 ir_nodeset_insert(temp, (ir_node *) u->irn);
1665 ir_nodeset_remove(values, u->irn);
1667 DBG((rss->dbg, LEVEL_3, "\t\t\tremoving %+F from values, adding it to temp\n", u->irn));
1675 /* Try to insert the chain predecessor of all selected u's */
1676 foreach_ir_nodeset(temp, u_irn, iter) {
1677 rss_irn_t *u = get_rss_irn(rss, u_irn);
1678 chain_t *c = u->chain;
1679 plist_element_t *el = plist_find_value(c->elements, u_irn);
1681 assert(el && "Missing element in chain!");
1683 /* If u has predecessor in chain: insert the predecessor */
1684 if (el == plist_element_get_prev(el)) {
1685 ir_nodeset_insert(values, plist_element_get_value(el));
1686 DBG((rss->dbg, LEVEL_3, "\t\t\tadding %+F to values\n", plist_element_get_value(el)));
1692 } while (ir_nodeset_size(temp) > 0);
1694 DBG((rss->dbg, LEVEL_2, "\t\tfinal set:\n"));
1696 if (firm_dbg_get_mask(rss->dbg) & LEVEL_2) {
1697 dump_nodeset(values, "\t\t\t");
1701 if (rss->opts->dump_flags & RSS_DUMP_MAXAC)
1702 debug_vcg_dump_pkg(rss, values, iteration);
1705 ir_nodeset_destroy(temp);
1715 * Computes the best serialization between two nodes of sat_vals.
1717 static serialization_t *compute_best_admissible_serialization(rss_t *rss, ir_nodeset_t *sat_vals, serialization_t *ser, int num_regs) {
1718 int n = ir_nodeset_size(sat_vals);
1719 int n_idx = ARR_LEN_SAFE(rss->idx_map);
1721 ir_node **val_arr = alloca(n * sizeof(val_arr[0]));
1722 bitset_t *bs_sv = bitset_alloca(n_idx);
1723 bitset_t *bs_vdesc = bitset_alloca(n_idx);
1724 bitset_t *bs_tmp = bitset_alloca(n_idx);
1725 bitset_t *bs_ukilldesc = bitset_alloca(n_idx);
1726 int best_benefit = INT_MAX;
1727 int best_omega2 = INT_MAX;
1728 int best_benefit_omega20 = INT_MAX;
1732 ir_nodeset_iterator_t iter;
1733 rss_edge_t min_benefit_edge = {NULL, NULL, NULL};
1734 rss_edge_t min_omega20_edge = {NULL, NULL, NULL};
1735 rss_irn_t *ser_u_omega1 = NULL, *ser_v_omega1 = NULL;
1736 rss_irn_t *ser_u_omega20 = NULL, *ser_v_omega20 = NULL;
1738 DBG((rss->dbg, LEVEL_1, "\tcomputing admissible serializations:\n"));
1741 We need an explicit array for the values as
1742 we cannot iterate multiple times over the same
1743 set at the same time. :-(((((
1746 foreach_ir_nodeset(sat_vals, irn, iter) {
1748 bitset_set(bs_sv, BLOCK_IDX_MAP(rss, irn));
1752 We build all admissible serializations and remember the best found so far.
1755 if v in pkiller(u): add edge from v to all other pkiller(u)
1756 else: for all vv in pkiller(u): add edge from v to vv if there exists no path from vv to v
1760 A node is unserializable if:
1761 - it has only one killer and this one is Sink
1762 - it kills no other values
1763 In this case there is no serialization which could
1764 reduce the registerpressure
1766 #define IS_UNSERIALIZABLE_NODE(rss_node) \
1769 (plist_count(rss_node->pkiller_list) == 1) && \
1770 is_Sink(rss_node->killer) && \
1771 (rss_node->kill_count == 0) \
1773 be_is_Barrier(rss_node->irn) || \
1774 be_is_Keep(rss_node->irn) \
1777 /* for all u in sat_vals */
1778 for (i = 0; i < n; ++i) {
1779 rss_irn_t *u = get_rss_irn(rss, val_arr[i]);
1780 plist_element_t *el;
1782 /* ignore nodes where serialization does not help */
1783 if (IS_UNSERIALIZABLE_NODE(u)) {
1784 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", u->irn));
1788 /* accumulate all descendants of all pkiller(u) */
1789 bitset_clear_all(bs_ukilldesc);
1790 foreach_plist(u->pkiller_list, el) {
1791 ir_node *irn = plist_element_get_value(el);
1792 rss_irn_t *node = get_rss_irn(rss, irn);
1795 bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, irn));
1799 for (k = ARR_LEN_SAFE(node->descendants) - 1; k >= 0; --k) {
1800 if (! is_Sink(node->descendants[k]))
1801 bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, node->descendants[k]));
1805 /* for all v in sat_vals */
1806 for (j = 0; j < n; ++j) {
1807 ir_node *v_irn = val_arr[j];
1808 rss_irn_t *v = get_rss_irn(rss, v_irn);
1809 unsigned v_height = get_irn_height(rss->h, v_irn);
1810 int omega1, omega2, is_pkiller;
1812 /* v cannot be serialized with itself
1813 * ignore nodes where serialization does not help */
1814 if (i == j || IS_UNSERIALIZABLE_NODE(v)) {
1815 #ifdef DEBUG_libfirm
1817 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", v->irn));
1822 /* get descendants of v */
1823 bitset_clear_all(bs_vdesc);
1824 bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v_irn));
1825 for (k = ARR_LEN_SAFE(v->descendants) - 1; k >= 0; --k) {
1826 if (! is_Sink(v->descendants[k]))
1827 bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v->descendants[k]));
1830 /* if v is in pkiller(u) */
1831 is_pkiller = plist_has_value(u->pkiller_list, v_irn);
1833 /* for all vv in pkiller(u) */
1834 foreach_plist(u->pkiller_list, el) {
1835 ir_node *vv_irn = plist_element_get_value(el);
1838 if (is_Sink(vv_irn) || is_cfop(vv_irn))
1842 add_edge = vv_irn != v_irn && skip_Proj(vv_irn) != skip_Proj(v_irn);
1844 add_edge = ! heights_reachable_in_block(rss->h, skip_Proj(vv_irn), skip_Proj(v_irn));
1847 As we add an edge from vv -> v, we have to make sure,
1848 that there exists no path from v to vv.
1852 unsigned vv_height = get_irn_height(rss->h, vv_irn);
1853 unsigned critical_path_cost;
1857 mu1 = | descendants(v) cut sat_vals |
1858 the number of saturating values which cannot
1859 be simultaneously alive with u
1861 bitset_copy(bs_tmp, bs_vdesc);
1862 mu1 = bitset_popcnt(bitset_and(bs_tmp, bs_sv));
1865 mu2 = | accum_desc_all_pkiller(u) without descendants(v) |
1868 bitset_copy(bs_tmp, bs_ukilldesc);
1869 mu2 = bitset_popcnt(bitset_andnot(bs_tmp, bs_vdesc));
1875 /* omega1 = mu1 - mu2 */
1881 /* omega2 = increase of critical path */
1882 critical_path_cost =
1883 v_height /* longest path from v to sink */
1884 + rss->max_height - vv_height /* longest path from source to vv */
1888 If critical_path_cost > max_height -> the new edge
1889 would increase the longest critical path by the difference.
1891 omega2 = critical_path_cost > rss->max_height ? critical_path_cost - rss->max_height : 0;
1893 /* this keeps track of the edge with the best benefit */
1894 if (omega1 >= num_regs - n && omega1 < best_benefit) {
1895 min_benefit_edge.src = v_irn;
1896 min_benefit_edge.tgt = vv_irn;
1901 best_benefit = omega1;
1902 ser->new_killer = is_pkiller;
1905 /* this keeps track of the edge with the best omega1 costs where omega2 == 0 */
1906 if (omega2 == 0 && omega1 >= num_regs - n && omega1 < best_benefit_omega20) {
1907 min_omega20_edge.src = v_irn;
1908 min_omega20_edge.tgt = vv_irn;
1913 best_benefit_omega20 = omega1;
1914 ser->new_killer = is_pkiller;
1917 best_omega2 = MIN(best_omega2, omega2);
1919 DBG((rss->dbg, LEVEL_2, "\t\tfound %+F -> %+F (w1 %d, w2 %d)\n",
1920 v_irn, vv_irn, omega1, omega2));
1922 } /* for all vv in pkiller(u) */
1923 } /* for all v in sat_vals */
1924 } /* for all u in sat_vals */
1929 if (best_omega2 == 0) {
1930 ser->u = ser_u_omega20;
1931 ser->v = ser_v_omega20;
1932 ser->edge->src = min_omega20_edge.src;
1933 ser->edge->tgt = min_omega20_edge.tgt;
1934 ser->omega1 = best_benefit_omega20;
1935 ser->omega2 = best_omega2;
1938 ser->u = ser_u_omega1;
1939 ser->v = ser_v_omega1;
1940 ser->edge->src = min_benefit_edge.src;
1941 ser->edge->tgt = min_benefit_edge.tgt;
1942 ser->omega1 = best_benefit;
1943 ser->omega2 = best_omega2;
1948 #undef IS_UNSERIALIZABLE_NODE
1952 * Perform the value serialization heuristic and add all
1953 * computed serialization edges as dependencies to the irg.
1955 static void perform_value_serialization_heuristic(rss_t *rss) {
1956 bitset_t *arch_nonign_bs = bitset_alloca(arch_register_class_n_regs(rss->cls));
1957 bitset_t *abi_ign_bs = bitset_alloca(arch_register_class_n_regs(rss->cls));
1958 unsigned available_regs, iteration;
1960 ir_nodeset_t *sat_vals;
1961 pset *ser_set = new_pset(cmp_rss_edges, 20);
1963 /* available_regs = R = |arch_non_ignore_regs cut ~abi_ignore_regs| */
1964 arch_put_non_ignore_regs(rss->arch_env, rss->cls, arch_nonign_bs);
1965 be_abi_put_ignore_regs(rss->abi, rss->cls, abi_ign_bs);
1966 bitset_andnot(arch_nonign_bs, abi_ign_bs);
1967 available_regs = bitset_popcnt(arch_nonign_bs);
1968 //num_live = pset_count(rss->live_block);
1969 //available_regs -= num_live < available_regs ? num_live : 0;
1971 DBG((rss->dbg, LEVEL_1, "\n\t#available regs: %d\n\n", available_regs));
1974 At first we need to compute the disjoint value DAG (DVG = {V, E_dv}).
1975 V = set of all nodes we are currently interested in
1976 E_dv = there is an edge from u to v iff v is a descendant of killer(u), forall u, v in V
1978 ir_nodeset_init_size(&dvg.nodes, plist_count(rss->nodes));
1979 dvg.edges = new_pset(cmp_rss_edges, plist_count(rss->nodes) * 5);
1980 compute_dvg(rss, &dvg);
1983 Then we perform the heuristic serialization algorithm
1984 on the DVG which gives us all necessary serialization
1987 DBG((rss->dbg, LEVEL_1, "\tcomputing maximal antichain:\n"));
1989 sat_vals = compute_maximal_antichain(rss, &dvg, iteration++);
1990 while (sat_vals && (ir_nodeset_size(sat_vals) > available_regs)) {
1991 serialization_t *ser, best_ser;
1992 rss_edge_t *edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*edge));
1993 ir_node *dep_src, *dep_tgt;
1995 best_ser.edge = edge;
1996 ser = compute_best_admissible_serialization(rss, sat_vals, &best_ser, available_regs);
1998 DBG((rss->dbg, LEVEL_1, "\tcurrent register saturation %d, target %d\n", ir_nodeset_size(sat_vals), available_regs));
2001 DBG((rss->dbg, LEVEL_1, "\tno RS improving serialization found, breaking at iteration %d\n", iteration));
2005 /* Insert the serialization as dependency edge into the irg. */
2006 DBG((rss->dbg, LEVEL_1, "\tserializing %+F -> %+F with edge %+F -> %+F and cost %d, %d\n",
2007 ser->u->irn, ser->v->irn, ser->edge->src, ser->edge->tgt, ser->omega1, ser->omega2));
2009 if (pset_find(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge)))
2010 ir_printf("WARNING: serialization %+F -> %+F computed twice!\n", ser->edge->src, ser->edge->tgt);
2013 pset_insert(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge));
2015 /* update the dvg */
2016 update_dvg(rss, &dvg, ser->v, ser->u);
2017 update_dvg(rss, &dvg, get_rss_irn(rss, ser->edge->src), get_rss_irn(rss, ser->edge->tgt));
2018 if(sat_vals != NULL) {
2019 ir_nodeset_destroy(sat_vals);
2023 dep_src = skip_Proj(ser->edge->src);
2024 dep_tgt = ser->edge->tgt;
2025 add_irn_dep(dep_src, dep_tgt);
2027 /* Update descendants, consumer and pkillers of the target */
2028 update_node_info(rss, ser->edge->tgt, ser->edge->src);
2030 /* TODO: try to find a cheaper way for updating height information */
2031 rss->max_height = heights_recompute_block(rss->h, rss->block);
2033 /* Recompute the antichain for next serialization */
2034 DBG((rss->dbg, LEVEL_1, "\tre-computing maximal antichain:\n"));
2035 sat_vals = compute_maximal_antichain(rss, &dvg, iteration++);
2038 ir_nodeset_destroy(&dvg.nodes);
2039 del_pset(dvg.edges);
2043 * Do initial calculations for a block.
2045 static void process_block(ir_node *block, void *env) {
2048 const ir_edge_t *edge;
2050 phase_init(&rss->ph, "rss block preprocessor", rss->irg, PHASE_DEFAULT_GROWTH, init_rss_irn, NULL);
2052 DBG((rss->dbg, LEVEL_1, "preprocessing block %+F\n", block));
2055 /* build an index map for all nodes in the current block */
2057 n = get_irn_n_edges(block);
2058 NEW_ARR_A(int *, rss->idx_map, n);
2059 foreach_out_edge(block, edge) {
2060 ir_node *irn = get_edge_src_irn(edge);
2061 rss->idx_map[i++] = get_irn_idx(irn);
2063 qsort(rss->idx_map, n, sizeof(rss->idx_map[0]), cmp_int);
2064 rss->max_height = heights_recompute_block(rss->h, block);
2066 /* loop over all register classes */
2067 for (i = arch_isa_get_n_reg_class(rss->arch_env->isa) - 1; i >= 0; --i) {
2068 const arch_register_class_t *cls = arch_isa_get_reg_class(rss->arch_env->isa, i);
2071 DBG((rss->dbg, LEVEL_1, "register class %s\n", arch_register_class_name(cls)));
2073 /* Get all live value at end of Block having current register class */
2074 ir_nodeset_init(&rss->live_block);
2075 be_liveness_end_of_block(rss->liveness, rss->arch_env, rss->cls, rss->block, &rss->live_block);
2077 /* reset the list of interesting nodes */
2078 plist_clear(rss->nodes);
2079 plist_insert_back(rss->nodes, _sink);
2081 /* collect all nodes having a certain register class */
2082 foreach_out_edge(block, edge) {
2083 ir_node *irn = get_edge_src_irn(edge);
2084 ir_mode *mode = get_irn_mode(irn);
2088 - mode_T nodes (the projs are asked)
2089 - mode_X nodes (control flow nodes are always scheduled last)
2090 - Keeps (they are always immediately scheduled)
2091 - Phi (same as Keep)
2093 if (mode == mode_T || mode == mode_X || is_Phi(irn))
2097 In case of a proj, we skip
2098 - Barrier (they are a Barrier :)
2100 - the Proj itself, as it's scheduled always with it's super node
2103 ir_node *pred = get_Proj_pred(irn);
2104 if (be_is_Barrier(pred) || is_Start(pred))
2108 /* calculate the descendants and consumer for each node in the block */
2109 collect_node_info(rss, skip_Proj(irn));
2111 if (be_is_Keep(irn))
2114 if (! arch_irn_is(rss->arch_env, irn, ignore) && arch_get_irn_reg_class(rss->arch_env, irn, -1) == cls) {
2115 plist_insert_back(rss->nodes, skip_Proj(irn));
2120 /* compute the potential killing set PK(G) */
2121 compute_pkill_set(rss);
2123 /* compute the killing function k* */
2124 compute_killing_function(rss);
2127 Compute the heuristic value serialization and
2128 add the necessary dependencies to the irg.
2130 perform_value_serialization_heuristic(rss);
2132 ir_nodeset_destroy(&rss->live_block);
2135 phase_free(&rss->ph);
2139 * Register the options.
2141 void be_init_schedrss(void) {
2142 lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
2143 lc_opt_entry_t *sched_grp = lc_opt_get_grp(be_grp, "sched");
2144 lc_opt_entry_t *rss_grp = lc_opt_get_grp(sched_grp, "rss");
2146 lc_opt_add_table(rss_grp, rss_option_table);
2149 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_schedrss);
2152 * Preprocess the irg for scheduling.
2154 void rss_schedule_preparation(be_irg_t *birg) {
2155 ir_graph *irg = be_get_birg_irg(birg);
2158 FIRM_DBG_REGISTER(rss.dbg, "firm.be.sched.rss");
2160 //firm_dbg_set_mask(rss.dbg, LEVEL_1 | LEVEL_2 | LEVEL_3);
2162 init_rss_special_nodes(irg);
2165 rss.arch_env = be_get_birg_arch_env(birg);
2166 rss.abi = birg->abi;
2167 rss.h = heights_new(irg);
2168 rss.nodes = plist_new();
2169 rss.opts = &rss_options;
2170 rss.liveness = be_liveness(birg);
2171 be_liveness_assure_sets(rss.liveness);
2172 irg_block_walk_graph(irg, NULL, process_block, &rss);
2173 heights_free(rss.h);
2174 plist_free(rss.nodes);
2175 be_liveness_free(rss.liveness);
2177 if (birg->main_env->options->dump_flags & DUMP_SCHED)
2178 be_dump(rss.irg, "-rss", dump_ir_block_graph);