2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief Implementation of a register saturating list scheduler.
23 * @author Christian Wuerdig
27 * Implementation of a register saturating list scheduler
28 * as described in: Sid-Ahmed-Ali Touati
29 * Register Saturation in Superscalar and VLIW Codes
40 #include "irgraph_t.h"
42 #include "iredges_t.h"
44 #include "irphase_t.h"
50 #include "bipartite.h"
51 #include "hungarian.h"
60 #include "besched_t.h"
64 #include "lc_opts_enum.h"
67 #define ARR_LEN_SAFE(arr) ((arr) != NULL ? ARR_LEN((arr)) : 0)
69 #define HASH_RSS_EDGE(edge) ((get_irn_node_nr((edge)->src) << 16) | (get_irn_node_nr((edge)->tgt) & 0xFFFF))
70 #define BSEARCH_IRN_ARR(val, arr) \
71 bsearch(&(val), (arr), ARR_LEN_SAFE((arr)), sizeof((arr)[0]), cmp_irn_idx)
73 #define BLOCK_IDX_MAP(rss, irn) bsearch_for_index(get_irn_idx((irn)), (rss)->idx_map, ARR_LEN_SAFE((rss)->idx_map), 1)
76 typedef struct _rss_opts_t {
80 /* Represents a child with associated costs */
81 typedef struct _child {
86 /* We need edges for several purposes. */
87 typedef struct _rss_edge {
93 /* Represents a connected bipartite component. */
95 ir_nodeset_t parents; /**< = S a set of value producers */
96 ir_nodeset_t children; /**< = T a set of value consumers */
97 pset *kill_edges; /**< = E a set of edges (t in T, s in S) such as each s in S gets killed by at least one t in T */
98 int nr; /**< a deterministic index for set insertion (used as hash) */
101 /* Represents a disjoint value DAG. */
102 typedef struct _dvg {
107 /* Represents a chain of nodes. */
108 typedef struct _chain {
109 plist_t *elements; /**< List of chain elements */
110 int nr; /**< a deterministic index for set insertion (used as hash) */
113 typedef struct _rss_irn {
114 plist_t *consumer_list; /**< List of consumers */
115 const ir_node **consumer; /**< Sorted consumer array (needed for faster access) */
117 plist_t *parent_list; /**< List of parents */
118 plist_t *pkiller_list; /**< List of potential killers */
120 plist_t *descendant_list; /**< List of descendants */
121 const ir_node **descendants; /**< Sorted descendant array (needed for faster access) */
123 const ir_node *killer; /**< The selected unique killer */
124 const ir_node *irn; /**< The corresponding firm node to this rss_irn */
126 chain_t *chain; /**< The chain, this node is associated to */
128 unsigned desc_walk; /**< visited flag for collecting descendants */
129 unsigned kill_count; /**< number of nodes killed by this one */
131 unsigned live_out : 1; /**< irn has consumers outside of it's block */
132 unsigned visited : 1; /**< visited flag for bipartite decomposition */
133 unsigned havecons : 1; /**< visited flag collect consumer */
134 unsigned handled : 1; /**< flag indicating whether or not the list structures have been build */
135 unsigned dumped : 1; /**< flag indication whether or not this node was dumped */
138 /* Represents a serialization edge with associated costs. */
139 typedef struct _serialization {
140 rss_irn_t *u; /* the top node */
141 rss_irn_t *v; /* the node about to be serialized after u */
142 rss_edge_t *edge; /* the edge selected for the serialization */
143 int omega1; /* estimated: available regs - RS reduction */
144 int omega2; /* increase in critical path length */
148 typedef struct _rss {
149 ir_phase ph; /**< Phase to hold some data */
150 heights_t *h; /**< The current height object */
151 ir_graph *irg; /**< The irg to preprocess */
152 plist_t *nodes; /**< The list of interesting nodes */
153 const arch_env_t *arch_env; /**< The architecture environment */
154 be_abi_irg_t *abi; /**< The abi for this irg */
155 pset *cbc_set; /**< A set of connected bipartite components */
156 ir_node *block; /**< The current block in progress. */
157 int *idx_map; /**< Mapping irn indices to per block indices */
158 unsigned max_height; /**< maximum height in the current block */
159 rss_opts_t *opts; /**< The options */
160 be_lv_t *liveness; /**< The liveness information for this irg */
161 ir_nodeset_t live_block; /**< Values alive at end of block */
162 const arch_register_class_t *cls; /**< The current register class */
163 DEBUG_ONLY(firm_dbg_module_t *dbg);
166 #define get_rss_irn(rss, irn) (phase_get_or_set_irn_data(&rss->ph, irn))
169 * We need some special nodes:
170 * a source and a sink for all live-in and live-out values of a block
178 /** The opcode of the rss_Source node once allocated. */
179 static ir_op *op_rss_Source;
180 /** The opcode of the rss_Sink node once allocated. */
181 static ir_op *op_rss_Sink;
183 /** The rss_Source node of the current graph. */
184 static ir_node *_source = NULL;
185 /** The rss_Sink node of the current graph. */
186 static ir_node *_sink = NULL;
188 #define is_Source(irn) ((irn) == _source)
189 #define is_Sink(irn) ((irn) == _sink)
193 RSS_DUMP_CBC = 1 << 0,
194 RSS_DUMP_PKG = 1 << 1,
195 RSS_DUMP_KILL = 1 << 2,
196 RSS_DUMP_DVG = 1 << 3,
197 RSS_DUMP_MAXAC = 1 << 4,
198 RSS_DUMP_ALL = (RSS_DUMP_MAXAC << 1) - 1,
201 static rss_opts_t rss_options = {
205 static const lc_opt_enum_int_items_t dump_items[] = {
206 { "none", RSS_DUMP_NONE },
207 { "cbc", RSS_DUMP_CBC },
208 { "pkg", RSS_DUMP_PKG },
209 { "kill", RSS_DUMP_KILL },
210 { "dvg", RSS_DUMP_DVG },
211 { "maxac", RSS_DUMP_MAXAC },
212 { "all", RSS_DUMP_ALL },
216 static lc_opt_enum_int_var_t dump_var = {
217 &rss_options.dump_flags, dump_items
220 static const lc_opt_table_entry_t rss_option_table[] = {
221 LC_OPT_ENT_ENUM_MASK("dump", "dump phases", &dump_var),
225 /******************************************************************************
227 * | | | | / _| | | (_)
228 * | |__ ___| |_ __ ___ _ __ | |_ _ _ _ __ ___| |_ _ ___ _ __ ___
229 * | '_ \ / _ \ | '_ \ / _ \ '__| | _| | | | '_ \ / __| __| |/ _ \| '_ \/ __|
230 * | | | | __/ | |_) | __/ | | | | |_| | | | | (__| |_| | (_) | | | \__ \
231 * |_| |_|\___|_| .__/ \___|_| |_| \__,_|_| |_|\___|\__|_|\___/|_| |_|___/
234 ******************************************************************************/
237 * Acquire opcodes if needed and create source and sink nodes.
239 static void init_rss_special_nodes(ir_graph *irg) {
242 if (op_rss_Source == NULL) {
243 int iro_rss_base = get_next_ir_opcodes(iro_rss_last);
244 op_rss_Source = new_ir_op(iro_rss_base + iro_rss_Source, "rss_Source", op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
245 op_rss_Sink = new_ir_op(iro_rss_base + iro_rss_Sink, "rss_Sink", op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
247 block = get_irg_start_block(irg);
248 _source = new_ir_node(NULL, irg, block, op_rss_Source, mode_ANY, 0, NULL);
249 _sink = new_ir_node(NULL, irg, block, op_rss_Sink, mode_ANY, 0, NULL);
252 static int cmp_int(const void *a, const void *b) {
256 return QSORT_CMP(*i1, *i2);
259 static int cmp_child_costs(const void *a, const void *b) {
260 const child_t *c1 = a;
261 const child_t *c2 = b;
263 return QSORT_CMP(c1->cost, c2->cost);
266 static int cmp_irn_idx(const void *a, const void *b) {
267 const ir_node *n1 = *(ir_node **)a;
268 const ir_node *n2 = *(ir_node **)b;
270 return QSORT_CMP(get_irn_idx(n1), get_irn_idx(n2));
273 static int cmp_rss_edges(const void *a, const void *b) {
274 const rss_edge_t *e1 = a;
275 const rss_edge_t *e2 = b;
277 return (e1->src != e2->src) || (e1->tgt != e2->tgt);
280 static int bsearch_for_index(int key, int *arr, size_t len, int force) {
284 while (right >= left) {
285 int idx = (left + right) / 2;
289 else if (key > arr[idx])
296 assert(0 && "Something is wrong, key not found.");
300 static const ir_node **build_sorted_array_from_list(plist_t *irn_list, struct obstack *obst) {
303 int len = plist_count(irn_list);
304 const ir_node **arr = (const ir_node**)NEW_ARR_D(ir_node*, obst, len);
306 /* copy the list into the array */
307 foreach_plist(irn_list, el) {
308 arr[i++] = plist_element_get_value(el);
311 /* sort the array by node index */
312 /* HACK cast for MSVC */
313 qsort((void*)arr, len, sizeof(arr[0]), cmp_irn_idx);
318 /*****************************************************
321 * __| | ___| |__ _ _ __ _ __ _ _ _ __ __ _
322 * / _` |/ _ \ '_ \| | | |/ _` |/ _` | | '_ \ / _` |
323 * | (_| | __/ |_) | |_| | (_| | (_| | | | | | (_| |
324 * \__,_|\___|_.__/ \__,_|\__, |\__, |_|_| |_|\__, |
328 *****************************************************/
331 static void dump_nodeset(ir_nodeset_t *ns, const char *prefix) {
332 ir_nodeset_iterator_t iter;
335 ir_nodeset_iterator_init(&iter, ns);
336 while( (irn = ir_nodeset_iterator_next(&iter)) != NULL ) {
337 ir_fprintf(stderr, "%s%+F\n", prefix, irn);
342 static void build_file_name(rss_t *rss, const char *suffix, size_t suf_len, char *buf, size_t len) {
343 const char *irg_name;
346 irg_name = get_entity_name(get_irg_entity(rss->irg));
347 snprintf(buf, len - suf_len, "%s-%s-block-%ld",
348 irg_name, arch_register_class_name(rss->cls), get_irn_node_nr(rss->block));
352 /* Dumps all collected bipartite components of current irg as vcg. */
353 static void debug_vcg_dump_bipartite(rss_t *rss) {
357 static const char suffix[] = "-RSS-CBC.vcg";
359 build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
360 f = fopen(file_name, "w");
362 ir_fprintf(f, "graph: { title: \"connected bipartite component graph of %+F\"\n", rss->irg);
363 fprintf(f, "display_edge_labels: no\n");
364 fprintf(f, "layoutalgorithm: mindepth\n");
365 fprintf(f, "manhattan_edges: yes\n\n");
367 foreach_pset(rss->cbc_set, cbc) {
368 ir_nodeset_iterator_t iter;
372 fprintf(f, "graph: { titel: \"cbc %d\" label: \"cbc %d\" status:clustered color:yellow\n", cbc->nr, cbc->nr);
373 foreach_ir_nodeset(&cbc->parents, n, iter) {
374 ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
376 foreach_ir_nodeset(&cbc->children, n, iter) {
377 ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
379 foreach_pset(cbc->kill_edges, ke) {
380 ir_fprintf(f, "edge: { sourcename: \"n%d_%d\" targetname: \"n%d_%d\" }\n",
381 get_irn_node_nr(ke->src), cbc->nr, get_irn_node_nr(ke->tgt), cbc->nr);
389 /* Dump the computed killing function as vcg. */
390 static void debug_vcg_dump_kill(rss_t *rss) {
394 static const char suffix[] = "-RSS-KILL.vcg";
396 build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
397 f = fopen(file_name, "w");
399 ir_fprintf(f, "graph: { title: \"computed kill graph of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
400 fprintf(f, "display_edge_labels: no\n");
401 fprintf(f, "layoutalgorithm: mindepth\n");
402 fprintf(f, "manhattan_edges: yes\n\n");
405 /* reset dump_flag */
407 foreach_phase_irn(&rss->ph, irn) {
408 rss_irn_t *node = get_rss_irn(rss, irn);
413 /* dump all nodes and their killers */
414 foreach_plist(rss->nodes, el) {
415 ir_node *irn = plist_element_get_value(el);
416 rss_irn_t *rirn = get_rss_irn(rss, irn);
417 rss_irn_t *pk_rirn = get_rss_irn(rss, rirn->killer);
419 if (! rirn->dumped) {
420 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
424 if (! pk_rirn->dumped) {
425 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(rirn->killer), rirn->killer);
429 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
430 get_irn_node_nr(rirn->killer), get_irn_node_nr(irn));
437 /* Dumps the potential killing DAG (PKG) as vcg. */
438 static void debug_vcg_dump_pkg(rss_t *rss, ir_nodeset_t *max_ac, int iteration) {
441 char *suffix = alloca(32);
442 static const char suffix1[] = "-RSS-PKG.vcg";
443 static const char suffix2[] = "-RSS-PKG-MAXAC.vcg";
447 snprintf(suffix, 32, "%s", suffix1);
450 snprintf(suffix, 32, "-%02d%s", iteration, suffix2);
453 build_file_name(rss, suffix, strlen(suffix) + 1, file_name, sizeof(file_name));
454 f = fopen(file_name, "w");
456 ir_fprintf(f, "graph: { title: \"potential killing DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
457 fprintf(f, "display_edge_labels: no\n");
458 fprintf(f, "layoutalgorithm: mindepth\n");
459 fprintf(f, "manhattan_edges: yes\n\n");
462 /* reset dump_flag */
464 foreach_phase_irn(&rss->ph, irn) {
465 rss_irn_t *node = get_rss_irn(rss, irn);
470 foreach_plist(rss->nodes, el) {
471 ir_node *irn = plist_element_get_value(el);
472 rss_irn_t *rirn = get_rss_irn(rss, irn);
474 plist_element_t *k_el;
476 /* dump selected saturating values in yellow */
477 if (max_ac && ir_nodeset_contains(max_ac, irn))
480 if (! rirn->dumped) {
482 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F c%d\" %s }\n", get_irn_node_nr(irn), irn, rirn->chain->nr, c1);
484 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(irn), irn, c1);
488 foreach_plist(rirn->pkiller_list, k_el) {
489 ir_node *pkiller = plist_element_get_value(k_el);
490 rss_irn_t *pk_rirn = get_rss_irn(rss, pkiller);
493 if (max_ac && ir_nodeset_contains(max_ac, pkiller))
496 if (! pk_rirn->dumped) {
498 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F c%d\" %s }\n", get_irn_node_nr(pkiller), pkiller, pk_rirn->chain->nr, c2);
500 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(pkiller), pkiller, c2);
503 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
504 get_irn_node_nr(pkiller), get_irn_node_nr(irn));
511 /* Dumps the disjoint value DAG (DVG) as vcg. */
512 static void debug_vcg_dump_dvg(rss_t *rss, dvg_t *dvg) {
513 static const char suffix[] = "-RSS-DVG.vcg";
516 ir_nodeset_iterator_t iter;
520 build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
521 f = fopen(file_name, "w");
523 ir_fprintf(f, "graph: { title: \"disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
524 fprintf(f, "display_edge_labels: no\n");
525 fprintf(f, "layoutalgorithm: mindepth\n");
526 fprintf(f, "manhattan_edges: yes\n\n");
529 foreach_ir_nodeset(&dvg->nodes, irn, iter) {
530 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
534 foreach_pset(dvg->edges, edge) {
535 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
536 get_irn_node_nr(edge->src), get_irn_node_nr(edge->tgt));
544 /* Dumps the PKG(DVG). */
545 static void debug_vcg_dump_dvg_pkiller(rss_t *rss, dvg_t *dvg) {
546 static const char suffix[] = "-RSS-DVG-PKG.vcg";
549 ir_nodeset_iterator_t iter;
552 build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
553 f = fopen(file_name, "w");
555 ir_fprintf(f, "graph: { title: \"PKG of disjoint value DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block));
556 fprintf(f, "display_edge_labels: no\n");
557 fprintf(f, "layoutalgorithm: mindepth\n");
558 fprintf(f, "manhattan_edges: yes\n\n");
561 foreach_ir_nodeset(&dvg->nodes, irn, iter) {
562 ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
566 foreach_ir_nodeset(&dvg->nodes, irn, iter) {
567 rss_irn_t *node = get_rss_irn(rss, irn);
570 foreach_plist(node->dvg_pkiller_list, el) {
571 ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
572 get_irn_node_nr(plist_element_get_value(el)), get_irn_node_nr(irn));
582 * In case there is no rss information for irn, initialize it.
584 static void *init_rss_irn(ir_phase *ph, const ir_node *irn, void *old) {
585 rss_irn_t *res = old ? old : phase_alloc(ph, sizeof(res[0]));
587 res->descendant_list = plist_obstack_new(phase_obst(ph));
588 res->descendants = NULL;
590 res->consumer_list = plist_obstack_new(phase_obst(ph));
591 res->consumer = NULL;
593 res->pkiller_list = plist_obstack_new(phase_obst(ph));
595 res->parent_list = plist_obstack_new(phase_obst(ph));
613 * Collect all nodes data dependent on current node.
615 static void collect_descendants(rss_t *rss, rss_irn_t *rirn, ir_node *irn, int *got_sink, unsigned cur_desc_walk) {
616 const ir_edge_t *edge;
617 rss_irn_t *cur_node = get_rss_irn(rss, irn);
618 ir_node *block = rss->block;
619 ir_edge_kind_t ekind[2] = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
622 if (cur_node->desc_walk >= cur_desc_walk)
624 cur_node->desc_walk = cur_desc_walk;
626 /* Stop at Barriers */
627 if (be_is_Barrier(irn))
630 /* loop over normal and dependency edges */
631 for (i = 0; i < 2; ++i) {
632 foreach_out_edge_kind(irn, edge, ekind[i]) {
633 ir_node *user = get_edge_src_irn(edge);
635 /* skip ignore nodes as they do not really contribute to register pressure */
636 if (arch_irn_is(rss->arch_env, user, ignore))
640 (a) mode_X means Jump -> out_edge
641 (b) Phi as user of a normal node -> out-edge
643 if (get_irn_mode(user) == mode_X || is_Phi(user)) {
652 //if (arch_get_irn_reg_class(rss->arch_env, user, -1) == rss->cls)
653 collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
656 /* check if user lives in block and is not a control flow node */
657 if (get_nodes_block(user) == block) {
658 if (! plist_has_value(rirn->descendant_list, user)) {
659 plist_insert_back(rirn->descendant_list, user);
660 DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", user));
662 collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
664 else if (! *got_sink) {
666 /* user lives out of block: add sink as descendant if not already done */
667 plist_insert_back(rirn->descendant_list, _sink);
669 DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", _sink));
677 * Handles a single consumer.
679 static void collect_single_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *consumer, int *got_sink) {
680 ir_node *block = rss->block;
682 assert(! is_Proj(consumer) && "Cannot handle Projs");
684 if (! is_Phi(consumer) && ! is_Block(consumer) && get_nodes_block(consumer) == block) {
685 if (! arch_irn_is(rss->arch_env, consumer, ignore) && ! plist_has_value(rss_irn->consumer_list, consumer)) {
686 plist_insert_back(rss_irn->consumer_list, consumer);
687 DBG((rss->dbg, LEVEL_2, "\t\tconsumer %+F\n", consumer));
691 rss_irn->live_out = 1;
692 DBG((rss->dbg, LEVEL_2, "\t\tlive out %+F", consumer));
694 plist_insert_back(rss_irn->consumer_list, _sink);
696 DB((rss->dbg, LEVEL_2, ", %+F added instead", _sink));
698 DB((rss->dbg, LEVEL_2, "\n"));
703 * Collect all nodes consuming the value(s) produced by current node.
705 static void collect_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *irn, int *got_sink) {
706 const ir_edge_t *edge;
708 ir_edge_kind_t ekind[2] = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
709 rss_irn_t *cur_node = get_rss_irn(rss, irn);
711 if (cur_node->havecons)
713 cur_node->havecons = 1;
715 for (i = 0; i < 2; ++i) {
716 foreach_out_edge_kind(irn, edge, ekind[i]) {
717 ir_node *consumer = get_edge_src_irn(edge);
719 if (is_Proj(consumer)) {
720 //if (arch_get_irn_reg_class(rss->arch_env, consumer, -1) == rss->cls)
721 collect_consumer(rss, rss_irn, consumer, got_sink);
724 collect_single_consumer(rss, rss_irn, consumer, got_sink);
730 * Collects all consumer and descendant of a irn.
732 static void collect_node_info(rss_t *rss, ir_node *irn) {
733 static unsigned cur_desc_walk = 0;
734 rss_irn_t *rss_irn = get_rss_irn(rss, irn);
737 assert(! is_Proj(irn) && "Cannot handle Projs.");
739 /* check if node info is already available */
740 if (rss_irn->handled)
742 //phase_reinit_single_irn_data(&rss->ph, irn);
744 DBG((rss->dbg, LEVEL_1, "\tcomputing consumers of %+F:\n", irn));
746 /* collect all consumer */
748 collect_consumer(rss, rss_irn, irn, &got_sink);
750 /* build sorted consumer array */
751 rss_irn->consumer = build_sorted_array_from_list(rss_irn->consumer_list, phase_obst(&rss->ph));
753 DBG((rss->dbg, LEVEL_1, "\tcompute descendants of %+F:\n", irn));
755 /* collect descendants */
757 collect_descendants(rss, rss_irn, irn, &got_sink, ++cur_desc_walk);
759 /* build sorted descendant array */
760 rss_irn->descendants = build_sorted_array_from_list(rss_irn->descendant_list, phase_obst(&rss->ph));
762 rss_irn->handled = 1;
766 * Checks if v is a potential killer of u.
767 * v is in pkill(u) iff descendants(v) cut consumer(u) is v
769 * @param rss The rss object
770 * @param v The node to check for killer
771 * @param u The potentially killed value
772 * @return 1 if v is in pkill(u), 0 otherwise
774 static int is_potential_killer(rss_t *rss, rss_irn_t *v, rss_irn_t *u) {
780 assert(is_Sink(v->irn) || ((plist_count(v->descendant_list) > 0 && v->descendants) || 1));
781 assert(is_Sink(u->irn) || ((plist_count(u->consumer_list) > 0 && u->consumer) || 1));
783 /* as we loop over the list: loop over the shorter one */
784 if (plist_count(v->descendant_list) > plist_count(u->consumer_list)) {
785 list = u->consumer_list;
786 arr = v->descendants;
789 list = v->descendant_list;
793 /* for each list element: try to find element in array */
794 foreach_plist(list, el) {
795 ir_node *irn = plist_element_get_value(el);
796 ir_node *match = BSEARCH_IRN_ARR(irn, arr);
798 if (match && match != irn)
806 * Update descendants, consumer and pkiller list for the given irn.
808 static void update_node_info(rss_t *rss, ir_node *irn, ir_node *pk_irn) {
809 rss_irn_t *node = get_rss_irn(rss, irn);
810 rss_irn_t *pkiller = get_rss_irn(rss, pk_irn);
812 /* add consumer and rebuild the consumer array */
813 if (! plist_has_value(node->consumer_list, pk_irn)) {
814 plist_insert_back(node->consumer_list, pk_irn);
815 node->consumer = build_sorted_array_from_list(node->consumer_list, phase_obst(&rss->ph));
818 /* add pk_irn as descendant, add it's descendants to irn's and rebuild array */
819 if (! plist_has_value(node->descendant_list, pk_irn)) {
822 plist_insert_back(node->descendant_list, pk_irn);
824 foreach_plist(pkiller->descendant_list, el) {
825 ir_node *desc = plist_element_get_value(el);
827 if (! plist_has_value(node->descendant_list, desc))
828 plist_insert_back(node->descendant_list, desc);
831 node->descendants = build_sorted_array_from_list(node->descendant_list, phase_obst(&rss->ph));
837 * Compute the potential killing set PK.
839 static void compute_pkill_set(rss_t *rss) {
840 plist_element_t *u_el, *v_el;
842 foreach_plist(rss->nodes, u_el) {
843 ir_node *u_irn = plist_element_get_value(u_el);
844 rss_irn_t *u = get_rss_irn(rss, u_irn);
846 DBG((rss->dbg, LEVEL_1, "\tcomputing potential killers of %+F:\n", u_irn));
848 /* check each consumer if it is a potential killer */
849 foreach_plist(u->consumer_list, v_el) {
850 ir_node *v_irn = plist_element_get_value(v_el);
851 rss_irn_t *v = get_rss_irn(rss, v_irn);
853 /* check, if v is a potential killer of u */
854 if (is_potential_killer(rss, v, u)) {
855 if (! plist_has_value(u->pkiller_list, v_irn))
856 plist_insert_back(u->pkiller_list, v_irn);
858 DBG((rss->dbg, LEVEL_2, "\t\tpotential killer %+F\n", v_irn));
865 if (rss->opts->dump_flags & RSS_DUMP_PKG)
866 debug_vcg_dump_pkg(rss, NULL, 0);
870 * Build set of killing edges (from values to their potential killers)
872 static void build_kill_edges(rss_t *rss, pset *epk) {
873 plist_element_t *el, *k_el;
875 foreach_plist(rss->nodes, el) {
876 ir_node *irn = plist_element_get_value(el);
877 rss_irn_t *rirn = get_rss_irn(rss, irn);
879 DBG((rss->dbg, LEVEL_3, "\t\tbuilding kill edges for %+F:\n", irn));
881 foreach_plist(rirn->pkiller_list, k_el) {
882 ir_node *pkiller = plist_element_get_value(k_el);
883 rss_edge_t *ke = obstack_alloc(phase_obst(&rss->ph), sizeof(*ke));
889 DBG((rss->dbg, LEVEL_3, "\t\t\ttarget %+F\n", pkiller));
891 pset_insert(epk, ke, HASH_RSS_EDGE(ke));
897 /* print the given cbc for debugging purpose */
898 static void debug_print_cbc(firm_dbg_module_t *mod, cbc_t *cbc) {
899 ir_nodeset_iterator_t iter;
903 DBG((mod, LEVEL_3, "\t\tS = set of parents:\n"));
904 foreach_ir_nodeset(&cbc->parents, n, iter) {
905 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
907 DBG((mod, LEVEL_3, "\t\tT = set of children:\n"));
908 foreach_ir_nodeset(&cbc->children, n, iter) {
909 DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
911 DBG((mod, LEVEL_3, "\t\tE = Edges from producers to consumers\n"));
912 foreach_pset(cbc->kill_edges, ke) {
913 DBG((mod, LEVEL_3, "\t\t\t%+F -> %+F\n", ke->src, ke->tgt));
919 * Construct the bipartite decomposition.
920 * Sid-Ahmed-Ali Touati, Phd Thesis
921 * Register Pressure in Instruction Level Parallelism, p. 71
923 static void compute_bipartite_decomposition(rss_t *rss) {
924 pset *epk = new_pset(cmp_rss_edges, 10);
929 DBG((rss->dbg, LEVEL_1, "\tcomputing bipartite decomposition:\n"));
931 build_kill_edges(rss, epk);
933 foreach_plist(rss->nodes, el) {
934 ir_node *u_irn = plist_element_get_value(el);
935 rss_irn_t *u = get_rss_irn(rss, u_irn);
941 plist_element_t *el2;
943 rss_edge_t *kedge_root = NULL;
944 ir_node *t_irn, *s_irn;
945 ir_nodeset_iterator_t iter;
947 if (u->visited || u_irn == _sink)
950 DBG((rss->dbg, LEVEL_2, "\t\t%+F choosen:\n", u_irn));
952 cbc = obstack_alloc(phase_obst(&rss->ph), sizeof(*cbc));
955 /* initialize S_cb */
956 ir_nodeset_init_size(&cbc->parents, 5);
957 ir_nodeset_insert(&cbc->parents, u_irn);
958 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (init)\n", u_irn));
961 cbc->kill_edges = new_pset(cmp_rss_edges, 5);
963 /* each parent gets killed by at least one value from children */
965 /* T_cb = PK_successors(u) */
966 ir_nodeset_init_size(&cbc->children, 5);
967 foreach_plist(u->pkiller_list, el2) {
968 ir_nodeset_insert(&cbc->children, plist_element_get_value(el2));
969 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (init)\n", plist_element_get_value(el2)));
973 Now: insert the parents of all children into the parent set
974 and insert the children of all parents into the children set
975 until the sets don't change any more
977 while (p_change || c_change) {
978 ir_nodeset_iterator_t iter;
979 p_change = c_change = 0;
981 /* accumulate parents */
982 foreach_ir_nodeset(&cbc->children, t_irn, iter) {
983 foreach_pset(epk, k_edge) {
984 ir_node *val = k_edge->src;
986 if (k_edge->tgt == t_irn && ! ir_nodeset_contains(&cbc->parents, val)) {
987 ir_nodeset_insert(&cbc->parents, val);
989 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (killed by %+F)\n", val, t_irn));
994 /* accumulate children */
995 foreach_ir_nodeset(&cbc->parents, s_irn, iter) {
996 foreach_pset(epk, k_edge) {
997 ir_node *val = k_edge->tgt;
999 if (k_edge->src == s_irn && ! ir_nodeset_contains(&cbc->children, val)) {
1000 ir_nodeset_insert(&cbc->children, val);
1002 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (kills %+F)\n", val, s_irn));
1008 /* mark all parent values as visited */
1009 foreach_ir_nodeset(&cbc->parents, s_irn, iter) {
1010 rss_irn_t *s = get_rss_irn(rss, s_irn);
1012 /* assure bipartite property */
1014 if (ir_nodeset_contains(&cbc->children, s_irn)) {
1015 ir_nodeset_remove(&cbc->children, s_irn);
1016 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F removed from to children\n", s_irn));
1022 foreach_pset(epk, k_edge) {
1023 if (ir_nodeset_contains(&cbc->parents, k_edge->src) &&
1024 ir_nodeset_contains(&cbc->children, k_edge->tgt)) {
1025 pset_insert(cbc->kill_edges, k_edge, HASH_RSS_EDGE(k_edge));
1027 Link all k_edges which are about to be removed together.
1028 Beware: pset_remove kills the iterator.
1030 k_edge->next = kedge_root;
1031 kedge_root = k_edge;
1035 /* remove all linked k_edges */
1036 for (; kedge_root; kedge_root = kedge_root->next) {
1037 pset_remove(epk, kedge_root, HASH_RSS_EDGE(kedge_root));
1040 /* verify the cbc */
1041 foreach_ir_nodeset(&cbc->parents, s_irn, iter) {
1044 foreach_pset(cbc->kill_edges, k_edge) {
1045 if (k_edge->src == s_irn) {
1047 pset_break(cbc->kill_edges);
1054 ir_fprintf(stderr, "Warning: parent %+F is not killed in current cbc\n", s_irn);
1057 assert(vrfy_ok && "Verification of CBC failed");
1059 /* add the connected bipartite component */
1060 if (ir_nodeset_size(&cbc->parents) > 0 &&
1061 ir_nodeset_size(&cbc->children) > 0 &&
1062 pset_count(cbc->kill_edges) > 0)
1063 pset_insert(rss->cbc_set, cbc, (unsigned)cbc->nr);
1064 DBG((rss->dbg, LEVEL_2, "\tbipartite component %d inserted:\n", cbc->nr));
1066 if (firm_dbg_get_mask(rss->dbg) & LEVEL_2)
1067 debug_print_cbc(rss->dbg, cbc);
1071 if (rss->opts->dump_flags & RSS_DUMP_CBC)
1072 debug_vcg_dump_bipartite(rss);
1078 * Select the child with the maximum cost.
1080 static child_t *select_child_max_cost(rss_t *rss, ir_nodeset_t *x, ir_nodeset_t *y, child_t *t, cbc_t *cbc) {
1082 ir_nodeset_iterator_t iter;
1083 float max_cost = -1.0f;
1085 DBG((rss->dbg, LEVEL_2, "\t\tcomputing children costs:\n"));
1087 foreach_ir_nodeset(&cbc->children, child, iter) {
1088 rss_irn_t *r_child = get_rss_irn(rss, child);
1089 int num_unkilled_parents = 0;
1090 int num_descendants;
1094 /* get the number of unkilled parents */
1095 foreach_pset(cbc->kill_edges, k_edge) {
1096 if (k_edge->tgt == child && ir_nodeset_contains(x, k_edge->src))
1097 ++num_unkilled_parents;
1100 cost = (float)num_unkilled_parents;
1102 num_descendants = plist_count(r_child->descendant_list) + ir_nodeset_size(y);
1104 if (num_descendants > 0)
1105 cost /= (float)num_descendants;
1107 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F, #desc %d, cost %.3f\n", child, num_descendants, cost));
1109 if (cost > max_cost) {
1120 * Remove all parents from x which are killed by t_irn.
1122 static void remove_covered_parents(rss_t *rss, ir_nodeset_t *x, ir_node *t_irn, cbc_t *cbc) {
1123 rss_irn_t *t = get_rss_irn(rss, t_irn);
1126 DBG((rss->dbg, LEVEL_2, "\t\tremoving parents covered by %+F:\n", t_irn));
1128 foreach_pset(cbc->kill_edges, k_edge) {
1129 if (k_edge->tgt == t_irn && ir_nodeset_contains(x, k_edge->src)) {
1130 ir_nodeset_remove(x, k_edge->src);
1131 plist_insert_back(t->parent_list, k_edge->src);
1132 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", k_edge->src));
1137 static void update_cumulated_descendent_values(rss_t *rss, ir_nodeset_t *y, ir_node *t_irn) {
1138 rss_irn_t *t = get_rss_irn(rss, t_irn);
1139 plist_element_t *el;
1141 DBG((rss->dbg, LEVEL_2, "\t\tupdating cumulated descendant value of %+F:\n", t_irn));
1143 foreach_plist(t->descendant_list, el) {
1144 ir_nodeset_insert(y, plist_element_get_value(el));
1145 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", plist_element_get_value(el)));
1150 * Greedy-k: a heuristics for the MMA problem
1152 static void compute_killing_function(rss_t *rss) {
1154 struct obstack obst;
1156 obstack_init(&obst);
1158 rss->cbc_set = pset_new_ptr(5);
1159 compute_bipartite_decomposition(rss);
1161 /* for all bipartite components do: */
1162 foreach_pset(rss->cbc_set, cbc) {
1166 ir_nodeset_iterator_t iter;
1167 child_t **sks = NEW_ARR_F(child_t *, 20);
1172 ir_nodeset_init_size(&x, 10);
1173 ir_nodeset_init_size(&y, 10);
1175 DBG((rss->dbg, LEVEL_1, "\tcomputing SKS for cbc %d:\n", cbc->nr));
1176 DBG((rss->dbg, LEVEL_2, "\t\tinitializing parents X:\n"));
1178 /* X = S_cb (all parents are initially uncovered) */
1179 foreach_ir_nodeset(&cbc->parents, p, iter) {
1180 ir_nodeset_insert(&x, p);
1181 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", p));
1184 /* while X not empty */
1185 while (ir_nodeset_size(&x) > 0) {
1186 child_t *t = obstack_alloc(&obst, sizeof(*t));
1187 memset(t, 0, sizeof(*t));
1189 t = select_child_max_cost(rss, &x, &y, t, cbc);
1191 if (cur_len >= cur_size) {
1192 ARR_EXTO(child_t *, sks, cur_size * 2);
1196 DBG((rss->dbg, LEVEL_2, "\t\tinsert child %+F (%.3f) into SKS at pos %d\n", t->irn, t->cost, cur_len));
1199 remove_covered_parents(rss, &x, t->irn, cbc);
1200 update_cumulated_descendent_values(rss, &y, t->irn);
1203 ARR_SHRINKLEN(sks, cur_len);
1205 /* sort SKS in increasing cost order */
1206 qsort(sks, cur_len, sizeof(sks[0]), cmp_child_costs);
1208 DBG((rss->dbg, LEVEL_2, "\tprocessing SKS for cbc %d:\n", cbc->nr));
1210 /* build killing function */
1211 for (i = cur_len - 1; i >= 0; --i) { /* loop over sks in decreasing cost order */
1212 child_t *t = sks[i];
1213 rss_irn_t *rt = get_rss_irn(rss, t->irn);
1214 plist_element_t *p_el;
1216 DBG((rss->dbg, LEVEL_3, "\t\t\tkiller %+F (%.3f):\n", t->irn, t->cost));
1218 /* kill all unkilled parents of t */
1219 foreach_plist(rt->parent_list, p_el) {
1220 ir_node *par = plist_element_get_value(p_el);
1221 rss_irn_t *rpar = get_rss_irn(rss, par);
1223 if (is_Sink(rpar->killer)) {
1224 rpar->killer = t->irn;
1225 DBG((rss->dbg, LEVEL_2, "\t\tkill %+F\n", rpar->irn));
1228 DBG((rss->dbg, LEVEL_3, "\t\t\tkeeping %+F as killer for %+F\n", rpar->killer, rpar->irn));
1233 ir_nodeset_destroy(&x);
1234 ir_nodeset_destroy(&y);
1238 if (rss->opts->dump_flags & RSS_DUMP_KILL)
1239 debug_vcg_dump_kill(rss);
1241 del_pset(rss->cbc_set);
1242 obstack_free(&obst, NULL);
1246 * Adds the edge src -> tgt to the dvg. Checks if reverse edge is already there (asserts).
1248 static INLINE void add_dvg_edge(rss_t *rss, dvg_t *dvg, const ir_node *src, const ir_node *tgt, int have_source) {
1249 rss_edge_t *dvg_edge;
1253 ir_nodeset_insert(&dvg->nodes, (ir_node *) src);
1255 assert(ir_nodeset_contains(&dvg->nodes, src) && "Wrong assumption");
1257 ir_nodeset_insert(&dvg->nodes, (ir_node *) tgt);
1259 key.src = (ir_node *) tgt;
1260 key.tgt = (ir_node *) src;
1261 assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1263 key.src = (ir_node *) src;
1264 key.tgt = (ir_node *) tgt;
1265 if (NULL != pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1266 /* add the edge to the DVG */
1267 dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1269 dvg_edge->src = (ir_node *) src;
1270 dvg_edge->tgt = (ir_node *) tgt;
1271 dvg_edge->next = NULL;
1273 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", src, tgt));
1274 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1279 * Computes the disjoint value DAG (DVG).
1280 * BEWARE: It is not made explicitly clear in the Touati paper,
1281 * but the DVG is meant to be build from the KILLING DAG
1283 static void compute_dvg(rss_t *rss, dvg_t *dvg) {
1284 plist_element_t *el;
1286 DBG((rss->dbg, LEVEL_1, "\tcomputing DVG:\n"));
1288 foreach_plist(rss->nodes, el) {
1289 ir_node *u_irn = plist_element_get_value(el);
1290 rss_irn_t *u = get_rss_irn(rss, u_irn);
1291 rss_irn_t *u_killer = get_rss_irn(rss, u->killer);
1294 /* TODO: omit nodes only having sink as pkiller and killing no one */
1296 /* add an edge to killer */
1297 add_dvg_edge(rss, dvg, u_irn, u->killer, 0);
1299 if (is_Sink(u->killer))
1302 /* We add an edge to every killer, from where we could be reached. */
1303 for (i = ARR_LEN_SAFE(u_killer->descendants) - 1; i >= 0; --i) {
1304 add_dvg_edge(rss, dvg, u_irn, u_killer->descendants[i], 1);
1309 foreach_plist(rss->nodes, el2) {
1310 ir_node *v_irn = plist_element_get_value(el2);
1313 There is an edge (u, v) in the DVG iff v is a descendant of the killer(u).
1315 if (BSEARCH_IRN_ARR(v_irn, u_kill->descendants)) {
1316 rss_edge_t *dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
1319 /* insert the user into the DVG and append it to the user list of u */
1320 ir_nodeset_insert(&dvg->nodes, v_irn);
1321 if (! plist_has_value(u->dvg_user_list, v_irn))
1322 plist_insert_back(u->dvg_user_list, v_irn);
1324 dvg_edge->src = u_irn;
1325 dvg_edge->tgt = v_irn;
1326 dvg_edge->next = NULL;
1331 assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
1333 /* add the edge to the DVG */
1334 DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", u_irn, v_irn));
1335 pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
1341 if (rss->opts->dump_flags & RSS_DUMP_DVG)
1342 debug_vcg_dump_dvg(rss, dvg);
1346 * Updates the dvg structure when a serialization edge from src -> tgt is added.
1348 static void update_dvg(rss_t *rss, dvg_t *dvg, rss_irn_t *src, rss_irn_t *tgt) {
1351 rss_edge_t **arr = alloca(pset_count(dvg->edges) * sizeof(arr[0]));
1354 Add an edge from serialization target to serialization src:
1355 src cannot be alive with target
1357 add_dvg_edge(rss, dvg, tgt->irn, src->irn, 0);
1359 /* Add edges to src's descendants as well, they also getting serialized. */
1360 for (i = ARR_LEN_SAFE(src->descendants) - 1; i >= 0; --i) {
1361 add_dvg_edge(rss, dvg, tgt->irn, src->descendants[i], 1);
1364 /* We also have to add edges from targets predecessors in dvg */
1366 /* We cannot insert elements into set over which we iterate ... */
1367 foreach_pset(dvg->edges, edge) {
1368 if (edge->tgt == tgt->irn) {
1373 for (i = 0; i < idx; ++i) {
1375 add_dvg_edge(rss, dvg, edge->src, src->irn, 1);
1376 for (j = ARR_LEN_SAFE(src->descendants) - 1; j >= 0; --j) {
1377 add_dvg_edge(rss, dvg, edge->src, src->descendants[j], 1);
1384 * Accumulate all descendants for root into list.
1386 static void accumulate_dvg_descendant_values(rss_t *rss, rss_irn_t *root, plist_t *list) {
1387 if (plist_count(root->dvg_user_list) > 0) {
1388 plist_element_t *el;
1390 foreach_plist(root->dvg_user_list, el) {
1391 ir_node *v_irn = plist_element_get_value(el);
1392 rss_irn_t *v = get_rss_irn(rss, v_irn);
1394 /* add v as descendant */
1395 if (! plist_has_value(list, v_irn)) {
1396 plist_insert_back(list, v_irn);
1397 DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG descendant %+F\n", v_irn));
1400 /* accumulate v's descendants */
1401 accumulate_dvg_descendant_values(rss, v, list);
1407 * Builds the list of potential killers for each node
1409 * Needs the descendant list for all user as sorted array.
1411 static void build_dvg_pkiller_list(rss_t *rss, dvg_t *dvg) {
1412 ir_nodeset_iterator_t iter;
1415 foreach_nodeset(&dvg->nodes, irn, iter) {
1416 rss_irn_t *node = get_rss_irn(rss, irn);
1417 plist_element_t *el, *el2;
1419 DBG((rss->dbg, DEBUG_DVG, "\t\tbuilding pkiller list for %+F\n", irn));
1421 /* check each user */
1422 foreach_plist(node->dvg_user_list, el) {
1423 ir_node *u_irn = plist_element_get_value(el);
1425 /* is the current user u_irn not a descendant of any other user -> pkiller */
1426 foreach_plist(node->dvg_user_list, el2) {
1427 ir_node *v_irn = plist_element_get_value(el2);
1428 rss_irn_t *v = get_rss_irn(rss, v_irn);
1431 ! BSEARCH_IRN_ARR(u_irn, v->dvg_desc) &&
1432 ! plist_has_value(node->dvg_pkiller_list, u_irn))
1434 plist_insert_back(node->dvg_pkiller_list, u_irn);
1435 DBG((rss->dbg, DEBUG_DVG, "\t\t\tadd DVG pkiller %+F\n", u_irn));
1440 node->dvg_pkiller = build_sorted_array_from_list(node->dvg_pkiller_list, phase_obst(&rss->ph));
1444 if (firm_dbg_get_mask(rss->dbg) & DEBUG_DVG)
1445 debug_vcg_dump_dvg_pkiller(rss, dvg);
1452 * Compute the maximal antichain of the current DVG.
1453 * This is a reimplementation of the MAXIMAL_ANTI_CHAIN function
1454 * from the DDG library 1.1 (DAG.cpp).
1456 static ir_nodeset_t *compute_maximal_antichain(rss_t *rss, dvg_t *dvg, int iteration) {
1457 int n = ir_nodeset_size(&dvg->nodes);
1458 int *assignment = alloca(n * sizeof(assignment[0]));
1459 int *assignment_rev = alloca(n * sizeof(assignment_rev[0]));
1460 int *idx_map = alloca(n * sizeof(idx_map[0]));
1461 hungarian_problem_t *bp;
1462 ir_nodeset_t *values, *temp;
1463 ir_nodeset_iterator_t iter;
1465 int i, j, cost, cur_chain, res;
1466 rss_edge_t *dvg_edge;
1468 #define MAP_IDX(irn) bsearch_for_index(get_irn_idx(irn), idx_map, n, 1)
1470 if (pset_count(dvg->edges) == 0)
1473 bp = hungarian_new(n, n, 1, HUNGARIAN_MATCH_NORMAL);
1476 At first, we build an index map for the nodes in the DVG,
1477 because we cannot use the irn idx therefore as the resulting
1478 bipartite data structure would have around 1.2GB.
1479 So we limit the size to the number of nodes we have in the DVG
1480 and build a sorted index map for their irn indices.
1483 foreach_ir_nodeset(&dvg->nodes, u_irn, iter) {
1484 idx_map[i++] = get_irn_idx(u_irn);
1486 qsort(idx_map, n, sizeof(idx_map[0]), cmp_int);
1488 foreach_pset(dvg->edges, dvg_edge) {
1489 int idx_u = MAP_IDX(dvg_edge->src);
1490 int idx_v = MAP_IDX(dvg_edge->tgt);
1492 /* add the entry to the bipartite data structure */
1493 hungarian_add(bp, idx_u, idx_v, 1);
1494 DBG((rss->dbg, LEVEL_3, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1495 idx_u, dvg_edge->src, idx_v, dvg_edge->tgt));
1499 Add a bipartite entry for each pair of nodes (u, v), where exists a
1500 path in the DVG from u to v, ie. connect all descendants(v) to v.
1501 desc_dvg(v) = accumulated descendants of all z in dvg_user_list(v)
1503 foreach_ir_nodeset(&dvg->nodes, u_irn, iter) {
1504 rss_irn_t *u = get_rss_irn(rss, u_irn);
1505 int idx_u_irn = MAP_IDX(u_irn);
1507 DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG descendants of %+F:\n", u_irn));
1509 //plist_clear(u->dvg_desc_list);
1510 //accumulate_dvg_descendant_values(rss, u, u->dvg_desc_list);
1513 FIXME: The array is build on the phase obstack and we cannot free the data.
1514 So the array get as many times allocated as this function is called.
1517 /* build the sorted array for faster searches */
1518 //u->dvg_desc = build_sorted_array_from_list(u->dvg_desc_list, phase_obst(&rss->ph));
1520 DBG((rss->dbg, DEBUG_MAX_AC, "\t\tadding bipartite entries of %+F:\n", u_irn));
1522 /* add the bipartite entries for all descendants */
1523 for (i = ARR_LEN_SAFE(u->dvg_desc) - 1; i >= 0; --i) {
1524 rss_irn_t *desc = get_rss_irn(rss, u->dvg_desc[i]);
1525 int idx_desc = MAP_IDX(u->dvg_desc[i]);
1527 /* add the entry to the bipartite data structure */
1528 hungarian_add(bp, idx_u_irn, idx_desc, 1);
1529 DBG((rss->dbg, DEBUG_MAX_AC, "\t\t\tadd %d (%+F) -> %d (%+F)\n",
1530 idx_u_irn, u_irn, idx_desc, u->dvg_desc[i]));
1537 /* We want maximum cardinality matching */
1538 hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL);
1541 DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG pkiller:\n"));
1542 /* beware: the following function needs the dvg_desc array */
1543 build_dvg_pkiller_list(rss, dvg);
1546 DBG((rss->dbg, LEVEL_2, "\t\tcomputing bipartite matching\n"));
1548 The maximum cardinality bipartite matching gives us the minimal
1549 chain partition, which corresponds to the maximum anti chains.
1551 res = hungarian_solve(bp, assignment, &cost, 1);
1552 assert(res == 0 && "Bipartite matching failed!");
1555 memset(assignment_rev, -1, n * sizeof(assignment_rev[0]));
1557 /* build the reverse assignment, ie. foreach i -> j, add j -> i */
1558 for (i = 0; i < n; ++i) {
1559 if (assignment[i] >= 0) {
1560 int j = assignment[i];
1561 assignment_rev[j] = i;
1565 DBG((rss->dbg, LEVEL_2, "\t\tgot assignment with cost %d\n", cost));
1566 DBG((rss->dbg, LEVEL_3, "\t\t\tassignment --- reverse assignment\n", cost));
1567 for (i = 0; i < n; ++i) {
1568 DBG((rss->dbg, LEVEL_3, "\t\t\t%3d -> %3d %3d -> %3d\n", i, assignment[i], i, assignment_rev[i]));
1571 values = xmalloc(sizeof(values[0]));
1572 ir_nodeset_init_size(values, 10);
1574 /* Construction of the minimal chain partition */
1575 for (j = 0; j < n; ++j) {
1576 /* check nodes, which did not occur as target */
1577 if (assignment_rev[j] == -1) {
1578 int xj = idx_map[j];
1579 ir_node *xj_irn = get_idx_irn(rss->irg, xj);
1580 rss_irn_t *xj_rss = get_rss_irn(rss, xj_irn);
1581 chain_t *c = obstack_alloc(phase_obst(&rss->ph), sizeof(*c));
1584 /* there was no source for j -> we have a source of a new chain */
1585 ir_nodeset_insert(values, xj_irn);
1587 c->elements = plist_obstack_new(phase_obst(&rss->ph));
1588 c->nr = cur_chain++;
1589 plist_insert_back(c->elements, xj_irn);
1593 DBG((rss->dbg, LEVEL_2, "\t\tstarting chain %d:\n", c->nr));
1594 DBG((rss->dbg, LEVEL_2, "\t\t\t%+F (%d)", xj_irn, j));
1596 /* follow chain, having j as source */
1598 while (assignment[source] >= 0) {
1599 int target = assignment[source];
1600 int irn_idx = idx_map[target];
1601 ir_node *irn = get_idx_irn(rss->irg, irn_idx);
1602 rss_irn_t *node = get_rss_irn(rss, irn);
1604 plist_insert_back(c->elements, irn);
1607 DB((rss->dbg, LEVEL_2, " -> %+F (%d)", irn, target));
1609 /* new source = last target */
1613 DB((rss->dbg, LEVEL_2, "\n"));
1618 Computing the maximal antichain: Select an element from each
1619 chain such, such it is parallel with the others.
1621 DBG((rss->dbg, LEVEL_1, "\t\tcomputing set of saturation values (MAX AC)\n"));
1622 DBG((rss->dbg, LEVEL_3, "\t\tstarting with:\n"));
1625 if (firm_dbg_get_mask(rss->dbg) & LEVEL_3)
1626 dump_nodeset(values, "\t\t\t");
1632 We need an explicit array for the values as
1633 we cannot iterate multiple times over the same
1634 set at the same time. :-(((((
1635 TODO Matze: now we can, so rewrite this...
1637 int n = ir_nodeset_size(values);
1639 ir_node **val_arr = NEW_ARR_F(ir_node *, n);
1641 foreach_ir_nodeset(values, u_irn, iter)
1642 val_arr[i++] = u_irn;
1645 ir_nodeset_destroy(temp);
1649 temp = xmalloc(sizeof(temp[0]));
1650 ir_nodeset_init_size(temp, 10);
1652 /* Select all nodes from current value set, having another node in the set as descendant. */
1653 for (i = 0; i < n; ++i) {
1654 rss_irn_t *u = get_rss_irn(rss, val_arr[i]);
1656 for (j = 0; j < n; ++j) {
1660 key.src = val_arr[i];
1661 key.tgt = val_arr[j];
1663 if (pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
1664 /* v[j] is descendant of u -> remove u and break */
1665 ir_nodeset_insert(temp, (ir_node *) u->irn);
1666 ir_nodeset_remove(values, u->irn);
1668 DBG((rss->dbg, LEVEL_3, "\t\t\tremoving %+F from values, adding it to temp\n", u->irn));
1676 /* Try to insert the chain predecessor of all selected u's */
1677 foreach_ir_nodeset(temp, u_irn, iter) {
1678 rss_irn_t *u = get_rss_irn(rss, u_irn);
1679 chain_t *c = u->chain;
1680 plist_element_t *el = plist_find_value(c->elements, u_irn);
1682 assert(el && "Missing element in chain!");
1684 /* If u has predecessor in chain: insert the predecessor */
1685 if (el == plist_element_get_prev(el)) {
1686 ir_nodeset_insert(values, plist_element_get_value(el));
1687 DBG((rss->dbg, LEVEL_3, "\t\t\tadding %+F to values\n", plist_element_get_value(el)));
1693 } while (ir_nodeset_size(temp) > 0);
1695 DBG((rss->dbg, LEVEL_2, "\t\tfinal set:\n"));
1697 if (firm_dbg_get_mask(rss->dbg) & LEVEL_2) {
1698 dump_nodeset(values, "\t\t\t");
1702 if (rss->opts->dump_flags & RSS_DUMP_MAXAC)
1703 debug_vcg_dump_pkg(rss, values, iteration);
1706 ir_nodeset_destroy(temp);
1716 * Computes the best serialization between two nodes of sat_vals.
1718 static serialization_t *compute_best_admissible_serialization(rss_t *rss, ir_nodeset_t *sat_vals, serialization_t *ser, int num_regs) {
1719 int n = ir_nodeset_size(sat_vals);
1720 int n_idx = ARR_LEN_SAFE(rss->idx_map);
1722 ir_node **val_arr = alloca(n * sizeof(val_arr[0]));
1723 bitset_t *bs_sv = bitset_alloca(n_idx);
1724 bitset_t *bs_vdesc = bitset_alloca(n_idx);
1725 bitset_t *bs_tmp = bitset_alloca(n_idx);
1726 bitset_t *bs_ukilldesc = bitset_alloca(n_idx);
1727 int best_benefit = INT_MAX;
1728 int best_omega2 = INT_MAX;
1729 int best_benefit_omega20 = INT_MAX;
1733 ir_nodeset_iterator_t iter;
1734 rss_edge_t min_benefit_edge = {NULL, NULL, NULL};
1735 rss_edge_t min_omega20_edge = {NULL, NULL, NULL};
1736 rss_irn_t *ser_u_omega1 = NULL, *ser_v_omega1 = NULL;
1737 rss_irn_t *ser_u_omega20 = NULL, *ser_v_omega20 = NULL;
1739 DBG((rss->dbg, LEVEL_1, "\tcomputing admissible serializations:\n"));
1742 We need an explicit array for the values as
1743 we cannot iterate multiple times over the same
1744 set at the same time. :-(((((
1747 foreach_ir_nodeset(sat_vals, irn, iter) {
1749 bitset_set(bs_sv, BLOCK_IDX_MAP(rss, irn));
1753 We build all admissible serializations and remember the best found so far.
1756 if v in pkiller(u): add edge from v to all other pkiller(u)
1757 else: for all vv in pkiller(u): add edge from v to vv if there exists no path from vv to v
1761 A node is unserializable if:
1762 - it has only one killer and this one is Sink
1763 - it kills no other values
1764 In this case there is no serialization which could
1765 reduce the registerpressure
1767 #define IS_UNSERIALIZABLE_NODE(rss_node) \
1770 (plist_count(rss_node->pkiller_list) == 1) && \
1771 is_Sink(rss_node->killer) && \
1772 (rss_node->kill_count == 0) \
1774 be_is_Barrier(rss_node->irn) || \
1775 be_is_Keep(rss_node->irn) \
1778 /* for all u in sat_vals */
1779 for (i = 0; i < n; ++i) {
1780 rss_irn_t *u = get_rss_irn(rss, val_arr[i]);
1781 plist_element_t *el;
1783 /* ignore nodes where serialization does not help */
1784 if (IS_UNSERIALIZABLE_NODE(u)) {
1785 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", u->irn));
1789 /* accumulate all descendants of all pkiller(u) */
1790 bitset_clear_all(bs_ukilldesc);
1791 foreach_plist(u->pkiller_list, el) {
1792 ir_node *irn = plist_element_get_value(el);
1793 rss_irn_t *node = get_rss_irn(rss, irn);
1796 bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, irn));
1800 for (k = ARR_LEN_SAFE(node->descendants) - 1; k >= 0; --k) {
1801 if (! is_Sink(node->descendants[k]))
1802 bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, node->descendants[k]));
1806 /* for all v in sat_vals */
1807 for (j = 0; j < n; ++j) {
1808 ir_node *v_irn = val_arr[j];
1809 rss_irn_t *v = get_rss_irn(rss, v_irn);
1810 unsigned v_height = get_irn_height(rss->h, v_irn);
1811 int omega1, omega2, is_pkiller;
1813 /* v cannot be serialized with itself
1814 * ignore nodes where serialization does not help */
1815 if (i == j || IS_UNSERIALIZABLE_NODE(v)) {
1816 #ifdef DEBUG_libfirm
1818 DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", v->irn));
1823 /* get descendants of v */
1824 bitset_clear_all(bs_vdesc);
1825 bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v_irn));
1826 for (k = ARR_LEN_SAFE(v->descendants) - 1; k >= 0; --k) {
1827 if (! is_Sink(v->descendants[k]))
1828 bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v->descendants[k]));
1831 /* if v is in pkiller(u) */
1832 is_pkiller = plist_has_value(u->pkiller_list, v_irn);
1834 /* for all vv in pkiller(u) */
1835 foreach_plist(u->pkiller_list, el) {
1836 ir_node *vv_irn = plist_element_get_value(el);
1839 if (is_Sink(vv_irn) || is_cfop(vv_irn))
1843 add_edge = vv_irn != v_irn && skip_Proj(vv_irn) != skip_Proj(v_irn);
1845 add_edge = ! heights_reachable_in_block(rss->h, skip_Proj(vv_irn), skip_Proj(v_irn));
1848 As we add an edge from vv -> v, we have to make sure,
1849 that there exists no path from v to vv.
1853 unsigned vv_height = get_irn_height(rss->h, vv_irn);
1854 unsigned critical_path_cost;
1858 mu1 = | descendants(v) cut sat_vals |
1859 the number of saturating values which cannot
1860 be simultaneously alive with u
1862 bitset_copy(bs_tmp, bs_vdesc);
1863 mu1 = bitset_popcnt(bitset_and(bs_tmp, bs_sv));
1866 mu2 = | accum_desc_all_pkiller(u) without descendants(v) |
1869 bitset_copy(bs_tmp, bs_ukilldesc);
1870 mu2 = bitset_popcnt(bitset_andnot(bs_tmp, bs_vdesc));
1876 /* omega1 = mu1 - mu2 */
1882 /* omega2 = increase of critical path */
1883 critical_path_cost =
1884 v_height /* longest path from v to sink */
1885 + rss->max_height - vv_height /* longest path from source to vv */
1889 If critical_path_cost > max_height -> the new edge
1890 would increase the longest critical path by the difference.
1892 omega2 = critical_path_cost > rss->max_height ? critical_path_cost - rss->max_height : 0;
1894 /* this keeps track of the edge with the best benefit */
1895 if (omega1 >= num_regs - n && omega1 < best_benefit) {
1896 min_benefit_edge.src = v_irn;
1897 min_benefit_edge.tgt = vv_irn;
1902 best_benefit = omega1;
1903 ser->new_killer = is_pkiller;
1906 /* this keeps track of the edge with the best omega1 costs where omega2 == 0 */
1907 if (omega2 == 0 && omega1 >= num_regs - n && omega1 < best_benefit_omega20) {
1908 min_omega20_edge.src = v_irn;
1909 min_omega20_edge.tgt = vv_irn;
1914 best_benefit_omega20 = omega1;
1915 ser->new_killer = is_pkiller;
1918 best_omega2 = MIN(best_omega2, omega2);
1920 DBG((rss->dbg, LEVEL_2, "\t\tfound %+F -> %+F (w1 %d, w2 %d)\n",
1921 v_irn, vv_irn, omega1, omega2));
1923 } /* for all vv in pkiller(u) */
1924 } /* for all v in sat_vals */
1925 } /* for all u in sat_vals */
1930 if (best_omega2 == 0) {
1931 ser->u = ser_u_omega20;
1932 ser->v = ser_v_omega20;
1933 ser->edge->src = min_omega20_edge.src;
1934 ser->edge->tgt = min_omega20_edge.tgt;
1935 ser->omega1 = best_benefit_omega20;
1936 ser->omega2 = best_omega2;
1939 ser->u = ser_u_omega1;
1940 ser->v = ser_v_omega1;
1941 ser->edge->src = min_benefit_edge.src;
1942 ser->edge->tgt = min_benefit_edge.tgt;
1943 ser->omega1 = best_benefit;
1944 ser->omega2 = best_omega2;
1949 #undef IS_UNSERIALIZABLE_NODE
1953 * Perform the value serialization heuristic and add all
1954 * computed serialization edges as dependencies to the irg.
1956 static void perform_value_serialization_heuristic(rss_t *rss) {
1957 bitset_t *arch_nonign_bs = bitset_alloca(arch_register_class_n_regs(rss->cls));
1958 bitset_t *abi_ign_bs = bitset_alloca(arch_register_class_n_regs(rss->cls));
1959 unsigned available_regs, iteration;
1961 ir_nodeset_t *sat_vals;
1962 pset *ser_set = new_pset(cmp_rss_edges, 20);
1964 /* available_regs = R = |arch_non_ignore_regs cut ~abi_ignore_regs| */
1965 arch_put_non_ignore_regs(rss->arch_env, rss->cls, arch_nonign_bs);
1966 be_abi_put_ignore_regs(rss->abi, rss->cls, abi_ign_bs);
1967 bitset_andnot(arch_nonign_bs, abi_ign_bs);
1968 available_regs = bitset_popcnt(arch_nonign_bs);
1969 //num_live = pset_count(rss->live_block);
1970 //available_regs -= num_live < available_regs ? num_live : 0;
1972 DBG((rss->dbg, LEVEL_1, "\n\t#available regs: %d\n\n", available_regs));
1975 At first we need to compute the disjoint value DAG (DVG = {V, E_dv}).
1976 V = set of all nodes we are currently interested in
1977 E_dv = there is an edge from u to v iff v is a descendant of killer(u), forall u, v in V
1979 ir_nodeset_init_size(&dvg.nodes, plist_count(rss->nodes));
1980 dvg.edges = new_pset(cmp_rss_edges, plist_count(rss->nodes) * 5);
1981 compute_dvg(rss, &dvg);
1984 Then we perform the heuristic serialization algorithm
1985 on the DVG which gives us all necessary serialization
1988 DBG((rss->dbg, LEVEL_1, "\tcomputing maximal antichain:\n"));
1990 sat_vals = compute_maximal_antichain(rss, &dvg, iteration++);
1991 while (sat_vals && (ir_nodeset_size(sat_vals) > available_regs)) {
1992 serialization_t *ser, best_ser;
1993 rss_edge_t *edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*edge));
1994 ir_node *dep_src, *dep_tgt;
1996 best_ser.edge = edge;
1997 ser = compute_best_admissible_serialization(rss, sat_vals, &best_ser, available_regs);
1999 DBG((rss->dbg, LEVEL_1, "\tcurrent register saturation %d, target %d\n", ir_nodeset_size(sat_vals), available_regs));
2002 DBG((rss->dbg, LEVEL_1, "\tno RS improving serialization found, breaking at iteration %d\n", iteration));
2006 /* Insert the serialization as dependency edge into the irg. */
2007 DBG((rss->dbg, LEVEL_1, "\tserializing %+F -> %+F with edge %+F -> %+F and cost %d, %d\n",
2008 ser->u->irn, ser->v->irn, ser->edge->src, ser->edge->tgt, ser->omega1, ser->omega2));
2010 if (pset_find(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge)))
2011 ir_printf("WARNING: serialization %+F -> %+F computed twice!\n", ser->edge->src, ser->edge->tgt);
2014 pset_insert(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge));
2016 /* update the dvg */
2017 update_dvg(rss, &dvg, ser->v, ser->u);
2018 update_dvg(rss, &dvg, get_rss_irn(rss, ser->edge->src), get_rss_irn(rss, ser->edge->tgt));
2019 if(sat_vals != NULL) {
2020 ir_nodeset_destroy(sat_vals);
2024 dep_src = skip_Proj(ser->edge->src);
2025 dep_tgt = ser->edge->tgt;
2026 add_irn_dep(dep_src, dep_tgt);
2028 /* Update descendants, consumer and pkillers of the target */
2029 update_node_info(rss, ser->edge->tgt, ser->edge->src);
2031 /* TODO: try to find a cheaper way for updating height information */
2032 rss->max_height = heights_recompute_block(rss->h, rss->block);
2034 /* Recompute the antichain for next serialization */
2035 DBG((rss->dbg, LEVEL_1, "\tre-computing maximal antichain:\n"));
2036 sat_vals = compute_maximal_antichain(rss, &dvg, iteration++);
2039 ir_nodeset_destroy(&dvg.nodes);
2040 del_pset(dvg.edges);
2044 * Do initial calculations for a block.
2046 static void process_block(ir_node *block, void *env) {
2049 const ir_edge_t *edge;
2051 phase_init(&rss->ph, "rss block preprocessor", rss->irg, PHASE_DEFAULT_GROWTH, init_rss_irn, NULL);
2053 DBG((rss->dbg, LEVEL_1, "preprocessing block %+F\n", block));
2056 /* build an index map for all nodes in the current block */
2058 n = get_irn_n_edges(block);
2059 NEW_ARR_A(int *, rss->idx_map, n);
2060 foreach_out_edge(block, edge) {
2061 ir_node *irn = get_edge_src_irn(edge);
2062 rss->idx_map[i++] = get_irn_idx(irn);
2064 qsort(rss->idx_map, n, sizeof(rss->idx_map[0]), cmp_int);
2065 rss->max_height = heights_recompute_block(rss->h, block);
2067 /* loop over all register classes */
2068 for (i = arch_env_get_n_reg_class(rss->arch_env) - 1; i >= 0; --i) {
2069 const arch_register_class_t *cls = arch_env_get_reg_class(rss->arch_env, i);
2072 DBG((rss->dbg, LEVEL_1, "register class %s\n", arch_register_class_name(cls)));
2074 /* Get all live value at end of Block having current register class */
2075 ir_nodeset_init(&rss->live_block);
2076 be_liveness_end_of_block(rss->liveness, rss->arch_env, rss->cls, rss->block, &rss->live_block);
2078 /* reset the list of interesting nodes */
2079 plist_clear(rss->nodes);
2080 plist_insert_back(rss->nodes, _sink);
2082 /* collect all nodes having a certain register class */
2083 foreach_out_edge(block, edge) {
2084 ir_node *irn = get_edge_src_irn(edge);
2085 ir_mode *mode = get_irn_mode(irn);
2089 - mode_T nodes (the projs are asked)
2090 - mode_X nodes (control flow nodes are always scheduled last)
2091 - Keeps (they are always immediately scheduled)
2092 - Phi (same as Keep)
2094 if (mode == mode_T || mode == mode_X || is_Phi(irn))
2098 In case of a proj, we skip
2099 - Barrier (they are a Barrier :)
2101 - the Proj itself, as it's scheduled always with it's super node
2104 ir_node *pred = get_Proj_pred(irn);
2105 if (be_is_Barrier(pred) || is_Start(pred))
2109 /* calculate the descendants and consumer for each node in the block */
2110 collect_node_info(rss, skip_Proj(irn));
2112 if (be_is_Keep(irn))
2115 if (! arch_irn_is(rss->arch_env, irn, ignore) && arch_get_irn_reg_class(rss->arch_env, irn, -1) == cls) {
2116 plist_insert_back(rss->nodes, skip_Proj(irn));
2121 /* compute the potential killing set PK(G) */
2122 compute_pkill_set(rss);
2124 /* compute the killing function k* */
2125 compute_killing_function(rss);
2128 Compute the heuristic value serialization and
2129 add the necessary dependencies to the irg.
2131 perform_value_serialization_heuristic(rss);
2133 ir_nodeset_destroy(&rss->live_block);
2136 phase_free(&rss->ph);
2140 * Register the options.
2142 void be_init_schedrss(void) {
2143 lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
2144 lc_opt_entry_t *sched_grp = lc_opt_get_grp(be_grp, "sched");
2145 lc_opt_entry_t *rss_grp = lc_opt_get_grp(sched_grp, "rss");
2147 lc_opt_add_table(rss_grp, rss_option_table);
2150 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_schedrss);
2153 * Preprocess the irg for scheduling.
2155 void rss_schedule_preparation(be_irg_t *birg) {
2156 ir_graph *irg = be_get_birg_irg(birg);
2159 FIRM_DBG_REGISTER(rss.dbg, "firm.be.sched.rss");
2161 //firm_dbg_set_mask(rss.dbg, LEVEL_1 | LEVEL_2 | LEVEL_3);
2163 init_rss_special_nodes(irg);
2166 rss.arch_env = be_get_birg_arch_env(birg);
2167 rss.abi = birg->abi;
2168 rss.h = heights_new(irg);
2169 rss.nodes = plist_new();
2170 rss.opts = &rss_options;
2171 rss.liveness = be_liveness(birg);
2172 be_liveness_assure_sets(rss.liveness);
2173 irg_block_walk_graph(irg, NULL, process_block, &rss);
2174 heights_free(rss.h);
2175 plist_free(rss.nodes);
2176 be_liveness_free(rss.liveness);
2178 if (birg->main_env->options->dump_flags & DUMP_SCHED)
2179 be_dump(rss.irg, "-rss", dump_ir_block_graph);