X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=ir%2Fbe%2Fbeschedrss.c;h=633e89776e2a1b72f5967003efd4cbcd03a52965;hb=e7ba741cdd9599ce05d7989bff60a1c6137ee0b5;hp=69cad879f51f77c3dee6453f4fc97e1927641df2;hpb=8975cfb21a64bbf19bbd240777c76567e1c06667;p=libfirm diff --git a/ir/be/beschedrss.c b/ir/be/beschedrss.c index 69cad879f..633e89776 100644 --- a/ir/be/beschedrss.c +++ b/ir/be/beschedrss.c @@ -1,12 +1,32 @@ +/* + * Copyright (C) 1995-2007 University of Karlsruhe. All right reserved. + * + * This file is part of libFirm. + * + * This file may be distributed and/or modified under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation and appearing in the file LICENSE.GPL included in the + * packaging of this file. + * + * Licensees holding valid libFirm Professional Edition licenses may use + * this file in accordance with the libFirm Commercial License. + * Agreement provided with the Software. + * + * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE + * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE. + */ + /** + * @file + * @brief Implementation of a register saturating list scheduler. + * @author Christian Wuerdig + * @date 29.08.2006 + * @version $Id$ + * * Implementation of a register saturating list scheduler * as described in: Sid-Ahmed-Ali Touati * Register Saturation in Superscalar and VLIW Codes - * - * @license This file protected by GPL - GNU GENERAL PUBLIC LICENSE. - * @author Christian Wuerdig - * @date 29.08.2006 - * @cvs-id $Id$ */ #ifdef HAVE_CONFIG_H #include "config.h" @@ -23,6 +43,7 @@ #include "ircons_t.h" #include "irphase_t.h" #include "irgwalk.h" +#include "irdump.h" #include "irtools.h" #include "irbitset.h" #include "irprintf.h" @@ -33,23 +54,27 @@ #include "height.h" #include "beabi.h" +#include "bemodule.h" #include "benode_t.h" #include "besched_t.h" -#include "beschedmris.h" +#include "beirg_t.h" + +#include +#include -#define DEBUG_NODEINFO 1 << 0 -#define DEBUG_PKILL 1 << 1 -#define DEBUG_BIPARTITE 1 << 2 -#define DEBUG_SKS 1 << 3 -#define DEBUG_DVG 1 << 4 -#define DEBUG_SER_HEUR 1 << 5 -#define DEBUG_MAX_AC 1 << 6 + +#define ARR_LEN_SAFE(arr) ((arr) != NULL ? ARR_LEN((arr)) : 0) #define HASH_RSS_EDGE(edge) ((get_irn_node_nr((edge)->src) << 16) | (get_irn_node_nr((edge)->tgt) & 0xFFFF)) #define BSEARCH_IRN_ARR(val, arr) \ - bsearch(&(val), (arr), ARR_LEN((arr)), sizeof((arr)[0]), cmp_irn_idx) + bsearch(&(val), (arr), ARR_LEN_SAFE((arr)), sizeof((arr)[0]), cmp_irn_idx) + +#define BLOCK_IDX_MAP(rss, irn) bsearch_for_index(get_irn_idx((irn)), (rss)->idx_map, ARR_LEN_SAFE((rss)->idx_map), 1) -#define BLOCK_IDX_MAP(rss, irn) bsearch_for_index(get_irn_idx((irn)), (rss)->idx_map, ARR_LEN((rss)->idx_map), 1) +/* the rss options */ +typedef struct _rss_opts_t { + int dump_flags; +} rss_opts_t; /* Represents a child with associated costs */ typedef struct _child { @@ -66,22 +91,15 @@ typedef struct _rss_edge { /* Represents a connected bipartite component. */ typedef struct _cbc { - nodeset *parents; /**< = S a set of value producers */ - nodeset *children; /**< = T a set of value consumers */ + ir_nodeset_t parents; /**< = S a set of value producers */ + ir_nodeset_t children; /**< = T a set of value consumers */ pset *kill_edges; /**< = E a set of edges (t in T, s in S) such as each s in S gets killed by at least one t in T */ int nr; /**< a deterministic index for set insertion (used as hash) */ } cbc_t; -/* Represents a serialization edge with associated costs. */ -typedef struct _serialization { - rss_edge_t *edge; - int omega1; - int omega2; -} serialization_t; - /* Represents a disjoint value DAG. */ typedef struct _dvg { - nodeset *nodes; + ir_nodeset_t nodes; pset *edges; } dvg_t; @@ -96,36 +114,38 @@ typedef struct _rss_irn { ir_node **consumer; /**< Sorted consumer array (needed for faster access) */ plist_t *parent_list; /**< List of parents */ - ir_node **parents; /**< Sorted parent array (needed for faster access) */ + plist_t *pkiller_list; /**< List of potential killers */ plist_t *descendant_list; /**< List of descendants */ ir_node **descendants; /**< Sorted descendant array (needed for faster access) */ - plist_t *pkiller_list; /**< List of potential killers */ - ir_node **pkillers; /**< Sorted pkiller array (needed for faster access) */ - - plist_t *dvg_desc_list; /**< List of all descendants in the DVG */ - ir_node **dvg_desc; /**< Sorted dvg descendant array (needed for faster access) */ - - plist_t *dvg_pkiller_list; /**< List of potential killers in the DVG */ - ir_node **dvg_pkiller; /**< Sorted dvg pkiller array (needed for faster access) */ - - plist_t *kill_value_list; /**< List of values getting potentially killed by this node */ - plist_t *dvg_user_list; /**< List of users in the disjoint value DAG DVG */ - ir_node *killer; /**< The selected unique killer */ ir_node *irn; /**< The corresponding firm node to this rss_irn */ chain_t *chain; /**< The chain, this node is associated to */ + unsigned desc_walk; /**< visited flag for collecting descendants */ + unsigned kill_count; /**< number of nodes killed by this one */ + unsigned live_out : 1; /**< irn has consumers outside of it's block */ unsigned visited : 1; /**< visited flag for bipartite decomposition */ + unsigned havecons : 1; /**< visited flag collect consumer */ unsigned handled : 1; /**< flag indicating whether or not the list structures have been build */ unsigned dumped : 1; /**< flag indication whether or not this node was dumped */ } rss_irn_t; +/* Represents a serialization edge with associated costs. */ +typedef struct _serialization { + rss_irn_t *u; /* the top node */ + rss_irn_t *v; /* the node about to be serialized after u */ + rss_edge_t *edge; /* the edge selected for the serialization */ + int omega1; /* estimated: available regs - RS reduction */ + int omega2; /* increase in critical path length */ + int new_killer; +} serialization_t; + typedef struct _rss { - phase_t ph; /**< Phase to hold some data */ + ir_phase ph; /**< Phase to hold some data */ heights_t *h; /**< The current height object */ ir_graph *irg; /**< The irg to preprocess */ plist_t *nodes; /**< The list of interesting nodes */ @@ -135,6 +155,9 @@ typedef struct _rss { ir_node *block; /**< The current block in progress. */ int *idx_map; /**< Mapping irn indices to per block indices */ unsigned max_height; /**< maximum height in the current block */ + rss_opts_t *opts; /**< The options */ + be_lv_t *liveness; /**< The liveness information for this irg */ + ir_nodeset_t live_block; /**< Values alive at end of block */ const arch_register_class_t *cls; /**< The current register class */ DEBUG_ONLY(firm_dbg_module_t *dbg); } rss_t; @@ -145,29 +168,84 @@ typedef struct _rss { * We need some special nodes: * a source and a sink for all live-in and live-out values of a block */ - -static enum { +enum { iro_rss_Source, iro_rss_Sink, iro_rss_last }; +/** The opcode of the rss_Source node once allocated. */ +static ir_op *op_rss_Source; +/** The opcode of the rss_Sink node once allocated. */ +static ir_op *op_rss_Sink; + +/** The rss_Source node of the current graph. */ static ir_node *_source = NULL; +/** The rss_Sink node of the current graph. */ static ir_node *_sink = NULL; #define is_Source(irn) ((irn) == _source) #define is_Sink(irn) ((irn) == _sink) +enum { + RSS_DUMP_NONE = 0, + RSS_DUMP_CBC = 1 << 0, + RSS_DUMP_PKG = 1 << 1, + RSS_DUMP_KILL = 1 << 2, + RSS_DUMP_DVG = 1 << 3, + RSS_DUMP_MAXAC = 1 << 4, + RSS_DUMP_ALL = (RSS_DUMP_MAXAC << 1) - 1, +}; + +static rss_opts_t rss_options = { + RSS_DUMP_NONE, +}; + +static const lc_opt_enum_int_items_t dump_items[] = { + { "none", RSS_DUMP_NONE }, + { "cbc", RSS_DUMP_CBC }, + { "pkg", RSS_DUMP_PKG }, + { "kill", RSS_DUMP_KILL }, + { "dvg", RSS_DUMP_DVG }, + { "maxac", RSS_DUMP_MAXAC }, + { "all", RSS_DUMP_ALL }, + { NULL, 0 } +}; + +static lc_opt_enum_int_var_t dump_var = { + &rss_options.dump_flags, dump_items +}; + +static const lc_opt_table_entry_t rss_option_table[] = { + LC_OPT_ENT_ENUM_MASK("dump", "dump phases", &dump_var), + LC_OPT_LAST +}; + +/****************************************************************************** + * _ _ __ _ _ + * | | | | / _| | | (_) + * | |__ ___| |_ __ ___ _ __ | |_ _ _ _ __ ___| |_ _ ___ _ __ ___ + * | '_ \ / _ \ | '_ \ / _ \ '__| | _| | | | '_ \ / __| __| |/ _ \| '_ \/ __| + * | | | | __/ | |_) | __/ | | | | |_| | | | | (__| |_| | (_) | | | \__ \ + * |_| |_|\___|_| .__/ \___|_| |_| \__,_|_| |_|\___|\__|_|\___/|_| |_|___/ + * | | + * |_| + ******************************************************************************/ + /** - * Acquire opcodes and create source and sink nodes. + * Acquire opcodes if needed and create source and sink nodes. */ static void init_rss_special_nodes(ir_graph *irg) { - ir_node *block = get_irg_start_block(irg); - int iro_rss_base = get_next_ir_opcodes(iro_rss_last); - ir_op *op_rss_Source = new_ir_op(iro_rss_base + iro_rss_Source, "rss_Source", op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL); - ir_op *op_rss_Sink = new_ir_op(iro_rss_base + iro_rss_Sink, "rss_Sink", op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL); - _source = new_ir_node(NULL, irg, block, op_rss_Source, mode_ANY, 0, NULL); - _sink = new_ir_node(NULL, irg, block, op_rss_Sink, mode_ANY, 0, NULL); + ir_node *block; + + if (op_rss_Source == NULL) { + int iro_rss_base = get_next_ir_opcodes(iro_rss_last); + op_rss_Source = new_ir_op(iro_rss_base + iro_rss_Source, "rss_Source", op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL); + op_rss_Sink = new_ir_op(iro_rss_base + iro_rss_Sink, "rss_Sink", op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL); + } + block = get_irg_start_block(irg); + _source = new_ir_node(NULL, irg, block, op_rss_Source, mode_ANY, 0, NULL); + _sink = new_ir_node(NULL, irg, block, op_rss_Sink, mode_ANY, 0, NULL); } static int cmp_int(const void *a, const void *b) { @@ -218,13 +296,6 @@ static int bsearch_for_index(int key, int *arr, size_t len, int force) { return -1; } -static void dump_nodeset(nodeset *ns, const char *prefix) { - ir_node *irn; - foreach_nodeset(ns, irn) { - ir_printf("%s%+F\n", prefix, irn); - } -} - static ir_node **build_sorted_array_from_list(plist_t *irn_list, struct obstack *obst) { plist_element_t *el; int i = 0; @@ -242,12 +313,36 @@ static ir_node **build_sorted_array_from_list(plist_t *irn_list, struct obstack return arr; } +/***************************************************** + * _ _ _ + * | | | | (_) + * __| | ___| |__ _ _ __ _ __ _ _ _ __ __ _ + * / _` |/ _ \ '_ \| | | |/ _` |/ _` | | '_ \ / _` | + * | (_| | __/ |_) | |_| | (_| | (_| | | | | | (_| | + * \__,_|\___|_.__/ \__,_|\__, |\__, |_|_| |_|\__, | + * __/ | __/ | __/ | + * |___/ |___/ |___/ + * + *****************************************************/ + +#ifdef DEBUG_libfirm +static void dump_nodeset(ir_nodeset_t *ns, const char *prefix) { + ir_nodeset_iterator_t iter; + ir_node *irn; + + ir_nodeset_iterator_init(&iter, ns); + while( (irn = ir_nodeset_iterator_next(&iter)) != NULL ) { + ir_fprintf(stderr, "%s%+F\n", prefix, irn); + } +} +#endif + static void build_file_name(rss_t *rss, const char *suffix, size_t suf_len, char *buf, size_t len) { const char *irg_name; memset(buf, 0, len); irg_name = get_entity_name(get_irg_entity(rss->irg)); - snprintf(buf, len - suf_len, "%s-%s-block-%d", + snprintf(buf, len - suf_len, "%s-%s-block-%ld", irg_name, arch_register_class_name(rss->cls), get_irn_node_nr(rss->block)); strcat(buf, suffix); } @@ -268,14 +363,15 @@ static void debug_vcg_dump_bipartite(rss_t *rss) { fprintf(f, "manhattan_edges: yes\n\n"); foreach_pset(rss->cbc_set, cbc) { + ir_nodeset_iterator_t iter; ir_node *n; rss_edge_t *ke; fprintf(f, "graph: { titel: \"cbc %d\" label: \"cbc %d\" status:clustered color:yellow\n", cbc->nr, cbc->nr); - foreach_nodeset(cbc->parents, n) { + foreach_ir_nodeset(&cbc->parents, n, iter) { ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n); } - foreach_nodeset(cbc->children, n) { + foreach_ir_nodeset(&cbc->children, n, iter) { ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n); } foreach_pset(cbc->kill_edges, ke) { @@ -290,10 +386,10 @@ static void debug_vcg_dump_bipartite(rss_t *rss) { /* Dump the computed killing function as vcg. */ static void debug_vcg_dump_kill(rss_t *rss) { - FILE *f; - char file_name[256]; - static const char suffix[] = "-RSS-KILL.vcg"; + FILE *f; + char file_name[256]; plist_element_t *el; + static const char suffix[] = "-RSS-KILL.vcg"; build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name)); f = fopen(file_name, "w"); @@ -303,11 +399,13 @@ static void debug_vcg_dump_kill(rss_t *rss) { fprintf(f, "layoutalgorithm: mindepth\n"); fprintf(f, "manhattan_edges: yes\n\n"); - /* first: reset dumped flag of all nodes */ - foreach_plist(rss->nodes, el) { - ir_node *irn = plist_element_get_value(el); - rss_irn_t *rirn = get_rss_irn(rss, irn); - rirn->dumped = 0; + { + /* reset dump_flag */ + ir_node *irn; + foreach_phase_irn(&rss->ph, irn) { + rss_irn_t *node = get_rss_irn(rss, irn); + node->dumped = 0; + } } /* dump all nodes and their killers */ @@ -335,13 +433,22 @@ static void debug_vcg_dump_kill(rss_t *rss) { } /* Dumps the potential killing DAG (PKG) as vcg. */ -static void debug_vcg_dump_pkg(rss_t *rss) { - FILE *f; - char file_name[256]; - static const char suffix[] = "-RSS-PKG.vcg"; - plist_element_t *el; +static void debug_vcg_dump_pkg(rss_t *rss, ir_nodeset_t *max_ac, int iteration) { + FILE *f; + char file_name[256]; + char *suffix = alloca(32); + static const char suffix1[] = "-RSS-PKG.vcg"; + static const char suffix2[] = "-RSS-PKG-MAXAC.vcg"; + plist_element_t *el; + + if (! max_ac) { + snprintf(suffix, 32, "%s", suffix1); + } + else { + snprintf(suffix, 32, "-%02d%s", iteration, suffix2); + } - build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name)); + build_file_name(rss, suffix, strlen(suffix) + 1, file_name, sizeof(file_name)); f = fopen(file_name, "w"); ir_fprintf(f, "graph: { title: \"potential killing DAG of %+F, block %d\"\n", rss->irg, get_irn_node_nr(rss->block)); @@ -349,20 +456,46 @@ static void debug_vcg_dump_pkg(rss_t *rss) { fprintf(f, "layoutalgorithm: mindepth\n"); fprintf(f, "manhattan_edges: yes\n\n"); + { + /* reset dump_flag */ + ir_node *irn; + foreach_phase_irn(&rss->ph, irn) { + rss_irn_t *node = get_rss_irn(rss, irn); + node->dumped = 0; + } + } + foreach_plist(rss->nodes, el) { ir_node *irn = plist_element_get_value(el); rss_irn_t *rirn = get_rss_irn(rss, irn); + char *c1 = ""; plist_element_t *k_el; - ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn); - rirn->dumped = 1; + /* dump selected saturating values in yellow */ + if (max_ac && ir_nodeset_contains(max_ac, irn)) + c1 = "color:yellow"; + + if (! rirn->dumped) { + if (rirn->chain) + ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F c%d\" %s }\n", get_irn_node_nr(irn), irn, rirn->chain->nr, c1); + else + ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(irn), irn, c1); + rirn->dumped = 1; + } foreach_plist(rirn->pkiller_list, k_el) { ir_node *pkiller = plist_element_get_value(k_el); rss_irn_t *pk_rirn = get_rss_irn(rss, pkiller); + char *c2 = ""; + + if (max_ac && ir_nodeset_contains(max_ac, pkiller)) + c2 = "color:yellow"; if (! pk_rirn->dumped) { - ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(pkiller), pkiller); + if (pk_rirn->chain) + ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F c%d\" %s }\n", get_irn_node_nr(pkiller), pkiller, pk_rirn->chain->nr, c2); + else + ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(pkiller), pkiller, c2); pk_rirn->dumped = 1; } ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n", @@ -378,6 +511,7 @@ static void debug_vcg_dump_dvg(rss_t *rss, dvg_t *dvg) { static const char suffix[] = "-RSS-DVG.vcg"; FILE *f; char file_name[256]; + ir_nodeset_iterator_t iter; ir_node *irn; rss_edge_t *edge; @@ -390,15 +524,12 @@ static void debug_vcg_dump_dvg(rss_t *rss, dvg_t *dvg) { fprintf(f, "manhattan_edges: yes\n\n"); /* dump all nodes */ - foreach_nodeset(dvg->nodes, irn) { + foreach_ir_nodeset(&dvg->nodes, irn, iter) { ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn); } /* dump all edges */ foreach_pset(dvg->edges, edge) { - rss_irn_t *src = get_rss_irn(rss, edge->src); - rss_irn_t *tgt = get_rss_irn(rss, edge->tgt); - ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n", get_irn_node_nr(edge->src), get_irn_node_nr(edge->tgt)); } @@ -407,11 +538,13 @@ static void debug_vcg_dump_dvg(rss_t *rss, dvg_t *dvg) { fclose(f); } +#if 0 /* Dumps the PKG(DVG). */ static void debug_vcg_dump_dvg_pkiller(rss_t *rss, dvg_t *dvg) { static const char suffix[] = "-RSS-DVG-PKG.vcg"; FILE *f; char file_name[256]; + ir_nodeset_iterator_t iter; ir_node *irn; build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name)); @@ -423,12 +556,12 @@ static void debug_vcg_dump_dvg_pkiller(rss_t *rss, dvg_t *dvg) { fprintf(f, "manhattan_edges: yes\n\n"); /* dump all nodes */ - foreach_nodeset(dvg->nodes, irn) { + foreach_ir_nodeset(&dvg->nodes, irn, iter) { ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn); } /* dump all edges */ - foreach_nodeset(dvg->nodes, irn) { + foreach_ir_nodeset(&dvg->nodes, irn, iter) { rss_irn_t *node = get_rss_irn(rss, irn); plist_element_t *el; @@ -441,40 +574,35 @@ static void debug_vcg_dump_dvg_pkiller(rss_t *rss, dvg_t *dvg) { fprintf(f, "}\n"); fclose(f); } +#endif /* if 0 */ /** * In case there is no rss information for irn, initialize it. */ -static void *init_rss_irn(phase_t *ph, ir_node *irn, void *old) { - rss_irn_t *res = phase_alloc(ph, sizeof(res[0])); - - res->descendant_list = plist_obstack_new(phase_obst(ph)); - res->descendants = NULL; - - res->consumer_list = plist_obstack_new(phase_obst(ph)); - res->consumer = NULL; +static void *init_rss_irn(ir_phase *ph, ir_node *irn, void *old) { + rss_irn_t *res = old ? old : phase_alloc(ph, sizeof(res[0])); - res->pkiller_list = plist_obstack_new(phase_obst(ph)); - res->pkillers = NULL; + res->descendant_list = plist_obstack_new(phase_obst(ph)); + res->descendants = NULL; - res->parent_list = plist_obstack_new(phase_obst(ph)); - res->parents = NULL; + res->consumer_list = plist_obstack_new(phase_obst(ph)); + res->consumer = NULL; - res->dvg_desc_list = plist_obstack_new(phase_obst(ph)); - res->dvg_desc = NULL; + res->pkiller_list = plist_obstack_new(phase_obst(ph)); - res->kill_value_list = plist_obstack_new(phase_obst(ph)); - res->dvg_user_list = plist_obstack_new(phase_obst(ph)); - res->dvg_pkiller_list = plist_obstack_new(phase_obst(ph)); + res->parent_list = plist_obstack_new(phase_obst(ph)); - res->killer = NULL; - res->irn = irn; - res->chain = NULL; + res->killer = NULL; + res->irn = irn; + res->chain = NULL; - res->live_out = 0; - res->visited = 0; - res->handled = 0; - res->dumped = 0; + res->kill_count = 0; + res->desc_walk = 0; + res->live_out = 0; + res->visited = 0; + res->handled = 0; + res->dumped = 0; + res->havecons = 0; return res; } @@ -482,31 +610,63 @@ static void *init_rss_irn(phase_t *ph, ir_node *irn, void *old) { /** * Collect all nodes data dependent on current node. */ -static void collect_descendants(rss_t *rss, rss_irn_t *rirn, ir_node *irn, int *got_sink) { +static void collect_descendants(rss_t *rss, rss_irn_t *rirn, ir_node *irn, int *got_sink, unsigned cur_desc_walk) { const ir_edge_t *edge; - ir_node *block = rss->block; + rss_irn_t *cur_node = get_rss_irn(rss, irn); + ir_node *block = rss->block; + ir_edge_kind_t ekind[2] = { EDGE_KIND_NORMAL, EDGE_KIND_DEP }; + int i; - foreach_out_edge(irn, edge) { - ir_node *user = get_edge_src_irn(edge); + if (cur_node->desc_walk >= cur_desc_walk) + return; + cur_node->desc_walk = cur_desc_walk; - /* skip ignore nodes as they do not really contribute to register presssure */ - if (arch_irn_is(rss->arch_env, user, ignore)) - continue; + /* Stop at Barriers */ + if (be_is_Barrier(irn)) + return; - /* check if user lives in block and is not a control flow node */ - if (get_nodes_block(user) == block && get_irn_mode(user) != mode_X) { - /* skip mode_T nodes */ - if (get_irn_mode(user) != mode_T && ! plist_has_value(rirn->descendant_list, user)) { - plist_insert_back(rirn->descendant_list, user); - DBG((rss->dbg, DEBUG_NODEINFO, "\t\tdescendant %+F\n", user)); + /* loop over normal and dependency edges */ + for (i = 0; i < 2; ++i) { + foreach_out_edge_kind(irn, edge, ekind[i]) { + ir_node *user = get_edge_src_irn(edge); + + /* skip ignore nodes as they do not really contribute to register pressure */ + if (arch_irn_is(rss->arch_env, user, ignore)) + continue; + + /* + (a) mode_X means Jump -> out_edge + (b) Phi as user of a normal node -> out-edge + */ + if (get_irn_mode(user) == mode_X || is_Phi(user)) { + if (! *got_sink) + goto force_sink; + else + continue; + } + + if (is_Proj(user)) { + + //if (arch_get_irn_reg_class(rss->arch_env, user, -1) == rss->cls) + collect_descendants(rss, rirn, user, got_sink, cur_desc_walk); + } + else { + /* check if user lives in block and is not a control flow node */ + if (get_nodes_block(user) == block) { + if (! plist_has_value(rirn->descendant_list, user)) { + plist_insert_back(rirn->descendant_list, user); + DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", user)); + } + collect_descendants(rss, rirn, user, got_sink, cur_desc_walk); + } + else if (! *got_sink) { +force_sink: + /* user lives out of block: add sink as descendant if not already done */ + plist_insert_back(rirn->descendant_list, _sink); + *got_sink = 1; + DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", _sink)); + } } - collect_descendants(rss, rirn, user, got_sink); - } - else if (! *got_sink) { - /* user lives out of block: add sink as descendant if not already done */ - plist_insert_back(rirn->descendant_list, _sink); - *got_sink = 1; - DBG((rss->dbg, DEBUG_NODEINFO, "\t\tdescendant %+F\n", _sink)); } } } @@ -514,133 +674,85 @@ static void collect_descendants(rss_t *rss, rss_irn_t *rirn, ir_node *irn, int * /** * Handles a single consumer. */ -static int collect_single_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *consumer, int got_sink) { +static void collect_single_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *consumer, int *got_sink) { ir_node *block = rss->block; - if (get_nodes_block(consumer) == block) { - /* the consumer of a mode_T node are it's projs */ - if (get_irn_mode(consumer) == mode_T) { - const ir_edge_t *cons_edge; - - DBG((rss->dbg, DEBUG_NODEINFO, "\t\tmode_T consumer %+F skipped\n", consumer)); - foreach_out_edge(consumer, cons_edge) { - ir_node *cons_proj = get_edge_src_irn(cons_edge); - - assert(get_nodes_block(cons_proj) == block && "Proj in wrong block!"); - - /* skip ignore nodes, as they do not really contribute to register pressure */ - if (arch_irn_is(rss->arch_env, cons_proj, ignore)) - continue; + assert(! is_Proj(consumer) && "Cannot handle Projs"); - plist_insert_back(rss_irn->consumer_list, cons_proj); - DBG((rss->dbg, DEBUG_NODEINFO, "\t\t\treal consumer %+F\n", cons_proj)); - } - } - else if (! arch_irn_is(rss->arch_env, consumer, ignore)) { + if (! is_Phi(consumer) && ! is_Block(consumer) && get_nodes_block(consumer) == block) { + if (! arch_irn_is(rss->arch_env, consumer, ignore) && ! plist_has_value(rss_irn->consumer_list, consumer)) { plist_insert_back(rss_irn->consumer_list, consumer); - DBG((rss->dbg, DEBUG_NODEINFO, "\t\tconsumer %+F\n", consumer)); + DBG((rss->dbg, LEVEL_2, "\t\tconsumer %+F\n", consumer)); } } else { rss_irn->live_out = 1; - DBG((rss->dbg, DEBUG_NODEINFO, "\t\tlive out %+F", consumer)); - if (! got_sink) { + DBG((rss->dbg, LEVEL_2, "\t\tlive out %+F", consumer)); + if (! *got_sink) { plist_insert_back(rss_irn->consumer_list, _sink); - got_sink = 1; - DB((rss->dbg, DEBUG_NODEINFO, ", %+F added instead", _sink)); + *got_sink = 1; + DB((rss->dbg, LEVEL_2, ", %+F added instead", _sink)); } - DB((rss->dbg, DEBUG_NODEINFO, "\n")); + DB((rss->dbg, LEVEL_2, "\n")); } - - return got_sink; } /** * Collect all nodes consuming the value(s) produced by current node. */ -static void collect_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *irn) { - const ir_edge_t *edge; - int got_sink = 0; - - foreach_out_edge(irn, edge) { - ir_node *consumer = get_edge_src_irn(edge); - got_sink = collect_single_consumer(rss, rss_irn, consumer, got_sink); - } -} - -#if 0 -/** - * We need to build the consumer and descendant list for _source. - */ -static void collect_node_info_source(rss_t *rss) { +static void collect_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *irn, int *got_sink) { const ir_edge_t *edge; - rss_irn_t *rirn = get_rss_irn(rss, _source); - ir_node *block = rss->block; + int i; + ir_edge_kind_t ekind[2] = { EDGE_KIND_NORMAL, EDGE_KIND_DEP }; + rss_irn_t *cur_node = get_rss_irn(rss, irn); - if (rirn->handled) + if (cur_node->havecons) return; + cur_node->havecons = 1; - foreach_out_edge(block, edge) { - ir_node *irn = get_edge_src_irn(edge); - int i; - - for (i = get_irn_arity(n) - 1; i >= 0; --i) { + for (i = 0; i < 2; ++i) { + foreach_out_edge_kind(irn, edge, ekind[i]) { + ir_node *consumer = get_edge_src_irn(edge); + if (is_Proj(consumer)) { + //if (arch_get_irn_reg_class(rss->arch_env, consumer, -1) == rss->cls) + collect_consumer(rss, rss_irn, consumer, got_sink); + } + else + collect_single_consumer(rss, rss_irn, consumer, got_sink); } } } -static void reset_node_info(rss_irn_t *rss_irn) { - /* Beware: array data resides on phase obstack, so it gets removed automatically */ - - plist_clear(rss_irn->consumer_list); - rss_irn->consumer = NULL; - - plist_clear(rss_irn->parent_list); - rss_irn->parents = NULL; - - plist_clear(rss_irn->descendant_list); - rss_irn->descendants = NULL; - - plist_clear(rss_irn->pkiller_list); - rss_irn->pkillers = NULL; - - plist_clear(rss_irn->kill_value_list); - - rss_irn->killer = NULL; - rss_irn->live_out = 0; - rss_irn->visited = 0; - rss_irn->handled = 0; -} -#endif - /** * Collects all consumer and descendant of a irn. */ static void collect_node_info(rss_t *rss, ir_node *irn) { - rss_irn_t *rss_irn = get_rss_irn(rss, irn); - int got_sink; + static unsigned cur_desc_walk = 0; + rss_irn_t *rss_irn = get_rss_irn(rss, irn); + int got_sink; - assert(get_irn_mode(irn) != mode_T && "Cannot handle mode_T nodes."); + assert(! is_Proj(irn) && "Cannot handle Projs."); /* check if node info is already available */ if (rss_irn->handled) return; + //phase_reinit_single_irn_data(&rss->ph, irn); - DBG((rss->dbg, DEBUG_NODEINFO, "\tcomputing consumers of %+F:\n", irn)); + DBG((rss->dbg, LEVEL_1, "\tcomputing consumers of %+F:\n", irn)); /* collect all consumer */ got_sink = 0; - collect_consumer(rss, rss_irn, irn); + collect_consumer(rss, rss_irn, irn, &got_sink); /* build sorted consumer array */ rss_irn->consumer = build_sorted_array_from_list(rss_irn->consumer_list, phase_obst(&rss->ph)); - DBG((rss->dbg, DEBUG_NODEINFO, "\tcompute descendants of %+F:\n", irn)); + DBG((rss->dbg, LEVEL_1, "\tcompute descendants of %+F:\n", irn)); /* collect descendants */ got_sink = 0; - collect_descendants(rss, rss_irn, irn, &got_sink); + collect_descendants(rss, rss_irn, irn, &got_sink, ++cur_desc_walk); /* build sorted descendant array */ rss_irn->descendants = build_sorted_array_from_list(rss_irn->descendant_list, phase_obst(&rss->ph)); @@ -661,6 +773,7 @@ static int is_potential_killer(rss_t *rss, rss_irn_t *v, rss_irn_t *u) { plist_t *list; ir_node **arr; plist_element_t *el; + (void) rss; assert(is_Sink(v->irn) || ((plist_count(v->descendant_list) > 0 && v->descendants) || 1)); assert(is_Sink(u->irn) || ((plist_count(u->consumer_list) > 0 && u->consumer) || 1)); @@ -687,6 +800,37 @@ static int is_potential_killer(rss_t *rss, rss_irn_t *v, rss_irn_t *u) { return 1; } +/** + * Update descendants, consumer and pkiller list for the given irn. + */ +static void update_node_info(rss_t *rss, ir_node *irn, ir_node *pk_irn) { + rss_irn_t *node = get_rss_irn(rss, irn); + rss_irn_t *pkiller = get_rss_irn(rss, pk_irn); + + /* add consumer and rebuild the consumer array */ + if (! plist_has_value(node->consumer_list, pk_irn)) { + plist_insert_back(node->consumer_list, pk_irn); + node->consumer = build_sorted_array_from_list(node->consumer_list, phase_obst(&rss->ph)); + } + + /* add pk_irn as descendant, add it's descendants to irn's and rebuild array */ + if (! plist_has_value(node->descendant_list, pk_irn)) { + plist_element_t *el; + + plist_insert_back(node->descendant_list, pk_irn); + + foreach_plist(pkiller->descendant_list, el) { + ir_node *desc = plist_element_get_value(el); + + if (! plist_has_value(node->descendant_list, desc)) + plist_insert_back(node->descendant_list, desc); + } + + node->descendants = build_sorted_array_from_list(node->descendant_list, phase_obst(&rss->ph)); + } + +} + /** * Compute the potential killing set PK. */ @@ -697,7 +841,7 @@ static void compute_pkill_set(rss_t *rss) { ir_node *u_irn = plist_element_get_value(u_el); rss_irn_t *u = get_rss_irn(rss, u_irn); - DBG((rss->dbg, DEBUG_PKILL, "\tcomputing potential killers of %+F:\n", u_irn)); + DBG((rss->dbg, LEVEL_1, "\tcomputing potential killers of %+F:\n", u_irn)); /* check each consumer if it is a potential killer */ foreach_plist(u->consumer_list, v_el) { @@ -708,19 +852,16 @@ static void compute_pkill_set(rss_t *rss) { if (is_potential_killer(rss, v, u)) { if (! plist_has_value(u->pkiller_list, v_irn)) plist_insert_back(u->pkiller_list, v_irn); - if (! plist_has_value(v->kill_value_list, u_irn)) - plist_insert_back(v->kill_value_list, u_irn); - DBG((rss->dbg, DEBUG_PKILL, "\t\tpotential killer %+F\n", v_irn)); + v->kill_count++; + DBG((rss->dbg, LEVEL_2, "\t\tpotential killer %+F\n", v_irn)); } } u->killer = _sink; } - DEBUG_ONLY( - if (firm_dbg_get_mask(rss->dbg) & DEBUG_PKILL) - debug_vcg_dump_pkg(rss); - ) + if (rss->opts->dump_flags & RSS_DUMP_PKG) + debug_vcg_dump_pkg(rss, NULL, 0); } /** @@ -733,6 +874,8 @@ static void build_kill_edges(rss_t *rss, pset *epk) { ir_node *irn = plist_element_get_value(el); rss_irn_t *rirn = get_rss_irn(rss, irn); + DBG((rss->dbg, LEVEL_3, "\t\tbuilding kill edges for %+F:\n", irn)); + foreach_plist(rirn->pkiller_list, k_el) { ir_node *pkiller = plist_element_get_value(k_el); rss_edge_t *ke = obstack_alloc(phase_obst(&rss->ph), sizeof(*ke)); @@ -741,29 +884,34 @@ static void build_kill_edges(rss_t *rss, pset *epk) { ke->tgt = pkiller; ke->next = NULL; + DBG((rss->dbg, LEVEL_3, "\t\t\ttarget %+F\n", pkiller)); + pset_insert(epk, ke, HASH_RSS_EDGE(ke)); } } } +#ifdef DEBUG_libfirm /* print the given cbc for debugging purpose */ static void debug_print_cbc(firm_dbg_module_t *mod, cbc_t *cbc) { + ir_nodeset_iterator_t iter; ir_node *n; rss_edge_t *ke; - DBG((mod, DEBUG_BIPARTITE, "\t\tS = set of parents:\n")); - foreach_nodeset(cbc->parents, n) { - DBG((mod, DEBUG_BIPARTITE, "\t\t\t%+F\n", n)); + DBG((mod, LEVEL_3, "\t\tS = set of parents:\n")); + foreach_ir_nodeset(&cbc->parents, n, iter) { + DBG((mod, LEVEL_3, "\t\t\t%+F\n", n)); } - DBG((mod, DEBUG_BIPARTITE, "\t\tT = set of children:\n")); - foreach_nodeset(cbc->children, n) { - DBG((mod, DEBUG_BIPARTITE, "\t\t\t%+F\n", n)); + DBG((mod, LEVEL_3, "\t\tT = set of children:\n")); + foreach_ir_nodeset(&cbc->children, n, iter) { + DBG((mod, LEVEL_3, "\t\t\t%+F\n", n)); } - DBG((mod, DEBUG_BIPARTITE, "\t\tE = Edges from producers to consumers\n")); + DBG((mod, LEVEL_3, "\t\tE = Edges from producers to consumers\n")); foreach_pset(cbc->kill_edges, ke) { - DBG((mod, DEBUG_BIPARTITE, "\t\t\t%+F -> %+F\n", ke->src, ke->tgt)); + DBG((mod, LEVEL_3, "\t\t\t%+F -> %+F\n", ke->src, ke->tgt)); } } +#endif /** * Construct the bipartite decomposition. @@ -776,7 +924,7 @@ static void compute_bipartite_decomposition(rss_t *rss) { plist_element_t *el; - DBG((rss->dbg, DEBUG_BIPARTITE, "\tcomputing bipartite decomposition:\n")); + DBG((rss->dbg, LEVEL_1, "\tcomputing bipartite decomposition:\n")); build_kill_edges(rss, epk); @@ -785,25 +933,27 @@ static void compute_bipartite_decomposition(rss_t *rss) { rss_irn_t *u = get_rss_irn(rss, u_irn); int p_change = 1; int c_change = 1; + int vrfy_ok = 1; cbc_t *cbc; plist_element_t *el2; rss_edge_t *k_edge; rss_edge_t *kedge_root = NULL; ir_node *t_irn, *s_irn; + ir_nodeset_iterator_t iter; if (u->visited || u_irn == _sink) continue; - DBG((rss->dbg, DEBUG_BIPARTITE, "\t\t%+F choosen:\n", u_irn)); + DBG((rss->dbg, LEVEL_2, "\t\t%+F choosen:\n", u_irn)); cbc = obstack_alloc(phase_obst(&rss->ph), sizeof(*cbc)); cbc->nr = cur_num++; /* initialize S_cb */ - cbc->parents = new_nodeset(5); - nodeset_insert(cbc->parents, u_irn); - DBG((rss->dbg, DEBUG_BIPARTITE, "\t\t\t%+F added to parents (init)\n", u_irn)); + ir_nodeset_init_size(&cbc->parents, 5); + ir_nodeset_insert(&cbc->parents, u_irn); + DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (init)\n", u_irn)); /* E_cb = empty */ cbc->kill_edges = new_pset(cmp_rss_edges, 5); @@ -811,10 +961,10 @@ static void compute_bipartite_decomposition(rss_t *rss) { /* each parent gets killed by at least one value from children */ /* T_cb = PK_successors(u) */ - cbc->children = new_nodeset(5); + ir_nodeset_init_size(&cbc->children, 5); foreach_plist(u->pkiller_list, el2) { - nodeset_insert(cbc->children, plist_element_get_value(el2)); - DBG((rss->dbg, DEBUG_BIPARTITE, "\t\t\t%+F added to children (init)\n", plist_element_get_value(el2))); + ir_nodeset_insert(&cbc->children, plist_element_get_value(el2)); + DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (init)\n", plist_element_get_value(el2))); } /* @@ -823,55 +973,53 @@ static void compute_bipartite_decomposition(rss_t *rss) { until the sets don't change any more */ while (p_change || c_change) { + ir_nodeset_iterator_t iter; p_change = c_change = 0; /* accumulate parents */ - foreach_nodeset(cbc->children, t_irn) { - rss_irn_t *t = get_rss_irn(rss, t_irn); - plist_element_t *kvl_el; + foreach_ir_nodeset(&cbc->children, t_irn, iter) { + foreach_pset(epk, k_edge) { + ir_node *val = k_edge->src; - foreach_plist(t->kill_value_list, kvl_el) { - ir_node *val = plist_element_get_value(kvl_el); - - if (! nodeset_find(cbc->parents, val)) { - nodeset_insert(cbc->parents, val); + if (k_edge->tgt == t_irn && ! ir_nodeset_contains(&cbc->parents, val)) { + ir_nodeset_insert(&cbc->parents, val); p_change = 1; - DBG((rss->dbg, DEBUG_BIPARTITE, "\t\t\t%+F added to parents\n", val)); + DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (killed by %+F)\n", val, t_irn)); } } } /* accumulate children */ - foreach_nodeset(cbc->parents, s_irn) { - rss_irn_t *s = get_rss_irn(rss, s_irn); - plist_element_t *pkl_el; - - foreach_plist(s->pkiller_list, pkl_el) { - ir_node *val = plist_element_get_value(pkl_el); + foreach_ir_nodeset(&cbc->parents, s_irn, iter) { + foreach_pset(epk, k_edge) { + ir_node *val = k_edge->tgt; - if (! nodeset_find(cbc->children, val)) { - nodeset_insert(cbc->children, val); + if (k_edge->src == s_irn && ! ir_nodeset_contains(&cbc->children, val)) { + ir_nodeset_insert(&cbc->children, val); c_change = 1; - DBG((rss->dbg, DEBUG_BIPARTITE, "\t\t\t%+F added to children\n", val)); + DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (kills %+F)\n", val, s_irn)); } } } } /* mark all parent values as visited */ - foreach_nodeset(cbc->parents, s_irn) { + foreach_ir_nodeset(&cbc->parents, s_irn, iter) { rss_irn_t *s = get_rss_irn(rss, s_irn); s->visited = 1; /* assure bipartite property */ - if (nodeset_find(cbc->children, s_irn)) { - nodeset_remove(cbc->children, s_irn); - DBG((rss->dbg, DEBUG_BIPARTITE, "\t\t\t%+F removed from to children\n", s_irn)); +#if 0 + if (ir_nodeset_contains(&cbc->children, s_irn)) { + ir_nodeset_remove(&cbc->children, s_irn); + DBG((rss->dbg, LEVEL_3, "\t\t\t%+F removed from to children\n", s_irn)); } +#endif } /* update edges */ foreach_pset(epk, k_edge) { - if (nodeset_find(cbc->parents, k_edge->src) && nodeset_find(cbc->children, k_edge->tgt)) { + if (ir_nodeset_contains(&cbc->parents, k_edge->src) && + ir_nodeset_contains(&cbc->children, k_edge->tgt)) { pset_insert(cbc->kill_edges, k_edge, HASH_RSS_EDGE(k_edge)); /* Link all k_edges which are about to be removed together. @@ -887,13 +1035,38 @@ static void compute_bipartite_decomposition(rss_t *rss) { pset_remove(epk, kedge_root, HASH_RSS_EDGE(kedge_root)); } + /* verify the cbc */ + foreach_ir_nodeset(&cbc->parents, s_irn, iter) { + int is_killed = 0; + + foreach_pset(cbc->kill_edges, k_edge) { + if (k_edge->src == s_irn) { + is_killed = 1; + pset_break(cbc->kill_edges); + break; + } + } + + if (! is_killed) { + vrfy_ok = 0; + ir_fprintf(stderr, "Warning: parent %+F is not killed in current cbc\n", s_irn); + } + } + assert(vrfy_ok && "Verification of CBC failed"); + /* add the connected bipartite component */ - pset_insert(rss->cbc_set, cbc, (unsigned)cbc->nr); - DBG((rss->dbg, DEBUG_BIPARTITE, "\tbipartite component %d inserted:\n", cbc->nr)); - DEBUG_ONLY(debug_print_cbc(rss->dbg, cbc)); + if (ir_nodeset_size(&cbc->parents) > 0 && + ir_nodeset_size(&cbc->children) > 0 && + pset_count(cbc->kill_edges) > 0) + pset_insert(rss->cbc_set, cbc, (unsigned)cbc->nr); + DBG((rss->dbg, LEVEL_2, "\tbipartite component %d inserted:\n", cbc->nr)); + DEBUG_ONLY( + if (firm_dbg_get_mask(rss->dbg) & LEVEL_2) + debug_print_cbc(rss->dbg, cbc); + ); } - if (firm_dbg_get_mask(rss->dbg) & DEBUG_BIPARTITE) + if (rss->opts->dump_flags & RSS_DUMP_CBC) debug_vcg_dump_bipartite(rss); del_pset(epk); @@ -902,13 +1075,14 @@ static void compute_bipartite_decomposition(rss_t *rss) { /** * Select the child with the maximum cost. */ -static child_t *select_child_max_cost(rss_t *rss, nodeset *x, nodeset *y, child_t *t, cbc_t *cbc) { +static child_t *select_child_max_cost(rss_t *rss, ir_nodeset_t *x, ir_nodeset_t *y, child_t *t, cbc_t *cbc) { ir_node *child; + ir_nodeset_iterator_t iter; float max_cost = -1.0f; - DBG((rss->dbg, DEBUG_SKS, "\t\tcomputing children costs:\n")); + DBG((rss->dbg, LEVEL_2, "\t\tcomputing children costs:\n")); - foreach_nodeset(cbc->children, child) { + foreach_ir_nodeset(&cbc->children, child, iter) { rss_irn_t *r_child = get_rss_irn(rss, child); int num_unkilled_parents = 0; int num_descendants; @@ -917,18 +1091,18 @@ static child_t *select_child_max_cost(rss_t *rss, nodeset *x, nodeset *y, child_ /* get the number of unkilled parents */ foreach_pset(cbc->kill_edges, k_edge) { - if (k_edge->tgt == child && nodeset_find(x, k_edge->src)) + if (k_edge->tgt == child && ir_nodeset_contains(x, k_edge->src)) ++num_unkilled_parents; } cost = (float)num_unkilled_parents; - num_descendants = plist_count(r_child->descendant_list) + nodeset_count(y); + num_descendants = plist_count(r_child->descendant_list) + ir_nodeset_size(y); if (num_descendants > 0) cost /= (float)num_descendants; - DBG((rss->dbg, DEBUG_SKS, "\t\t\t%+F, #desc %d, cost %.3f\n", child, num_descendants, cost)); + DBG((rss->dbg, LEVEL_3, "\t\t\t%+F, #desc %d, cost %.3f\n", child, num_descendants, cost)); if (cost > max_cost) { t->irn = child; @@ -943,30 +1117,30 @@ static child_t *select_child_max_cost(rss_t *rss, nodeset *x, nodeset *y, child_ /** * Remove all parents from x which are killed by t_irn. */ -static void remove_covered_parents(rss_t *rss, nodeset *x, ir_node *t_irn, cbc_t *cbc) { +static void remove_covered_parents(rss_t *rss, ir_nodeset_t *x, ir_node *t_irn, cbc_t *cbc) { rss_irn_t *t = get_rss_irn(rss, t_irn); rss_edge_t *k_edge; - DBG((rss->dbg, DEBUG_SKS, "\t\tremoving parents covered by %+F:\n", t_irn)); + DBG((rss->dbg, LEVEL_2, "\t\tremoving parents covered by %+F:\n", t_irn)); foreach_pset(cbc->kill_edges, k_edge) { - if (k_edge->tgt == t_irn && nodeset_find(x, k_edge->src)) { - nodeset_remove(x, k_edge->src); + if (k_edge->tgt == t_irn && ir_nodeset_contains(x, k_edge->src)) { + ir_nodeset_remove(x, k_edge->src); plist_insert_back(t->parent_list, k_edge->src); - DBG((rss->dbg, DEBUG_SKS, "\t\t\t%+F\n", k_edge->src)); + DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", k_edge->src)); } } } -static void update_cumulated_descendent_values(rss_t *rss, nodeset *y, ir_node *t_irn) { +static void update_cumulated_descendent_values(rss_t *rss, ir_nodeset_t *y, ir_node *t_irn) { rss_irn_t *t = get_rss_irn(rss, t_irn); plist_element_t *el; - DBG((rss->dbg, DEBUG_SKS, "\t\tupdating cumulated descendant value of %+F:\n", t_irn)); + DBG((rss->dbg, LEVEL_2, "\t\tupdating cumulated descendant value of %+F:\n", t_irn)); foreach_plist(t->descendant_list, el) { - nodeset_insert(y, plist_element_get_value(el)); - DBG((rss->dbg, DEBUG_SKS, "\t\t\t%+F\n", plist_element_get_value(el))); + ir_nodeset_insert(y, plist_element_get_value(el)); + DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", plist_element_get_value(el))); } } @@ -985,39 +1159,43 @@ static void compute_killing_function(rss_t *rss) { /* for all bipartite components do: */ foreach_pset(rss->cbc_set, cbc) { ir_node *p; - nodeset *x = new_nodeset(10); - nodeset *y = new_nodeset(10); + ir_nodeset_t x; + ir_nodeset_t y; + ir_nodeset_iterator_t iter; child_t **sks = NEW_ARR_F(child_t *, 20); int cur_len = 0; int cur_size = 20; int i; - DBG((rss->dbg, DEBUG_SKS, "\tcomputing SKS for cbc %d:\n", cbc->nr)); - DBG((rss->dbg, DEBUG_SKS, "\t\tinitializing parents X:\n")); + ir_nodeset_init_size(&x, 10); + ir_nodeset_init_size(&y, 10); + + DBG((rss->dbg, LEVEL_1, "\tcomputing SKS for cbc %d:\n", cbc->nr)); + DBG((rss->dbg, LEVEL_2, "\t\tinitializing parents X:\n")); /* X = S_cb (all parents are initially uncovered) */ - foreach_nodeset(cbc->parents, p) { - nodeset_insert(x, p); - DBG((rss->dbg, DEBUG_SKS, "\t\t\t%+F\n", p)); + foreach_ir_nodeset(&cbc->parents, p, iter) { + ir_nodeset_insert(&x, p); + DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", p)); } /* while X not empty */ - while (nodeset_count(x) > 0) { + while (ir_nodeset_size(&x) > 0) { child_t *t = obstack_alloc(&obst, sizeof(*t)); memset(t, 0, sizeof(*t)); - t = select_child_max_cost(rss, x, y, t, cbc); + t = select_child_max_cost(rss, &x, &y, t, cbc); if (cur_len >= cur_size) { ARR_EXTO(child_t *, sks, cur_size * 2); cur_size *= 2; } - DBG((rss->dbg, DEBUG_SKS, "\t\tinsert child %+F (%.3f) into SKS at pos %d\n", t->irn, t->cost, cur_len)); + DBG((rss->dbg, LEVEL_2, "\t\tinsert child %+F (%.3f) into SKS at pos %d\n", t->irn, t->cost, cur_len)); sks[cur_len++] = t; - remove_covered_parents(rss, x, t->irn, cbc); - update_cumulated_descendent_values(rss, y, t->irn); + remove_covered_parents(rss, &x, t->irn, cbc); + update_cumulated_descendent_values(rss, &y, t->irn); } ARR_SHRINKLEN(sks, cur_len); @@ -1025,7 +1203,7 @@ static void compute_killing_function(rss_t *rss) { /* sort SKS in increasing cost order */ qsort(sks, cur_len, sizeof(sks[0]), cmp_child_costs); - DBG((rss->dbg, DEBUG_SKS, "\tprocessing SKS for cbc %d:\n", cbc->nr)); + DBG((rss->dbg, LEVEL_2, "\tprocessing SKS for cbc %d:\n", cbc->nr)); /* build killing function */ for (i = cur_len - 1; i >= 0; --i) { /* loop over sks in decreasing cost order */ @@ -1033,7 +1211,7 @@ static void compute_killing_function(rss_t *rss) { rss_irn_t *rt = get_rss_irn(rss, t->irn); plist_element_t *p_el; - DBG((rss->dbg, DEBUG_SKS, "\t\t\tkiller %+F (%.3f):\n", t->irn, t->cost)); + DBG((rss->dbg, LEVEL_3, "\t\t\tkiller %+F (%.3f):\n", t->irn, t->cost)); /* kill all unkilled parents of t */ foreach_plist(rt->parent_list, p_el) { @@ -1042,70 +1220,86 @@ static void compute_killing_function(rss_t *rss) { if (is_Sink(rpar->killer)) { rpar->killer = t->irn; - DBG((rss->dbg, DEBUG_SKS, "\t\tkill %+F\n", rpar->irn)); + DBG((rss->dbg, LEVEL_2, "\t\tkill %+F\n", rpar->irn)); } else { - DBG((rss->dbg, DEBUG_SKS, "\t\t\tkeeping %+F as killer for %+F\n", rpar->killer, rpar->irn)); + DBG((rss->dbg, LEVEL_3, "\t\t\tkeeping %+F as killer for %+F\n", rpar->killer, rpar->irn)); } } } - del_nodeset(x); - del_nodeset(y); + ir_nodeset_destroy(&x); + ir_nodeset_destroy(&y); DEL_ARR_F(sks); } - DEBUG_ONLY( - if (firm_dbg_get_mask(rss->dbg) & DEBUG_SKS) - debug_vcg_dump_kill(rss); - ); + if (rss->opts->dump_flags & RSS_DUMP_KILL) + debug_vcg_dump_kill(rss); del_pset(rss->cbc_set); obstack_free(&obst, NULL); } +/** + * Adds the edge src -> tgt to the dvg. Checks if reverse edge is already there (asserts). + */ +static INLINE void add_dvg_edge(rss_t *rss, dvg_t *dvg, ir_node *src, ir_node *tgt, int have_source) { + rss_edge_t *dvg_edge; + rss_edge_t key; + + if (! have_source) + ir_nodeset_insert(&dvg->nodes, src); + else + assert(ir_nodeset_contains(&dvg->nodes, src) && "Wrong assumption"); + + ir_nodeset_insert(&dvg->nodes, tgt); + + key.src = tgt; + key.tgt = src; + assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!"); + + key.src = src; + key.tgt = tgt; + if (NULL != pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) { + /* add the edge to the DVG */ + dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge)); + + dvg_edge->src = src; + dvg_edge->tgt = tgt; + dvg_edge->next = NULL; + + DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", src, tgt)); + pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge)); + } +} + /** * Computes the disjoint value DAG (DVG). * BEWARE: It is not made explicitly clear in the Touati paper, * but the DVG is meant to be build from the KILLING DAG */ static void compute_dvg(rss_t *rss, dvg_t *dvg) { - plist_element_t *el, *el2; + plist_element_t *el; - DBG((rss->dbg, DEBUG_DVG, "\tcomputing DVG:\n")); + DBG((rss->dbg, LEVEL_1, "\tcomputing DVG:\n")); foreach_plist(rss->nodes, el) { - ir_node *u_irn = plist_element_get_value(el); - rss_irn_t *u = get_rss_irn(rss, u_irn); - ir_node *old_killer = NULL; - ir_node *cur_killer = u->killer; - - nodeset_insert(dvg->nodes, u_irn); - - /* We add an edge to every killer, from where we could be reached. */ - while (cur_killer != old_killer) { /* sink kills itself */ - rss_edge_t *dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge)); - rss_irn_t *c_killer = get_rss_irn(rss, cur_killer); - rss_edge_t key; + ir_node *u_irn = plist_element_get_value(el); + rss_irn_t *u = get_rss_irn(rss, u_irn); + rss_irn_t *u_killer = get_rss_irn(rss, u->killer); + int i; - nodeset_insert(dvg->nodes, cur_killer); + /* TODO: omit nodes only having sink as pkiller and killing no one */ - /* add an edge to our killer */ - dvg_edge->src = u_irn; - dvg_edge->tgt = cur_killer; - dvg_edge->next = NULL; + /* add an edge to killer */ + add_dvg_edge(rss, dvg, u_irn, u->killer, 0); - key.src = cur_killer; - key.tgt = u_irn; - assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!"); - - /* add the edge to the DVG */ - DBG((rss->dbg, DEBUG_DVG, "\t\tadd edge %+F -> %+F\n", u_irn, cur_killer)); - pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge)); + if (is_Sink(u->killer)) + continue; - /* descent to the next killer */ - old_killer = cur_killer; - cur_killer = c_killer->killer; + /* We add an edge to every killer, from where we could be reached. */ + for (i = ARR_LEN_SAFE(u_killer->descendants) - 1; i >= 0; --i) { + add_dvg_edge(rss, dvg, u_irn, u_killer->descendants[i], 1); } #if 0 @@ -1121,7 +1315,7 @@ static void compute_dvg(rss_t *rss, dvg_t *dvg) { rss_edge_t key; /* insert the user into the DVG and append it to the user list of u */ - nodeset_insert(dvg->nodes, v_irn); + ir_nodeset_insert(&dvg->nodes, v_irn); if (! plist_has_value(u->dvg_user_list, v_irn)) plist_insert_back(u->dvg_user_list, v_irn); @@ -1135,19 +1329,55 @@ static void compute_dvg(rss_t *rss, dvg_t *dvg) { assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!"); /* add the edge to the DVG */ - DBG((rss->dbg, DEBUG_DVG, "\t\tadd edge %+F -> %+F\n", u_irn, v_irn)); + DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", u_irn, v_irn)); pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge)); } } #endif /* if 0 */ } - DEBUG_ONLY( - if (firm_dbg_get_mask(rss->dbg) & DEBUG_DVG) - debug_vcg_dump_dvg(rss, dvg); - ); + if (rss->opts->dump_flags & RSS_DUMP_DVG) + debug_vcg_dump_dvg(rss, dvg); } +/** + * Updates the dvg structure when a serialization edge from src -> tgt is added. + */ +static void update_dvg(rss_t *rss, dvg_t *dvg, rss_irn_t *src, rss_irn_t *tgt) { + int i, j, idx; + rss_edge_t *edge; + rss_edge_t **arr = alloca(pset_count(dvg->edges) * sizeof(arr[0])); + + /* + Add an edge from serialization target to serialization src: + src cannot be alive with target + */ + add_dvg_edge(rss, dvg, tgt->irn, src->irn, 0); + + /* Add edges to src's descendants as well, they also getting serialized. */ + for (i = ARR_LEN_SAFE(src->descendants) - 1; i >= 0; --i) { + add_dvg_edge(rss, dvg, tgt->irn, src->descendants[i], 1); + } + + /* We also have to add edges from targets predecessors in dvg */ + idx = 0; + /* We cannot insert elements into set over which we iterate ... */ + foreach_pset(dvg->edges, edge) { + if (edge->tgt == tgt->irn) { + arr[idx++] = edge; + } + } + + for (i = 0; i < idx; ++i) { + edge = arr[i]; + add_dvg_edge(rss, dvg, edge->src, src->irn, 1); + for (j = ARR_LEN_SAFE(src->descendants) - 1; j >= 0; --j) { + add_dvg_edge(rss, dvg, edge->src, src->descendants[j], 1); + } + } +} + +#if 0 /** * Accumulate all descendants for root into list. */ @@ -1177,9 +1407,10 @@ static void accumulate_dvg_descendant_values(rss_t *rss, rss_irn_t *root, plist_ * Needs the descendant list for all user as sorted array. */ static void build_dvg_pkiller_list(rss_t *rss, dvg_t *dvg) { + ir_nodeset_iterator_t iter; ir_node *irn; - foreach_nodeset(dvg->nodes, irn) { + foreach_nodeset(&dvg->nodes, irn, iter) { rss_irn_t *node = get_rss_irn(rss, irn); plist_element_t *el, *el2; @@ -1213,20 +1444,23 @@ static void build_dvg_pkiller_list(rss_t *rss, dvg_t *dvg) { ); } +#endif /* if 0 */ + /** * Compute the maximal antichain of the current DVG. * This is a reimplementation of the MAXIMAL_ANTI_CHAIN function * from the DDG library 1.1 (DAG.cpp). */ -static nodeset *compute_maximal_antichain(rss_t *rss, dvg_t *dvg) { - int n = nodeset_count(dvg->nodes); +static ir_nodeset_t *compute_maximal_antichain(rss_t *rss, dvg_t *dvg, int iteration) { + int n = ir_nodeset_size(&dvg->nodes); int *assignment = alloca(n * sizeof(assignment[0])); int *assignment_rev = alloca(n * sizeof(assignment_rev[0])); int *idx_map = alloca(n * sizeof(idx_map[0])); hungarian_problem_t *bp; - nodeset *values, *temp; + ir_nodeset_t *values, *temp; + ir_nodeset_iterator_t iter; ir_node *u_irn; - int i, j, cost, cur_chain; + int i, j, cost, cur_chain, res; rss_edge_t *dvg_edge; #define MAP_IDX(irn) bsearch_for_index(get_irn_idx(irn), idx_map, n, 1) @@ -1244,7 +1478,7 @@ static nodeset *compute_maximal_antichain(rss_t *rss, dvg_t *dvg) { and build a sorted index map for their irn indices. */ i = 0; - foreach_nodeset(dvg->nodes, u_irn) { + foreach_ir_nodeset(&dvg->nodes, u_irn, iter) { idx_map[i++] = get_irn_idx(u_irn); } qsort(idx_map, n, sizeof(idx_map[0]), cmp_int); @@ -1255,7 +1489,7 @@ static nodeset *compute_maximal_antichain(rss_t *rss, dvg_t *dvg) { /* add the entry to the bipartite data structure */ hungarian_add(bp, idx_u, idx_v, 1); - DBG((rss->dbg, DEBUG_MAX_AC, "\t\t\tadd %d (%+F) -> %d (%+F)\n", + DBG((rss->dbg, LEVEL_3, "\t\t\tadd %d (%+F) -> %d (%+F)\n", idx_u, dvg_edge->src, idx_v, dvg_edge->tgt)); } #if 0 @@ -1264,7 +1498,7 @@ static nodeset *compute_maximal_antichain(rss_t *rss, dvg_t *dvg) { path in the DVG from u to v, ie. connect all descendants(v) to v. desc_dvg(v) = accumulated descendants of all z in dvg_user_list(v) */ - foreach_nodeset(dvg->nodes, u_irn) { + foreach_ir_nodeset(&dvg->nodes, u_irn, iter) { rss_irn_t *u = get_rss_irn(rss, u_irn); int idx_u_irn = MAP_IDX(u_irn); @@ -1284,7 +1518,7 @@ static nodeset *compute_maximal_antichain(rss_t *rss, dvg_t *dvg) { DBG((rss->dbg, DEBUG_MAX_AC, "\t\tadding bipartite entries of %+F:\n", u_irn)); /* add the bipartite entries for all descendants */ - for (i = ARR_LEN(u->dvg_desc) - 1; i >= 0; --i) { + for (i = ARR_LEN_SAFE(u->dvg_desc) - 1; i >= 0; --i) { rss_irn_t *desc = get_rss_irn(rss, u->dvg_desc[i]); int idx_desc = MAP_IDX(u->dvg_desc[i]); @@ -1301,17 +1535,19 @@ static nodeset *compute_maximal_antichain(rss_t *rss, dvg_t *dvg) { /* We want maximum cardinality matching */ hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL); +#if 0 DBG((rss->dbg, DEBUG_DVG, "\t\tcomputing DVG pkiller:\n")); /* beware: the following function needs the dvg_desc array */ build_dvg_pkiller_list(rss, dvg); +#endif - DBG((rss->dbg, DEBUG_MAX_AC, "\t\tcomputing bipartite matching\n")); + DBG((rss->dbg, LEVEL_2, "\t\tcomputing bipartite matching\n")); /* The maximum cardinality bipartite matching gives us the minimal chain partition, which corresponds to the maximum anti chains. */ - cost = hungarian_solve(bp, assignment); - assert(cost >= 0 && "Bipartite matching failed!"); + res = hungarian_solve(bp, assignment, &cost, 1); + assert(res == 0 && "Bipartite matching failed!"); hungarian_free(bp); memset(assignment_rev, -1, n * sizeof(assignment_rev[0])); @@ -1324,14 +1560,14 @@ static nodeset *compute_maximal_antichain(rss_t *rss, dvg_t *dvg) { } } - DBG((rss->dbg, DEBUG_MAX_AC, "\t\t\tgot assignment with cost %d\n", cost)); - DBG((rss->dbg, DEBUG_MAX_AC, "\t\t\tassignment --- reverse assignment\n", cost)); + DBG((rss->dbg, LEVEL_2, "\t\tgot assignment with cost %d\n", cost)); + DBG((rss->dbg, LEVEL_3, "\t\t\tassignment --- reverse assignment\n", cost)); for (i = 0; i < n; ++i) { - DBG((rss->dbg, DEBUG_MAX_AC, "\t\t\t%3d -> %3d %3d -> %3d\n", i, assignment[i], i, assignment_rev[i])); + DBG((rss->dbg, LEVEL_3, "\t\t\t%3d -> %3d %3d -> %3d\n", i, assignment[i], i, assignment_rev[i])); } - - values = new_nodeset(10); + values = xmalloc(sizeof(values[0])); + ir_nodeset_init_size(values, 10); cur_chain = 0; /* Construction of the minimal chain partition */ for (j = 0; j < n; ++j) { @@ -1344,7 +1580,7 @@ static nodeset *compute_maximal_antichain(rss_t *rss, dvg_t *dvg) { int source; /* there was no source for j -> we have a source of a new chain */ - nodeset_insert(values, xj_irn); + ir_nodeset_insert(values, xj_irn); c->elements = plist_obstack_new(phase_obst(&rss->ph)); c->nr = cur_chain++; @@ -1352,8 +1588,8 @@ static nodeset *compute_maximal_antichain(rss_t *rss, dvg_t *dvg) { xj_rss->chain = c; - DBG((rss->dbg, DEBUG_MAX_AC, "\t\tstarting chain %d:\n", c->nr)); - DBG((rss->dbg, DEBUG_MAX_AC, "\t\t\t%+F (%d)", xj_irn, j)); + DBG((rss->dbg, LEVEL_2, "\t\tstarting chain %d:\n", c->nr)); + DBG((rss->dbg, LEVEL_2, "\t\t\t%+F (%d)", xj_irn, j)); /* follow chain, having j as source */ source = j; @@ -1366,13 +1602,13 @@ static nodeset *compute_maximal_antichain(rss_t *rss, dvg_t *dvg) { plist_insert_back(c->elements, irn); node->chain = c; - DB((rss->dbg, DEBUG_MAX_AC, " -> %+F (%d)", irn, target)); + DB((rss->dbg, LEVEL_2, " -> %+F (%d)", irn, target)); /* new source = last target */ source = target; } - DB((rss->dbg, DEBUG_MAX_AC, "\n")); + DB((rss->dbg, LEVEL_2, "\n")); } } @@ -1380,52 +1616,63 @@ static nodeset *compute_maximal_antichain(rss_t *rss, dvg_t *dvg) { Computing the maximal antichain: Select an element from each chain such, such it is parallel with the others. */ - DBG((rss->dbg, DEBUG_MAX_AC, "\t\tcomputing set of saturation values (MAX AC)\n")); - DBG((rss->dbg, DEBUG_MAX_AC, "\t\tstarting with:\n")); - dump_nodeset(values, "\t\t\t"); + DBG((rss->dbg, LEVEL_1, "\t\tcomputing set of saturation values (MAX AC)\n")); + DBG((rss->dbg, LEVEL_3, "\t\tstarting with:\n")); + + DEBUG_ONLY( + if (firm_dbg_get_mask(rss->dbg) & LEVEL_3) + dump_nodeset(values, "\t\t\t"); + ) + temp = NULL; do { /* We need an explicit array for the values as we cannot iterate multiple times over the same set at the same time. :-((((( + TODO Matze: now we can, so rewrite this... */ - int n = nodeset_count(values); + int n = ir_nodeset_size(values); int i = 0; ir_node **val_arr = NEW_ARR_F(ir_node *, n); - foreach_nodeset(values, u_irn) + foreach_ir_nodeset(values, u_irn, iter) val_arr[i++] = u_irn; - if (temp) - del_nodeset(temp); + if (temp) { + ir_nodeset_destroy(temp); + free(temp); + } - temp = new_nodeset(10); + temp = xmalloc(sizeof(temp[0])); + ir_nodeset_init_size(temp, 10); /* Select all nodes from current value set, having another node in the set as descendant. */ for (i = 0; i < n; ++i) { rss_irn_t *u = get_rss_irn(rss, val_arr[i]); - int j; for (j = 0; j < n; ++j) { if (i != j) { - rss_edge_t *entry; rss_edge_t key; -// BSEARCH_IRN_ARR(val_arr[j], u->dvg_desc)) - /* v[j] is descendant of u -> remove u and break */ - nodeset_insert(temp, u->irn); - nodeset_remove(values, u->irn); + key.src = val_arr[i]; + key.tgt = val_arr[j]; - DBG((rss->dbg, DEBUG_MAX_AC, "\t\t\tremoving %+F from values, adding it to temp\n", u->irn)); + if (pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) { + /* v[j] is descendant of u -> remove u and break */ + ir_nodeset_insert(temp, u->irn); + ir_nodeset_remove(values, u->irn); - break; + DBG((rss->dbg, LEVEL_3, "\t\t\tremoving %+F from values, adding it to temp\n", u->irn)); + + break; + } } } } /* Try to insert the chain predecessor of all selected u's */ - foreach_nodeset(temp, u_irn) { + foreach_ir_nodeset(temp, u_irn, iter) { rss_irn_t *u = get_rss_irn(rss, u_irn); chain_t *c = u->chain; plist_element_t *el = plist_find_value(c->elements, u_irn); @@ -1433,43 +1680,61 @@ static nodeset *compute_maximal_antichain(rss_t *rss, dvg_t *dvg) { assert(el && "Missing element in chain!"); /* If u has predecessor in chain: insert the predecessor */ - if (el = plist_element_get_prev(el)) { - nodeset_insert(values, plist_element_get_value(el)); - DBG((rss->dbg, DEBUG_MAX_AC, "\t\t\tadding %+F to values\n", plist_element_get_value(el))); + if (el == plist_element_get_prev(el)) { + ir_nodeset_insert(values, plist_element_get_value(el)); + DBG((rss->dbg, LEVEL_3, "\t\t\tadding %+F to values\n", plist_element_get_value(el))); } } DEL_ARR_F(val_arr); - } while (nodeset_count(temp) > 0); + } while (ir_nodeset_size(temp) > 0); - DBG((rss->dbg, DEBUG_MAX_AC, "\t\tfinal set:\n")); - dump_nodeset(values, "\t\t\t"); + DBG((rss->dbg, LEVEL_2, "\t\tfinal set:\n")); + DEBUG_ONLY( + if (firm_dbg_get_mask(rss->dbg) & LEVEL_2) { + dump_nodeset(values, "\t\t\t"); + } + ); + + if (rss->opts->dump_flags & RSS_DUMP_MAXAC) + debug_vcg_dump_pkg(rss, values, iteration); - del_nodeset(temp); + if(temp != NULL) { + ir_nodeset_destroy(temp); + free(temp); + } return values; #undef MAP_IDX } -static serialization_t *compute_best_admissible_serialization(rss_t *rss, nodeset *sat_vals, serialization_t *ser, int num_regs) { - int n = nodeset_count(sat_vals); - int n_idx = ARR_LEN(rss->idx_map); +/** + * Computes the best serialization between two nodes of sat_vals. + */ +static serialization_t *compute_best_admissible_serialization(rss_t *rss, ir_nodeset_t *sat_vals, serialization_t *ser, int num_regs) { + int n = ir_nodeset_size(sat_vals); + int n_idx = ARR_LEN_SAFE(rss->idx_map); int i = 0; ir_node **val_arr = alloca(n * sizeof(val_arr[0])); bitset_t *bs_sv = bitset_alloca(n_idx); bitset_t *bs_vdesc = bitset_alloca(n_idx); bitset_t *bs_tmp = bitset_alloca(n_idx); bitset_t *bs_ukilldesc = bitset_alloca(n_idx); - unsigned best_benefit = UINT_MAX; - unsigned best_omega2 = UINT_MAX; - unsigned best_benefit_omega20 = UINT_MAX; - int has_positive_omega1 = 0; + int best_benefit = INT_MAX; + int best_omega2 = INT_MAX; + int best_benefit_omega20 = INT_MAX; + int has_omega1 = 0; int j, k; ir_node *irn; + ir_nodeset_iterator_t iter; rss_edge_t min_benefit_edge; rss_edge_t min_omega20_edge; + rss_irn_t *ser_u_omega1 = NULL, *ser_v_omega1 = NULL; + rss_irn_t *ser_u_omega20 = NULL, *ser_v_omega20 = NULL; + + DBG((rss->dbg, LEVEL_1, "\tcomputing admissible serializations:\n")); /* We need an explicit array for the values as @@ -1477,7 +1742,7 @@ static serialization_t *compute_best_admissible_serialization(rss_t *rss, nodese set at the same time. :-((((( */ - foreach_nodeset(sat_vals, irn) { + foreach_ir_nodeset(sat_vals, irn, iter) { val_arr[i++] = irn; bitset_set(bs_sv, BLOCK_IDX_MAP(rss, irn)); } @@ -1486,20 +1751,42 @@ static serialization_t *compute_best_admissible_serialization(rss_t *rss, nodese We build all admissible serializations and remember the best found so far. For u in sat_vals: For v in sat_val: - if v in pkiller(u): add edge to v from all other pkiller(u) - else: for all uu in pkiller(u): add edge to v if there exists no path from v to uu - + if v in pkiller(u): add edge from v to all other pkiller(u) + else: for all vv in pkiller(u): add edge from v to vv if there exists no path from vv to v */ +/* + A node is unserializable if: + - it has only one killer and this one is Sink + - it kills no other values + In this case there is no serialization which could + reduce the registerpressure +*/ +#define IS_UNSERIALIZABLE_NODE(rss_node) \ + ( \ + ( \ + (plist_count(rss_node->pkiller_list) == 1) && \ + is_Sink(rss_node->killer) && \ + (rss_node->kill_count == 0) \ + ) || \ + be_is_Barrier(rss_node->irn) || \ + be_is_Keep(rss_node->irn) \ + ) + /* for all u in sat_vals */ for (i = 0; i < n; ++i) { - rss_irn_t *u = get_rss_irn(rss, val_arr[i]); - int u_height = get_irn_height(rss->h, val_arr[i]); + rss_irn_t *u = get_rss_irn(rss, val_arr[i]); plist_element_t *el; + /* ignore nodes where serialization does not help */ + if (IS_UNSERIALIZABLE_NODE(u)) { + DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", u->irn)); + continue; + } + /* accumulate all descendants of all pkiller(u) */ bitset_clear_all(bs_ukilldesc); - foreach_plist(u->dvg_pkiller_list, el) { + foreach_plist(u->pkiller_list, el) { ir_node *irn = plist_element_get_value(el); rss_irn_t *node = get_rss_irn(rss, irn); @@ -1508,9 +1795,9 @@ static serialization_t *compute_best_admissible_serialization(rss_t *rss, nodese else continue; - for (k = ARR_LEN(node->dvg_desc) - 1; k >= 0; --k) { - if (! is_Sink(node->dvg_desc[k])) - bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, node->dvg_desc[k])); + for (k = ARR_LEN_SAFE(node->descendants) - 1; k >= 0; --k) { + if (! is_Sink(node->descendants[k])) + bitset_set(bs_ukilldesc, BLOCK_IDX_MAP(rss, node->descendants[k])); } } @@ -1519,30 +1806,39 @@ static serialization_t *compute_best_admissible_serialization(rss_t *rss, nodese ir_node *v_irn = val_arr[j]; rss_irn_t *v = get_rss_irn(rss, v_irn); unsigned v_height = get_irn_height(rss->h, v_irn); - unsigned omega1, omega2, is_pkiller; + int omega1, omega2, is_pkiller; - if (i == j) + /* v cannot be serialized with itself + * ignore nodes where serialization does not help */ + if (i == j || IS_UNSERIALIZABLE_NODE(v)) { + if (i != j) + DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", v->irn)); continue; + } /* get descendants of v */ bitset_clear_all(bs_vdesc); - for (k = ARR_LEN(v->dvg_desc) - 1; k >= 0; --k) { - if (! is_Sink(v->dvg_desc[k])) - bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v->dvg_desc[k])); + bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v_irn)); + for (k = ARR_LEN_SAFE(v->descendants) - 1; k >= 0; --k) { + if (! is_Sink(v->descendants[k])) + bitset_set(bs_vdesc, BLOCK_IDX_MAP(rss, v->descendants[k])); } /* if v is in pkiller(u) */ - is_pkiller = BSEARCH_IRN_ARR(val_arr[j], u->dvg_pkiller) != NULL ? 1 : 0; + is_pkiller = plist_has_value(u->pkiller_list, v_irn); /* for all vv in pkiller(u) */ - for (k = ARR_LEN(u->dvg_pkiller) - 1; k >= 0; --k) { - ir_node *vv_irn = u->dvg_pkiller[k]; + foreach_plist(u->pkiller_list, el) { + ir_node *vv_irn = plist_element_get_value(el); int add_edge; - if (is_Sink(vv_irn)) + if (is_Sink(vv_irn) || is_cfop(vv_irn)) continue; - add_edge = is_pkiller ? k != j : ! heights_reachable_in_block(rss->h, v_irn, vv_irn); + if (is_pkiller) + add_edge = vv_irn != v_irn && skip_Proj(vv_irn) != skip_Proj(v_irn); + else + add_edge = ! heights_reachable_in_block(rss->h, skip_Proj(vv_irn), skip_Proj(v_irn)); /* As we add an edge from vv -> v, we have to make sure, @@ -1551,7 +1847,8 @@ static serialization_t *compute_best_admissible_serialization(rss_t *rss, nodese if (add_edge) { unsigned vv_height = get_irn_height(rss->h, vv_irn); - unsigned mu1, mu2, critical_path_cost; + unsigned critical_path_cost; + unsigned mu1, mu2; /* mu1 = | descendants(v) cut sat_vals | @@ -1572,13 +1869,11 @@ static serialization_t *compute_best_admissible_serialization(rss_t *rss, nodese mu2 = 0; } - assert(mu1 >= mu2); - /* omega1 = mu1 - mu2 */ omega1 = mu1 - mu2; - if (omega1 > 0) - has_positive_omega1 = 1; + if (omega1 != 0) + has_omega1 = 1; /* omega2 = increase of critical path */ critical_path_cost = @@ -1593,37 +1888,52 @@ static serialization_t *compute_best_admissible_serialization(rss_t *rss, nodese omega2 = critical_path_cost > rss->max_height ? critical_path_cost - rss->max_height : 0; /* this keeps track of the edge with the best benefit */ - if (num_regs - omega1 < best_benefit) { - min_benefit_edge.src = vv_irn; - min_benefit_edge.tgt = v_irn; + if (omega1 >= num_regs - n && omega1 < best_benefit) { + min_benefit_edge.src = v_irn; + min_benefit_edge.tgt = vv_irn; - best_benefit = num_regs - omega1; + ser_u_omega1 = u; + ser_v_omega1 = v; + + best_benefit = omega1; + ser->new_killer = is_pkiller; } /* this keeps track of the edge with the best omega1 costs where omega2 == 0 */ - if (omega2 == 0 && (num_regs - omega1 < best_benefit_omega20)) { - min_omega20_edge.src = vv_irn; - min_omega20_edge.tgt = v_irn; + if (omega2 == 0 && omega1 >= num_regs - n && omega1 < best_benefit_omega20) { + min_omega20_edge.src = v_irn; + min_omega20_edge.tgt = vv_irn; + + ser_u_omega20 = u; + ser_v_omega20 = v; - best_benefit_omega20 = num_regs - omega1; + best_benefit_omega20 = omega1; + ser->new_killer = is_pkiller; } best_omega2 = MIN(best_omega2, omega2); + + DBG((rss->dbg, LEVEL_2, "\t\tfound %+F -> %+F (w1 %d, w2 %d)\n", + v_irn, vv_irn, omega1, omega2)); } /* if add_edge */ } /* for all vv in pkiller(u) */ } /* for all v in sat_vals */ } /* for all u in sat_vals */ - if (! has_positive_omega1) + if (! has_omega1) return NULL; if (best_omega2 == 0) { + ser->u = ser_u_omega20; + ser->v = ser_v_omega20; ser->edge->src = min_omega20_edge.src; ser->edge->tgt = min_omega20_edge.tgt; ser->omega1 = best_benefit_omega20; ser->omega2 = best_omega2; } else { + ser->u = ser_u_omega1; + ser->v = ser_v_omega1; ser->edge->src = min_benefit_edge.src; ser->edge->tgt = min_benefit_edge.tgt; ser->omega1 = best_benefit; @@ -1631,6 +1941,8 @@ static serialization_t *compute_best_admissible_serialization(rss_t *rss, nodese } return ser; + +#undef IS_UNSERIALIZABLE_NODE } /** @@ -1640,25 +1952,28 @@ static serialization_t *compute_best_admissible_serialization(rss_t *rss, nodese static void perform_value_serialization_heuristic(rss_t *rss) { bitset_t *arch_nonign_bs = bitset_alloca(arch_register_class_n_regs(rss->cls)); bitset_t *abi_ign_bs = bitset_alloca(arch_register_class_n_regs(rss->cls)); - int available_regs; + unsigned available_regs, iteration; dvg_t dvg; - nodeset *sat_vals; + ir_nodeset_t *sat_vals; + pset *ser_set = new_pset(cmp_rss_edges, 20); /* available_regs = R = |arch_non_ignore_regs cut ~abi_ignore_regs| */ arch_put_non_ignore_regs(rss->arch_env, rss->cls, arch_nonign_bs); be_abi_put_ignore_regs(rss->abi, rss->cls, abi_ign_bs); bitset_andnot(arch_nonign_bs, abi_ign_bs); - available_regs = bitset_popcnt(arch_nonign_bs); + available_regs = bitset_popcnt(arch_nonign_bs); + //num_live = pset_count(rss->live_block); + //available_regs -= num_live < available_regs ? num_live : 0; - DBG((rss->dbg, DEBUG_SER_HEUR, "\n\t#available regs: %d\n\n", available_regs)); + DBG((rss->dbg, LEVEL_1, "\n\t#available regs: %d\n\n", available_regs)); /* At first we need to compute the disjoint value DAG (DVG = {V, E_dv}). V = set of all nodes we are currently interested in E_dv = there is an edge from u to v iff v is a descendant of killer(u), forall u, v in V */ - dvg.nodes = new_nodeset(plist_count(rss->nodes)); - dvg.edges = new_pset(cmp_rss_edges, 20); + ir_nodeset_init_size(&dvg.nodes, plist_count(rss->nodes)); + dvg.edges = new_pset(cmp_rss_edges, plist_count(rss->nodes) * 5); compute_dvg(rss, &dvg); /* @@ -1666,40 +1981,58 @@ static void perform_value_serialization_heuristic(rss_t *rss) { on the DVG which gives us all necessary serialization edges. */ - DBG((rss->dbg, DEBUG_MAX_AC, "\tcomputing maximal antichain:\n")); - sat_vals = compute_maximal_antichain(rss, &dvg); - while (sat_vals && (nodeset_count(sat_vals) > available_regs)) { + DBG((rss->dbg, LEVEL_1, "\tcomputing maximal antichain:\n")); + iteration = 0; + sat_vals = compute_maximal_antichain(rss, &dvg, iteration++); + while (sat_vals && (ir_nodeset_size(sat_vals) > available_regs)) { serialization_t *ser, best_ser; - rss_edge_t edge; - rss_irn_t *tgt; + rss_edge_t *edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*edge)); + ir_node *dep_src, *dep_tgt; - best_ser.edge = &edge; + best_ser.edge = edge; ser = compute_best_admissible_serialization(rss, sat_vals, &best_ser, available_regs); - tgt = get_rss_irn(rss, ser->edge->tgt); - - DBG((rss->dbg, DEBUG_SER_HEUR, "\tcurrent register saturation %d, target %d\n", nodeset_count(sat_vals), available_regs)); - /* BEWARE: Update dvg_user_list when inserting a serialization edge !!! */ - plist_insert_back(tgt->dvg_user_list, ser->edge->src); - pset_insert(dvg.edges, ser->edge, HASH_RSS_EDGE(ser->edge)); - del_nodeset(sat_vals); + DBG((rss->dbg, LEVEL_1, "\tcurrent register saturation %d, target %d\n", ir_nodeset_size(sat_vals), available_regs)); - /* TODO: Might be better to update the dvg descendants here as well, instead of recalculating them */ + if (! ser) { + DBG((rss->dbg, LEVEL_1, "\tno RS improving serialization found, breaking at iteration %d\n", iteration)); + break; + } /* Insert the serialization as dependency edge into the irg. */ - DBG((rss->dbg, DEBUG_SER_HEUR, "\tinserting serialization %+F -> %+F with cost %d, %d\n", - ser->edge->src, ser->edge->tgt, ser->omega1, ser->omega2)); - add_irn_dep(ser->edge->src, ser->edge->tgt); + DBG((rss->dbg, LEVEL_1, "\tserializing %+F -> %+F with edge %+F -> %+F and cost %d, %d\n", + ser->u->irn, ser->v->irn, ser->edge->src, ser->edge->tgt, ser->omega1, ser->omega2)); + + if (pset_find(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge))) + ir_printf("WARNING: serialization %+F -> %+F computed twice!\n", ser->edge->src, ser->edge->tgt); + + + pset_insert(ser_set, ser->edge, HASH_RSS_EDGE(ser->edge)); + + /* update the dvg */ + update_dvg(rss, &dvg, ser->v, ser->u); + update_dvg(rss, &dvg, get_rss_irn(rss, ser->edge->src), get_rss_irn(rss, ser->edge->tgt)); + if(sat_vals != NULL) { + ir_nodeset_destroy(sat_vals); + free(sat_vals); + } + + dep_src = skip_Proj(ser->edge->src); + dep_tgt = ser->edge->tgt; + add_irn_dep(dep_src, dep_tgt); + + /* Update descendants, consumer and pkillers of the target */ + update_node_info(rss, ser->edge->tgt, ser->edge->src); /* TODO: try to find a cheaper way for updating height information */ rss->max_height = heights_recompute_block(rss->h, rss->block); /* Recompute the antichain for next serialization */ - DBG((rss->dbg, DEBUG_MAX_AC, "\tre-computing maximal antichain:\n")); - sat_vals = compute_maximal_antichain(rss, &dvg); + DBG((rss->dbg, LEVEL_1, "\tre-computing maximal antichain:\n")); + sat_vals = compute_maximal_antichain(rss, &dvg, iteration++); } - del_nodeset(dvg.nodes); + ir_nodeset_destroy(&dvg.nodes); del_pset(dvg.edges); } @@ -1711,7 +2044,7 @@ static void process_block(ir_node *block, void *env) { int i, n; const ir_edge_t *edge; - phase_init(&rss->ph, "rss block preprocessor", rss->irg, PHASE_DEFAULT_GROWTH, init_rss_irn); + phase_init(&rss->ph, "rss block preprocessor", rss->irg, PHASE_DEFAULT_GROWTH, init_rss_irn, NULL); DBG((rss->dbg, LEVEL_1, "preprocessing block %+F\n", block)); rss->block = block; @@ -1734,23 +2067,51 @@ static void process_block(ir_node *block, void *env) { rss->cls = cls; DBG((rss->dbg, LEVEL_1, "register class %s\n", arch_register_class_name(cls))); + /* Get all live value at end of Block having current register class */ + ir_nodeset_init(&rss->live_block); + be_liveness_end_of_block(rss->liveness, rss->arch_env, rss->cls, rss->block, &rss->live_block); + /* reset the list of interesting nodes */ plist_clear(rss->nodes); plist_insert_back(rss->nodes, _sink); /* collect all nodes having a certain register class */ foreach_out_edge(block, edge) { - ir_node *irn = get_edge_src_irn(edge); + ir_node *irn = get_edge_src_irn(edge); + ir_mode *mode = get_irn_mode(irn); - if (get_irn_mode(irn) == mode_T) + /* + We skip: + - mode_T nodes (the projs are asked) + - mode_X nodes (control flow nodes are always scheduled last) + - Keeps (they are always immediately scheduled) + - Phi (same as Keep) + */ + if (mode == mode_T || mode == mode_X || is_Phi(irn)) continue; - if (! arch_irn_is(rss->arch_env, irn, ignore) && arch_get_irn_reg_class(rss->arch_env, irn, -1) == cls) { - plist_insert_back(rss->nodes, irn); + /* + In case of a proj, we skip + - Barrier (they are a Barrier :) + - Start + - the Proj itself, as it's scheduled always with it's super node + */ + if (is_Proj(irn)) { + ir_node *pred = get_Proj_pred(irn); + if (be_is_Barrier(pred) || is_Start(pred)) + continue; + } - /* calculate the descendants and consumer for each node in the block */ - collect_node_info(rss, irn); + /* calculate the descendants and consumer for each node in the block */ + collect_node_info(rss, skip_Proj(irn)); + + if (be_is_Keep(irn)) + continue; + + if (! arch_irn_is(rss->arch_env, irn, ignore) && arch_get_irn_reg_class(rss->arch_env, irn, -1) == cls) { + plist_insert_back(rss->nodes, skip_Proj(irn)); } + //} } /* compute the potential killing set PK(G) */ @@ -1764,29 +2125,51 @@ static void process_block(ir_node *block, void *env) { add the necessary dependencies to the irg. */ perform_value_serialization_heuristic(rss); + + ir_nodeset_destroy(&rss->live_block); } phase_free(&rss->ph); } +/** + * Register the options. + */ +void be_init_schedrss(void) { + lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be"); + lc_opt_entry_t *sched_grp = lc_opt_get_grp(be_grp, "sched"); + lc_opt_entry_t *rss_grp = lc_opt_get_grp(sched_grp, "rss"); + + lc_opt_add_table(rss_grp, rss_option_table); +} + +BE_REGISTER_MODULE_CONSTRUCTOR(be_init_schedrss); + /** * Preprocess the irg for scheduling. */ void rss_schedule_preparation(const be_irg_t *birg) { + ir_graph *irg = be_get_birg_irg(birg); rss_t rss; FIRM_DBG_REGISTER(rss.dbg, "firm.be.sched.rss"); - firm_dbg_set_mask(rss.dbg, 255); + //firm_dbg_set_mask(rss.dbg, LEVEL_1 | LEVEL_2 | LEVEL_3); - init_rss_special_nodes(birg->irg); + init_rss_special_nodes(irg); - rss.irg = birg->irg; - rss.arch_env = birg->main_env->arch_env; + rss.irg = irg; + rss.arch_env = be_get_birg_arch_env(birg); rss.abi = birg->abi; - rss.h = heights_new(birg->irg); + rss.h = heights_new(irg); rss.nodes = plist_new(); - irg_block_walk_graph(birg->irg, NULL, process_block, &rss); + rss.opts = &rss_options; + rss.liveness = be_liveness(irg); + irg_block_walk_graph(irg, NULL, process_block, &rss); heights_free(rss.h); plist_free(rss.nodes); + be_liveness_free(rss.liveness); + + if (birg->main_env->options->dump_flags & DUMP_SCHED) + be_dump(rss.irg, "-rss", dump_ir_block_graph); }