X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=ir%2Fbe%2Fbecopyopt.c;h=584da53204051c891735d0b19390c1d3eddeeb5a;hb=75bdba692afeb0617e59ddc2ea08e0662c356e03;hp=84f602e12b5a23b28538038fc915819d26c84e3c;hpb=8a483b248ca6a7041b1b246f64304890a0c894a0;p=libfirm diff --git a/ir/be/becopyopt.c b/ir/be/becopyopt.c index 84f602e12..584da5320 100644 --- a/ir/be/becopyopt.c +++ b/ir/be/becopyopt.c @@ -1,23 +1,44 @@ +/* + * Copyright (C) 1995-2007 University of Karlsruhe. All right reserved. + * + * This file is part of libFirm. + * + * This file may be distributed and/or modified under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation and appearing in the file LICENSE.GPL included in the + * packaging of this file. + * + * Licensees holding valid libFirm Professional Edition licenses may use + * this file in accordance with the libFirm Commercial License. + * Agreement provided with the Software. + * + * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE + * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE. + */ + /** - * Author: Daniel Grund - * Date: 12.04.2005 - * Copyright: (c) Universitaet Karlsruhe - * Licence: This file protected by GPL - GNU GENERAL PUBLIC LICENSE. + * @file + * @brief Copy minimization driver. + * @author Daniel Grund + * @date 12.04.2005 + * @version $Id$ + * + * Main file for the optimization reducing the copies needed for: + * - Phi coalescing + * - Register-constrained nodes + * - Two-address code instructions */ #ifdef HAVE_CONFIG_H #include "config.h" #endif -#ifdef HAVE_ALLOCA_H -#include -#endif -#ifdef HAVE_MALLOC_H -#include -#endif #include "execfreq.h" #include "xmalloc.h" #include "debug.h" #include "pmap.h" +#include "raw_bitset.h" +#include "irnode.h" #include "irgraph.h" #include "irgwalk.h" #include "irprog.h" @@ -28,11 +49,12 @@ #include "irphase_t.h" #include "irprintf_t.h" - -#include "bearch.h" +#include "bemodule.h" +#include "bearch_t.h" #include "benode_t.h" #include "beutil.h" #include "beifg_t.h" +#include "beintlive_t.h" #include "becopyopt_t.h" #include "becopystat.h" #include "belive_t.h" @@ -41,11 +63,12 @@ #include "benodesets.h" #include "bejavacoal.h" #include "bestatevent.h" +#include "beirg_t.h" +#include "error.h" -#ifdef WITH_LIBCORE #include #include -#endif /* WITH_LIBCORE */ +#include #define DUMP_BEFORE 1 #define DUMP_AFTER 2 @@ -56,14 +79,13 @@ #define COST_FUNC_LOOP 2 #define COST_FUNC_ALL_ONE 3 -static int dump_flags = 0; -static int style_flags = 0; -static int do_stats = 0; +static unsigned dump_flags = 0; +static unsigned style_flags = 0; +static unsigned do_stats = 0; static cost_fct_t cost_func = co_get_costs_exec_freq; -static int algo = CO_ALGO_HEUR2; -static int improve = 1; +static unsigned algo = CO_ALGO_HEUR4; +static int improve = 1; -#ifdef WITH_LIBCORE static const lc_opt_enum_mask_items_t dump_items[] = { { "before", DUMP_BEFORE }, { "after", DUMP_AFTER }, @@ -88,6 +110,7 @@ static const lc_opt_enum_mask_items_t algo_items[] = { #ifdef WITH_JVM { "heur3", CO_ALGO_HEUR3 }, #endif /* WITH_JVM */ + { "heur4", CO_ALGO_HEUR4 }, #ifdef WITH_ILP { "ilp", CO_ALGO_ILP }, #endif /* WITH_ILP */ @@ -134,22 +157,29 @@ extern void be_co_ilp_register_options(lc_opt_entry_t *grp); extern void be_co2_register_options(lc_opt_entry_t *grp); extern void be_co3_register_options(lc_opt_entry_t *grp); -void co_register_options(lc_opt_entry_t *grp) +void be_init_copycoal(void) { - lc_opt_entry_t *co_grp = lc_opt_get_grp(grp, "co"); - lc_opt_add_table(co_grp, options); + lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be"); + lc_opt_entry_t *ra_grp = lc_opt_get_grp(be_grp, "ra"); + lc_opt_entry_t *chordal_grp = lc_opt_get_grp(ra_grp, "chordal"); + lc_opt_entry_t *co_grp = lc_opt_get_grp(chordal_grp, "co"); - be_co2_register_options(co_grp); - be_co3_register_options(co_grp); -#ifdef WITH_ILP - be_co_ilp_register_options(co_grp); -#endif + lc_opt_add_table(co_grp, options); } -#endif +BE_REGISTER_MODULE_CONSTRUCTOR(be_init_copycoal); #undef QUICK_AND_DIRTY_HACK +static int nodes_interfere(const be_chordal_env_t *env, const ir_node *a, const ir_node *b) +{ + if (env->ifg) + return be_ifg_connected(env->ifg, a, b); + else + return values_interfere(env->birg, a, b); +} + + /****************************************************************************** _____ _ / ____| | | @@ -163,9 +193,6 @@ void co_register_options(lc_opt_entry_t *grp) DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;) -void be_copy_opt_init(void) { -} - copy_opt_t *new_copy_opt(be_chordal_env_t *chordal_env, cost_fct_t get_costs) { const char *s1, *s2, *s3; @@ -197,7 +224,7 @@ void free_copy_opt(copy_opt_t *co) { } int co_is_optimizable_root(const copy_opt_t *co, ir_node *irn) { - arch_register_req_t req; + const arch_register_req_t *req; const arch_register_t *reg; if (arch_irn_is(co->aenv, irn, ignore)) @@ -207,43 +234,13 @@ int co_is_optimizable_root(const copy_opt_t *co, ir_node *irn) { if (arch_register_type_is(reg, ignore)) return 0; - if (is_Reg_Phi(irn) || is_Perm_Proj(co->aenv, irn) || is_2addr_code(co->aenv, irn, &req)) + req = arch_get_register_req(co->aenv, irn, -1); + if (is_Reg_Phi(irn) || is_Perm_Proj(co->aenv, irn) || is_2addr_code(req)) return 1; return 0; } -int co_is_optimizable_arg(const copy_opt_t *co, ir_node *irn) { - const ir_edge_t *edge; - const arch_register_t *reg; - - assert(0 && "Is buggy and obsolete. Do not use"); - - if (arch_irn_is(co->aenv, irn, ignore)) - return 0; - - reg = arch_get_irn_register(co->aenv, irn); - if (arch_register_type_is(reg, ignore)) - return 0; - - foreach_out_edge(irn, edge) { - ir_node *n = edge->src; - - if (!nodes_interfere(co->cenv, irn, n) || irn == n) { - arch_register_req_t req; - arch_get_register_req(co->aenv, &req, n, -1); - - if(is_Reg_Phi(n) || - is_Perm(co->aenv, n) || - (arch_register_req_is(&req, should_be_same) && req.other_same == irn) - ) - return 1; - } - } - - return 0; -} - int co_get_costs_loop_depth(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) { int cost = 0; ir_loop *loop; @@ -267,7 +264,7 @@ int co_get_costs_exec_freq(const copy_opt_t *co, ir_node *root, ir_node* arg, in int res; ir_node *root_bl = get_nodes_block(root); ir_node *copy_bl = is_Phi(root) ? get_Block_cfgpred_block(root_bl, pos) : root_bl; - res = get_block_execfreq_ulong(co->cenv->exec_freq, copy_bl); + res = get_block_execfreq_ulong(co->cenv->birg->exec_freq, copy_bl); /* don't allow values smaller than one. */ return res < 1 ? 1 : res; @@ -379,7 +376,6 @@ static int ou_max_ind_set_costs(unit_t *ou) { static void co_collect_units(ir_node *irn, void *env) { copy_opt_t *co = env; unit_t *unit; - arch_register_req_t req; if (!is_curr_reg_class(co, irn)) return; @@ -418,22 +414,25 @@ static void co_collect_units(ir_node *irn, void *env) { /* Else insert the argument of the phi to the members of this ou */ DBG((dbg, LEVEL_1, "\t Member: %+F\n", arg)); - /* Check if arg has occurred at a prior position in the arg/list */ - arg_pos = 0; - for (o=0; onode_count; ++o) - if (unit->nodes[o] == arg) { - arg_pos = o; - break; + if (! arch_irn_is(co->aenv, arg, ignore)) { + /* Check if arg has occurred at a prior position in the arg/list */ + arg_pos = 0; + for (o=1; onode_count; ++o) { + if (unit->nodes[o] == arg) { + arg_pos = o; + break; + } } - if (!arg_pos) { /* a new argument */ - /* insert node, set costs */ - unit->nodes[unit->node_count] = arg; - unit->costs[unit->node_count] = co->get_costs(co, irn, arg, i); - unit->node_count++; - } else { /* arg has occured before in same phi */ - /* increase costs for existing arg */ - unit->costs[arg_pos] += co->get_costs(co, irn, arg, i); + if (!arg_pos) { /* a new argument */ + /* insert node, set costs */ + unit->nodes[unit->node_count] = arg; + unit->costs[unit->node_count] = co->get_costs(co, irn, arg, i); + unit->node_count++; + } else { /* arg has occured before in same phi */ + /* increase costs for existing arg */ + unit->costs[arg_pos] += co->get_costs(co, irn, arg, i); + } } } unit->nodes = xrealloc(unit->nodes, unit->node_count * sizeof(*unit->nodes)); @@ -449,21 +448,26 @@ static void co_collect_units(ir_node *irn, void *env) { unit->nodes[0] = irn; unit->nodes[1] = get_Perm_src(irn); unit->costs[1] = co->get_costs(co, irn, unit->nodes[1], -1); - } else - - /* Src == Tgt of a 2-addr-code instruction */ - if (is_2addr_code(co->aenv, irn, &req)) { - ir_node *other = req.other_same; - if (!nodes_interfere(co->cenv, irn, other)) { - unit->nodes = xmalloc(2 * sizeof(*unit->nodes)); - unit->costs = xmalloc(2 * sizeof(*unit->costs)); - unit->node_count = 2; - unit->nodes[0] = irn; - unit->nodes[1] = other; - unit->costs[1] = co->get_costs(co, irn, other, -1); + } else { + const arch_register_req_t *req = + arch_get_register_req(co->aenv, irn, -1); + + /* Src == Tgt of a 2-addr-code instruction */ + if (is_2addr_code(req)) { + ir_node *other = get_irn_n(skip_Proj(irn), req->other_same); + if (!arch_irn_is(co->aenv, other, ignore) && + !nodes_interfere(co->cenv, irn, other)) { + unit->nodes = xmalloc(2 * sizeof(*unit->nodes)); + unit->costs = xmalloc(2 * sizeof(*unit->costs)); + unit->node_count = 2; + unit->nodes[0] = irn; + unit->nodes[1] = other; + unit->costs[1] = co->get_costs(co, irn, other, -1); + } + } else { + assert(0 && "This is not an optimizable node!"); } - } else - assert(0 && "This is not an optimizable node!"); + } /* Insert the new unit at a position according to its costs */ if (unit->node_count > 1) { @@ -704,15 +708,14 @@ static int compare_affinity_node_t(const void *k1, const void *k2, size_t size) static void add_edge(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) { affinity_node_t new_node, *node; - neighb_t new_nbr, *nbr; - int allocnew; + neighb_t *nbr; + int allocnew = 1; new_node.irn = n1; new_node.degree = 0; new_node.neighbours = NULL; node = set_insert(co->nodes, &new_node, sizeof(new_node), nodeset_hash(new_node.irn)); - allocnew = 1; for (nbr = node->neighbours; nbr; nbr = nbr->next) if (nbr->irn == n2) { allocnew = 0; @@ -721,11 +724,11 @@ static void add_edge(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) { /* if we did not find n2 in n1's neighbourhood insert it */ if (allocnew) { - obstack_grow(&co->obst, &new_nbr, sizeof(new_nbr)); - nbr = obstack_finish(&co->obst); + nbr = obstack_alloc(&co->obst, sizeof(*nbr)); nbr->irn = n2; nbr->costs = 0; nbr->next = node->neighbours; + node->neighbours = nbr; node->degree++; } @@ -744,7 +747,6 @@ static INLINE void add_edges(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs static void build_graph_walker(ir_node *irn, void *env) { copy_opt_t *co = env; int pos, max; - arch_register_req_t req; const arch_register_t *reg; if (!is_curr_reg_class(co, irn) || arch_irn_is(co->aenv, irn, ignore)) @@ -768,8 +770,15 @@ static void build_graph_walker(ir_node *irn, void *env) { } /* 2-address code */ - else if (is_2addr_code(co->aenv, irn, &req)) - add_edges(co, irn, req.other_same, co->get_costs(co, irn, req.other_same, 0)); + else { + const arch_register_req_t *req = + arch_get_register_req(co->aenv, irn, -1); + if (is_2addr_code(req)) { + ir_node *other = get_irn_n(skip_Proj(irn), req->other_same); + if (! arch_irn_is(co->aenv, other, ignore)) + add_edges(co, irn, other, co->get_costs(co, irn, other, 0)); + } + } } void co_build_graph_structure(copy_opt_t *co) { @@ -806,7 +815,6 @@ void co_dump_appel_graph(const copy_opt_t *co, FILE *f) { be_ifg_t *ifg = co->cenv->ifg; int *color_map = alloca(co->cls->n_regs * sizeof(color_map[0])); - bitset_t *adm = bitset_alloca(co->cls->n_regs); ir_node *irn; void *it, *nit; @@ -839,17 +847,15 @@ void co_dump_appel_graph(const copy_opt_t *co, FILE *f) int idx = PTR_TO_INT(get_irn_link(irn)); affinity_node_t *a = get_affinity_info(co, irn); - arch_register_req_t req; + const arch_register_req_t *req; ir_node *adj; - arch_get_register_req(co->aenv, &req, irn, BE_OUT_POS(0)); - if(arch_register_req_is(&req, limited)) { - bitset_clear_all(adm); - req.limited(req.limited_env, adm); - for(i = 0; i < co->cls->n_regs; ++i) - if(!bitset_is_set(adm, i) && color_map[i] >= 0) + req = arch_get_register_req(co->aenv, irn, BE_OUT_POS(0)); + if(arch_register_req_is(req, limited)) { + for(i = 0; i < co->cls->n_regs; ++i) { + if(!rbitset_is_set(req->limited, i) && color_map[i] >= 0) fprintf(f, "%d %d -1\n", color_map[i], idx); - + } } @@ -877,7 +883,7 @@ void co_dump_appel_graph(const copy_opt_t *co, FILE *f) } typedef struct _appel_clique_walker_t { - phase_t ph; + ir_phase ph; const copy_opt_t *co; int curr_nr; int node_count; @@ -915,7 +921,7 @@ static int appel_aff_weight(const appel_clique_walker_t *env, ir_node *bl) #endif } -static void *appel_clique_walker_irn_init(phase_t *phase, ir_node *irn, void *old) +static void *appel_clique_walker_irn_init(ir_phase *phase, ir_node *irn, void *old) { appel_block_info_t *res = NULL; @@ -1001,6 +1007,7 @@ static void appel_walker(ir_node *bl, void *data) struct obstack *obst = &env->obst; void *base = obstack_base(obst); pset *live = pset_new_ptr_default(); + be_lv_t *lv = env->co->cenv->birg->lv; int n_insns = 0; int n_nodes = 0; @@ -1033,8 +1040,8 @@ static void appel_walker(ir_node *bl, void *data) irn = insn->next_insn; } - DBG((env->co->cenv->dbg, LEVEL_2, "%+F\n", bl)); - be_liveness_end_of_block(env->co->cenv->lv, env->co->aenv, env->co->cls, bl, live); + DBG((dbg, LEVEL_2, "%+F\n", bl)); + be_liveness_end_of_block(lv, env->co->aenv, env->co->cls, bl, live); /* Generate the bad and ugly. */ for(i = n_insns - 1; i >= 0; --i) { @@ -1156,10 +1163,11 @@ void co_dump_appel_graph_cliques(const copy_opt_t *co, FILE *f) int n_colors; appel_clique_walker_t env; bitset_t *adm = bitset_alloca(co->cls->n_regs); + be_lv_t *lv = co->cenv->birg->lv; - be_liveness_recompute(co->cenv->lv); + be_liveness_recompute(lv); obstack_init(&env.obst); - phase_init(&env.ph, "appel_clique_dumper", co->irg, PHASE_DEFAULT_GROWTH, appel_clique_walker_irn_init); + phase_init(&env.ph, "appel_clique_dumper", co->irg, PHASE_DEFAULT_GROWTH, appel_clique_walker_irn_init, NULL); env.curr_nr = co->cls->n_regs; env.co = co; env.f = f; @@ -1265,25 +1273,27 @@ static void ifg_dump_node_attr(FILE *f, void *self, ir_node *irn) { co_ifg_dump_t *env = self; const arch_register_t *reg = arch_get_irn_register(env->co->aenv, irn); - arch_register_req_t req; + const arch_register_req_t *req; int limited; - arch_get_register_req(env->co->aenv, &req, irn, BE_OUT_POS(0)); - limited = arch_register_req_is(&req, limited); + req = arch_get_register_req(env->co->aenv, irn, BE_OUT_POS(0)); + limited = arch_register_req_is(req, limited); if(env->flags & CO_IFG_DUMP_LABELS) { ir_fprintf(f, "label=\"%+F", irn); +#if 0 + // TODO fix this... if((env->flags & CO_IFG_DUMP_CONSTR) && limited) { bitset_t *bs = bitset_alloca(env->co->cls->n_regs); req.limited(req.limited_env, bs); ir_fprintf(f, "\\n%B", bs); } +#endif ir_fprintf(f, "\" "); - } - - else + } else { fprintf(f, "label=\"\" shape=point " ); + } if(env->flags & CO_IFG_DUMP_SHAPE) fprintf(f, "shape=%s ", limited ? "diamond" : "ellipse"); @@ -1363,20 +1373,21 @@ static int void_algo(copy_opt_t *co) */ typedef struct { - co_algo_t *algo; + co_algo_t *algo; const char *name; - int can_improve_existing; + int can_improve_existing; } co_algo_info_t; static co_algo_info_t algos[] = { - { void_algo, "none", 0 }, - { co_solve_heuristic, "heur1", 0 }, - { co_solve_heuristic_new, "heur2", 0 }, - { co_solve_heuristic_java, "heur3", 0 }, + { void_algo, "none", 0 }, + { co_solve_heuristic, "heur1", 0 }, + { co_solve_heuristic_new, "heur2", 0 }, + { co_solve_heuristic_java, "heur3", 0 }, + { co_solve_heuristic_mst, "heur4", 0 }, #ifdef WITH_ILP - { co_solve_ilp2, "ilp", 1 }, + { co_solve_ilp2, "ilp", 1 }, #endif - { NULL, "", 0 } + { NULL, "", 0 } }; /* @@ -1388,17 +1399,29 @@ static co_algo_info_t algos[] = { */ +static FILE *my_open(const be_chordal_env_t *env, const char *prefix, const char *suffix) +{ + FILE *result; + char buf[1024]; + + ir_snprintf(buf, sizeof(buf), "%s%F_%s%s", prefix, env->irg, env->cls->name, suffix); + result = fopen(buf, "wt"); + if(result == NULL) { + panic("Couldn't open '%s' for writing.", buf); + } + + return result; +} + void co_driver(be_chordal_env_t *cenv) { -#ifdef WITH_LIBCORE - lc_timer_t *timer = lc_timer_register("firm.be.copyopt", "runtime"); -#endif + lc_timer_t *timer = lc_timer_register("firm.be.copyopt", "runtime"); co_complete_stats_t before, after; - copy_opt_t *co; - co_algo_t *algo_func; - int was_optimal = 0; + copy_opt_t *co; + co_algo_t *algo_func; + int was_optimal = 0; - if(algo < 0 || algo >= CO_ALGO_LAST) + if (algo < 0 || algo >= CO_ALGO_LAST) return; co = new_copy_opt(cenv, cost_func); @@ -1417,24 +1440,28 @@ void co_driver(be_chordal_env_t *cenv) be_stat_ev_ull("co_init_unsat", before.unsatisfied_edges); /* Dump the interference graph in Appel's format. */ - if(dump_flags & DUMP_APPEL) { - FILE *f = be_chordal_open(cenv, "", ".apl"); + if (dump_flags & DUMP_APPEL) { + FILE *f = my_open(cenv, "", ".apl"); co_dump_appel_graph(co, f); fclose(f); } - if(dump_flags & DUMP_BEFORE) { - FILE *f = be_chordal_open(cenv, "", "-before.dot"); + if (dump_flags & DUMP_BEFORE) { + FILE *f = my_open(cenv, "", "-before.dot"); co_dump_ifg_dot(co, f, style_flags); fclose(f); } /* if the algo can improve results, provide an initial solution with heur3 */ - if(improve && algos[algo].can_improve_existing) { + if (improve && algos[algo].can_improve_existing) { co_complete_stats_t stats; - /* produce a heuristical solution */ + /* produce a heuristic solution */ +#ifdef WITH_JVM co_solve_heuristic_java(co); +#else + co_solve_heuristic(co); +#endif /* WITH_JVM */ /* do the stats and provide the current costs */ co_complete_stats(co, &stats); @@ -1443,34 +1470,29 @@ void co_driver(be_chordal_env_t *cenv) #ifdef WITH_JVM /* start the JVM here so that it does not tamper the timing. */ - if(algo == CO_ALGO_HEUR3) + if (algo == CO_ALGO_HEUR3) be_java_coal_start_jvm(); -#endif +#endif /* WITH_JVM */ algo_func = algos[algo].algo; -#ifdef WITH_LIBCORE + /* perform actual copy minimization */ lc_timer_reset_and_start(timer); -#endif - was_optimal = algo_func(co); - -#ifdef WITH_LIBCORE lc_timer_stop(timer); - be_stat_ev("co_time", lc_timer_elapsed_msec(timer)); -#endif + be_stat_ev("co_time", lc_timer_elapsed_msec(timer)); be_stat_ev_ull("co_optimal", was_optimal); - if(dump_flags & DUMP_AFTER) { - FILE *f = be_chordal_open(cenv, "", "-after.dot"); + if (dump_flags & DUMP_AFTER) { + FILE *f = my_open(cenv, "", "-after.dot"); co_dump_ifg_dot(co, f, style_flags); fclose(f); } co_complete_stats(co, &after); - if(do_stats) { + if (do_stats) { ulong64 optimizable_costs = after.max_costs - after.inevit_costs; ulong64 evitable = after.costs - after.inevit_costs;