X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=ir%2Fbe%2Fbecopyopt.c;h=316e31a03ca2a69f601fd56a15facdcd02120451;hb=cbfbedae75798a9830fb0ef090189345ede85dc8;hp=cc663946f893b539e01bfff8ff5564dea8c3a034;hpb=d94bd29f5604ce7d99a084c921ea97d44ef8e8b2;p=libfirm diff --git a/ir/be/becopyopt.c b/ir/be/becopyopt.c index cc663946f..316e31a03 100644 --- a/ir/be/becopyopt.c +++ b/ir/be/becopyopt.c @@ -1,183 +1,1384 @@ /** - * @author Daniel Grund - * @date 12.04.2005 + * Author: Daniel Grund + * Date: 12.04.2005 + * Copyright: (c) Universitaet Karlsruhe + * Licence: This file protected by GPL - GNU GENERAL PUBLIC LICENSE. */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif +#ifdef HAVE_ALLOCA_H +#include +#endif +#ifdef HAVE_MALLOC_H +#include +#endif -#include "becopyopt.h" +#include "execfreq.h" +#include "xmalloc.h" +#include "debug.h" +#include "pmap.h" +#include "irgraph.h" +#include "irgwalk.h" +#include "irprog.h" +#include "irloop_t.h" +#include "iredges_t.h" +#include "phiclass.h" +#include "irbitset.h" +#include "irphase_t.h" +#include "irprintf_t.h" + + +#include "bearch.h" +#include "benode_t.h" +#include "beutil.h" +#include "beifg_t.h" +#include "becopyopt_t.h" #include "becopystat.h" +#include "belive_t.h" +#include "beinsn_t.h" +#include "besched_t.h" -#define DEBUG_LVL 0 //SET_LEVEL_1 -static firm_dbg_module_t *dbg = NULL; +#define DUMP_BEFORE 1 +#define DUMP_AFTER 2 +#define DUMP_APPEL 4 +#define DUMP_ALL 2 * DUMP_APPEL - 1 -#define is_curr_reg_class(irn) (co->isa->get_irn_reg_class(irn)==co->cls) -#define is_optimizable(irn) (is_Phi(irn) || is_Copy(irn)) +#define COST_FUNC_FREQ 1 +#define COST_FUNC_LOOP 2 +#define COST_FUNC_ALL_ONE 3 -/** - * Builds an optimization unit for a given optimizable irn (root). - * This opt-unit is inserted in the main structure co. - * If an arg of root itself is optimizable process this arg before with a - * recursive call. For handling this situation and loops co->root is used - * to remember all roots. - */ -static void co_append_unit(copy_opt_t *co, const ir_node *root) { - int i, arity; - unit_t *unit; - DBG((dbg, LEVEL_1, "\t Root: %n\n", root)); - /* check if we encountered this root earlier */ - if (pset_find_ptr(co->roots, root)) - return; - pset_insert_ptr(co->roots, root); +static int dump_flags = 0; +static int style_flags = 0; +static int do_stats = 0; +static cost_fct_t cost_func = co_get_costs_exec_freq; +static int algo = CO_ALGO_HEUR2; - assert(is_curr_reg_class(root) && "node is in wrong register class!"); +#ifdef WITH_LIBCORE +static const lc_opt_enum_mask_items_t dump_items[] = { + { "before", DUMP_BEFORE }, + { "after", DUMP_AFTER }, + { "appel", DUMP_APPEL }, + { "all", DUMP_ALL }, + { NULL, 0 } +}; - /* init unit */ - arity = get_irn_arity(root); - unit = calloc(1, sizeof(*unit)); - unit->co = co; - unit->interf = 0; - unit->node_count = 1; - unit->nodes = malloc((arity+1) * sizeof(*unit->nodes)); - unit->nodes[0] = root; - INIT_LIST_HEAD(&unit->queue); +static const lc_opt_enum_mask_items_t style_items[] = { + { "color", CO_IFG_DUMP_COLORS }, + { "labels", CO_IFG_DUMP_LABELS }, + { "constr", CO_IFG_DUMP_CONSTR }, + { "shape", CO_IFG_DUMP_SHAPE }, + { "full", 2 * CO_IFG_DUMP_SHAPE - 1 }, + { NULL, 0 } +}; - /* check all args */ - for (i=0; inodes[unit->node_count++] = arg; - } else - unit->interf++; - } - } - unit->nodes = realloc(unit->nodes, unit->node_count * sizeof(*unit->nodes)); - list_add_tail(&unit->units, &co->units); - /* Init mis_size to node_count. So get_lower_bound returns correct results. - * - Now it can be called even before the heuristic has run. - * - And it will return correct results for units with nodecount 1 which are - * not optimized during the heuristic and have therefor delivered wrong results for get_lower_bound - */ - unit->mis_size = unit->node_count; +static const lc_opt_enum_mask_items_t algo_items[] = { + { "none", CO_ALGO_NONE }, + { "heur", CO_ALGO_HEUR }, + { "heur2", CO_ALGO_HEUR2 }, + { "heur3", CO_ALGO_HEUR3 }, + { "ilp", CO_ALGO_ILP }, + { NULL, 0 } +}; -} +static const lc_opt_enum_func_ptr_items_t cost_func_items[] = { + { "freq", co_get_costs_exec_freq }, + { "loop", co_get_costs_loop_depth }, + { "one", co_get_costs_all_one }, + { NULL, 0 } +}; -static void co_collect_in_block(ir_node *block, void *env) { - copy_opt_t *co = env; - struct list_head *head = &get_ra_block_info(block)->border_head; - border_t *curr; +static lc_opt_enum_mask_var_t dump_var = { + &dump_flags, dump_items +}; - list_for_each_entry_reverse(border_t, curr, head, list) - if (curr->is_def && curr->is_real && is_optimizable(curr->irn)) - co_append_unit(co, curr->irn); +static lc_opt_enum_mask_var_t style_var = { + &style_flags, style_items +}; + +static lc_opt_enum_mask_var_t algo_var = { + &algo, algo_items +}; + +static lc_opt_enum_func_ptr_var_t cost_func_var = { + &cost_func, cost_func_items +}; + +static const lc_opt_table_entry_t options[] = { + LC_OPT_ENT_ENUM_INT ("algo", "select copy optimization algo (heur, heur2, heur3, ilp)", &algo_var), + LC_OPT_ENT_ENUM_FUNC_PTR ("cost", "select a cost function (freq, loop, one)", &cost_func_var), + LC_OPT_ENT_ENUM_MASK ("dump", "dump ifg before or after copy optimization", &dump_var), + LC_OPT_ENT_ENUM_MASK ("style", "dump style for ifg dumping", &style_var), + LC_OPT_ENT_BOOL ("stats", "dump statistics after each optimization", &do_stats), + { NULL } +}; + +/* Insert additional options registration functions here. */ +extern void be_co_ilp_register_options(lc_opt_entry_t *grp); +extern void be_co2_register_options(lc_opt_entry_t *grp); +extern void be_co3_register_options(lc_opt_entry_t *grp); + +void co_register_options(lc_opt_entry_t *grp) +{ + lc_opt_entry_t *co_grp = lc_opt_get_grp(grp, "co"); + lc_opt_add_table(co_grp, options); + + be_co2_register_options(co_grp); + be_co3_register_options(co_grp); +#ifdef WITH_ILP + be_co_ilp_register_options(co_grp); +#endif } +#endif -static void co_collect_units(copy_opt_t *co) { - DBG((dbg, LEVEL_1, "\tCollecting optimization units\n")); - co->roots = pset_new_ptr(64); - dom_tree_walk_irg(co->irg, co_collect_in_block, NULL, co); - del_pset(co->roots); + +#undef QUICK_AND_DIRTY_HACK + +/****************************************************************************** + _____ _ + / ____| | | + | | __ ___ _ __ ___ _ __ __ _| | + | | |_ |/ _ \ '_ \ / _ \ '__/ _` | | + | |__| | __/ | | | __/ | | (_| | | + \_____|\___|_| |_|\___|_| \__,_|_| + + ******************************************************************************/ + +DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;) + +void be_copy_opt_init(void) { } -copy_opt_t *new_copy_opt(ir_graph *irg, const arch_isa_if_t *isa, const arch_register_class_t *cls) { +copy_opt_t *new_copy_opt(be_chordal_env_t *chordal_env, cost_fct_t get_costs) +{ const char *s1, *s2, *s3; int len; - dbg = firm_dbg_register("ir.be.copyopt"); - firm_dbg_set_mask(dbg, DEBUG_LVL); + copy_opt_t *co; + + FIRM_DBG_REGISTER(dbg, "ir.be.copyopt"); - copy_opt_t *co = calloc(1, sizeof(*co)); - co->irg = irg; - co->isa = isa; - co->cls = cls; + co = xcalloc(1, sizeof(*co)); + co->cenv = chordal_env; + co->aenv = chordal_env->birg->main_env->arch_env; + co->irg = chordal_env->irg; + co->cls = chordal_env->cls; + co->get_costs = get_costs; s1 = get_irp_prog_name(); s2 = get_entity_name(get_irg_entity(co->irg)); - s3 = cls->name; + s3 = chordal_env->cls->name; len = strlen(s1) + strlen(s2) + strlen(s3) + 5; - co->name = malloc(len); - if (!strcmp(co->name, DEBUG_IRG)) - firm_dbg_set_mask(dbg, -1); + co->name = xmalloc(len); snprintf(co->name, len, "%s__%s__%s", s1, s2, s3); - INIT_LIST_HEAD(&co->units); - co_collect_units(co); return co; } void free_copy_opt(copy_opt_t *co) { + xfree(co->name); + free(co); +} + +int co_is_optimizable_root(const copy_opt_t *co, ir_node *irn) { + arch_register_req_t req; + const arch_register_t *reg; + + if (arch_irn_is(co->aenv, irn, ignore)) + return 0; + + reg = arch_get_irn_register(co->aenv, irn); + if (arch_register_type_is(reg, ignore)) + return 0; + + if (is_Reg_Phi(irn) || is_Perm_Proj(co->aenv, irn) || is_2addr_code(co->aenv, irn, &req)) + return 1; + + return 0; +} + +int co_is_optimizable_arg(const copy_opt_t *co, ir_node *irn) { + const ir_edge_t *edge; + const arch_register_t *reg; + + assert(0 && "Is buggy and obsolete. Do not use"); + + if (arch_irn_is(co->aenv, irn, ignore)) + return 0; + + reg = arch_get_irn_register(co->aenv, irn); + if (arch_register_type_is(reg, ignore)) + return 0; + + foreach_out_edge(irn, edge) { + ir_node *n = edge->src; + + if (!nodes_interfere(co->cenv, irn, n) || irn == n) { + arch_register_req_t req; + arch_get_register_req(co->aenv, &req, n, -1); + + if(is_Reg_Phi(n) || + is_Perm(co->aenv, n) || + (arch_register_req_is(&req, should_be_same) && req.other_same == irn) + ) + return 1; + } + } + + return 0; +} + +int co_get_costs_loop_depth(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) { + int cost = 0; + ir_loop *loop; + ir_node *root_block = get_nodes_block(root); + + if (is_Phi(root)) { + /* for phis the copies are placed in the corresponding pred-block */ + loop = get_irn_loop(get_Block_cfgpred_block(root_block, pos)); + } else { + /* a perm places the copy in the same block as it resides */ + loop = get_irn_loop(root_block); + } + if (loop) { + int d = get_loop_depth(loop); + cost = d*d; + } + return cost+1; +} + +int co_get_costs_exec_freq(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) { + ir_node *root_bl = get_nodes_block(root); + ir_node *copy_bl = is_Phi(root) ? get_Block_cfgpred_block(root_bl, pos) : root_bl; + unsigned long freq = get_block_execfreq_ulong(co->cenv->exec_freq, copy_bl); + return freq > 0 ? (int) freq : 1; +} + + +int co_get_costs_all_one(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) { + return 1; +} + +/****************************************************************************** + ____ _ _ _ _ _ _____ _ + / __ \ | | | | | | (_) | / ____| | + | | | |_ __ | |_| | | |_ __ _| |_ ___ | (___ | |_ ___ _ __ __ _ __ _ ___ + | | | | '_ \| __| | | | '_ \| | __/ __| \___ \| __/ _ \| '__/ _` |/ _` |/ _ \ + | |__| | |_) | |_| |__| | | | | | |_\__ \ ____) | || (_) | | | (_| | (_| | __/ + \____/| .__/ \__|\____/|_| |_|_|\__|___/ |_____/ \__\___/|_| \__,_|\__, |\___| + | | __/ | + |_| |___/ + ******************************************************************************/ + +/** + * Determines a maximum weighted independent set with respect to + * the interference and conflict edges of all nodes in a qnode. + */ +static int ou_max_ind_set_costs(unit_t *ou) { + be_chordal_env_t *chordal_env = ou->co->cenv; + ir_node **safe, **unsafe; + int i, o, safe_count, safe_costs, unsafe_count, *unsafe_costs; + bitset_t *curr; + int max, pos, curr_weight, best_weight = 0; + + /* assign the nodes into two groups. + * safe: node has no interference, hence it is in every max stable set. + * unsafe: node has an interference + */ + safe = alloca((ou->node_count-1) * sizeof(*safe)); + safe_costs = 0; + safe_count = 0; + unsafe = alloca((ou->node_count-1) * sizeof(*unsafe)); + unsafe_costs = alloca((ou->node_count-1) * sizeof(*unsafe_costs)); + unsafe_count = 0; + for(i=1; inode_count; ++i) { + int is_safe = 1; + for(o=1; onode_count; ++o) { + if (i==o) + continue; + if (nodes_interfere(chordal_env, ou->nodes[i], ou->nodes[o])) { + unsafe_costs[unsafe_count] = ou->costs[i]; + unsafe[unsafe_count] = ou->nodes[i]; + ++unsafe_count; + is_safe = 0; + break; + } + } + if (is_safe) { + safe_costs += ou->costs[i]; + safe[safe_count++] = ou->nodes[i]; + } + } + + + /* now compute the best set out of the unsafe nodes*/ + if (unsafe_count > MIS_HEUR_TRIGGER) { + bitset_t *best = bitset_alloca(unsafe_count); + /* Heuristik: Greedy trial and error form index 0 to unsafe_count-1 */ + for (i=0; i best_weight) { + best_weight = curr_weight; + } + + no_stable_set: + bitset_minus1(curr); + } + } + + return safe_costs+best_weight; +} + +static void co_collect_units(ir_node *irn, void *env) { + copy_opt_t *co = env; + unit_t *unit; + arch_register_req_t req; + + if (!is_curr_reg_class(co, irn)) + return; + if (!co_is_optimizable_root(co, irn)) + return; + + /* Init a new unit */ + unit = xcalloc(1, sizeof(*unit)); + unit->co = co; + unit->node_count = 1; + INIT_LIST_HEAD(&unit->queue); + + /* Phi with some/all of its arguments */ + if (is_Reg_Phi(irn)) { + int i, arity; + + /* init */ + arity = get_irn_arity(irn); + unit->nodes = xmalloc((arity+1) * sizeof(*unit->nodes)); + unit->costs = xmalloc((arity+1) * sizeof(*unit->costs)); + unit->nodes[0] = irn; + + /* fill */ + for (i=0; icenv, irn, arg)) { + unit->inevitable_costs += co->get_costs(co, irn, arg, i); + continue; + } + + /* Else insert the argument of the phi to the members of this ou */ + DBG((dbg, LEVEL_1, "\t Member: %+F\n", arg)); + + /* Check if arg has occurred at a prior position in the arg/list */ + arg_pos = 0; + for (o=0; onode_count; ++o) + if (unit->nodes[o] == arg) { + arg_pos = o; + break; + } + + if (!arg_pos) { /* a new argument */ + /* insert node, set costs */ + unit->nodes[unit->node_count] = arg; + unit->costs[unit->node_count] = co->get_costs(co, irn, arg, i); + unit->node_count++; + } else { /* arg has occured before in same phi */ + /* increase costs for existing arg */ + unit->costs[arg_pos] += co->get_costs(co, irn, arg, i); + } + } + unit->nodes = xrealloc(unit->nodes, unit->node_count * sizeof(*unit->nodes)); + unit->costs = xrealloc(unit->costs, unit->node_count * sizeof(*unit->costs)); + } else + + /* Proj of a perm with corresponding arg */ + if (is_Perm_Proj(co->aenv, irn)) { + assert(!nodes_interfere(co->cenv, irn, get_Perm_src(irn))); + unit->nodes = xmalloc(2 * sizeof(*unit->nodes)); + unit->costs = xmalloc(2 * sizeof(*unit->costs)); + unit->node_count = 2; + unit->nodes[0] = irn; + unit->nodes[1] = get_Perm_src(irn); + unit->costs[1] = co->get_costs(co, irn, unit->nodes[1], -1); + } else + + /* Src == Tgt of a 2-addr-code instruction */ + if (is_2addr_code(co->aenv, irn, &req)) { + ir_node *other = req.other_same; + if (!nodes_interfere(co->cenv, irn, other)) { + unit->nodes = xmalloc(2 * sizeof(*unit->nodes)); + unit->costs = xmalloc(2 * sizeof(*unit->costs)); + unit->node_count = 2; + unit->nodes[0] = irn; + unit->nodes[1] = other; + unit->costs[1] = co->get_costs(co, irn, other, -1); + } + } else + assert(0 && "This is not an optimizable node!"); + + /* Insert the new unit at a position according to its costs */ + if (unit->node_count > 1) { + int i; + struct list_head *tmp; + + /* Determine the maximum costs this unit can cause: all_nodes_cost */ + for(i=1; inode_count; ++i) { + unit->sort_key = MAX(unit->sort_key, unit->costs[i]); + unit->all_nodes_costs += unit->costs[i]; + } + + /* Determine the minimal costs this unit will cause: min_nodes_costs */ + unit->min_nodes_costs += unit->all_nodes_costs - ou_max_ind_set_costs(unit); + /* Insert the new ou according to its sort_key */ + tmp = &co->units; + while (tmp->next != &co->units && list_entry_units(tmp->next)->sort_key > unit->sort_key) + tmp = tmp->next; + list_add(&unit->units, tmp); + } else { + free(unit); + } +} + +#ifdef QUICK_AND_DIRTY_HACK + +static int compare_ous(const void *k1, const void *k2) { + const unit_t *u1 = *((const unit_t **) k1); + const unit_t *u2 = *((const unit_t **) k2); + int i, o, u1_has_constr, u2_has_constr; + arch_register_req_t req; + const arch_env_t *aenv = u1->co->aenv; + + /* Units with constraints come first */ + u1_has_constr = 0; + for (i=0; inode_count; ++i) { + arch_get_register_req(aenv, &req, u1->nodes[i], -1); + if (arch_register_req_is(&req, limited)) { + u1_has_constr = 1; + break; + } + } + + u2_has_constr = 0; + for (i=0; inode_count; ++i) { + arch_get_register_req(aenv, &req, u2->nodes[i], -1); + if (arch_register_req_is(&req, limited)) { + u2_has_constr = 1; + break; + } + } + + if (u1_has_constr != u2_has_constr) + return u2_has_constr - u1_has_constr; + + /* Now check, whether the two units are connected */ +#if 0 + for (i=0; inode_count; ++i) + for (o=0; onode_count; ++o) + if (u1->nodes[i] == u2->nodes[o]) + return 0; +#endif + + /* After all, the sort key decides. Greater keys come first. */ + return u2->sort_key - u1->sort_key; + +} + +/** + * Sort the ou's according to constraints and their sort_key + */ +static void co_sort_units(copy_opt_t *co) { + int i, count = 0, costs; + unit_t *ou, **ous; + + /* get the number of ous, remove them form the list and fill the array */ + list_for_each_entry(unit_t, ou, &co->units, units) + count++; + ous = alloca(count * sizeof(*ous)); + + costs = co_get_max_copy_costs(co); + + i = 0; + list_for_each_entry(unit_t, ou, &co->units, units) + ous[i++] = ou; + + INIT_LIST_HEAD(&co->units); + + assert(count == i && list_empty(&co->units)); + + for (i=0; inodes[0]); + + qsort(ous, count, sizeof(*ous), compare_ous); + + ir_printf("\n\n"); + for (i=0; inodes[0]); + + /* reinsert into list in correct order */ + for (i=0; iunits, &co->units); + + assert(costs == co_get_max_copy_costs(co)); +} +#endif + +void co_build_ou_structure(copy_opt_t *co) { + DBG((dbg, LEVEL_1, "\tCollecting optimization units\n")); + INIT_LIST_HEAD(&co->units); + irg_walk_graph(co->irg, co_collect_units, NULL, co); +#ifdef QUICK_AND_DIRTY_HACK + co_sort_units(co); +#endif +} + +void co_free_ou_structure(copy_opt_t *co) { unit_t *curr, *tmp; - free(co->name); + ASSERT_OU_AVAIL(co); list_for_each_entry_safe(unit_t, curr, tmp, &co->units, units) { - free(curr->nodes); - free(curr); + xfree(curr->nodes); + xfree(curr->costs); + xfree(curr); } + co->units.next = NULL; } -int co_get_copy_count(copy_opt_t *co) { +/* co_solve_heuristic() is implemented in becopyheur.c */ + +int co_get_max_copy_costs(const copy_opt_t *co) { int i, res = 0; unit_t *curr; + + ASSERT_OU_AVAIL(co); + list_for_each_entry(unit_t, curr, &co->units, units) { - int root_col = get_irn_color(curr->nodes[0]); - res += curr->interf; + res += curr->inevitable_costs; for (i=1; inode_count; ++i) - if (root_col != get_irn_color(curr->nodes[i])) - res++; + res += curr->costs[i]; } return res; } -int co_get_lower_bound(copy_opt_t *co) { +int co_get_inevit_copy_costs(const copy_opt_t *co) { int res = 0; unit_t *curr; + + ASSERT_OU_AVAIL(co); + list_for_each_entry(unit_t, curr, &co->units, units) - res += curr->interf + curr->node_count - curr->mis_size; + res += curr->inevitable_costs; return res; } -int co_get_interferer_count(copy_opt_t *co) { +int co_get_copy_costs(const copy_opt_t *co) { + int i, res = 0; + unit_t *curr; + + ASSERT_OU_AVAIL(co); + + list_for_each_entry(unit_t, curr, &co->units, units) { + int root_col = get_irn_col(co, curr->nodes[0]); + DBG((dbg, LEVEL_1, " %3d costs for root %+F color %d\n", curr->inevitable_costs, curr->nodes[0], root_col)); + res += curr->inevitable_costs; + for (i=1; inode_count; ++i) { + int arg_col = get_irn_col(co, curr->nodes[i]); + if (root_col != arg_col) { + DBG((dbg, LEVEL_1, " %3d for arg %+F color %d\n", curr->costs[i], curr->nodes[i], arg_col)); + res += curr->costs[i]; + } + } + } + return res; +} + +int co_get_lower_bound(const copy_opt_t *co) { int res = 0; unit_t *curr; + + ASSERT_OU_AVAIL(co); + list_for_each_entry(unit_t, curr, &co->units, units) - res += curr->interf; + res += curr->inevitable_costs + curr->min_nodes_costs; return res; } -/** - * Needed for result checking - */ -static void co_collect_for_checker(ir_node *block, void *env) { +/****************************************************************************** + _____ _ _____ _ + / ____| | | / ____| | + | | __ _ __ __ _ _ __ | |__ | (___ | |_ ___ _ __ __ _ __ _ ___ + | | |_ | '__/ _` | '_ \| '_ \ \___ \| __/ _ \| '__/ _` |/ _` |/ _ \ + | |__| | | | (_| | |_) | | | | ____) | || (_) | | | (_| | (_| | __/ + \_____|_| \__,_| .__/|_| |_| |_____/ \__\___/|_| \__,_|\__, |\___| + | | __/ | + |_| |___/ + ******************************************************************************/ + +static int compare_affinity_node_t(const void *k1, const void *k2, size_t size) { + const affinity_node_t *n1 = k1; + const affinity_node_t *n2 = k2; + + return (n1->irn != n2->irn); +} + +static void add_edge(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) { + affinity_node_t new_node, *node; + neighb_t new_nbr, *nbr; + int allocnew; + + new_node.irn = n1; + new_node.degree = 0; + new_node.neighbours = NULL; + node = set_insert(co->nodes, &new_node, sizeof(new_node), HASH_PTR(new_node.irn)); + + allocnew = 1; + for (nbr = node->neighbours; nbr; nbr = nbr->next) + if (nbr->irn == n2) { + allocnew = 0; + break; + } + + /* if we did not find n2 in n1's neighbourhood insert it */ + if (allocnew) { + obstack_grow(&co->obst, &new_nbr, sizeof(new_nbr)); + nbr = obstack_finish(&co->obst); + nbr->irn = n2; + nbr->costs = 0; + nbr->next = node->neighbours; + node->neighbours = nbr; + node->degree++; + } + + /* now nbr points to n1's neighbour-entry of n2 */ + nbr->costs += costs; +} + +static INLINE void add_edges(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) { + if (! be_ifg_connected(co->cenv->ifg, n1, n2)) { + add_edge(co, n1, n2, costs); + add_edge(co, n2, n1, costs); + } +} + +static void build_graph_walker(ir_node *irn, void *env) { copy_opt_t *co = env; - struct list_head *head = &get_ra_block_info(block)->border_head; - border_t *curr; + int pos, max; + arch_register_req_t req; + const arch_register_t *reg; + + if (!is_curr_reg_class(co, irn) || arch_irn_is(co->aenv, irn, ignore)) + return; + + reg = arch_get_irn_register(co->aenv, irn); + if (arch_register_type_is(reg, ignore)) + return; - list_for_each_entry_reverse(border_t, curr, head, list) - if (curr->is_def && curr->is_real && is_curr_reg_class(curr->irn)) - obstack_ptr_grow(&co->ob, curr->irn); + /* Phis */ + if (is_Reg_Phi(irn)) + for (pos=0, max=get_irn_arity(irn); posget_costs(co, irn, arg, pos)); + } + + /* Perms */ + else if (is_Perm_Proj(co->aenv, irn)) { + ir_node *arg = get_Perm_src(irn); + add_edges(co, irn, arg, co->get_costs(co, irn, arg, 0)); + } + + /* 2-address code */ + else if (is_2addr_code(co->aenv, irn, &req)) + add_edges(co, irn, req.other_same, co->get_costs(co, irn, req.other_same, 0)); } -/** - * This O(n^2) checker checks, if two ifg-connected nodes have the same color. - */ -void co_check_allocation(copy_opt_t *co) { - ir_node **nodes, *n1, *n2; - int i, o; - - obstack_init(&co->ob); - dom_tree_walk_irg(co->irg, co_collect_for_checker, NULL, co); - obstack_ptr_grow(&co->ob, NULL); - - nodes = (ir_node **) obstack_finish(&co->ob); - for (i = 0, n1 = nodes[i]; n1; n1 = nodes[++i]) { - assert(! (is_allocatable_irn(n1) && get_irn_color(n1) == NO_COLOR)); - for (o = i+1, n2 = nodes[o]; n2; n2 = nodes[++o]) - if (phi_ops_interfere(n1, n2) && get_irn_color(n1) == get_irn_color(n2)) { - DBG((dbg, 0, "Error: %n in %n and %n in %n have the same color.\n", n1, get_nodes_block(n1), n2, get_nodes_block(n2))); - assert(0 && "Interfering values have the same color!"); +void co_build_graph_structure(copy_opt_t *co) { + obstack_init(&co->obst); + co->nodes = new_set(compare_affinity_node_t, 32); + + irg_walk_graph(co->irg, build_graph_walker, NULL, co); +} + +void co_free_graph_structure(copy_opt_t *co) { + ASSERT_GS_AVAIL(co); + + del_set(co->nodes); + obstack_free(&co->obst, NULL); + co->nodes = NULL; +} + +/* co_solve_ilp1() co_solve_ilp2() are implemented in becopyilpX.c */ + +int co_gs_is_optimizable(copy_opt_t *co, ir_node *irn) { + affinity_node_t new_node, *n; + + ASSERT_GS_AVAIL(co); + + new_node.irn = irn; + n = set_find(co->nodes, &new_node, sizeof(new_node), HASH_PTR(new_node.irn)); + if (n) { + return (n->degree > 0); + } else + return 0; +} + +void co_dump_appel_graph(const copy_opt_t *co, FILE *f) +{ + be_ifg_t *ifg = co->cenv->ifg; + int *color_map = alloca(co->cls->n_regs * sizeof(color_map[0])); + bitset_t *adm = bitset_alloca(co->cls->n_regs); + + ir_node *irn; + void *it, *nit; + int i, n, n_regs; + + n_regs = 0; + for(i = 0; i < co->cls->n_regs; ++i) { + const arch_register_t *reg = &co->cls->regs[i]; + color_map[i] = arch_register_type_is(reg, ignore) ? -1 : n_regs++; + } + + /* + * n contains the first node number. + * the values below n are the pre-colored register nodes + */ + + it = be_ifg_nodes_iter_alloca(ifg); + nit = be_ifg_neighbours_iter_alloca(ifg); + + n = n_regs; + be_ifg_foreach_node(ifg, it, irn) { + if(!arch_irn_is(co->aenv, irn, ignore)) + set_irn_link(irn, INT_TO_PTR(n++)); + } + + fprintf(f, "%d %d\n", n, n_regs); + + be_ifg_foreach_node(ifg, it, irn) { + if(!arch_irn_is(co->aenv, irn, ignore)) { + int idx = PTR_TO_INT(get_irn_link(irn)); + affinity_node_t *a = get_affinity_info(co, irn); + + arch_register_req_t req; + ir_node *adj; + + arch_get_register_req(co->aenv, &req, irn, BE_OUT_POS(0)); + if(arch_register_req_is(&req, limited)) { + bitset_clear_all(adm); + req.limited(req.limited_env, adm); + for(i = 0; i < co->cls->n_regs; ++i) + if(!bitset_is_set(adm, i) && color_map[i] >= 0) + fprintf(f, "%d %d -1\n", color_map[i], idx); + + } + + + be_ifg_foreach_neighbour(ifg, nit, irn, adj) { + if(!arch_irn_is(co->aenv, adj, ignore)) { + int adj_idx = PTR_TO_INT(get_irn_link(adj)); + if(idx < adj_idx) + fprintf(f, "%d %d -1\n", idx, adj_idx); + } + } + + if(a) { + neighb_t *n; + + co_gs_foreach_neighb(a, n) { + if(!arch_irn_is(co->aenv, n->irn, ignore)) { + int n_idx = PTR_TO_INT(get_irn_link(n->irn)); + if(idx < n_idx) + fprintf(f, "%d %d %d\n", idx, n_idx, n->costs); + } + } + } + } + } +} + +typedef struct _appel_clique_walker_t { + phase_t ph; + const copy_opt_t *co; + int curr_nr; + int node_count; + FILE *f; + int dumb; + int *color_map; + struct obstack obst; +} appel_clique_walker_t; + +typedef struct _appel_block_info_t { + int *live_end_nr; + int *live_in_nr; + int *phi_nr; + ir_node **live_end; + ir_node **live_in; + ir_node **phi; + int n_live_end; + int n_live_in; + int n_phi; +} appel_block_info_t; + +static int appel_aff_weight(const appel_clique_walker_t *env, ir_node *bl) +{ +#if 0 + double freq = get_block_execfreq(env->co->cenv->execfreq, bl); + int res = (int) freq; + return res == 0 ? 1 : res; +#else + ir_loop *loop = get_irn_loop(bl); + if(loop) { + int d = get_loop_depth(loop); + return 1 + d * d; + } + return 1; +#endif +} + +static void *appel_clique_walker_irn_init(phase_t *phase, ir_node *irn, void *old) +{ + appel_block_info_t *res = NULL; + + if(is_Block(irn)) { + appel_clique_walker_t *d = (void *) phase; + res = phase_alloc(phase, sizeof(res[0])); + res->phi_nr = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end_nr)); + res->live_end_nr = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end_nr)); + res->live_in_nr = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in_nr)); + res->live_end = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end)); + res->live_in = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in)); + res->phi = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in)); + } + + return res; +} + +typedef struct _insn_list_t { + be_insn_t *insn; + struct list_head list; +} insn_list_t; + +static int appel_get_live_end_nr(appel_clique_walker_t *env, ir_node *bl, ir_node *irn) +{ + appel_block_info_t *bli = phase_get_irn_data(&env->ph, bl); + int i; + + for(i = 0; i < bli->n_live_end; ++i) + if(bli->live_end[i] == irn) + return bli->live_end_nr[i]; + + return -1; +} + +static int appel_dump_clique(appel_clique_walker_t *env, pset *live, ir_node *bl, int curr_nr, int start_nr) +{ + ir_node **live_arr = alloca(env->co->cls->n_regs * sizeof(live_arr[0])); + ir_node *irn; + int n_live; + int j; + + n_live = 0; + foreach_pset(live, irn) + live_arr[n_live++] = irn; + + /* dump the live after clique */ + if(!env->dumb) { + for(j = 0; j < n_live; ++j) { + int k; + + for(k = j + 1; k < n_live; ++k) { + fprintf(env->f, "%d %d -1 ", curr_nr + j, curr_nr + k); + } + fprintf(env->f, "\n"); + } + } + + /* dump the affinities */ + for(j = 0; !env->dumb && j < n_live; ++j) { + ir_node *irn = live_arr[j]; + int old_nr = PTR_TO_INT(get_irn_link(irn)); + + /* if the node was already live in the last insn dump the affinity */ + if(old_nr > start_nr) { + int weight = appel_aff_weight(env, bl); + fprintf(env->f, "%d %d %d\n", old_nr, curr_nr + j, weight); + } + } + + /* set the current numbers into the link field. */ + for(j = 0; j < n_live; ++j) { + ir_node *irn = live_arr[j]; + set_irn_link(irn, INT_TO_PTR(curr_nr + j)); + } + + return curr_nr + n_live; +} + +static void appel_walker(ir_node *bl, void *data) +{ + appel_clique_walker_t *env = data; + appel_block_info_t *bli = phase_get_or_set_irn_data(&env->ph, bl); + struct obstack *obst = &env->obst; + void *base = obstack_base(obst); + pset *live = pset_new_ptr_default(); + + int n_insns = 0; + int n_nodes = 0; + int start_nr = env->curr_nr; + int curr_nr = start_nr; + + be_insn_env_t insn_env; + int i, j; + ir_node *irn; + be_insn_t **insns; + + insn_env.aenv = env->co->aenv; + insn_env.cls = env->co->cls; + insn_env.obst = obst; + insn_env.ignore_colors = env->co->cenv->ignore_colors; + + /* Guess how many insns will be in this block. */ + sched_foreach(bl, irn) + n_nodes++; + + bli->n_phi = 0; + insns = malloc(n_nodes * sizeof(insns[0])); + + /* Put all insns in an array. */ + irn = sched_first(bl); + while(!sched_is_end(irn)) { + be_insn_t *insn; + insn = be_scan_insn(&insn_env, irn); + insns[n_insns++] = insn; + irn = insn->next_insn; + } + + DBG((env->co->cenv->dbg, LEVEL_2, "%+F\n", bl)); + be_liveness_end_of_block(env->co->cenv->lv, env->co->aenv, env->co->cls, bl, live); + + /* Generate the bad and ugly. */ + for(i = n_insns - 1; i >= 0; --i) { + be_insn_t *insn = insns[i]; + + /* The first live set has to be saved in the block border set. */ + if(i == n_insns - 1) { + j = 0; + foreach_pset(live, irn) { + bli->live_end[j] = irn; + bli->live_end_nr[j] = curr_nr + j; + ++j; + } + bli->n_live_end = j; + } + + if(!env->dumb) { + for(j = 0; j < insn->use_start; ++j) { + ir_node *op = insn->ops[j].carrier; + bitset_t *adm = insn->ops[j].regs; + int k; + int nr; + + if(!insn->ops[j].has_constraints) + continue; + + nr = 0; + foreach_pset(live, irn) { + if(irn == op) { + pset_break(live); + break; + } + ++nr; + } + + assert(nr < pset_count(live)); + + for(k = 0; k < env->co->cls->n_regs; ++k) { + int mapped_col = env->color_map[k]; + if(mapped_col >= 0 && !bitset_is_set(adm, k) && !bitset_is_set(env->co->cenv->ignore_colors, k)) + fprintf(env->f, "%d %d -1\n", curr_nr + nr, mapped_col); + } + } + } + + /* dump the clique and update the stuff. */ + curr_nr = appel_dump_clique(env, live, bl, curr_nr, start_nr); + + /* remove all defs. */ + for(j = 0; j < insn->use_start; ++j) + pset_remove_ptr(live, insn->ops[j].carrier); + + if(is_Phi(insn->irn) && arch_irn_consider_in_reg_alloc(env->co->aenv, env->co->cls, insn->irn)) { + bli->phi[bli->n_phi] = insn->irn; + bli->phi_nr[bli->n_phi] = PTR_TO_INT(get_irn_link(insn->irn)); + bli->n_phi++; + } + + /* add all uses */ + else + for(j = insn->use_start; j < insn->n_ops; ++j) + pset_insert_ptr(live, insn->ops[j].carrier); + } + + /* print the start clique. */ + curr_nr = appel_dump_clique(env, live, bl, curr_nr, start_nr); + + i = 0; + foreach_pset(live, irn) { + bli->live_in[i] = irn; + bli->live_in_nr[i] = PTR_TO_INT(get_irn_link(irn)); + ++i; + } + bli->n_live_in = i; + + del_pset(live); + free(insns); + obstack_free(obst, base); + env->curr_nr = curr_nr; +} + +static void appel_inter_block_aff(ir_node *bl, void *data) +{ + appel_clique_walker_t *env = data; + appel_block_info_t *bli = phase_get_irn_data(&env->ph, bl); + + int i, j, n; + + for(i = 0; i < bli->n_live_in; ++i) { + ir_node *irn = bli->live_in[i]; + + for(j = 0, n = get_Block_n_cfgpreds(bl); j < n; ++j) { + ir_node *pred = get_Block_cfgpred_block(bl, j); + + int nr = appel_get_live_end_nr(env, pred, irn); + assert(nr >= 0); + fprintf(env->f, "%d %d 1\n", bli->live_in_nr[i], nr); + } + } + + for(i = 0; i < bli->n_phi; ++i) { + ir_node *irn = bli->phi[i]; + + for(j = 0, n = get_Block_n_cfgpreds(bl); j < n; ++j) { + ir_node *pred = get_Block_cfgpred_block(bl, j); + ir_node *op = get_irn_n(irn, j); + + int nr = appel_get_live_end_nr(env, pred, op); + assert(nr >= 0); + fprintf(env->f, "%d %d 1\n", bli->phi_nr[i], nr); + } + } + +} + +void co_dump_appel_graph_cliques(const copy_opt_t *co, FILE *f) +{ + int i; + int n_colors; + appel_clique_walker_t env; + bitset_t *adm = bitset_alloca(co->cls->n_regs); + + be_liveness_recompute(co->cenv->lv); + obstack_init(&env.obst); + phase_init(&env.ph, "appel_clique_dumper", co->irg, PHASE_DEFAULT_GROWTH, appel_clique_walker_irn_init); + env.curr_nr = co->cls->n_regs; + env.co = co; + env.f = f; + + bitset_copy(adm, co->cenv->ignore_colors); + bitset_flip_all(adm); + + /* Make color map. */ + env.color_map = alloca(co->cls->n_regs * sizeof(env.color_map[0])); + for(i = 0, n_colors = 0; i < co->cls->n_regs; ++i) { + const arch_register_t *reg = &co->cls->regs[i]; + env.color_map[i] = arch_register_type_is(reg, ignore) ? -1 : n_colors++; + } + + env.dumb = 1; + env.curr_nr = n_colors; + irg_block_walk_graph(co->irg, firm_clear_link, NULL, NULL); + irg_block_walk_graph(co->irg, appel_walker, NULL, &env); + + fprintf(f, "%d %d\n", env.curr_nr, n_colors); + + /* make the first k nodes interfere */ + for(i = 0; i < n_colors; ++i) { + int j; + for(j = i + 1; j < n_colors; ++j) + fprintf(f, "%d %d -1 ", i, j); + fprintf(f, "\n"); + } + + env.dumb = 0; + env.curr_nr = n_colors; + irg_block_walk_graph(co->irg, firm_clear_link, NULL, NULL); + irg_block_walk_graph(co->irg, appel_walker, NULL, &env); + irg_block_walk_graph(co->irg, appel_inter_block_aff, NULL, &env); + obstack_free(&env.obst, NULL); +} + +/* + ___ _____ ____ ____ ___ _____ ____ _ + |_ _| ___/ ___| | _ \ / _ \_ _| | _ \ _ _ _ __ ___ _ __ (_)_ __ __ _ + | || |_ | | _ | | | | | | || | | | | | | | | '_ ` _ \| '_ \| | '_ \ / _` | + | || _|| |_| | | |_| | |_| || | | |_| | |_| | | | | | | |_) | | | | | (_| | + |___|_| \____| |____/ \___/ |_| |____/ \__,_|_| |_| |_| .__/|_|_| |_|\__, | + |_| |___/ +*/ + +static const char *get_dot_color_name(int col) +{ + static const char *names[] = { + "blue", + "red", + "green", + "yellow", + "cyan", + "magenta", + "orange", + "chocolate", + "beige", + "navy", + "darkgreen", + "darkred", + "lightPink", + "chartreuse", + "lightskyblue", + "linen", + "pink", + "lightslateblue", + "mintcream", + "red", + "darkolivegreen", + "mediumblue", + "mistyrose", + "salmon", + "darkseagreen", + "mediumslateblue" + "moccasin", + "tomato", + "forestgreen", + "darkturquoise", + "palevioletred" + }; + + return col < sizeof(names)/sizeof(names[0]) ? names[col] : "white"; +} + +typedef struct _co_ifg_dump_t { + const copy_opt_t *co; + unsigned flags; +} co_ifg_dump_t; + +static void ifg_dump_graph_attr(FILE *f, void *self) +{ + fprintf(f, "overlap=scale"); +} + +static int ifg_is_dump_node(void *self, ir_node *irn) +{ + co_ifg_dump_t *cod = self; + return !arch_irn_is(cod->co->aenv, irn, ignore); +} + +static void ifg_dump_node_attr(FILE *f, void *self, ir_node *irn) +{ + co_ifg_dump_t *env = self; + const arch_register_t *reg = arch_get_irn_register(env->co->aenv, irn); + arch_register_req_t req; + int limited; + + arch_get_register_req(env->co->aenv, &req, irn, BE_OUT_POS(0)); + limited = arch_register_req_is(&req, limited); + + if(env->flags & CO_IFG_DUMP_LABELS) { + ir_fprintf(f, "label=\"%+F", irn); + + if((env->flags & CO_IFG_DUMP_CONSTR) && limited) { + bitset_t *bs = bitset_alloca(env->co->cls->n_regs); + req.limited(req.limited_env, bs); + ir_fprintf(f, "\\n%B", bs); + } + ir_fprintf(f, "\" "); + } + + else + fprintf(f, "label=\"\" shape=point " ); + + if(env->flags & CO_IFG_DUMP_SHAPE) + fprintf(f, "shape=%s ", limited ? "diamond" : "ellipse"); + + if(env->flags & CO_IFG_DUMP_COLORS) + fprintf(f, "style=filled color=%s ", get_dot_color_name(reg->index)); +} + +static void ifg_dump_at_end(FILE *file, void *self) +{ + co_ifg_dump_t *env = self; + affinity_node_t *a; + + co_gs_foreach_aff_node(env->co, a) { + const arch_register_t *ar = arch_get_irn_register(env->co->aenv, a->irn); + unsigned aidx = get_irn_idx(a->irn); + neighb_t *n; + + co_gs_foreach_neighb(a, n) { + const arch_register_t *nr = arch_get_irn_register(env->co->aenv, n->irn); + unsigned nidx = get_irn_idx(n->irn); + + if(aidx < nidx) { + const char *color = nr == ar ? "blue" : "red"; + fprintf(file, "\tn%d -- n%d [weight=0.01 ", aidx, nidx); + if(env->flags & CO_IFG_DUMP_LABELS) + fprintf(file, "label=\"%d\" ", n->costs); + if(env->flags & CO_IFG_DUMP_COLORS) + fprintf(file, "color=%s ", color); + else + fprintf(file, "style=dashed"); + fprintf(file, "];\n"); } + } } - obstack_free(&co->ob, NULL); - DBG((dbg, 2, "The checker seems to be happy!\n")); +} + + +static be_ifg_dump_dot_cb_t ifg_dot_cb = { + ifg_is_dump_node, + ifg_dump_graph_attr, + ifg_dump_node_attr, + NULL, + NULL, + ifg_dump_at_end +}; + + + +void co_dump_ifg_dot(const copy_opt_t *co, FILE *f, unsigned flags) +{ + co_ifg_dump_t cod; + + cod.co = co; + cod.flags = flags; + be_ifg_dump_dot(co->cenv->ifg, co->irg, f, &ifg_dot_cb, &cod); +} + + +void co_solve_park_moon(copy_opt_t *opt) +{ + +} + +static int void_algo(copy_opt_t *co) +{ + return 0; +} + +/* + _ _ _ _ _ + / \ | | __ _ ___ _ __(_) |_| |__ _ __ ___ ___ + / _ \ | |/ _` |/ _ \| '__| | __| '_ \| '_ ` _ \/ __| + / ___ \| | (_| | (_) | | | | |_| | | | | | | | \__ \ + /_/ \_\_|\__, |\___/|_| |_|\__|_| |_|_| |_| |_|___/ + |___/ +*/ + +static co_algo_t *algos[] = { + void_algo, + co_solve_heuristic, + co_solve_heuristic_new, + co_solve_heuristic_java, +#ifdef WITH_ILP + co_solve_ilp2 +#endif +}; + +/* + __ __ _ ____ _ + | \/ | __ _(_)_ __ | _ \ _ __(_)_ _____ _ __ + | |\/| |/ _` | | '_ \ | | | | '__| \ \ / / _ \ '__| + | | | | (_| | | | | | | |_| | | | |\ V / __/ | + |_| |_|\__,_|_|_| |_| |____/|_| |_| \_/ \___|_| + +*/ + +void co_driver(be_chordal_env_t *cenv) +{ + copy_opt_t *co; + co_algo_t *algo_func; + int init_costs; + + if(algo < 0 || algo >= CO_ALGO_LAST) + return; + + co = new_copy_opt(cenv, cost_func); + co_build_ou_structure(co); + co_build_graph_structure(co); + init_costs = co_get_copy_costs(co); + + /* Dump the interference graph in Appel's format. */ + if(dump_flags & DUMP_APPEL) { + FILE *f = be_chordal_open(cenv, "", ".apl"); + co_dump_appel_graph(co, f); + fclose(f); + } + + if(dump_flags & DUMP_BEFORE) { + FILE *f = be_chordal_open(cenv, "", "-before.dot"); + co_dump_ifg_dot(co, f, style_flags); + fclose(f); + } + + algo_func = algos[algo]; + algo_func(co); + + if(dump_flags & DUMP_AFTER) { + FILE *f = be_chordal_open(cenv, "", "-after.dot"); + co_dump_ifg_dot(co, f, style_flags); + fclose(f); + } + + if(do_stats) { + int optimizable_costs = co_get_max_copy_costs(co) - co_get_lower_bound(co); + int remaining = co_get_copy_costs(co); + int evitable = remaining - co_get_lower_bound(co); + + ir_printf("%30F %10s %10d%10d%10d%10d", cenv->irg, cenv->cls->name, + co_get_max_copy_costs(co), init_costs, + co_get_inevit_copy_costs(co), co_get_lower_bound(co)); + + if(optimizable_costs > 0) + printf("%10d %5.2f\n", remaining, (evitable * 100.0) / optimizable_costs); + else + printf("%10d %5s\n", remaining, "-"); + } + + co_free_graph_structure(co); + co_free_ou_structure(co); + free_copy_opt(co); }