X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;ds=sidebyside;f=ir%2Fbe%2Fbespillbelady3.c;h=1b4d40c7dc74fff5d97635ed9ed36137cbb05232;hb=fcb579b8959da1d7563b1a7b9f008a423ffdf75a;hp=32f9ef20d70b153df9203f2f1b8a5b513c3ccddc;hpb=cd038d3027364295ee3056eebc9929b55f4514ef;p=libfirm diff --git a/ir/be/bespillbelady3.c b/ir/be/bespillbelady3.c index 32f9ef20d..1b4d40c7d 100644 --- a/ir/be/bespillbelady3.c +++ b/ir/be/bespillbelady3.c @@ -25,14 +25,16 @@ * @version $Id$ * * TODO: - * - handle phis correctly, decide wether we should spill them - * - merge multiple start worksets of blocks + * - handle phis correctly, decide whether we should spill them ~ok + * - merge multiple start worksets of blocks ~ok * - how to and when to do the tentative phase... */ #ifdef HAVE_CONFIG_H #include "config.h" #endif +#include + #include "debug.h" #include "list.h" #include "pdeq.h" @@ -40,6 +42,8 @@ #include "irnode_t.h" #include "irprintf.h" #include "iredges_t.h" +#include "irloop_t.h" +#include "irgwalk.h" #include "execfreq.h" #include "bemodule.h" @@ -49,36 +53,68 @@ #include "besched_t.h" #include "be_t.h" +#ifndef NDEBUG +#define EXPENSIVE_CHECKS +#endif + DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;) +typedef struct loop_edge_t loop_edge_t; +struct loop_edge_t { + ir_node *block; + int pos; + loop_edge_t *next; +}; + +typedef struct loop_info_t loop_info_t; +struct loop_info_t { + ir_loop *loop; + loop_edge_t *entry_edges; + loop_edge_t *exit_edges; + size_t max_register_pressure; +}; + typedef struct worklist_entry_t worklist_entry_t; struct worklist_entry_t { struct list_head head; ir_node *value; - unsigned timestep; ir_node *reload_point; + ir_loop *unused_livethrough_loop; }; typedef struct worklist_t worklist_t; struct worklist_t { struct list_head live_values; size_t n_live_values; - unsigned current_timestep; + ir_visited_t visited; +}; + +typedef struct block_info_t block_info_t; +struct block_info_t { + worklist_t *start_worklist; + worklist_t *end_worklist; }; -static const arch_env_t *arch_env; static const arch_register_class_t *cls; static struct obstack obst; static spill_env_t *senv; static size_t n_regs; -static int tentative_mode; +static size_t max_register_pressure; +static bool tentative_mode; +static bool should_have_reached_fixpoint; +static bool do_push_unused_livethroughs; static ir_exec_freq *exec_freq; +static ir_visited_t worklist_visited; -static void init_worklist(worklist_t *worklist, unsigned timestep) +static worklist_t *new_worklist(void) { + worklist_t *worklist = obstack_alloc(&obst, sizeof(worklist[0])); + memset(worklist, 0, sizeof(worklist[0])); + INIT_LIST_HEAD(&worklist->live_values); worklist->n_live_values = 0; - worklist->current_timestep = timestep; + + return worklist; } static void mark_irn_not_visited(ir_node *node) @@ -86,6 +122,23 @@ static void mark_irn_not_visited(ir_node *node) set_irn_visited(node, get_irg_visited(current_ir_graph) - 1); } +static bool worklist_contains(const ir_node *node) +{ + return irn_visited(node); +} + +static block_info_t *get_block_info(ir_node *block) +{ + block_info_t *info = get_irn_link(block); + if (info != NULL) + return info; + + info = obstack_alloc(&obst, sizeof(info[0])); + memset(info, 0, sizeof(info[0])); + set_irn_link(block, info); + return info; +} + static void deactivate_worklist(const worklist_t *worklist) { struct list_head *entry; @@ -93,7 +146,7 @@ static void deactivate_worklist(const worklist_t *worklist) list_for_each(entry, &worklist->live_values) { worklist_entry_t *wl_entry = list_entry(entry, worklist_entry_t, head); - assert(irn_visited(wl_entry->value)); + assert(worklist_contains(wl_entry->value)); mark_irn_not_visited(wl_entry->value); set_irn_link(wl_entry->value, NULL); } @@ -106,51 +159,83 @@ static void activate_worklist(const worklist_t *worklist) list_for_each(entry, &worklist->live_values) { worklist_entry_t *wl_entry = list_entry(entry, worklist_entry_t, head); - ir_node *value = wl_entry->value; - - assert(irn_not_visited(value)); - mark_irn_visited(value); - set_irn_link(value, wl_entry); + assert(!worklist_contains(wl_entry->value)); + mark_irn_visited(wl_entry->value); + set_irn_link(wl_entry->value, wl_entry); } } -static worklist_t *duplicate_worklist(const worklist_t *worklist, - ir_node *block, - ir_node *succ_block, int succ_pos) +/** + * duplicate a worklist and directly activates it + */ +static void fill_and_activate_worklist(worklist_t *new_worklist, + const worklist_t *worklist, ir_node *block, ir_node *succ_block, + int succ_pos) { - ir_node *reload_point = NULL; + ir_node *reload_point = NULL; + size_t n_live_values = 0; struct list_head *entry; - worklist_t *new_worklist = obstack_alloc(&obst, sizeof(new_worklist[0])); - INIT_LIST_HEAD(&new_worklist->live_values); - - if(succ_block != NULL && get_Block_n_cfgpreds(succ_block) > 1) { + if (succ_block != NULL && + (get_Block_n_cfgpreds(succ_block) > 1 + || get_irn_n_edges_kind(block, EDGE_KIND_BLOCK) > 1)) { reload_point = be_get_end_of_block_insertion_point(block); } - new_worklist->current_timestep = worklist->current_timestep; - new_worklist->n_live_values = worklist->n_live_values; - list_for_each(entry, &worklist->live_values) { - worklist_entry_t *wl_entry - = list_entry(entry, worklist_entry_t, head); - worklist_entry_t *new_entry - = obstack_alloc(&obst, sizeof(new_entry[0])); - ir_node *value = wl_entry->value; + worklist_entry_t *wl_entry = list_entry(entry, worklist_entry_t, head); + ir_node *value = wl_entry->value; + worklist_entry_t *new_entry; - if(is_Phi(value) && get_nodes_block(value) == succ_block) { + if (new_worklist->n_live_values >= n_regs) + break; + + if (is_Phi(value) && get_nodes_block(value) == succ_block) { value = get_Phi_pred(value, succ_pos); + + /* can happen for unknown phi preds */ + if (!arch_irn_consider_in_reg_alloc(cls, value)) + continue; } - new_entry->value = value; - new_entry->timestep = wl_entry->timestep; - if(reload_point != NULL) { + if (irn_visited_else_mark(value)) + continue; + + new_entry = obstack_alloc(&obst, sizeof(new_entry[0])); + memset(new_entry, 0, sizeof(new_entry[0])); + + new_entry->value = value; + if (reload_point != NULL) { new_entry->reload_point = reload_point; } else { new_entry->reload_point = wl_entry->reload_point; } list_add_tail(&new_entry->head, &new_worklist->live_values); + ++n_live_values; + + set_irn_link(value, new_entry); + new_worklist->n_live_values++; + } +} + +static worklist_t *duplicate_worklist(const worklist_t *worklist) +{ + worklist_t *new_worklist; + struct list_head *entry; + + new_worklist = obstack_alloc(&obst, sizeof(new_worklist[0])); + memset(new_worklist, 0, sizeof(new_worklist[0])); + INIT_LIST_HEAD(&new_worklist->live_values); + new_worklist->n_live_values = worklist->n_live_values; + + list_for_each(entry, &worklist->live_values) { + worklist_entry_t *wl_entry = list_entry(entry, worklist_entry_t, head); + worklist_entry_t *new_entry + = obstack_alloc(&obst, sizeof(new_entry[0])); + + memcpy(new_entry, wl_entry, sizeof(new_entry[0])); + list_add_tail(&new_entry->head, &new_worklist->live_values); } return new_worklist; @@ -161,66 +246,147 @@ static void print_worklist(const worklist_t *worklist, int level) { struct list_head *entry; - DB((dbg, level, "%d values (TS %u): ", worklist->n_live_values, - worklist->current_timestep)); + DB((dbg, level, "%d values: ", worklist->n_live_values)); list_for_each(entry, &worklist->live_values) { worklist_entry_t *wl_entry = list_entry(entry, worklist_entry_t, head); - //DB((dbg, level, "%+F(%+F) ", wl_entry->value, - // wl_entry->reload_point)); DB((dbg, level, "%+F ", wl_entry->value)); } } #endif -static void place_reload(worklist_entry_t *entry) + + +static void clear_loop_info(ir_loop *loop) { - if(tentative_mode) - return; - DB((dbg, LEVEL_1, "reload of %+F before %+F", entry->value, - entry->reload_point)); - be_add_reload(senv, entry->value, entry->reload_point, cls, 1); + int n_elements = get_loop_n_elements(loop); + int i; + + loop->link = NULL; + for (i = 0; i < n_elements; ++i) { + loop_element element = get_loop_element(loop, i); + if (*element.kind != k_ir_loop) + continue; + + clear_loop_info(element.son); + } } -static void spill_non_live_nodes(const worklist_t *worklist) +static loop_info_t *get_loop_info(ir_loop *loop) { - struct list_head *entry; + loop_info_t *info = loop->link; + if (info != NULL) + return info; + + info = obstack_alloc(&obst, sizeof(info[0])); + memset(info, 0, sizeof(info[0])); + info->loop = loop; + loop->link = info; + return info; +} - list_for_each(entry, &worklist->live_values) { - worklist_entry_t *wl_entry - = list_entry(entry, worklist_entry_t, head); - ir_node *value = wl_entry->value; +/** + * constructs loop in+out edges when called in a block walker + */ +static void construct_loop_edges(ir_node *block, void *data) +{ + int n_cfgpreds = get_Block_n_cfgpreds(block); + ir_loop *loop = get_irn_loop(block); + int i; + + (void) data; + + for (i = 0; i < n_cfgpreds; ++i) { + ir_node *cfgpred_block = get_Block_cfgpred_block(block, i); + ir_loop *cfgpred_loop = get_irn_loop(cfgpred_block); + loop_edge_t *edge; + bool is_exit_edge; + ir_loop *l, *goal; - if(irn_visited(value)) + if (cfgpred_loop == loop) continue; - place_reload(wl_entry); + /* critical edges are splitted, so we can't jump from 1 loop + * directly into another without going out of it */ + assert(get_loop_depth(cfgpred_loop) != get_loop_depth(loop)); + + /* edge out of loop */ + if (get_loop_depth(cfgpred_loop) > get_loop_depth(loop)) { + is_exit_edge = true; + l = cfgpred_loop; + goal = loop; + } else { + is_exit_edge = false; + l = loop; + goal = cfgpred_loop; + } + + /* this might be a jump out/in multiple loops */ + do { + loop_info_t *l_info = get_loop_info(l); + + edge = obstack_alloc(&obst, sizeof(edge[0])); + memset(edge, 0, sizeof(edge[0])); + edge->block = block; + edge->pos = i; + + if (is_exit_edge) { + edge->next = l_info->exit_edges; + l_info->exit_edges = edge; + assert(get_loop_depth(loop) < get_loop_depth(l)); + } else { + edge->next = l_info->entry_edges; + l_info->entry_edges = edge; + assert(get_loop_depth(cfgpred_loop) < get_loop_depth(l)); + } + l = get_loop_outer_loop(l); + } while(l != goal); } } + + + +static void place_reload(worklist_entry_t *entry) +{ + if (tentative_mode) + return; + + DB((dbg, LEVEL_1, "reload of %+F before %+F\n", entry->value, + entry->reload_point)); + assert(entry->reload_point != NULL); + be_add_reload(senv, entry->value, entry->reload_point, cls, 1); + entry->reload_point = NULL; +} + /** * makes sure the worklist contains not more than n_regs - room_needed entries */ static void make_room(worklist_t *worklist, size_t room_needed) { - int spills_needed = worklist->n_live_values + room_needed - n_regs; - if(spills_needed > 0) { - int i = spills_needed; - struct list_head *entry = worklist->live_values.next; - for(i = spills_needed; i > 0; --i) { - struct list_head *next = entry->next; - worklist_entry_t *wl_entry - = list_entry(entry, worklist_entry_t, head); - assert(irn_visited(wl_entry->value)); - mark_irn_not_visited(wl_entry->value); - place_reload(wl_entry); - list_del(entry); + int i; + int spills_needed; + struct list_head *entry; - entry = next; - } - worklist->n_live_values -= spills_needed; + spills_needed = worklist->n_live_values + room_needed - n_regs; + if (spills_needed <= 0) + return; + + entry = worklist->live_values.next; + for(i = spills_needed; i > 0; --i) { + struct list_head *next = entry->next; + worklist_entry_t *wl_entry + = list_entry(entry, worklist_entry_t, head); + + assert(worklist_contains(wl_entry->value)); + mark_irn_not_visited(wl_entry->value); + place_reload(wl_entry); + list_del(entry); + + entry = next; } + worklist->n_live_values -= spills_needed; } /** @@ -229,16 +395,20 @@ static void make_room(worklist_t *worklist, size_t room_needed) */ static void val_used(worklist_t *worklist, ir_node *value, ir_node *sched_point) { - /* is the node in the worklist already? */ + /* already in the worklist? move around, otherwise add at back */ worklist_entry_t *entry = get_irn_link(value); - if(irn_visited(value)) { + + assert(arch_irn_consider_in_reg_alloc(cls, value)); + + if (worklist_contains(value)) { assert(entry != NULL); - assert(irn_visited(value)); list_del(&entry->head); } else { - if(entry == NULL) { + if (entry == NULL) { entry = obstack_alloc(&obst, sizeof(entry[0])); + memset(entry, 0, sizeof(entry[0])); + entry->value = value; set_irn_link(value, entry); } @@ -247,7 +417,6 @@ static void val_used(worklist_t *worklist, ir_node *value, ir_node *sched_point) mark_irn_visited(value); } - entry->timestep = worklist->current_timestep; entry->reload_point = sched_point; list_add_tail(&entry->head, &worklist->live_values); } @@ -259,29 +428,30 @@ static void worklist_remove(worklist_t *worklist, ir_node *value) list_del(&entry->head); --worklist->n_live_values; - assert(irn_visited(value)); + assert(worklist_contains(value)); mark_irn_not_visited(value); } +static void update_max_pressure(worklist_t *worklist) +{ + if (worklist->n_live_values > max_register_pressure) + max_register_pressure = worklist->n_live_values; +} + static void do_spilling(ir_node *block, worklist_t *worklist) { ir_node *node; assert(worklist != NULL); -#ifdef DEBUG_libfirm - DB((dbg, LEVEL_1, "worklist at end of %+F:", block)); - print_worklist(worklist, LEVEL_1); - DB((dbg, LEVEL_1, "\n")); -#endif - sched_foreach_reverse(block, node) { int i, arity; size_t n_defs = 0; DB((dbg, LEVEL_2, "\t%+F... ", node)); + update_max_pressure(worklist); - if(is_Phi(node)) { + if (is_Phi(node)) { ir_node *node2; /* TODO: if we have some free registers, then we could decide to * not spill some phis (but not for phis where at least 1 input is @@ -291,33 +461,35 @@ static void do_spilling(ir_node *block, worklist_t *worklist) sched_foreach_reverse_from(node, node2) { assert(is_Phi(node2)); - if(irn_visited(node2)) + if (worklist_contains(node2)) continue; - if(!arch_irn_consider_in_reg_alloc(arch_env, cls, node2)) + if (!arch_irn_consider_in_reg_alloc(cls, node2)) continue; - be_spill_phi(senv, node2); + if (!tentative_mode) + be_spill_phi(senv, node2); } + DB((dbg, LEVEL_2, "\n")); break; } /* remove values defined by this instruction from the workset. Values * defined but not in the workset need free registers */ - if(get_irn_mode(node) == mode_T) { + if (get_irn_mode(node) == mode_T) { const ir_edge_t *edge; foreach_out_edge(node, edge) { ir_node *proj = get_edge_src_irn(edge); - if(!arch_irn_consider_in_reg_alloc(arch_env, cls, proj)) + if (!arch_irn_consider_in_reg_alloc(cls, proj)) continue; - if(irn_visited(proj)) { + if (worklist_contains(proj)) { worklist_remove(worklist, proj); } else { ++n_defs; } } - } else if(arch_irn_consider_in_reg_alloc(arch_env, cls, node)) { - if(irn_visited(node)) { + } else if (arch_irn_consider_in_reg_alloc(cls, node)) { + if (worklist_contains(node)) { worklist_remove(worklist, node); } else { n_defs = 1; @@ -327,12 +499,12 @@ static void do_spilling(ir_node *block, worklist_t *worklist) /* make sure we have enough free registers for the spills */ make_room(worklist, n_defs); - /* put all values by the instruction into the workset */ + /* put all values used by the instruction into the workset */ arity = get_irn_arity(node); for(i = 0; i < arity; ++i) { ir_node *use = get_irn_n(node, i); - if(!arch_irn_consider_in_reg_alloc(arch_env, cls, use)) + if (!arch_irn_consider_in_reg_alloc(cls, use)) continue; val_used(worklist, use, node); @@ -342,14 +514,14 @@ static void do_spilling(ir_node *block, worklist_t *worklist) * some */ make_room(worklist, 0); - ++worklist->current_timestep; - #ifdef DEBUG_libfirm print_worklist(worklist, LEVEL_2); DB((dbg, LEVEL_2, "\n")); #endif } + update_max_pressure(worklist); + #ifdef DEBUG_libfirm DB((dbg, LEVEL_1, "worklist at begin of %+F:", block)); print_worklist(worklist, LEVEL_1); @@ -357,95 +529,526 @@ static void do_spilling(ir_node *block, worklist_t *worklist) #endif } -static void process_block(ir_node *block, void *env) +static bool worklists_equal(const worklist_t *wl1, const worklist_t *wl2) +{ + const struct list_head *i1 = &wl1->live_values; + const struct list_head *i2 = &wl2->live_values; + + for ( ; i1 != &wl1->live_values && i2 != &wl2->live_values; + i1 = i1->next, i2 = i2->next) { + worklist_entry_t *entry1 = list_entry(i1, worklist_entry_t, head); + worklist_entry_t *entry2 = list_entry(i2, worklist_entry_t, head); + + if (entry1->value != entry2->value) + return false; + } + /* both lists at the end */ + if (i1 != &wl1->live_values || i2 != &wl2->live_values) + return false; + + return true; +} + +static bool fill_start_worklist(worklist_t *new_worklist, ir_node *block) { - int n_preds; - const ir_edge_t *edge; - worklist_t *worklist = NULL; double best_execfreq = -1; + worklist_t *best_worklist = NULL; ir_node *best_succ_block = NULL; - int best_pos = -1; + int best_pos; + const ir_edge_t *edge; + + /* construct worklist */ + foreach_block_succ(block, edge) { + ir_node *succ_block = get_edge_src_irn(edge); + double execfreq = get_block_execfreq(exec_freq, succ_block); + block_info_t *block_info; + worklist_t *succ_worklist; + + if (execfreq < best_execfreq) + continue; + + block_info = get_block_info(succ_block); + succ_worklist = block_info->start_worklist; + + if (succ_worklist == NULL || succ_worklist->visited >= worklist_visited) + continue; + + best_execfreq = execfreq; + best_worklist = succ_worklist; + best_succ_block = succ_block; + best_pos = get_edge_src_pos(edge); + } + + if (best_worklist == NULL) + return false; + best_worklist->visited = worklist_visited; + + fill_and_activate_worklist(new_worklist, best_worklist, block, + best_succ_block, best_pos); + return true; +} + +static worklist_t *construct_start_worklist(ir_node *block) +{ + worklist_t *worklist = new_worklist(); + + ++worklist_visited; + + while(fill_start_worklist(worklist, block)) { + if (worklist->n_live_values >= n_regs) + break; + } + + return worklist; +} + +static void process_block(ir_node *block, void *env) +{ + block_info_t *block_info; + worklist_t *worklist; + int n_preds; (void) env; DB((dbg, LEVEL_1, "Processing %+F\n", block)); - /* construct worklist */ + worklist = construct_start_worklist(block); + block_info = get_block_info(block); + +#ifdef DEBUG_libfirm + DB((dbg, LEVEL_1, "worklist at end of %+F:", block)); + print_worklist(worklist, LEVEL_1); + DB((dbg, LEVEL_1, "\n")); + + if (should_have_reached_fixpoint) { + assert(worklists_equal(block_info->end_worklist, worklist)); + } +#endif + block_info->end_worklist = duplicate_worklist(worklist); + + do_spilling(block, worklist); + deactivate_worklist(worklist); + +#ifdef DEBUG_libfirm + if (should_have_reached_fixpoint) { + assert(worklists_equal(block_info->start_worklist, worklist)); + } +#endif + block_info->start_worklist = worklist; + + /* we shouldn't have any live values left at the start block */ + n_preds = get_Block_n_cfgpreds(block); + //assert(n_preds != 0 || worklist->n_live_values == 0); +} + +typedef struct block_or_loop_t block_or_loop_t; +struct block_or_loop_t { + union { + ir_node *block; + ir_loop *loop; + } v; + bool is_loop; +}; + +static block_or_loop_t *loop_blocks; +static ir_loop *current_loop; + +static void find_blocks(ir_node *block); + +static void find_in_loop(ir_loop *loop, ir_node *entry) +{ + loop_info_t *loop_info = get_loop_info(loop); + block_or_loop_t block_or_loop; + loop_edge_t *edge; + + /* simply mark 1 block in the loop to indicate that the loop was already + * processed */ + ir_node *some_block = loop_info->entry_edges->block; + if (Block_block_visited(some_block)) + return; + + block_or_loop.v.loop = loop; + block_or_loop.is_loop = true; + ARR_APP1(block_or_loop_t, loop_blocks, block_or_loop); + +#ifndef NDEBUG + { + /* we should be 1 of the entry blocks */ + loop_edge_t *edge = loop_info->entry_edges; + bool found = false; + for ( ; edge != NULL; edge = edge->next) { + if (edge->block == entry) + found = true; + } + assert(found); + } +#else + (void) entry; +#endif + + /* check all loop successors */ + for (edge = loop_info->exit_edges; edge != NULL; edge = edge->next) { + ir_node *succ = edge->block; + ir_loop *succ_loop = get_irn_loop(succ); + + if (succ_loop == current_loop) { + find_blocks(succ); + } else { + assert(get_loop_depth(succ_loop) < get_loop_depth(current_loop)); + } + } +} + +static void find_blocks(ir_node *block) +{ + const ir_edge_t *edge; + block_or_loop_t block_or_loop; + + if (Block_block_visited(block)) + return; + + block_or_loop.v.block = block; + block_or_loop.is_loop = false; + + ARR_APP1(block_or_loop_t, loop_blocks, block_or_loop); + mark_Block_block_visited(block); + foreach_block_succ(block, edge) { - ir_node *succ_block = get_edge_src_irn(edge); - double execfreq = get_block_execfreq(exec_freq, succ_block); - - if(execfreq > best_execfreq) { - worklist_t *succ_worklist = get_irn_link(succ_block); - if(succ_worklist != NULL) { - best_execfreq = execfreq; - worklist = succ_worklist; - best_succ_block = succ_block; - best_pos = get_edge_src_pos(edge); + ir_node *succ = get_edge_src_irn(edge); + + /* is it part of the current loop? */ + ir_loop *loop = get_irn_loop(succ); + if (loop != current_loop) { + /* a sub-loop? */ + if (get_loop_depth(loop) > get_loop_depth(current_loop)) { + find_in_loop(loop, succ); + } else { + /* parent loop: we're not interested in the block */ } + } else { + find_blocks(succ); } } - if(worklist == NULL) { - /* only the end-block has 0 successors */ - assert(block == get_irg_end_block(get_irn_irg(block))); +} - worklist = obstack_alloc(&obst, sizeof(worklist[0])); - init_worklist(worklist, 0); - } else { - worklist = duplicate_worklist(worklist, block, best_succ_block, - best_pos); - activate_worklist(worklist); - - /* now we could have live values in the succ worklists that are not - * live anymore in the worklist we picked. We need reloads for them. - */ - if(!tentative_mode) { - foreach_block_succ(block, edge) { - ir_node *succ_block = get_edge_src_irn(edge); - worklist_t *succ_worklist = get_irn_link(succ_block); - - spill_non_live_nodes(succ_worklist); +/** + * append an entry to a worklist. WARNING: The entry must not already be in the + * worklist. + */ +static void worklist_append(worklist_t *worklist, ir_node *value, + ir_node *reload_point, + ir_loop *unused_livethrough_loop) +{ + worklist_entry_t *entry = obstack_alloc(&obst, sizeof(entry[0])); + memset(entry, 0, sizeof(entry[0])); + +#ifdef EXPENSIVE_CHECKS + { + struct list_head *entry; + list_for_each(entry, &worklist->live_values) { + worklist_entry_t *wl_entry + = list_entry(entry, worklist_entry_t, head); + assert(wl_entry->value != value); + } + } +#endif + + entry->value = value; + entry->reload_point = reload_point; + entry->unused_livethrough_loop = unused_livethrough_loop; + list_add_tail(&entry->head, &worklist->live_values); + ++worklist->n_live_values; + assert(worklist->n_live_values <= n_regs); +} + +static void push_unused_livethrough(loop_info_t *loop_info, ir_node *value) +{ + loop_edge_t *edge; + ++worklist_visited; + + /* add the value to all loop exit and entry blocks */ + for (edge = loop_info->exit_edges; edge != NULL; edge = edge->next) { + ir_node *block + = get_Block_cfgpred_block(edge->block, edge->pos); + const block_info_t *info = get_block_info(block); + worklist_t *worklist = info->end_worklist; + ir_node *reload_point = NULL; + + if (worklist->visited >= worklist_visited) + continue; + worklist->visited = worklist_visited; + + /* TODO: we need a smarter mechanism here, that makes the reloader place + * reload nodes on all loop exits... */ + + worklist_append(worklist, value, reload_point, loop_info->loop); + } + edge = loop_info->entry_edges; + for ( ; edge != NULL; edge = edge->next) { + ir_node *entry_block = edge->block; + const block_info_t *info = get_block_info(entry_block); + worklist_t *worklist = info->start_worklist; + ir_node *pred_block; + ir_node *reload_point; + + if (worklist->visited >= worklist_visited) + continue; + worklist->visited = worklist_visited; + + pred_block = get_Block_cfgpred_block(entry_block, edge->pos); + reload_point = be_get_end_of_block_insertion_point(pred_block); + + worklist_append(worklist, value, reload_point, loop_info->loop); + } + + set_irn_link(value, NULL); + ++loop_info->max_register_pressure; +} + +static void push_unused_livethroughs(loop_info_t *loop_info) +{ + loop_edge_t *edge; + + /* we can only push unused livethroughs if register pressure inside the loop + * was low enough */ + if (loop_info->max_register_pressure >= n_regs) + return; + + /* find unused livethroughs: register pressure in the loop was low enough + * which means that we had no spills which implies that at every point in + * the loop all*/ + for (edge = loop_info->exit_edges; edge != NULL; edge = edge->next) { + ir_node *block = edge->block; + const block_info_t *info = get_block_info(block); + worklist_t *start_worklist = info->start_worklist; + ir_node *exit_block; + const block_info_t *exit_info; + worklist_t *end_worklist; + struct list_head *entry; + + if (start_worklist == NULL) + continue; + + exit_block = get_Block_cfgpred_block(edge->block, edge->pos); + exit_info = get_block_info(exit_block); + end_worklist = exit_info->end_worklist; + + activate_worklist(end_worklist); + /* all values contained in the start_worklist, which are not available + * in the end_worklist, must be unused livethroughs */ + + list_for_each(entry, &start_worklist->live_values) { + worklist_entry_t *wl_entry + = list_entry(entry, worklist_entry_t, head); + ir_node *value = wl_entry->value; + + if (loop_info->max_register_pressure >= n_regs) + break; + + + if (!worklist_contains(value)) { + /* add it to all loop exits */ + DB((dbg, LEVEL_2, "Found unused livethrough: %+F (regpressure in loop %d)\n", value, loop_info->max_register_pressure)); + push_unused_livethrough(loop_info, value); + /* the value should now be part of end_worklist, so mark it */ + mark_irn_visited(value); } } + + deactivate_worklist(end_worklist); } +} - do_spilling(block, worklist); - deactivate_worklist(worklist); +static void process_block_or_loop(const block_or_loop_t *block_or_loop) +{ + if (block_or_loop->is_loop) { + loop_info_t *loop_info = get_loop_info(block_or_loop->v.loop); - set_irn_link(block, worklist); + if (do_push_unused_livethroughs) + push_unused_livethroughs(loop_info); - /* we shouldn't have any live values left at the start block */ - n_preds = get_Block_n_cfgpreds(block); - assert(n_preds != 0 || worklist->n_live_values == 0); + if (loop_info->max_register_pressure > max_register_pressure) + max_register_pressure = loop_info->max_register_pressure; + + return; + } + process_block(block_or_loop->v.block, NULL); +} + +static void process_loop(ir_loop *loop) +{ + int n_elements = get_loop_n_elements(loop); + int i, len; + loop_info_t *loop_info; + loop_edge_t *edge; + ir_node *some_block; + + /* first handle all sub-loops */ + for (i = 0; i < n_elements; ++i) { + loop_element element = get_loop_element(loop, i); + if (*element.kind != k_ir_loop) + continue; + + process_loop(element.son); + } + + /* create a postorder of the blocks */ + loop_info = get_loop_info(loop); + edge = loop_info->entry_edges; + if (edge != NULL) { + some_block = edge->block; + } else { + assert(loop == get_irg_loop(current_ir_graph)); + some_block = get_irg_start_block(current_ir_graph); + } + + loop_blocks = NEW_ARR_F(block_or_loop_t,0); + current_loop = loop; + + ir_reserve_resources(current_ir_graph, IR_RESOURCE_BLOCK_VISITED); + inc_irg_block_visited(current_ir_graph); + find_blocks(some_block); + /* for endless loops the end-block might be unreachable */ + if (loop == get_irg_loop(current_ir_graph)) { + find_blocks(get_irg_end_block(current_ir_graph)); + } + ir_free_resources(current_ir_graph, IR_RESOURCE_BLOCK_VISITED); + + DB((dbg, LEVEL_3, "Block List for loop %p\n", loop)); + len = ARR_LEN(loop_blocks); + for (i = 0; i < len; ++i) { + block_or_loop_t *block_or_loop = &loop_blocks[i]; + if (block_or_loop->is_loop) { + DB((dbg, LEVEL_3, " L-%p", block_or_loop->v.loop)); + } else { + DB((dbg, LEVEL_3, " B-%+F", block_or_loop->v.block)); + } + } + DB((dbg, LEVEL_3, "\n")); + + max_register_pressure = 0; + + /* run1: tentative phase */ + tentative_mode = true; + for (i = len-1; i >= 0; --i) { + process_block_or_loop(&loop_blocks[i]); + } + + /* run2: tentative phase - should reach fixpoint */ + tentative_mode = true; + for (i = len-1; i >= 0; --i) { + process_block_or_loop(&loop_blocks[i]); + } + +#ifndef NDEBUG + /* run3: tentative phase - check fixpoint */ + tentative_mode = true; + should_have_reached_fixpoint = true; + for (i = len-1; i >= 0; --i) { + process_block_or_loop(&loop_blocks[i]); + } + should_have_reached_fixpoint = false; +#endif + + /* run4: add spills/reloads */ + tentative_mode = false; + do_push_unused_livethroughs = true; + for (i = len-1; i >= 0; --i) { + process_block_or_loop(&loop_blocks[i]); + } + do_push_unused_livethroughs = false; + + loop_info->max_register_pressure = max_register_pressure; + DB((dbg, LEVEL_2, "Regpressure in loop %p: %u\n", loop, + (unsigned) max_register_pressure)); + + DEL_ARR_F(loop_blocks); +} + +static void fix_block_borders(ir_node *block, void *data) +{ + block_info_t *block_info = get_block_info(block); + worklist_t *start_worklist = block_info->start_worklist; + int n_cfgpreds = get_Block_n_cfgpreds(block); + ir_loop *loop = get_irn_loop(block); + int i; + + (void) data; + assert(start_worklist != NULL); + + for (i = 0; i < n_cfgpreds; ++i) { + ir_node *pred_block = get_Block_cfgpred_block(block, i); + block_info_t *pred_block_info = get_block_info(pred_block); + worklist_t *end_worklist = pred_block_info->end_worklist; + ir_loop *pred_loop = get_irn_loop(pred_block); + bool is_loop_entry = false; + struct list_head *entry; + + assert(end_worklist != NULL); + + if (get_loop_depth(pred_loop) < get_loop_depth(loop)) { + is_loop_entry = true; + } + + /* reload missing values */ + activate_worklist(end_worklist); + + list_for_each(entry, &start_worklist->live_values) { + worklist_entry_t *wl_entry + = list_entry(entry, worklist_entry_t, head); + ir_node *value = wl_entry->value; + + if (is_Phi(value) && get_nodes_block(value) == block) { + value = get_irn_n(value, i); + + /* we might have unknowns as argument for the phi */ + if (!arch_irn_consider_in_reg_alloc(cls, value)) + continue; + } + + if (worklist_contains(value)) + continue; + if (wl_entry->unused_livethrough_loop != NULL && !is_loop_entry) + continue; + + be_add_reload_on_edge(senv, value, block, i, cls, 1); + } + + deactivate_worklist(end_worklist); + } } static void be_spill_belady3(be_irg_t *birg, const arch_register_class_t *ncls) { ir_graph *irg = be_get_birg_irg(birg); - cls = ncls; - n_regs = cls->n_regs - be_put_ignore_regs(birg, cls, NULL); + cls = ncls; + n_regs = cls->n_regs - be_put_ignore_regs(birg, cls, NULL); - if(n_regs == 0) + /* shortcut for register classes with ignore regs only */ + if (n_regs == 0) return; - arch_env = be_get_birg_arch_env(birg); - exec_freq = be_get_birg_exec_freq(birg); - tentative_mode = 0; + worklist_visited = 0; + exec_freq = be_get_birg_exec_freq(birg); be_clear_links(irg); - set_using_irn_link(irg); - set_using_irn_visited(irg); + ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED | IR_RESOURCE_IRN_LINK + | IR_RESOURCE_LOOP_LINK); inc_irg_visited(irg); obstack_init(&obst); senv = be_new_spill_env(birg); - /* do a post-order walk over the CFG to make sure we have a maximum number - * of preds processed before entering a block */ - irg_block_edges_walk(get_irg_start_block(irg), NULL, process_block, NULL); + assure_cf_loop(irg); + clear_loop_info(get_irg_loop(irg)); + irg_block_walk_graph(irg, construct_loop_edges, NULL, NULL); + + process_loop(get_irg_loop(current_ir_graph)); + + irg_block_walk_graph(irg, fix_block_borders, NULL, NULL); - clear_using_irn_link(irg); - clear_using_irn_visited(irg); + ir_free_resources(irg, IR_RESOURCE_IRN_VISITED | IR_RESOURCE_IRN_LINK + | IR_RESOURCE_LOOP_LINK); be_insert_spills_reloads(senv);