X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=ir%2Fbe%2Fbespillbelady3.c;h=2bac5bba474eb78216ef908c45d63721b529b2da;hb=0378ccc621a98322785250f96d85d3b32dc0cad1;hp=738148d61c529fbc7841a7d4ad66bd784ef8c76a;hpb=8763e45ea951e77d82e11eda1ac40d0acc58c24b;p=libfirm

diff --git a/ir/be/bespillbelady3.c b/ir/be/bespillbelady3.c
index 738148d61..2bac5bba4 100644
--- a/ir/be/bespillbelady3.c
+++ b/ir/be/bespillbelady3.c
@@ -29,9 +29,7 @@
  *   - merge multiple start worksets of blocks ~ok
  *   - how to and when to do the tentative phase...
  */
-#ifdef HAVE_CONFIG_H
 #include "config.h"
-#endif
 
 #include <stdbool.h>
 
@@ -49,10 +47,14 @@
 #include "bemodule.h"
 #include "bespill.h"
 #include "beutil.h"
-#include "bespilloptions.h"
-#include "besched_t.h"
+#include "bespillutil.h"
+#include "besched.h"
 #include "be_t.h"
 
+#ifndef NDEBUG
+#define EXPENSIVE_CHECKS
+#endif
+
 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
 
 typedef struct loop_edge_t loop_edge_t;
@@ -64,6 +66,7 @@ struct loop_edge_t {
 
 typedef struct loop_info_t loop_info_t;
 struct loop_info_t {
+	ir_loop     *loop;
 	loop_edge_t *entry_edges;
 	loop_edge_t *exit_edges;
 	size_t       max_register_pressure;
@@ -74,13 +77,16 @@ struct worklist_entry_t {
 	struct list_head  head;
 	ir_node          *value;
 	ir_node          *reload_point;
-	worklist_entry_t *next_reload;
+	ir_loop          *unused_livethrough_loop;
 };
 
+#define foreach_worklist(entry, wl) list_for_each_entry(worklist_entry_t, entry, &(wl)->live_values, head)
+
 typedef struct worklist_t worklist_t;
 struct worklist_t {
 	struct list_head  live_values;
 	size_t            n_live_values;
+	ir_visited_t      visited;
 };
 
 typedef struct block_info_t block_info_t;
@@ -89,19 +95,24 @@ struct block_info_t {
 	worklist_t *end_worklist;
 };
 
-static const arch_env_t            *arch_env;
+/** The register class we currently run on. */
 static const arch_register_class_t *cls;
 static struct obstack               obst;
 static spill_env_t                 *senv;
 static size_t                       n_regs;
 static size_t                       max_register_pressure;
 static bool                         tentative_mode;
-static bool                         should_have_reached_fixpoint;
+static bool                         do_push_unused_livethroughs;
+/** Execution frequency for the current graph. */
 static ir_exec_freq                *exec_freq;
+static ir_visited_t                 worklist_visited;
+#ifdef DEBUG_libfirm
+static bool                         should_have_reached_fixpoint;
+#endif
 
 static worklist_t *new_worklist(void)
 {
-	worklist_t *worklist = obstack_alloc(&obst, sizeof(worklist[0]));
+	worklist_t *worklist = OALLOCZ(&obst, worklist_t);
 
 	INIT_LIST_HEAD(&worklist->live_values);
 	worklist->n_live_values    = 0;
@@ -125,48 +136,43 @@ static block_info_t *get_block_info(ir_node *block)
 	if (info != NULL)
 		return info;
 
-	info = obstack_alloc(&obst, sizeof(info[0]));
-	memset(info, 0, sizeof(info[0]));
+	info = OALLOCZ(&obst, block_info_t);
 	set_irn_link(block, info);
 	return info;
 }
 
 static void deactivate_worklist(const worklist_t *worklist)
 {
-	struct list_head *entry;
+	worklist_entry_t *entry;
 
-	list_for_each(entry, &worklist->live_values) {
-		worklist_entry_t *wl_entry
-			= list_entry(entry, worklist_entry_t, head);
-		assert(worklist_contains(wl_entry->value));
-		mark_irn_not_visited(wl_entry->value);
-		set_irn_link(wl_entry->value, NULL);
+	foreach_worklist(entry, worklist) {
+		assert(worklist_contains(entry->value));
+		mark_irn_not_visited(entry->value);
+		set_irn_link(entry->value, NULL);
 	}
 }
 
 static void activate_worklist(const worklist_t *worklist)
 {
-	struct list_head *entry;
+	worklist_entry_t *entry;
 
-	list_for_each(entry, &worklist->live_values) {
-		worklist_entry_t *wl_entry
-			= list_entry(entry, worklist_entry_t, head);
-		assert(!worklist_contains(wl_entry->value));
-		mark_irn_visited(wl_entry->value);
-		set_irn_link(wl_entry->value, wl_entry);
+	foreach_worklist(entry, worklist) {
+		assert(!worklist_contains(entry->value));
+		mark_irn_visited(entry->value);
+		set_irn_link(entry->value, entry);
 	}
 }
 
 /**
  * duplicate a worklist and directly activates it
  */
-static worklist_t *duplicate_activate_worklist(const worklist_t *worklist,
-		ir_node *block, ir_node *succ_block, int succ_pos)
+static void fill_and_activate_worklist(worklist_t *new_worklist,
+		const worklist_t *worklist, ir_node *block, ir_node *succ_block,
+		int succ_pos)
 {
 	ir_node          *reload_point  = NULL;
 	size_t            n_live_values = 0;
-	worklist_t       *new_worklist;
-	struct list_head *entry;
+	worklist_entry_t *entry;
 
 	if (succ_block != NULL &&
 			(get_Block_n_cfgpreds(succ_block) > 1
@@ -174,50 +180,39 @@ static worklist_t *duplicate_activate_worklist(const worklist_t *worklist,
 		reload_point = be_get_end_of_block_insertion_point(block);
 	}
 
-	new_worklist = obstack_alloc(&obst, sizeof(new_worklist[0]));
-	INIT_LIST_HEAD(&new_worklist->live_values);
-
-	list_for_each(entry, &worklist->live_values) {
-		worklist_entry_t *wl_entry  = list_entry(entry, worklist_entry_t, head);
-		ir_node          *value     = wl_entry->value;
+	foreach_worklist(entry, worklist) {
+		ir_node          *value = entry->value;
 		worklist_entry_t *new_entry;
 
+		if (new_worklist->n_live_values >= n_regs)
+			break;
+
 		if (is_Phi(value) && get_nodes_block(value) == succ_block) {
 			value = get_Phi_pred(value, succ_pos);
 
 			/* can happen for unknown phi preds */
-			if (!arch_irn_consider_in_reg_alloc(arch_env, cls, value))
+			if (!arch_irn_consider_in_reg_alloc(cls, value))
 				continue;
 		}
 
-		new_entry = obstack_alloc(&obst, sizeof(new_entry[0]));
+		if (irn_visited_else_mark(value))
+			continue;
+
+		new_entry = OALLOCZ(&obst, worklist_entry_t);
+
 		new_entry->value = value;
 		if (reload_point != NULL) {
 			new_entry->reload_point = reload_point;
 		} else {
-			new_entry->reload_point = wl_entry->reload_point;
+			new_entry->reload_point = entry->reload_point;
 		}
-		new_entry->next_reload = NULL;
 
-		if (irn_not_visited(value)) {
-			list_add_tail(&new_entry->head, &new_worklist->live_values);
-			++n_live_values;
+		list_add_tail(&new_entry->head, &new_worklist->live_values);
+		++n_live_values;
 
-			mark_irn_visited(value);
-			set_irn_link(value, new_entry);
-		}
-#if 0
-		else {
-			/* the only way we might visit a value again, is when we get it as
-			 * phi predecessor */
-			assert(is_Phi(wl_entry->value)
-					&& get_nodes_block(wl_entry->value) == succ_block);
-		}
-#endif
+		set_irn_link(value, new_entry);
+		new_worklist->n_live_values++;
 	}
-	new_worklist->n_live_values = n_live_values;
-
-	return new_worklist;
 }
 
 static worklist_t *duplicate_worklist(const worklist_t *worklist)
@@ -225,14 +220,13 @@ static worklist_t *duplicate_worklist(const worklist_t *worklist)
 	worklist_t       *new_worklist;
 	struct list_head *entry;
 
-	new_worklist = obstack_alloc(&obst, sizeof(new_worklist[0]));
+	new_worklist = OALLOCZ(&obst, worklist_t);
 	INIT_LIST_HEAD(&new_worklist->live_values);
 	new_worklist->n_live_values = worklist->n_live_values;
 
 	list_for_each(entry, &worklist->live_values) {
 		worklist_entry_t *wl_entry  = list_entry(entry, worklist_entry_t, head);
-		worklist_entry_t *new_entry
-			= obstack_alloc(&obst, sizeof(new_entry[0]));
+		worklist_entry_t *new_entry	= OALLOC(&obst, worklist_entry_t);
 
 		memcpy(new_entry, wl_entry, sizeof(new_entry[0]));
 		list_add_tail(&new_entry->head, &new_worklist->live_values);
@@ -257,7 +251,12 @@ static void print_worklist(const worklist_t *worklist, int level)
 #endif
 
 
-
+/**
+ * Clear the link fields of a loop and all
+ * its child loops.
+ *
+ * @param loop  the root loop
+ */
 static void clear_loop_info(ir_loop *loop)
 {
 	int n_elements = get_loop_n_elements(loop);
@@ -279,8 +278,8 @@ static loop_info_t *get_loop_info(ir_loop *loop)
 	if (info != NULL)
 		return info;
 
-	info = obstack_alloc(&obst, sizeof(info[0]));
-	memset(info, 0, sizeof(info[0]));
+	info = OALLOCZ(&obst, loop_info_t);
+	info->loop = loop;
 	loop->link = info;
 	return info;
 }
@@ -325,26 +324,21 @@ static void construct_loop_edges(ir_node *block, void *data)
 		do {
 			loop_info_t *l_info = get_loop_info(l);
 
-			edge = obstack_alloc(&obst, sizeof(edge[0]));
-			memset(edge, 0, sizeof(edge[0]));
+			edge = OALLOCZ(&obst, loop_edge_t);
 			edge->block = block;
 			edge->pos   = i;
 
 			if (is_exit_edge) {
-				ir_fprintf(stderr, "Loop %p exit edge %+F:%d\n",
-				           l, block, i);
 				edge->next         = l_info->exit_edges;
 				l_info->exit_edges = edge;
 				assert(get_loop_depth(loop) < get_loop_depth(l));
 			} else {
-				ir_fprintf(stderr, "Loop %p entry edge %+F:%d\n",
-				           l, block, i);
 				edge->next          = l_info->entry_edges;
 				l_info->entry_edges = edge;
 				assert(get_loop_depth(cfgpred_loop) < get_loop_depth(l));
 			}
 			l = get_loop_outer_loop(l);
-		} while(l != goal);
+		} while (l != goal);
 	}
 }
 
@@ -356,61 +350,13 @@ static void place_reload(worklist_entry_t *entry)
 	if (tentative_mode)
 		return;
 
-	/* walk list of reload points */
-	do {
-		DB((dbg, LEVEL_1, "reload of %+F before %+F\n", entry->value,
-		    entry->reload_point));
-		assert(entry->reload_point != NULL);
-		be_add_reload(senv, entry->value, entry->reload_point, cls, 1);
-		entry->reload_point = NULL;
-
-		entry = entry->next_reload;
-	} while(entry != NULL);
+	DB((dbg, LEVEL_1, "reload of %+F before %+F\n", entry->value,
+		entry->reload_point));
+	assert(entry->reload_point != NULL);
+	be_add_reload(senv, entry->value, entry->reload_point, cls, 1);
+	entry->reload_point = NULL;
 }
 
-#if 0
-/**
- * does stuff required for non-active worklists: spills all values not live
- * in the active worklist; links live values to the current worklist
- */
-static void process_non_active_worklist(const worklist_t *worklist,
-                                        worklist_t *current_worklist)
-{
-	struct list_head *entry;
-
-	list_for_each(entry, &worklist->live_values) {
-		worklist_entry_t *wl_entry = list_entry(entry, worklist_entry_t, head);
-		ir_node          *value    = wl_entry->value;
-
-		if (!worklist_contains(value)) {
-			/* if we still have room in the worklist, then we can simply take
-			 * the value */
-			if (current_worklist->n_live_values < n_regs) {
-				worklist_entry_t *new_entry
-					= obstack_alloc(&obst, sizeof(new_entry[0]));
-
-				new_entry->value        = value;
-				new_entry->reload_point = wl_entry->reload_point;
-				new_entry->next_reload  = wl_entry->next_reload;
-				set_irn_link(value, new_entry);
-				mark_irn_visited(value);
-				list_add(&new_entry->head, &current_worklist->live_values);
-				++current_worklist->n_live_values;
-			} else {
-				/* value is not in current worklist, place reloads */
-				place_reload(wl_entry);
-			}
-		} else {
-			/* value is in current worklist, link it with the reload point
-			 * from this path */
-			worklist_entry_t *active_entry = get_irn_link(value);
-			wl_entry->next_reload          = active_entry->next_reload;
-			active_entry->next_reload      = wl_entry;
-		}
-	}
-}
-#endif
-
 /**
  * makes sure the worklist contains not more than n_regs - room_needed entries
  */
@@ -425,7 +371,7 @@ static void make_room(worklist_t *worklist, size_t room_needed)
 		return;
 
 	entry = worklist->live_values.next;
-	for(i = spills_needed; i > 0; --i) {
+	for (i = spills_needed; i > 0; --i) {
 		struct list_head *next = entry->next;
 		worklist_entry_t *wl_entry
 			= list_entry(entry, worklist_entry_t, head);
@@ -449,7 +395,7 @@ static void val_used(worklist_t *worklist, ir_node *value, ir_node *sched_point)
 	/* already in the worklist? move around, otherwise add at back */
 	worklist_entry_t *entry = get_irn_link(value);
 
-	assert(arch_irn_consider_in_reg_alloc(arch_env, cls, value));
+	assert(arch_irn_consider_in_reg_alloc(cls, value));
 
 	if (worklist_contains(value)) {
 		assert(entry != NULL);
@@ -457,7 +403,7 @@ static void val_used(worklist_t *worklist, ir_node *value, ir_node *sched_point)
 		list_del(&entry->head);
 	} else {
 		if (entry == NULL) {
-			entry        = obstack_alloc(&obst, sizeof(entry[0]));
+			entry        = OALLOCZ(&obst, worklist_entry_t);
 			entry->value = value;
 			set_irn_link(value, entry);
 		}
@@ -467,7 +413,6 @@ static void val_used(worklist_t *worklist, ir_node *value, ir_node *sched_point)
 	}
 
 	entry->reload_point = sched_point;
-	entry->next_reload  = NULL;
 	list_add_tail(&entry->head, &worklist->live_values);
 }
 
@@ -513,7 +458,7 @@ static void do_spilling(ir_node *block, worklist_t *worklist)
 
 				if (worklist_contains(node2))
 					continue;
-				if (!arch_irn_consider_in_reg_alloc(arch_env, cls, node2))
+				if (!arch_irn_consider_in_reg_alloc(cls, node2))
 					continue;
 
 				if (!tentative_mode)
@@ -530,7 +475,7 @@ static void do_spilling(ir_node *block, worklist_t *worklist)
 
 			foreach_out_edge(node, edge) {
 				ir_node *proj = get_edge_src_irn(edge);
-				if (!arch_irn_consider_in_reg_alloc(arch_env, cls, proj))
+				if (!arch_irn_consider_in_reg_alloc(cls, proj))
 					continue;
 				if (worklist_contains(proj)) {
 					worklist_remove(worklist, proj);
@@ -538,7 +483,7 @@ static void do_spilling(ir_node *block, worklist_t *worklist)
 					++n_defs;
 				}
 			}
-		} else if (arch_irn_consider_in_reg_alloc(arch_env, cls, node)) {
+		} else if (arch_irn_consider_in_reg_alloc(cls, node)) {
 			if (worklist_contains(node)) {
 				worklist_remove(worklist, node);
 			} else {
@@ -551,10 +496,10 @@ static void do_spilling(ir_node *block, worklist_t *worklist)
 
 		/* put all values used by the instruction into the workset */
 		arity = get_irn_arity(node);
-		for(i = 0; i < arity; ++i) {
+		for (i = 0; i < arity; ++i) {
 			ir_node *use = get_irn_n(node, i);
 
-			if (!arch_irn_consider_in_reg_alloc(arch_env, cls, use))
+			if (!arch_irn_consider_in_reg_alloc(cls, use))
 				continue;
 
 			val_used(worklist, use, node);
@@ -599,62 +544,69 @@ static bool worklists_equal(const worklist_t *wl1, const worklist_t *wl2)
 	return true;
 }
 
-static void process_block(ir_node *block, void *env)
+static bool fill_start_worklist(worklist_t *new_worklist, ir_node *block)
 {
-	block_info_t    *block_info      = NULL;
-	worklist_t      *worklist        = NULL;
 	double           best_execfreq   = -1;
+	worklist_t      *best_worklist   = NULL;
 	ir_node         *best_succ_block = NULL;
 	int              best_pos        = -1;
-	int              n_preds;
 	const ir_edge_t *edge;
 
-	(void) env;
-	DB((dbg, LEVEL_1, "Processing %+F\n", block));
-
 	/* construct worklist */
 	foreach_block_succ(block, edge) {
-		ir_node *succ_block = get_edge_src_irn(edge);
-		double   execfreq   = get_block_execfreq(exec_freq, succ_block);
-
-		if (execfreq > best_execfreq) {
-			block_info_t *block_info    = get_block_info(succ_block);
-			worklist_t   *succ_worklist = block_info->start_worklist;
-			if (succ_worklist != NULL) {
-				best_execfreq   = execfreq;
-				worklist        = succ_worklist;
-				best_succ_block = succ_block;
-				best_pos        = get_edge_src_pos(edge);
-			}
-		}
+		ir_node      *succ_block = get_edge_src_irn(edge);
+		double       execfreq    = get_block_execfreq(exec_freq, succ_block);
+		block_info_t *block_info;
+		worklist_t   *succ_worklist;
+
+		if (execfreq < best_execfreq)
+			continue;
+
+		block_info    = get_block_info(succ_block);
+		succ_worklist = block_info->start_worklist;
+
+		if (succ_worklist == NULL || succ_worklist->visited >= worklist_visited)
+			continue;
+
+		best_execfreq   = execfreq;
+		best_worklist   = succ_worklist;
+		best_succ_block = succ_block;
+		best_pos        = get_edge_src_pos(edge);
 	}
-	if (worklist == NULL) {
-		/* at least 1 successor block must have been processed unless it was
-		 * the end block */
-		assert(tentative_mode || block == get_irg_end_block(get_irn_irg(block)));
 
-		worklist = new_worklist();
-	} else {
-		worklist = duplicate_activate_worklist(worklist, block, best_succ_block,
-		                                       best_pos);
-
-		/* handle this in fix_blocks later... */
-#if 0
-		/* now we could have live values in the succ worklists that are not
-		 * live anymore in the worklist we picked. We need reloads for them. */
-		foreach_block_succ(block, edge) {
-			ir_node      *succ_block    = get_edge_src_irn(edge);
-			block_info_t *block_info    = get_block_info(succ_block);
-			worklist_t   *succ_worklist = block_info->start_worklist;
-
-			if (succ_block == best_succ_block)
-				continue;
+	if (best_worklist == NULL)
+		return false;
+	best_worklist->visited = worklist_visited;
 
-			process_non_active_worklist(succ_worklist, worklist);
-		}
-#endif
+	fill_and_activate_worklist(new_worklist, best_worklist, block,
+			best_succ_block, best_pos);
+	return true;
+}
+
+static worklist_t *construct_start_worklist(ir_node *block)
+{
+	worklist_t *worklist = new_worklist();
+
+	++worklist_visited;
+
+	while (fill_start_worklist(worklist, block)) {
+		if (worklist->n_live_values >= n_regs)
+			break;
 	}
 
+	return worklist;
+}
+
+static void process_block(ir_node *block, void *env)
+{
+	block_info_t    *block_info;
+	worklist_t      *worklist;
+	int              n_preds;
+
+	(void) env;
+	DB((dbg, LEVEL_1, "Processing %+F\n", block));
+
+	worklist   = construct_start_worklist(block);
 	block_info = get_block_info(block);
 
 #ifdef DEBUG_libfirm
@@ -680,16 +632,13 @@ static void process_block(ir_node *block, void *env)
 
 	/* we shouldn't have any live values left at the start block */
 	n_preds = get_Block_n_cfgpreds(block);
-	assert(n_preds != 0 || worklist->n_live_values == 0);
+	//assert(n_preds != 0 || worklist->n_live_values == 0);
 }
 
-typedef struct block_or_loop_t block_or_loop_t;
-struct block_or_loop_t {
-	union {
-		ir_node *block;
-		ir_loop *loop;
-	} v;
-	bool is_loop;
+typedef union block_or_loop_t block_or_loop_t;
+union block_or_loop_t {
+	ir_node *block;
+	ir_loop *loop;
 };
 
 static block_or_loop_t *loop_blocks;
@@ -709,8 +658,7 @@ static void find_in_loop(ir_loop *loop, ir_node *entry)
 	if (Block_block_visited(some_block))
 		return;
 
-	block_or_loop.v.loop  = loop;
-	block_or_loop.is_loop = true;
+	block_or_loop.loop = loop;
 	ARR_APP1(block_or_loop_t, loop_blocks, block_or_loop);
 
 #ifndef NDEBUG
@@ -719,12 +667,17 @@ static void find_in_loop(ir_loop *loop, ir_node *entry)
 		loop_edge_t *edge  = loop_info->entry_edges;
 		bool         found = false;
 		for ( ; edge != NULL; edge = edge->next) {
-			if (edge->block == entry)
+			if (edge->block == entry) {
 				found = true;
+				break;
+			}
 		}
 		assert(found);
 	}
+#else
+	(void) entry;
 #endif
+
 	/* check all loop successors */
 	for (edge = loop_info->exit_edges; edge != NULL; edge = edge->next) {
 		ir_node *succ      = edge->block;
@@ -746,8 +699,7 @@ static void find_blocks(ir_node *block)
 	if (Block_block_visited(block))
 		return;
 
-	block_or_loop.v.block = block;
-	block_or_loop.is_loop = false;
+	block_or_loop.block = block;
 
 	ARR_APP1(block_or_loop_t, loop_blocks, block_or_loop);
 	mark_Block_block_visited(block);
@@ -770,18 +722,143 @@ static void find_blocks(ir_node *block)
 	}
 }
 
-static void process_block_or_loop(const block_or_loop_t *block_or_loop)
+/**
+ * append an entry to a worklist. WARNING: The entry must not already be in the
+ * worklist.
+ */
+static void worklist_append(worklist_t *worklist, ir_node *value,
+                            ir_node *reload_point,
+							ir_loop *unused_livethrough_loop)
+{
+	worklist_entry_t *entry;
+
+#ifdef EXPENSIVE_CHECKS
+	foreach_worklist(entry, worklist) {
+		assert(entry->value != value);
+	}
+#endif
+
+	entry = OALLOCZ(&obst, worklist_entry_t);
+	entry->value                   = value;
+	entry->reload_point            = reload_point;
+	entry->unused_livethrough_loop = unused_livethrough_loop;
+	list_add_tail(&entry->head, &worklist->live_values);
+	++worklist->n_live_values;
+	assert(worklist->n_live_values <= n_regs);
+}
+
+static void push_unused_livethrough(loop_info_t *loop_info, ir_node *value)
+{
+	loop_edge_t *edge;
+	++worklist_visited;
+
+	/* add the value to all loop exit and entry blocks */
+	for (edge = loop_info->exit_edges; edge != NULL; edge = edge->next) {
+		ir_node            *block
+			= get_Block_cfgpred_block(edge->block, edge->pos);
+		const block_info_t *info     = get_block_info(block);
+		worklist_t         *worklist = info->end_worklist;
+		ir_node            *reload_point = NULL;
+
+		if (worklist->visited >= worklist_visited)
+			continue;
+		worklist->visited = worklist_visited;
+
+		/* TODO: we need a smarter mechanism here, that makes the reloader place
+		 * reload nodes on all loop exits... */
+
+		worklist_append(worklist, value, reload_point, loop_info->loop);
+	}
+	edge = loop_info->entry_edges;
+	for ( ; edge != NULL; edge = edge->next) {
+		ir_node            *entry_block = edge->block;
+		const block_info_t *info        = get_block_info(entry_block);
+		worklist_t         *worklist    = info->start_worklist;
+		ir_node            *pred_block;
+		ir_node            *reload_point;
+
+		if (worklist->visited >= worklist_visited)
+			continue;
+		worklist->visited = worklist_visited;
+
+		pred_block   = get_Block_cfgpred_block(entry_block, edge->pos);
+		reload_point = be_get_end_of_block_insertion_point(pred_block);
+
+		worklist_append(worklist, value, reload_point, loop_info->loop);
+	}
+
+	set_irn_link(value, NULL);
+	++loop_info->max_register_pressure;
+}
+
+static void push_unused_livethroughs(loop_info_t *loop_info)
+{
+	loop_edge_t *edge;
+
+	/* we can only push unused livethroughs if register pressure inside the loop
+	 * was low enough */
+	if (loop_info->max_register_pressure >= n_regs)
+		return;
+
+	/* find unused livethroughs: register pressure in the loop was low enough
+	 * which means that we had no spills which implies that at every point in
+	 * the loop all*/
+	for (edge = loop_info->exit_edges; edge != NULL; edge = edge->next) {
+		ir_node            *block          = edge->block;
+		const block_info_t *info           = get_block_info(block);
+		worklist_t         *start_worklist = info->start_worklist;
+		ir_node            *exit_block;
+		const block_info_t *exit_info;
+		worklist_t         *end_worklist;
+		struct list_head   *entry;
+
+		if (start_worklist == NULL)
+			continue;
+
+		exit_block   = get_Block_cfgpred_block(edge->block, edge->pos);
+		exit_info    = get_block_info(exit_block);
+		end_worklist = exit_info->end_worklist;
+
+		activate_worklist(end_worklist);
+		/* all values contained in the start_worklist, which are not available
+		 * in the end_worklist, must be unused livethroughs */
+
+		list_for_each(entry, &start_worklist->live_values) {
+			worklist_entry_t *wl_entry
+				= list_entry(entry, worklist_entry_t, head);
+			ir_node *value = wl_entry->value;
+
+			if (loop_info->max_register_pressure >= n_regs)
+				break;
+
+
+			if (!worklist_contains(value)) {
+				/* add it to all loop exits */
+				DB((dbg, LEVEL_2, "Found unused livethrough: %+F (regpressure in loop %d)\n", value, loop_info->max_register_pressure));
+				push_unused_livethrough(loop_info, value);
+				/* the value should now be part of end_worklist, so mark it */
+				mark_irn_visited(value);
+			}
+		}
+
+		deactivate_worklist(end_worklist);
+	}
+}
+
+static void process_block_or_loop(const block_or_loop_t block_or_loop)
 {
-	if (block_or_loop->is_loop) {
-		loop_info_t *loop_info = get_loop_info(block_or_loop->v.loop);
+	if (block_or_loop.loop->kind == k_ir_loop) {
+		loop_info_t *loop_info = get_loop_info(block_or_loop.loop);
+
+		if (do_push_unused_livethroughs)
+			push_unused_livethroughs(loop_info);
 
 		if (loop_info->max_register_pressure > max_register_pressure)
 			max_register_pressure = loop_info->max_register_pressure;
 
-		/* TODO: push unused livethroughs... */
 		return;
 	}
-	process_block(block_or_loop->v.block, NULL);
+	process_block(block_or_loop.block, NULL);
 }
 
 static void process_loop(ir_loop *loop)
@@ -811,7 +888,7 @@ static void process_loop(ir_loop *loop)
 		some_block = get_irg_start_block(current_ir_graph);
 	}
 
-	loop_blocks  = NEW_ARR_F(block_or_loop_t,0);
+	loop_blocks  = NEW_ARR_F(block_or_loop_t, 0);
 	current_loop = loop;
 
 	ir_reserve_resources(current_ir_graph, IR_RESOURCE_BLOCK_VISITED);
@@ -823,30 +900,30 @@ static void process_loop(ir_loop *loop)
 	}
 	ir_free_resources(current_ir_graph, IR_RESOURCE_BLOCK_VISITED);
 
-	fprintf(stderr, "Block List for loop %p\n", loop);
+	DB((dbg, LEVEL_3, "Block List for loop %p\n", loop));
 	len = ARR_LEN(loop_blocks);
 	for (i = 0; i < len; ++i) {
-		block_or_loop_t *block_or_loop = &loop_blocks[i];
-		if (block_or_loop->is_loop) {
-			ir_fprintf(stderr, " L-%p", block_or_loop->v.loop);
+		block_or_loop_t block_or_loop = loop_blocks[i];
+		if (block_or_loop.loop->kind == k_ir_loop) {
+			DB((dbg, LEVEL_3, " L-%p", block_or_loop.loop));
 		} else {
-			ir_fprintf(stderr, " B-%+F", block_or_loop->v.block);
+			DB((dbg, LEVEL_3, " B-%+F", block_or_loop.block));
 		}
 	}
-	fprintf(stderr, "\n");
+	DB((dbg, LEVEL_3, "\n"));
 
 	max_register_pressure = 0;
 
 	/* run1: tentative phase */
 	tentative_mode = true;
 	for (i = len-1; i >= 0; --i) {
-		process_block_or_loop(&loop_blocks[i]);
+		process_block_or_loop(loop_blocks[i]);
 	}
 
 	/* run2: tentative phase - should reach fixpoint */
 	tentative_mode = true;
 	for (i = len-1; i >= 0; --i) {
-		process_block_or_loop(&loop_blocks[i]);
+		process_block_or_loop(loop_blocks[i]);
 	}
 
 #ifndef NDEBUG
@@ -854,20 +931,22 @@ static void process_loop(ir_loop *loop)
 	tentative_mode               = true;
 	should_have_reached_fixpoint = true;
 	for (i = len-1; i >= 0; --i) {
-		process_block_or_loop(&loop_blocks[i]);
+		process_block_or_loop(loop_blocks[i]);
 	}
 	should_have_reached_fixpoint = false;
 #endif
 
 	/* run4: add spills/reloads */
-	tentative_mode = false;
+	tentative_mode              = false;
+	do_push_unused_livethroughs = true;
 	for (i = len-1; i >= 0; --i) {
-		process_block_or_loop(&loop_blocks[i]);
+		process_block_or_loop(loop_blocks[i]);
 	}
+	do_push_unused_livethroughs = false;
 
 	loop_info->max_register_pressure = max_register_pressure;
-	ir_fprintf(stderr, "Regpressure in loop %p: %u\n", loop,
-	           (unsigned) max_register_pressure);
+	DB((dbg, LEVEL_2, "Regpressure in loop %p: %u\n", loop,
+				(unsigned) max_register_pressure));
 
 	DEL_ARR_F(loop_blocks);
 }
@@ -877,6 +956,7 @@ static void fix_block_borders(ir_node *block, void *data)
 	block_info_t *block_info     = get_block_info(block);
 	worklist_t   *start_worklist = block_info->start_worklist;
 	int           n_cfgpreds     = get_Block_n_cfgpreds(block);
+	ir_loop      *loop           = get_irn_loop(block);
 	int           i;
 
 	(void) data;
@@ -886,29 +966,34 @@ static void fix_block_borders(ir_node *block, void *data)
 		ir_node      *pred_block      = get_Block_cfgpred_block(block, i);
 		block_info_t *pred_block_info = get_block_info(pred_block);
 		worklist_t   *end_worklist    = pred_block_info->end_worklist;
-		struct list_head *entry;
+		ir_loop      *pred_loop       = get_irn_loop(pred_block);
+		bool          is_loop_entry   = false;
+		worklist_entry_t *entry;
 
 		assert(end_worklist != NULL);
 
+		if (get_loop_depth(pred_loop) < get_loop_depth(loop)) {
+			is_loop_entry = true;
+		}
+
 		/* reload missing values */
 		activate_worklist(end_worklist);
 
-		list_for_each(entry, &start_worklist->live_values) {
-			worklist_entry_t *wl_entry
-				= list_entry(entry, worklist_entry_t, head);
-			ir_node          *value = wl_entry->value;
+		foreach_worklist(entry, start_worklist) {
+			ir_node *value = entry->value;
+
+			if (!is_loop_entry && entry->unused_livethrough_loop != NULL)
+				continue;
 
 			if (is_Phi(value) && get_nodes_block(value) == block) {
 				value = get_irn_n(value, i);
 
 				/* we might have unknowns as argument for the phi */
-				if (!arch_irn_consider_in_reg_alloc(arch_env, cls, value))
+				if (!arch_irn_consider_in_reg_alloc(cls, value))
 					continue;
 			}
-
 			if (worklist_contains(value))
 				continue;
-
 			be_add_reload_on_edge(senv, value, block, i, cls, 1);
 		}
 
@@ -916,6 +1001,12 @@ static void fix_block_borders(ir_node *block, void *data)
 	}
 }
 
+/**
+ * Run the spill algorithm.
+ *
+ * @param birg  the graph to run on
+ * @param ncls  the register class to run on
+ */
 static void be_spill_belady3(be_irg_t *birg, const arch_register_class_t *ncls)
 {
 	ir_graph *irg = be_get_birg_irg(birg);
@@ -927,8 +1018,8 @@ static void be_spill_belady3(be_irg_t *birg, const arch_register_class_t *ncls)
 	if (n_regs == 0)
 		return;
 
-	arch_env  = be_get_birg_arch_env(birg);
-	exec_freq = be_get_birg_exec_freq(birg);
+	worklist_visited = 0;
+	exec_freq        = be_get_birg_exec_freq(birg);
 
 	be_clear_links(irg);
 	ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED | IR_RESOURCE_IRN_LINK
@@ -942,20 +1033,7 @@ static void be_spill_belady3(be_irg_t *birg, const arch_register_class_t *ncls)
 	clear_loop_info(get_irg_loop(irg));
 	irg_block_walk_graph(irg, construct_loop_edges, NULL, NULL);
 
-	process_loop(get_irg_loop(current_ir_graph));
-
-#if 0
-	/* do a post-order walk over the CFG to make sure we have a maximum number
-	 * of preds processed before entering a block */
-	tentative_mode = 1;
-	irg_block_edges_walk(get_irg_start_block(irg), NULL, process_block, NULL);
-
-	tentative_mode = 1;
-	irg_block_edges_walk(get_irg_start_block(irg), NULL, process_block, NULL);
-
-	tentative_mode = 0;
-	irg_block_edges_walk(get_irg_start_block(irg), NULL, process_block, NULL);
-#endif
+	process_loop(get_irg_loop(irg));
 
 	irg_block_walk_graph(irg, fix_block_borders, NULL, NULL);