X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=ir%2Fopt%2Fopt_osr.c;h=101fb54c3f21210487d59b31ef6c2a9db82de842;hb=d16d39df6772995a29ecdc8de1904ccb2e523599;hp=26b130841c74902a8f0392ab4ca82d5035fe6004;hpb=754f75ba7836c706b5360afbb30e84cd14d59317;p=libfirm

diff --git a/ir/opt/opt_osr.c b/ir/opt/opt_osr.c
index 26b130841..101fb54c3 100644
--- a/ir/opt/opt_osr.c
+++ b/ir/opt/opt_osr.c
@@ -1,7 +1,7 @@
 /**
  * Project:     libFIRM
- * File name:   ir/opt/opt_osr.
- * Purpose:     Operator Strength Reduction,
+ * File name:   ir/opt/opt_osr.c
+ * Purpose:     Operator Strength Reduction, based on
  *              Keith D. Cooper, L. Taylor Simpson, Christopher A. Vick
  * Author:      Michael Beck
  * Modified by:
@@ -14,13 +14,6 @@
 #include "config.h"
 #endif
 
-#ifdef HAVE_MALLOC_H
-#include <malloc.h>
-#endif
-#ifdef HAVE_ALLOCA_H
-#include <alloca.h>
-#endif
-
 #include "opt_osr.h"
 #include "irgraph.h"
 #include "ircons.h"
@@ -30,13 +23,17 @@
 #include "irgmod.h"
 #include "irflag_t.h"
 #include "irgwalk.h"
+#include "irouts.h"
 #include "debug.h"
 #include "obst.h"
 #include "set.h"
 #include "tv.h"
 #include "hashptr.h"
 #include "irtools.h"
+#include "irloop_t.h"
 #include "array.h"
+#include "firmstat.h"
+#include "xmalloc.h"
 
 /** The debug handle. */
 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
@@ -69,27 +66,29 @@ typedef struct iv_env {
 	unsigned replaced;      /**< number of replaced ops */
 	unsigned lftr_replaced; /**< number of applied linear function test replacements */
 	unsigned flags;         /**< additional flags */
+	/** Function called to process a SCC. */
+	void (*process_scc)(scc *pscc, struct iv_env *env);
 } iv_env;
 
 /**
  * An entry in the (op, node, node) -> node map.
  */
-typedef struct quad_t {
-	opcode  code;  /**< the opcode of the reduced operation */
-	ir_node *op1;  /**< the first operand the reduced operation */
-	ir_node *op2;  /**< the second operand of the reduced operation */
+typedef struct quadruple_t {
+	ir_opcode code;  /**< the opcode of the reduced operation */
+	ir_node   *op1;  /**< the first operand the reduced operation */
+	ir_node   *op2;  /**< the second operand of the reduced operation */
 
-	ir_node *res; /**< the reduced operation */
-} quad_t;
+	ir_node   *res; /**< the reduced operation */
+} quadruple_t;
 
 /**
  * A LFTR edge.
  */
 typedef struct LFTR_edge {
-	ir_node *src;   /**< the source node */
-	ir_node *dst;   /**< the destination node */
-	opcode  code;   /**< the opcode that must be applied */
-	ir_node *rc;    /**< the region const that must be applied */
+	ir_node   *src;   /**< the source node */
+	ir_node   *dst;   /**< the destination node */
+	ir_opcode code;   /**< the opcode that must be applied */
+	ir_node   *rc;    /**< the region const that must be applied */
 } LFTR_edge;
 
 /* forward */
@@ -105,6 +104,7 @@ static int LFTR_cmp(const void *e1, const void *e2, size_t size) {
 	return l1->src != l2->src;
 }
 
+#if 0
 /**
  * Find a LFTR edge.
  */
@@ -115,11 +115,12 @@ static LFTR_edge *LFTR_find(ir_node *src, iv_env *env) {
 
 	return set_find(env->lftr_edges, &key, sizeof(key), HASH_PTR(src));
 }
+#endif
 
 /**
  * Add a LFTR edge.
  */
-static void LFTR_add(ir_node *src, ir_node *dst, opcode code, ir_node *rc, iv_env *env) {
+static void LFTR_add(ir_node *src, ir_node *dst, ir_opcode code, ir_node *rc, iv_env *env) {
 	LFTR_edge key;
 
 	key.src  = src;
@@ -127,7 +128,11 @@ static void LFTR_add(ir_node *src, ir_node *dst, opcode code, ir_node *rc, iv_en
 	key.code = code;
 	key.rc   = rc;
 
-	assert(LFTR_find(src, env) == NULL);
+	/*
+	 * There might be more than one edge here. This is rather bad
+	 * because we currently store only one.
+	 */
+//	assert(LFTR_find(src, env) == NULL);
 	set_insert(env->lftr_edges, &key, sizeof(key), HASH_PTR(src));
 }
 
@@ -148,6 +153,9 @@ static node_entry *get_irn_ne(ir_node *irn, iv_env *env) {
 /**
  * Check if irn is an IV.
  *
+ * @param irn  the node to check
+ * @param env  the environment
+ *
  * @returns the header if it is one, NULL else
  */
 static ir_node *is_iv(ir_node *irn, iv_env *env) {
@@ -156,31 +164,42 @@ static ir_node *is_iv(ir_node *irn, iv_env *env) {
 
 /**
  * Check if irn is a region constant.
+ * The block or irn must strictly dominate the header block.
+ *
+ * @param irn           the node to check
+ * @param header_block  the header block of the induction variable
  */
 static int is_rc(ir_node *irn, ir_node *header_block) {
 	ir_node *block = get_nodes_block(irn);
 
-	return block_dominates(block, header_block);
+	return (block != header_block) && block_dominates(block, header_block);
 }
 
 /**
  * Set compare function for the quad set.
  */
 static int quad_cmp(const void *e1, const void *e2, size_t size) {
-	const quad_t *c1 = e1;
-	const quad_t *c2 = e2;
+	const quadruple_t *c1 = e1;
+	const quadruple_t *c2 = e2;
 
 	return c1->code != c2->code || c1->op1 != c2->op1 || c1->op2 != c2->op2;
 }
 
 /**
  * Check if an reduced operation was already calculated.
+ *
+ * @param code  the opcode of the operation
+ * @param op1   the first operand of the operation
+ * @param op2   the second operand of the operation
+ * @param env   the environment
+ *
+ * @return the already reduced node or NULL if this operation is not yet reduced
  */
-static ir_node *search(opcode code, ir_node *op1, ir_node *op2, iv_env *env) {
-	quad_t key, *entry;
+static ir_node *search(ir_opcode code, ir_node *op1, ir_node *op2, iv_env *env) {
+	quadruple_t key, *entry;
 
 	key.code = code;
-	key.op1 = op2;
+	key.op1 = op1;
 	key.op2 = op2;
 
 	entry = set_find(env->quad_map, &key, sizeof(key),
@@ -191,13 +210,19 @@ static ir_node *search(opcode code, ir_node *op1, ir_node *op2, iv_env *env) {
 }
 
 /**
- * Add an reduced operation was already calculated.
+ * Add an reduced operation.
+ *
+ * @param code    the opcode of the operation
+ * @param op1     the first operand of the operation
+ * @param op2     the second operand of the operation
+ * @param result  the result of the reduced operation
+ * @param env     the environment
  */
-static void add(opcode code, ir_node *op1, ir_node *op2, ir_node *result, iv_env *env) {
-	quad_t key;
+static void add(ir_opcode code, ir_node *op1, ir_node *op2, ir_node *result, iv_env *env) {
+	quadruple_t key;
 
 	key.code = code;
-	key.op1  = op2;
+	key.op1  = op1;
 	key.op2  = op2;
 	key.res  = result;
 
@@ -209,21 +234,32 @@ static void add(opcode code, ir_node *op1, ir_node *op2, ir_node *result, iv_env
  * Find a location where to place a bin-op whose operands are in
  * block1 and block2.
  *
+ * @param block1  the block of the first operand
+ * @param block2  the block of the second operand
+ *
  * Note that we know here that such a place must exists. Moreover, this means
  * that either block1 dominates block2 or vice versa. So, just return
  * the "smaller" one.
  */
 static ir_node *find_location(ir_node *block1, ir_node *block2) {
 	if (block_dominates(block1, block2))
-		return block1;
+		return block2;
 	assert(block_dominates(block2, block1));
-	return block2;
+	return block1;
 }
 
 /**
- * create an op1 code op1 operation.
+ * Create a node that executes an op1 code op1 operation.
+ *
+ * @param code   the opcode to execute
+ * @param db     debug info to add to the new node
+ * @param op1    the first operand
+ * @param op2    the second operand
+ * @param mode   the mode of the new operation
+ *
+ * @return the newly created node
  */
-static ir_node *do_apply(opcode code, dbg_info *db, ir_node *op1, ir_node *op2, ir_mode *mode) {
+static ir_node *do_apply(ir_opcode code, dbg_info *db, ir_node *op1, ir_node *op2, ir_mode *mode) {
 	ir_graph *irg = current_ir_graph;
 	ir_node *result;
 	ir_node *block = find_location(get_nodes_block(op1), get_nodes_block(op2));
@@ -247,9 +283,17 @@ static ir_node *do_apply(opcode code, dbg_info *db, ir_node *op1, ir_node *op2,
 
 /**
  * The Apply operation.
+ *
+ * @param orig   the node that represent the original operation and determines
+ *               the opcode, debug-info and mode of a newly created one
+ * @param op1    the first operand
+ * @param op2    the second operand
+ * @param env    the environment
+ *
+ * @return the newly created node
  */
 static ir_node *apply(ir_node *orig, ir_node *op1, ir_node *op2, iv_env *env) {
-	opcode code = get_irn_opcode(orig);
+	ir_opcode code = get_irn_opcode(orig);
 	ir_node *result = search(code, op1, op2, env);
 
 	if (! result) {
@@ -265,17 +309,24 @@ static ir_node *apply(ir_node *orig, ir_node *op1, ir_node *op2, iv_env *env) {
 		}
 		else {
 			result = do_apply(code, db, op1, op2, get_irn_mode(orig));
-			get_irn_ne(result, env)->header = NULL;
-		}
+			get_irn_ne(result, env)->header = NULL;		}
 	}
 	return result;
 }
 
 /**
  * The Reduce operation.
+ *
+ * @param orig   the node that represent the original operation and determines
+ *               the opcode, debug-info and mode of a newly created one
+ * @param iv     the induction variable
+ * @param rc     the region constant
+ * @param env    the environment
+ *
+ * @return the reduced node
  */
 static ir_node *reduce(ir_node *orig, ir_node *iv, ir_node *rc, iv_env *env) {
-	opcode code = get_irn_opcode(orig);
+	ir_opcode code = get_irn_opcode(orig);
 	ir_node *result = search(code, iv, rc, env);
 
 	if (! result) {
@@ -284,11 +335,12 @@ static ir_node *reduce(ir_node *orig, ir_node *iv, ir_node *rc, iv_env *env) {
 		ir_mode *mode = get_irn_mode(orig);
 
 		result = exact_copy(iv);
-		if (mode_is_reference(mode)) {
-			/* bad case: we replace a reference mode calculation.
-			   assure that the new IV will be a reference one */
-			set_irn_mode(result, mode);
-		}
+
+		/* Beware: we must always create a new nduction variable with the same mode
+		   as the node we are replacing. Espicially this means the mode might be changed
+		   from P to I and back. This is always possible, because we have only Phi, Add
+		   and Sub nodes. */
+		set_irn_mode(result, mode);
 		add(code, iv, rc, result, env);
 		DB((dbg, LEVEL_3, "   Created new %+F for %+F (%s %+F)\n", result, iv,
 			get_irn_opname(orig), rc));
@@ -310,50 +362,64 @@ static ir_node *reduce(ir_node *orig, ir_node *iv, ir_node *rc, iv_env *env) {
 			else if (is_Phi(result))
 				o = apply(orig, o, rc, env);
 			else {
-				switch (code) {
-				case iro_Mul:
+				if (code == iro_Mul)
 					o = apply(orig, o, rc, env);
-					break;
-				}
 			}
 			set_irn_n(result, i, o);
 		}
 	}
+	else {
+		DB((dbg, LEVEL_3, "   Already Created %+F for %+F (%s %+F)\n", result, iv,
+			get_irn_opname(orig), rc));
+	}
 	return result;
 }
 
 /**
- * Do the replacement operation.
+ * The Replace operation.
  *
  * @param irn   the node that will be replaced
  * @param iv    the induction variable
  * @param rc    the region constant
  * @param env   the environment
  */
-static void replace(ir_node *irn, ir_node *iv, ir_node *rc, iv_env *env) {
+static int replace(ir_node *irn, ir_node *iv, ir_node *rc, iv_env *env) {
 	ir_node *result;
-
-	DB((dbg, LEVEL_2, "  Replacing %+F\n", irn));
-
-	result = reduce(irn, iv, rc, env);
-	if (result && result != irn) {
-		node_entry *e, *iv_e;
-
-		exchange(irn, result);
-		e = get_irn_ne(result, env);
-		iv_e = get_irn_ne(iv, env);
-		e->header = iv_e->header;
+	ir_loop *iv_loop  = get_irn_loop(get_nodes_block(iv));
+	ir_loop *irn_loop = get_irn_loop(get_nodes_block(irn));
+
+	/* only replace nodes that are in the same (or deeper loops) */
+	if (get_loop_depth(irn_loop) >= get_loop_depth(iv_loop)) {
+		DB((dbg, LEVEL_2, "  Replacing %+F\n", irn));
+
+		result = reduce(irn, iv, rc, env);
+		if (result != irn) {
+			node_entry *e, *iv_e;
+
+			hook_strength_red(current_ir_graph, irn);
+			exchange(irn, result);
+			e = get_irn_ne(result, env);
+			iv_e = get_irn_ne(iv, env);
+			e->header = iv_e->header;
+		}
+		return 1;
 	}
+	return 0;
 }
 
 /**
- * check if a node can be replaced.
+ * Check if a node can be replaced (+, -, *).
+ *
+ * @param irn   the node to check
+ * @param env   the environment
+ *
+ * @return non-zero if irn should be Replace'd
  */
 static int check_replace(ir_node *irn, iv_env *env) {
-	ir_node *left, *right, *iv, *rc;
-	ir_op   *op  = get_irn_op(irn);
-	opcode  code = get_op_code(op);
-	ir_node *liv, *riv;
+	ir_node   *left, *right, *iv, *rc;
+	ir_op     *op  = get_irn_op(irn);
+	ir_opcode code = get_op_code(op);
+	ir_node   *liv, *riv;
 
 	switch (code) {
 	case iro_Mul:
@@ -369,28 +435,47 @@ static int check_replace(ir_node *irn, iv_env *env) {
 		if (liv && is_rc(right, liv)) {
 			iv = left; rc = right;
 		}
-		else if (is_op_commutative(op) &&
-			riv && is_rc(left, riv)) {
+		else if (riv && is_op_commutative(op) &&
+			            is_rc(left, riv)) {
 			iv = right; rc = left;
 		}
 
 		if (iv) {
-			replace(irn, iv, rc, env);
-			++env->replaced;
-			return 1;
+			if (code == iro_Mul && env->flags & osr_flag_ignore_x86_shift) {
+				if (is_Const(rc)) {
+					tarval *tv = get_Const_tarval(rc);
+
+					if (tarval_is_long(tv)) {
+						long value = get_tarval_long(tv);
+
+						if (value == 2 || value == 4 || value == 8) {
+							/* do not reduce multiplications by 2, 4, 8 */
+							break;
+						}
+					}
+				}
+			}
+
+			return replace(irn, iv, rc, env);
 		}
 		break;
+	default:
+		break;
 	}
 	return 0;
 }
 
 /**
- * check which SCC's are induction variables
+ * Check which SCC's are induction variables.
+ *
+ * @param pscc  a SCC
+ * @param env   the environment
  */
 static void classify_iv(scc *pscc, iv_env *env) {
 	ir_node *irn, *next, *header = NULL;
-	node_entry *h, *b;
-	int j;
+	node_entry *b, *h = NULL;
+	int j, only_phi, num_outside;
+	ir_node *out_rc;
 
 	/* find the header block for this scc */
 	for (irn = pscc->head; irn; irn = next) {
@@ -413,6 +498,9 @@ static void classify_iv(scc *pscc, iv_env *env) {
 	}
 
 	/* check if this scc contains only Phi, Add or Sub nodes */
+	only_phi    = 1;
+	num_outside = 0;
+	out_rc      = NULL;
 	for (irn = pscc->head; irn; irn = next) {
 		node_entry *e = get_irn_ne(irn, env);
 
@@ -420,6 +508,8 @@ static void classify_iv(scc *pscc, iv_env *env) {
 		switch (get_irn_opcode(irn)) {
 		case iro_Add:
 		case iro_Sub:
+			only_phi = 0;
+			/* fall through */
 		case iro_Phi:
 			for (j = get_irn_arity(irn) - 1; j >= 0; --j) {
 				ir_node *pred  = get_irn_n(irn, j);
@@ -431,6 +521,12 @@ static void classify_iv(scc *pscc, iv_env *env) {
 						/* not an induction variable */
 						goto fail;
 					}
+					if (! out_rc) {
+						out_rc = pred;
+						++num_outside;
+					} else if (out_rc != pred) {
+						++num_outside;
+					}
 				}
 			}
 			break;
@@ -440,14 +536,29 @@ static void classify_iv(scc *pscc, iv_env *env) {
 		}
 	}
 	/* found an induction variable */
-	DB((dbg, LEVEL_2, "  Found an induction variable in %+F\n", pscc->head));
+	DB((dbg, LEVEL_2, "  Found an induction variable:\n  "));
+	if (only_phi && num_outside == 1) {
+		/* a phi cycle with only one real predecessor can be collapsed */
+		DB((dbg, LEVEL_2, "  Found an USELESS Phi cycle:\n  "));
+
+		for (irn = pscc->head; irn; irn = next) {
+			node_entry *e = get_irn_ne(irn, env);
+			next = e->next;
+			e->header = NULL;
+			exchange(irn, out_rc);
+		}
+		++env->replaced;
+		return;
+	}
 
 	/* set the header for every node in this scc */
 	for (irn = pscc->head; irn; irn = next) {
 		node_entry *e = get_irn_ne(irn, env);
 		e->header = header;
 		next = e->next;
+		DB((dbg, LEVEL_2, " %+F,", irn));
 	}
+	DB((dbg, LEVEL_2, "\n"));
 	return;
 
 fail:
@@ -461,7 +572,10 @@ fail:
 }
 
 /**
- * Process a SCC given as a list.
+ * Process a SCC for the operator strength reduction.
+ *
+ * @param pscc  the SCC
+ * @param env   the environment
  */
 static void process_scc(scc *pscc, iv_env *env) {
 	ir_node *head = pscc->head;
@@ -486,14 +600,90 @@ static void process_scc(scc *pscc, iv_env *env) {
 	if (e->next == NULL) {
 		/* this SCC has only a single member */
 		check_replace(head, env);
-	}
-	else {
+	} else {
 		classify_iv(pscc, env);
 	}
 }
 
+/**
+ * If an SCC is a Phi only cycle, remove it.
+ */
+static void remove_phi_cycle(scc *pscc, iv_env *env) {
+	ir_node *irn, *next;
+	int j;
+	ir_node *out_rc;
+
+	/* check if this scc contains only Phi, Add or Sub nodes */
+	out_rc      = NULL;
+	for (irn = pscc->head; irn; irn = next) {
+		node_entry *e = get_irn_ne(irn, env);
+
+		next = e->next;
+		if (! is_Phi(irn))
+			return;
+
+		for (j = get_irn_arity(irn) - 1; j >= 0; --j) {
+			ir_node *pred  = get_irn_n(irn, j);
+			node_entry *pe = get_irn_ne(pred, env);
+
+			if (pe->pscc != e->pscc) {
+				/* not in the same SCC, must be the only input */
+				if (! out_rc) {
+					out_rc = pred;
+				} else if (out_rc != pred) {
+					return;
+				}
+			}
+		}
+	}
+	/* found a Phi cycle */
+	DB((dbg, LEVEL_2, "  Found an USELESS Phi cycle:\n  "));
+
+	for (irn = pscc->head; irn; irn = next) {
+		node_entry *e = get_irn_ne(irn, env);
+		next = e->next;
+		e->header = NULL;
+		exchange(irn, out_rc);
+	}
+	++env->replaced;
+}
+
+/**
+ * Process a SCC for the Phi cycle removement.
+ *
+ * @param pscc  the SCC
+ * @param env   the environment
+ */
+static void process_phi_only_scc(scc *pscc, iv_env *env) {
+	ir_node *head = pscc->head;
+	node_entry *e = get_irn_link(head);
+
+#ifdef DEBUG_libfirm
+	{
+		ir_node *irn, *next;
+
+		DB((dbg, LEVEL_4, " SCC at %p:\n ", pscc));
+		for (irn = pscc->head; irn; irn = next) {
+			node_entry *e = get_irn_link(irn);
+
+			next = e->next;
+
+			DB((dbg, LEVEL_4, " %+F,", irn));
+		}
+		DB((dbg, LEVEL_4, "\n"));
+	}
+#endif
+
+	if (e->next != NULL)
+		remove_phi_cycle(pscc, env);
+}
+
+
 /**
  * Push a node onto the stack.
+ *
+ * @param env   the environment
+ * @param n     the node to push
  */
 static void push(iv_env *env, ir_node *n) {
 	node_entry *e;
@@ -510,19 +700,21 @@ static void push(iv_env *env, ir_node *n) {
 /**
  * pop a node from the stack
  *
+ * @param env   the environment
+ *
  * @return  The topmost node
  */
 static ir_node *pop(iv_env *env)
 {
-  ir_node *n = env->stack[--env->tos];
-  node_entry *e = get_irn_ne(n, env);
+	ir_node *n = env->stack[--env->tos];
+	node_entry *e = get_irn_ne(n, env);
 
-  e->in_stack = 0;
-  return n;
+	e->in_stack = 0;
+	return n;
 }
 
 /**
- * Do Tarjan's SCC algorithm and drive OSR
+ * Do Tarjan's SCC algorithm and drive OSR.
  *
  * @param irn  start at this node
  * @param env  the environment
@@ -582,13 +774,16 @@ static void dfs(ir_node *irn, iv_env *env)
 				pscc->head = x;
 			} while (x != irn);
 
-			process_scc(pscc, env);
+			env->process_scc(pscc, env);
 		}
 	}
 }
 
 /**
- * Do the DFS by starting end the End node
+ * Do the DFS by starting at the End node of a graph.
+ *
+ * @param irg  the graph to process
+ * @param env  the environment
  */
 static void do_dfs(ir_graph *irg, iv_env *env) {
 	ir_graph *rem = current_ir_graph;
@@ -623,11 +818,16 @@ static void assign_po(ir_node *block, void *ctx) {
 	e->POnum = env->POnum++;
 }
 
+#if 0
 /**
- * follows the LFTR edges and return the last node in the chain.
+ * Follows the LFTR edges and return the last node in the chain.
  *
  * @param irn  the node that should be followed
  * @param env  the IV environment
+ *
+ * @note
+ * In the current implementation only the last edge is stored, so
+ * only one chain exists. That's why we might miss some opportunities.
  */
 static ir_node *followEdges(ir_node *irn, iv_env *env) {
 	for (;;) {
@@ -647,6 +847,13 @@ static ir_node *followEdges(ir_node *irn, iv_env *env) {
  * @param rc   the IV node that should be translated
  * @param e    the LFTR edge
  * @param env  the IV environment
+ *
+ * @return the translated region constant or NULL
+ *         if the translation was not possible
+ *
+ * @note
+ * In the current implementation only the last edge is stored, so
+ * only one chain exists. That's why we might miss some opportunities.
  */
 static ir_node *applyOneEdge(ir_node *rc, LFTR_edge *e, iv_env *env) {
 	if (env->flags & osr_flag_lftr_with_ov_check) {
@@ -702,6 +909,9 @@ static ir_node *applyOneEdge(ir_node *rc, LFTR_edge *e, iv_env *env) {
  * @param iv   the IV node that starts the LFTR edge chain
  * @param rc   the region constant that should be translated
  * @param env  the IV environment
+ *
+ * @return the translated region constant or NULL
+ *         if the translation was not possible
  */
 static ir_node *applyEdges(ir_node *iv, ir_node *rc, iv_env *env) {
 	ir_node *irn = iv;
@@ -729,7 +939,8 @@ static ir_node *applyEdges(ir_node *iv, ir_node *rc, iv_env *env) {
 }
 
 /**
- * Walker; find Cmp(iv, rc) or Cmp(rc, iv)
+ * Walker, finds Cmp(iv, rc) or Cmp(rc, iv)
+ * and tries to optimize them.
  */
 static void do_lftr(ir_node *cmp, void *ctx) {
 	iv_env *env = ctx;
@@ -749,7 +960,7 @@ static void do_lftr(ir_node *cmp, void *ctx) {
 		iv = left; rc = right;
 
 		nright = applyEdges(iv, rc, env);
-		if (nright) {
+		if (nright && nright != rc) {
 			nleft = followEdges(iv, env);
 		}
 	}
@@ -757,7 +968,7 @@ static void do_lftr(ir_node *cmp, void *ctx) {
 		iv = right; rc = left;
 
 		nleft = applyEdges(iv, rc, env);
-		if (nleft) {
+		if (nleft && nleft != rc) {
 			nright = followEdges(iv, env);
 		}
 	}
@@ -772,23 +983,44 @@ static void do_lftr(ir_node *cmp, void *ctx) {
 
 /**
  * do linear function test replacement.
+ *
+ * @param irg   the graph that should be optimized
+ * @param env   the IV environment
  */
 static void lftr(ir_graph *irg, iv_env *env) {
 	irg_walk_graph(irg, NULL, do_lftr, env);
 }
+#endif
+
+/**
+ * Pre-walker: set all node links to NULL and fix the
+ * block of Proj nodes.
+ */
+static void clear_and_fix(ir_node *irn, void *env)
+{
+	set_irn_link(irn, NULL);
+
+	if (is_Proj(irn)) {
+		ir_node *pred = get_Proj_pred(irn);
+		set_irn_n(irn, -1, get_irn_n(pred, -1));
+	}
+}
 
 /* Performs Operator Strength Reduction for the passed graph. */
 void opt_osr(ir_graph *irg, unsigned flags) {
-	iv_env env;
+	iv_env   env;
+	ir_graph *rem;
 
-	if (! get_opt_strength_red())
+	if (! get_opt_strength_red()) {
+		/* only kill Phi cycles  */
+		remove_phi_cycles(irg);
 		return;
+	}
 
-	FIRM_DBG_REGISTER(dbg, "firm.opt.osr");
-//	firm_dbg_set_mask(dbg, SET_LEVEL_3);
+	rem = current_ir_graph;
+	current_ir_graph = irg;
 
-	/* and dominance as well */
-	assure_doms(irg);
+	FIRM_DBG_REGISTER(dbg, "firm.opt.osr");
 
 	DB((dbg, LEVEL_1, "Doing Operator Strength Reduction for %+F\n", irg));
 
@@ -802,27 +1034,86 @@ void opt_osr(ir_graph *irg, unsigned flags) {
 	env.replaced      = 0;
 	env.lftr_replaced = 0;
 	env.flags         = flags;
+	env.process_scc   = process_scc;
 
-	/* clear all links */
-	irg_walk_graph(irg, NULL, firm_clear_link, NULL);
+	/* Clear all links and move Proj nodes into the
+	   the same block as it's predecessors.
+	   This can improve the placement of new nodes.
+	 */
+	irg_walk_graph(irg, NULL, clear_and_fix, NULL);
+
+	/* we need dominance */
+	assure_doms(irg);
+	assure_irg_outs(irg);
 
-	/* calculate the post order number */
-	irg_block_walk_graph(irg, NULL, assign_po, &env);
+	/* calculate the post order number for blocks. */
+	irg_out_block_walk(get_irg_start_block(irg), NULL, assign_po, &env);
 
-	/* calculate the SCC's and drive OSR */
+	/* calculate the SCC's and drive OSR. */
 	do_dfs(irg, &env);
 
 	if (env.replaced) {
 		/* try linear function test replacements */
-		lftr(irg, &env);
+		//lftr(irg, &env);
 
 		set_irg_outs_inconsistent(irg);
-		set_irg_loopinfo_inconsistent(irg);
+		DB((dbg, LEVEL_1, "Replacements: %u + %u (lftr)\n\n", env.replaced, env.lftr_replaced));
 	}
-	DB((dbg, LEVEL_1, "Replacements: %u + %u (lftr)\n\n", env.replaced, env.lftr_replaced));
 
 	del_set(env.lftr_edges);
 	del_set(env.quad_map);
 	DEL_ARR_F(env.stack);
 	obstack_free(&env.obst, NULL);
+
+	current_ir_graph = rem;
+}
+
+/* Remove any Phi cycles with only one real input. */
+void remove_phi_cycles(ir_graph *irg) {
+	iv_env   env;
+	ir_graph *rem;
+
+	rem = current_ir_graph;
+	current_ir_graph = irg;
+
+	FIRM_DBG_REGISTER(dbg, "firm.opt.remove_phi");
+
+	DB((dbg, LEVEL_1, "Doing Phi cycle removement for %+F\n", irg));
+
+	obstack_init(&env.obst);
+	env.stack         = NEW_ARR_F(ir_node *, 128);
+	env.tos           = 0;
+	env.nextDFSnum    = 0;
+	env.POnum         = 0;
+	env.quad_map      = NULL;
+	env.lftr_edges    = NULL;
+	env.replaced      = 0;
+	env.lftr_replaced = 0;
+	env.flags         = 0;
+	env.process_scc   = process_phi_only_scc;
+
+	/* Clear all links and move Proj nodes into the
+	   the same block as it's predecessors.
+	   This can improve the placement of new nodes.
+	 */
+	irg_walk_graph(irg, NULL, clear_and_fix, NULL);
+
+	/* we need dominance */
+	assure_irg_outs(irg);
+
+	/* calculate the post order number for blocks. */
+	irg_out_block_walk(get_irg_start_block(irg), NULL, assign_po, &env);
+
+	/* calculate the SCC's and drive OSR. */
+	do_dfs(irg, &env);
+
+	if (env.replaced) {
+		set_irg_outs_inconsistent(irg);
+                DB((dbg, LEVEL_1, "remove_phi_cycles: %u Cycles removed\n\n", env.replaced));
+	}
+
+	DEL_ARR_F(env.stack);
+	obstack_free(&env.obst, NULL);
+
+	current_ir_graph = rem;
 }