X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=ir%2Fbe%2Fia32%2Fia32_optimize.c;h=0d434a7159d31bbec25b629450fd02cf8adaff39;hb=a1a465eb2b3f54027b29f829423fffd0396937f4;hp=46d529aec9aa2b67179745892157bb4b0e1cd1ab;hpb=ba60cfd3cb10672f8cf0aa4a8dcf1b41facd9130;p=libfirm

diff --git a/ir/be/ia32/ia32_optimize.c b/ir/be/ia32/ia32_optimize.c
index 46d529aec..0d434a715 100644
--- a/ir/be/ia32/ia32_optimize.c
+++ b/ir/be/ia32/ia32_optimize.c
@@ -35,10 +35,7 @@
 #include "ia32_dbg_stat.h"
 #include "ia32_util.h"
 
-typedef struct _ia32_place_env_t {
-	ia32_code_gen_t *cg;
-	bitset_t        *visited;
-} ia32_place_env_t;
+#define AGGRESSIVE_AM
 
 typedef enum {
 	IA32_AM_CAND_NONE  = 0,
@@ -111,12 +108,12 @@ static ir_node *gen_SymConst(ia32_transform_env_t *env) {
 	if (mode_is_float(mode)) {
 		FP_USED(env->cg);
 		if (USE_SSE2(env->cg))
-			cnst = new_rd_ia32_xConst(dbg, irg, block, get_irg_no_mem(irg), mode);
+			cnst = new_rd_ia32_xConst(dbg, irg, block, mode);
 		else
-			cnst = new_rd_ia32_vfConst(dbg, irg, block, get_irg_no_mem(irg), mode);
+			cnst = new_rd_ia32_vfConst(dbg, irg, block, mode);
 	}
 	else
-		cnst = new_rd_ia32_Const(dbg, irg, block, get_irg_no_mem(irg), mode);
+		cnst = new_rd_ia32_Const(dbg, irg, block, mode);
 
 	set_ia32_Const_attr(cnst, env->irn);
 
@@ -216,13 +213,16 @@ static ir_node *gen_Const(ia32_transform_env_t *env) {
 		env->irn  = cnst;
 		env->mode = mode_P;
 		cnst      = gen_SymConst(env);
+		add_irn_dep(cnst, be_abi_get_start_barrier(env->cg->birg->abi));
 		set_Load_ptr(get_Proj_pred(load), cnst);
 		cnst      = load;
 	}
 	else {
-		cnst = new_rd_ia32_Const(dbg, irg, block, get_irg_no_mem(irg), get_irn_mode(node));
+		cnst = new_rd_ia32_Const(dbg, irg, block, get_irn_mode(node));
+		add_irn_dep(cnst, be_abi_get_start_barrier(env->cg->birg->abi));
 		set_ia32_Const_attr(cnst, node);
 	}
+
 	return cnst;
 }
 
@@ -244,11 +244,31 @@ static void ia32_transform_const(ir_node *irn, void *env) {
 	tenv.irn  = irn;
 	DEBUG_ONLY(tenv.mod = cg->mod;)
 
+#if 1
 	/* place const either in the smallest dominator of all its users or the original block */
 	if (cg->opt & IA32_OPT_PLACECNST)
 		tenv.block = node_users_smallest_common_dominator(irn, 1);
 	else
 		tenv.block = get_nodes_block(irn);
+#else
+	/* Actually, there is no real sense in placing     */
+	/* the Consts in the successor of the start block. */
+	{
+		ir_node *afterstart = NULL;
+		ir_node *startblock = get_irg_start_block(tenv.irg);
+		const ir_edge_t *edge;
+
+		foreach_block_succ(startblock, edge) {
+			ir_node *block = get_edge_src_irn(edge);
+			if (block != startblock) {
+				afterstart = block;
+				break;
+			}
+		}
+		assert(afterstart != NULL);
+		tenv.block = afterstart;
+	}
+#endif
 
 	switch (get_irn_opcode(irn)) {
 		case iro_Const:
@@ -271,17 +291,12 @@ static void ia32_transform_const(ir_node *irn, void *env) {
  * Transform all firm consts and assure, we visit each const only once.
  */
 static void ia32_place_consts_walker(ir_node *irn, void *env) {
-	ia32_place_env_t *penv = env;
-	opcode           opc   = get_irn_opcode(irn);
+	ia32_code_gen_t *cg = env;
 
-	/* transform only firm consts which are not already visited */
-	if ((opc != iro_Const && opc != iro_SymConst) || bitset_is_set(penv->visited, get_irn_idx(irn)))
+	if (! is_Const(irn) && ! is_SymConst(irn))
 		return;
 
-	/* mark const visited */
-	bitset_set(penv->visited, get_irn_idx(irn));
-
-	ia32_transform_const(irn, penv->cg);
+	ia32_transform_const(irn, cg);
 }
 
 /**
@@ -303,12 +318,7 @@ static void ia32_set_modes(ir_node *irn, void *env) {
  * @param cg  The ia32 codegenerator object
  */
 static void ia32_transform_all_firm_consts(ia32_code_gen_t *cg) {
-	ia32_place_env_t penv;
-
-	penv.cg      = cg;
-	penv.visited = bitset_irg_malloc(cg->irg);
-	irg_walk_graph(cg->irg, NULL, ia32_place_consts_walker, &penv);
-	bitset_free(penv.visited);
+	irg_walk_graph(cg->irg, NULL, ia32_place_consts_walker, cg);
 }
 
 /* Place all consts and change pointer arithmetics into unsigned integer arithmetics. */
@@ -462,146 +472,158 @@ static void ia32_optimize_CondJmp(ir_node *irn, ia32_code_gen_t *cg) {
 	}
 }
 
+// only optimize up to 48 stores behind IncSPs
+#define MAXPUSH_OPTIMIZE	48
+
 /**
- * Creates a Push from Store(IncSP(gp_reg_size))
+ * Tries to create pushs from IncSP,Store combinations
  */
-static void ia32_create_Push(ir_node *irn, ia32_code_gen_t *cg) {
-	ir_node  *sp  = get_irn_n(irn, 0);
+static void ia32_create_Pushs(ir_node *irn, ia32_code_gen_t *cg) {
+	int i;
+	int offset;
+	ir_node *node;
+	ir_node *stores[MAXPUSH_OPTIMIZE];
+	ir_node *block = get_nodes_block(irn);
 	ir_graph *irg = cg->irg;
-	ir_node *val, *next, *push, *bl, *proj_M, *proj_res, *old_proj_M, *mem;
-	const ir_edge_t *edge;
-	heights_t *h;
+	ir_node *curr_sp;
+	ir_mode *spmode = get_irn_mode(irn);
 
-	/* do not create push if store has already an offset assigned or base is not a IncSP */
-	if (get_ia32_am_offs(irn) || ! be_is_IncSP(sp))
-		return;
+	memset(stores, 0, sizeof(stores));
 
-	/* do not create push if index is not NOREG */
-	if (arch_get_irn_register(cg->arch_env, get_irn_n(irn, 1)) !=
-		&ia32_gp_regs[REG_GP_NOREG])
-		return;
+	assert(be_is_IncSP(irn));
 
-	/* do not create push for floating point */
-	val = get_irn_n(irn, 2);
-	if (mode_is_float(get_irn_mode(val)))
+	offset = be_get_IncSP_offset(irn);
+	if(offset < 4)
 		return;
 
-	/* do not create push if IncSp doesn't expand stack or expand size is different from register size */
-	if (be_get_IncSP_direction(sp) != be_stack_dir_expand ||
-		be_get_IncSP_offset(sp) != (unsigned) get_mode_size_bytes(ia32_reg_classes[CLASS_ia32_gp].mode))
-		return;
+	/*
+	 * We first walk the schedule after the IncSP node as long as we find
+	 * suitable stores that could be transformed to a push.
+	 * We save them into the stores array which is sorted by the frame offset/4
+	 * attached to the node
+	 */
+	for(node = sched_next(irn); !sched_is_end(node); node = sched_next(node)) {
+		const char *am_offs;
+		ir_node *mem;
+		int offset = -1;
+		int n;
+		int storeslot;
+
+		// it has to be a store
+		if(!is_ia32_Store(node))
+			break;
 
-	/* do not create push, if there is a path (inside the block) from the push value to IncSP */
-	h = heights_new(cg->irg);
-	if (get_nodes_block(val) == get_nodes_block(sp) &&
-		heights_reachable_in_block(h, val, sp))
-	{
-		heights_free(h);
-		return;
-	}
-	heights_free(h);
+		// it has to use our sp value
+		if(get_irn_n(node, 0) != irn)
+			continue;
+		// store has to be attached to NoMem
+		mem = get_irn_n(node, 3);
+		if(!is_NoMem(mem)) {
+			continue;
+		}
 
-	/* ok, translate into Push */
-	edge       = get_irn_out_edge_first(irn);
-	old_proj_M = get_edge_src_irn(edge);
-	bl         = get_nodes_block(irn);
+		if( (get_ia32_am_flavour(node) & ia32_am_IS) != 0)
+			break;
 
-	next = sched_next(irn);
-	sched_remove(irn);
-	sched_remove(sp);
+		am_offs = get_ia32_am_offs(node);
+		if(am_offs == NULL) {
+			offset = 0;
+		} else {
+			// the am_offs has to be of the form "+NUMBER"
+			if(sscanf(am_offs, "+%d%n", &offset, &n) != 1 || am_offs[n] != '\0') {
+				// we shouldn't have any cases in the compiler at the moment
+				// that produce something different from esp+XX
+				assert(0);
+				break;
+			}
+		}
 
-	/*
-		build memory input:
-		if the IncSP points to NoMem -> just use the memory input from store
-		if IncSP points to somewhere else -> sync memory of IncSP and Store
-	*/
-	mem = be_get_IncSP_mem(sp);
-	if (mem == get_irg_no_mem(irg))
-		mem = get_irn_n(irn, 3);
-	else {
-		ir_node *in[2];
+		storeslot = offset / 4;
+		if(storeslot >= MAXPUSH_OPTIMIZE)
+			continue;
+
+		// storing into the same slot twice is bad (and shouldn't happen...)
+		if(stores[storeslot] != NULL)
+			break;
+
+		// storing at half-slots is bad
+		if(offset % 4 != 0)
+			break;
 
-		in[0] = mem;
-		in[1] = get_irn_n(irn, 3);
-		mem   = new_r_Sync(irg, bl, 2, in);
+		stores[storeslot] = node;
 	}
 
-	push = new_rd_ia32_Push(NULL, irg, bl, be_get_IncSP_pred(sp), val, mem);
-	proj_res = new_r_Proj(irg, bl, push, get_irn_mode(sp), pn_ia32_Push_stack);
-	proj_M   = new_r_Proj(irg, bl, push, mode_M, pn_ia32_Push_M);
+	curr_sp = get_irn_n(irn, 0);
 
-	/* copy a possible constant from the store */
-	set_ia32_id_cnst(push, get_ia32_id_cnst(irn));
-	set_ia32_immop_type(push, get_ia32_immop_type(irn));
+	// walk the stores in inverse order and create pushs for them
+	i = (offset / 4) - 1;
+	if(i >= MAXPUSH_OPTIMIZE) {
+		i = MAXPUSH_OPTIMIZE - 1;
+	}
 
-	/* the push must have SP out register */
-	arch_set_irn_register(cg->arch_env, push, arch_get_irn_register(cg->arch_env, sp));
+	for( ; i >= 0; --i) {
+		const ir_edge_t *edge, *next;
+		const arch_register_t *spreg;
+		ir_node *push;
+		ir_node *val, *mem;
+		ir_node *store = stores[i];
+		ir_node *noreg = ia32_new_NoReg_gp(cg);
 
-	exchange(old_proj_M, proj_M);
-	exchange(sp, proj_res);
-	sched_add_before(next, push);
-	sched_add_after(push, proj_res);
-}
+		if(store == NULL || is_Bad(store))
+			break;
 
-/**
- * Creates a Pop from IncSP(Load(sp))
- */
-static void ia32_create_Pop(ir_node *irn, ia32_code_gen_t *cg) {
-	ir_node *old_proj_M = be_get_IncSP_mem(irn);
-	ir_node *load = skip_Proj(old_proj_M);
-	ir_node *old_proj_res = NULL;
-	ir_node *bl, *pop, *next, *proj_res, *proj_sp, *proj_M;
-	const ir_edge_t *edge;
-	const arch_register_t *reg, *sp;
+		val = get_irn_n(store, 2);
+		mem = get_irn_n(store, 3);
+		spreg = arch_get_irn_register(cg->arch_env, curr_sp);
 
-	if (! is_ia32_Load(load) || get_ia32_am_offs(load))
-		return;
+		// create a push
+		push = new_rd_ia32_Push(NULL, irg, block, noreg, noreg, val, curr_sp, mem);
+		if(get_ia32_immop_type(store) != ia32_ImmNone) {
+			copy_ia32_Immop_attr(push, store);
+		}
+		sched_add_before(irn, push);
 
-	if (arch_get_irn_register(cg->arch_env, get_irn_n(load, 1)) !=
-		&ia32_gp_regs[REG_GP_NOREG])
-		return;
-	if (arch_get_irn_register(cg->arch_env, get_irn_n(load, 0)) != cg->isa->arch_isa.sp)
-		return;
+		// create stackpointer proj
+		curr_sp = new_r_Proj(irg, block, push, spmode, pn_ia32_Push_stack);
+		arch_set_irn_register(cg->arch_env, curr_sp, spreg);
+		sched_add_before(irn, curr_sp);
 
-	/* ok, translate into pop */
-	foreach_out_edge(load, edge) {
-		ir_node *succ = get_edge_src_irn(edge);
-		if (succ != old_proj_M) {
-			old_proj_res = succ;
-			break;
+		// rewire memprojs of the store
+		foreach_out_edge_safe(store, edge, next) {
+			ir_node *succ = get_edge_src_irn(edge);
+
+			assert(is_Proj(succ) && get_Proj_proj(succ) == pn_ia32_Store_M);
+			set_irn_n(succ, 0, push);
 		}
-	}
-	if (! old_proj_res) {
-		assert(0);
-		return; /* should not happen */
-	}
 
-	bl = get_nodes_block(load);
+		// we can remove the store now
+		set_irn_n(store, 0, new_Bad());
+		set_irn_n(store, 1, new_Bad());
+		set_irn_n(store, 2, new_Bad());
+		set_irn_n(store, 3, new_Bad());
+		sched_remove(store);
 
-	/* IncSP is typically scheduled after the load, so remove it first */
-	sched_remove(irn);
-	next = sched_next(old_proj_res);
-	sched_remove(old_proj_res);
-	sched_remove(load);
+		offset -= 4;
+	}
 
-	reg = arch_get_irn_register(cg->arch_env, load);
-	sp  = arch_get_irn_register(cg->arch_env, irn);
+	be_set_IncSP_offset(irn, offset);
 
-	pop      = new_rd_ia32_Pop(NULL, current_ir_graph, bl, get_irn_n(irn, 0), get_irn_n(load, 2));
-	proj_res = new_r_Proj(current_ir_graph, bl, pop, get_irn_mode(old_proj_res), pn_ia32_Pop_res);
-	proj_sp  = new_r_Proj(current_ir_graph, bl, pop, get_irn_mode(irn), pn_ia32_Pop_stack);
-	proj_M   = new_r_Proj(current_ir_graph, bl, pop, mode_M, pn_ia32_Pop_M);
+	// can we remove the IncSP now?
+	if(offset == 0) {
+		const ir_edge_t *edge, *next;
 
-	exchange(old_proj_M, proj_M);
-	exchange(old_proj_res, proj_res);
-	exchange(irn, proj_sp);
+		foreach_out_edge_safe(irn, edge, next) {
+			ir_node *arg = get_edge_src_irn(edge);
+			int pos = get_edge_src_pos(edge);
 
-	arch_set_irn_register(cg->arch_env, proj_res, reg);
-	arch_set_irn_register(cg->arch_env, proj_sp, sp);
+			set_irn_n(arg, pos, curr_sp);
+		}
 
-	sched_add_before(next, proj_sp);
-	sched_add_before(proj_sp, proj_res);
-	sched_add_before(proj_res,pop);
+		set_irn_n(irn, 0, new_Bad());
+		sched_remove(irn);
+	} else {
+		set_irn_n(irn, 0, curr_sp);
+	}
 }
 
 /**
@@ -613,33 +635,23 @@ static void ia32_optimize_IncSP(ir_node *irn, ia32_code_gen_t *cg) {
 
 	if (be_is_IncSP(prev) && real_uses == 1) {
 		/* first IncSP has only one IncSP user, kill the first one */
-		unsigned       prev_offs = be_get_IncSP_offset(prev);
-		be_stack_dir_t prev_dir  = be_get_IncSP_direction(prev);
-		unsigned       curr_offs = be_get_IncSP_offset(irn);
-		be_stack_dir_t curr_dir  = be_get_IncSP_direction(irn);
+		int prev_offs = be_get_IncSP_offset(prev);
+		int curr_offs = be_get_IncSP_offset(irn);
 
-		int new_ofs = prev_offs * (prev_dir == be_stack_dir_expand ? -1 : +1) +
-			            curr_offs * (curr_dir == be_stack_dir_expand ? -1 : +1);
-
-		if (new_ofs < 0) {
-			new_ofs  = -new_ofs;
-			curr_dir = be_stack_dir_expand;
-		}
-		else
-			curr_dir = be_stack_dir_shrink;
-		be_set_IncSP_offset(prev, 0);
-		be_set_IncSP_offset(irn, (unsigned)new_ofs);
-		be_set_IncSP_direction(irn, curr_dir);
+		be_set_IncSP_offset(prev, prev_offs + curr_offs);
 
 		/* Omit the optimized IncSP */
 		be_set_IncSP_pred(irn, be_get_IncSP_pred(prev));
+
+		set_irn_n(prev, 0, new_Bad());
+		sched_remove(prev);
 	}
 }
 
 /**
  * Performs Peephole Optimizations.
  */
-void ia32_peephole_optimization(ir_node *irn, void *env) {
+static void ia32_peephole_optimize_node(ir_node *irn, void *env) {
 	ia32_code_gen_t *cg = env;
 
 	/* AMD CPUs want explicit compare before conditional jump  */
@@ -649,14 +661,18 @@ void ia32_peephole_optimization(ir_node *irn, void *env) {
 		else if (is_ia32_CondJmp(irn))
 			ia32_optimize_CondJmp(irn, cg);
 	}
-	/* seems to be buggy when using Pushes */
-	else if (be_is_IncSP(irn))
-		ia32_optimize_IncSP(irn, cg);
-	else if (is_ia32_Store(irn))
-		ia32_create_Push(irn, cg);
-}
 
+	if (be_is_IncSP(irn)) {
+		// optimize_IncSP doesn't respect dependency edges yet...
+		//ia32_optimize_IncSP(irn, cg);
+		(void) ia32_optimize_IncSP;
+		ia32_create_Pushs(irn, cg);
+	}
+}
 
+void ia32_peephole_optimization(ir_graph *irg, ia32_code_gen_t *cg) {
+	irg_walk_graph(irg, ia32_peephole_optimize_node, NULL, cg);
+}
 
 /******************************************************************
  *              _     _                   __  __           _
@@ -745,6 +761,7 @@ static int is_addr_candidate(const ir_node *block, const ir_node *irn) {
 
 	in = left;
 
+#ifndef AGGRESSIVE_AM
 	if (pred_is_specific_nodeblock(block, in, is_ia32_Ld)) {
 		n         = ia32_get_irn_n_edges(in);
 		is_cand   = (n == 1) ? 0 : is_cand;  /* load with only one user: don't create LEA */
@@ -756,6 +773,9 @@ static int is_addr_candidate(const ir_node *block, const ir_node *irn) {
 		n         = ia32_get_irn_n_edges(in);
 		is_cand   = (n == 1) ? 0 : is_cand;  /* load with only one user: don't create LEA */
 	}
+#else
+	(void) n;
+#endif
 
 	is_cand = get_ia32_frame_ent(irn) ? 1 : is_cand;
 
@@ -778,7 +798,7 @@ static int is_addr_candidate(const ir_node *block, const ir_node *irn) {
  */
 static ia32_am_cand_t is_am_candidate(ia32_code_gen_t *cg, heights_t *h, const ir_node *block, ir_node *irn) {
 	ir_node *in, *load, *other, *left, *right;
-	int      n, is_cand = 0, cand;
+	int      is_cand = 0, cand;
 
 	if (is_ia32_Ld(irn) || is_ia32_St(irn) || is_ia32_Store8Bit(irn) || is_ia32_vfild(irn) || is_ia32_vfist(irn) ||
 		is_ia32_GetST0(irn) || is_ia32_SetST0(irn) || is_ia32_xStoreSimple(irn))
@@ -790,15 +810,23 @@ static ia32_am_cand_t is_am_candidate(ia32_code_gen_t *cg, heights_t *h, const i
 	in = left;
 
 	if (pred_is_specific_nodeblock(block, in, is_ia32_Ld)) {
+#ifndef AGGRESSIVE_AM
+		int n;
 		n         = ia32_get_irn_n_edges(in);
 		is_cand   = (n == 1) ? 1 : is_cand;  /* load with more than one user: no AM */
+#else
+		is_cand   = 1;
+#endif
 
 		load  = get_Proj_pred(in);
 		other = right;
 
-		/* 8bit Loads are not supported, they cannot be used with every register */
-		if (get_mode_size_bits(get_ia32_ls_mode(load)) < 16)
+		/* 8bit Loads are not supported (for binary ops),
+		 * they cannot be used with every register */
+		if (get_irn_arity(irn) != 4 && get_mode_size_bits(get_ia32_ls_mode(load)) < 16) {
+			assert(get_irn_arity(irn) == 5);
 			is_cand = 0;
+		}
 
 		/* If there is a data dependency of other irn from load: cannot use AM */
 		if (is_cand && get_nodes_block(other) == block) {
@@ -814,8 +842,11 @@ static ia32_am_cand_t is_am_candidate(ia32_code_gen_t *cg, heights_t *h, const i
 	is_cand = 0;
 
 	if (pred_is_specific_nodeblock(block, in, is_ia32_Ld)) {
+#ifndef AGGRESSIVE_AM
+		int n;
 		n         = ia32_get_irn_n_edges(in);
 		is_cand   = (n == 1) ? 1 : is_cand;  /* load with more than one user: no AM */
+#endif
 
 		load  = get_Proj_pred(in);
 		other = left;
@@ -1005,15 +1036,24 @@ static INLINE void try_add_to_sched(ir_node *irn, ir_node *res) {
  * @param irn  The irn to be removed from schedule
  */
 static INLINE void try_remove_from_sched(ir_node *irn) {
+	int i, arity;
+
 	if (sched_is_scheduled(irn)) {
 		if (get_irn_mode(irn) == mode_T) {
 			const ir_edge_t *edge;
 			foreach_out_edge(irn, edge) {
 				ir_node *proj = get_edge_src_irn(edge);
-				if (sched_is_scheduled(proj))
+				if (sched_is_scheduled(proj)) {
+					set_irn_n(proj, 0, new_Bad());
 					sched_remove(proj);
+				}
 			}
 		}
+
+		arity = get_irn_arity(irn);
+		for(i = 0; i < arity; ++i) {
+			set_irn_n(irn, i, new_Bad());
+		}
 		sched_remove(irn);
 	}
 }
@@ -1461,7 +1501,6 @@ static void optimize_lea(ir_node *irn, void *env) {
 	}
 }
 
-
 /**
  * Checks for address mode patterns and performs the
  * necessary transformations.
@@ -1471,8 +1510,7 @@ static void optimize_am(ir_node *irn, void *env) {
 	ia32_am_opt_env_t *am_opt_env = env;
 	ia32_code_gen_t   *cg         = am_opt_env->cg;
 	heights_t         *h          = am_opt_env->h;
-	ir_node           *block, *noreg_gp, *noreg_fp;
-	ir_node           *left, *right;
+	ir_node           *block, *left, *right;
 	ir_node           *store, *load, *mem_proj;
 	ir_node           *succ, *addr_b, *addr_i;
 	int               check_am_src          = 0;
@@ -1482,9 +1520,7 @@ static void optimize_am(ir_node *irn, void *env) {
 	if (! is_ia32_irn(irn) || is_ia32_Ld(irn) || is_ia32_St(irn) || is_ia32_Store8Bit(irn))
 		return;
 
-	block    = get_nodes_block(irn);
-	noreg_gp = ia32_new_NoReg_gp(cg);
-	noreg_fp = ia32_new_NoReg_fp(cg);
+	block = get_nodes_block(irn);
 
 	DBG((mod, LEVEL_1, "checking for AM\n"));
 
@@ -1517,6 +1553,7 @@ static void optimize_am(ir_node *irn, void *env) {
 		if (get_irn_arity(irn) == 4) {
 			/* it's an "unary" operation */
 			right = left;
+			cand = IA32_AM_CAND_BOTH;
 		}
 		else {
 			right = get_irn_n(irn, 3);
@@ -1611,12 +1648,12 @@ static void optimize_am(ir_node *irn, void *env) {
 					if (get_irn_arity(irn) == 5) {
 						/* binary AMop */
 						set_irn_n(irn, 4, get_irn_n(load, 2));
-						set_irn_n(irn, 2, noreg_gp);
+						set_irn_n(irn, 2, ia32_get_admissible_noreg(cg, irn, 2));
 					}
 					else {
 						/* unary AMop */
 						set_irn_n(irn, 3, get_irn_n(load, 2));
-						set_irn_n(irn, 2, noreg_gp);
+						set_irn_n(irn, 2, ia32_get_admissible_noreg(cg, irn, 2));
 					}
 
 					/* connect the memory Proj of the Store to the op */
@@ -1673,56 +1710,61 @@ static void optimize_am(ir_node *irn, void *env) {
 		/* and right operand is a Load which only used by this irn */
 		if (check_am_src                &&
 			(cand & IA32_AM_CAND_RIGHT) &&
-			(get_irn_arity(irn) == 5)   &&
 			(ia32_get_irn_n_edges(right) == 1))
 		{
-			right = get_Proj_pred(right);
+			ir_node *load = get_Proj_pred(right);
 
-			addr_b = get_irn_n(right, 0);
-			addr_i = get_irn_n(right, 1);
+			addr_b = get_irn_n(load, 0);
+			addr_i = get_irn_n(load, 1);
 
 			/* set new base, index and attributes */
 			set_irn_n(irn, 0, addr_b);
 			set_irn_n(irn, 1, addr_i);
-			add_ia32_am_offs(irn, get_ia32_am_offs(right));
-			set_ia32_am_scale(irn, get_ia32_am_scale(right));
-			set_ia32_am_flavour(irn, get_ia32_am_flavour(right));
+			add_ia32_am_offs(irn, get_ia32_am_offs(load));
+			set_ia32_am_scale(irn, get_ia32_am_scale(load));
+			set_ia32_am_flavour(irn, get_ia32_am_flavour(load));
 			set_ia32_op_type(irn, ia32_AddrModeS);
-			set_ia32_frame_ent(irn, get_ia32_frame_ent(right));
-			set_ia32_ls_mode(irn, get_ia32_ls_mode(right));
+			set_ia32_frame_ent(irn, get_ia32_frame_ent(load));
+			set_ia32_ls_mode(irn, get_ia32_ls_mode(load));
 
-			set_ia32_am_sc(irn, get_ia32_am_sc(right));
-			if (is_ia32_am_sc_sign(right))
+			set_ia32_am_sc(irn, get_ia32_am_sc(load));
+			if (is_ia32_am_sc_sign(load))
 				set_ia32_am_sc_sign(irn);
 
 			/* clear remat flag */
 			set_ia32_flags(irn, get_ia32_flags(irn) & ~arch_irn_flags_rematerializable);
 
-			if (is_ia32_use_frame(right))
+			if (is_ia32_use_frame(load))
 				set_ia32_use_frame(irn);
 
-			/* connect to Load memory */
-			set_irn_n(irn, 4, get_irn_n(right, 2));
+			/* connect to Load memory and disconnect Load */
+			if (get_irn_arity(irn) == 5) {
+				/* binary AMop */
+				set_irn_n(irn, 4, get_irn_n(load, 2));
+				set_irn_n(irn, 3, ia32_get_admissible_noreg(cg, irn, 3));
+			} else {
+				assert(get_irn_arity(irn) == 4);
+				/* unary AMop */
+				set_irn_n(irn, 3, get_irn_n(load, 2));
+				set_irn_n(irn, 2, ia32_get_admissible_noreg(cg, irn, 2));
+			}
 
 			/* this is only needed for Compares, but currently ALL nodes
 			 * have this attribute :-) */
 			set_ia32_pncode(irn, get_inversed_pnc(get_ia32_pncode(irn)));
 
-			/* disconnect from Load */
-			set_irn_n(irn, 3, noreg_gp);
-
-			DBG_OPT_AM_S(right, irn);
+			DBG_OPT_AM_S(load, irn);
 
 			/* If Load has a memory Proj, connect it to the op */
-			mem_proj = ia32_get_proj_for_mode(right, mode_M);
+			mem_proj = ia32_get_proj_for_mode(load, mode_M);
 			if (mem_proj) {
 				set_Proj_pred(mem_proj, irn);
 				set_Proj_proj(mem_proj, 1);
 			}
 
-			try_remove_from_sched(right);
+			try_remove_from_sched(load);
 
-			DB((mod, LEVEL_1, "merged with %+F into source AM\n", right));
+			DB((mod, LEVEL_1, "merged with %+F into source AM\n", load));
 		}
 		else {
 			/* was exchanged but optimize failed: exchange back */