Do not pass the cg to transform_nodes().
[libfirm] / ir / be / ia32 / ia32_transform.c
index 3e0cc10..9e66f76 100644 (file)
@@ -29,6 +29,7 @@
 #endif
 
 #include <limits.h>
+#include <stdbool.h>
 
 #include "irargs_t.h"
 #include "irnode_t.h"
@@ -47,6 +48,7 @@
 #include "irdom.h"
 #include "archop.h"
 #include "error.h"
+#include "array_t.h"
 #include "height.h"
 
 #include "../benode_t.h"
@@ -131,46 +133,46 @@ static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
                                 ir_node *op, ir_node *orig_node);
 
 /** Return non-zero is a node represents the 0 constant. */
-static int is_Const_0(ir_node *node) {
+static bool is_Const_0(ir_node *node) {
        return is_Const(node) && is_Const_null(node);
 }
 
 /** Return non-zero is a node represents the 1 constant. */
-static int is_Const_1(ir_node *node) {
+static bool is_Const_1(ir_node *node) {
        return is_Const(node) && is_Const_one(node);
 }
 
 /** Return non-zero is a node represents the -1 constant. */
-static int is_Const_Minus_1(ir_node *node) {
+static bool is_Const_Minus_1(ir_node *node) {
        return is_Const(node) && is_Const_all_one(node);
 }
 
 /**
  * returns true if constant can be created with a simple float command
  */
-static int is_simple_x87_Const(ir_node *node)
+static bool is_simple_x87_Const(ir_node *node)
 {
        tarval *tv = get_Const_tarval(node);
        if (tarval_is_null(tv) || tarval_is_one(tv))
-               return 1;
+               return true;
 
        /* TODO: match all the other float constants */
-       return 0;
+       return false;
 }
 
 /**
  * returns true if constant can be created with a simple float command
  */
-static int is_simple_sse_Const(ir_node *node)
+static bool is_simple_sse_Const(ir_node *node)
 {
        tarval  *tv   = get_Const_tarval(node);
        ir_mode *mode = get_tarval_mode(tv);
 
        if (mode == mode_F)
-               return 1;
+               return true;
 
        if (tarval_is_null(tv) || tarval_is_one(tv))
-               return 1;
+               return true;
 
        if (mode == mode_D) {
                unsigned val = get_tarval_sub_bits(tv, 0) |
@@ -179,11 +181,11 @@ static int is_simple_sse_Const(ir_node *node)
                        (get_tarval_sub_bits(tv, 3) << 24);
                if (val == 0)
                        /* lower 32bit are zero, really a 32bit constant */
-                       return 1;
+                       return true;
        }
 
        /* TODO: match all the other float constants */
-       return 0;
+       return false;
 }
 
 /**
@@ -478,11 +480,10 @@ static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
                return 0;
 
        /* don't do AM if other node inputs depend on the load (via mem-proj) */
-       if (other != NULL && get_nodes_block(other) == block &&
-           heights_reachable_in_block(heights, other, load))
+       if (other != NULL && prevents_AM(block, load, other))
                return 0;
-       if (other2 != NULL && get_nodes_block(other2) == block &&
-           heights_reachable_in_block(heights, other2, load))
+
+       if (other2 != NULL && prevents_AM(block, load, other2))
                return 0;
 
        return 1;
@@ -493,6 +494,7 @@ struct ia32_address_mode_t {
        ia32_address_t  addr;
        ir_mode        *ls_mode;
        ir_node        *mem_proj;
+       ir_node        *am_node;
        ia32_op_type_t  op_type;
        ir_node        *new_op1;
        ir_node        *new_op2;
@@ -543,6 +545,7 @@ static void build_address(ia32_address_mode_t *am, ir_node *node)
        am->pinned   = get_irn_pinned(load);
        am->ls_mode  = get_Load_mode(load);
        am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
+       am->am_node  = node;
 
        /* construct load address */
        ia32_create_address_mode(addr, ptr, /*force=*/0);
@@ -734,20 +737,19 @@ static void match_arguments(ia32_address_mode_t *am, ir_node *block,
                }
                am->op_type = ia32_AddrModeS;
        } else {
+               am->op_type = ia32_Normal;
+
                if (flags & match_try_am) {
                        am->new_op1 = NULL;
                        am->new_op2 = NULL;
-                       am->op_type = ia32_Normal;
                        return;
                }
 
                new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
                if (new_op2 == NULL)
                        new_op2 = be_transform_node(op2);
-               am->op_type = ia32_Normal;
-               am->ls_mode = get_irn_mode(op2);
-               if (flags & match_mode_neutral)
-                       am->ls_mode = mode_Iu;
+               am->ls_mode =
+                       (flags & match_mode_neutral ? mode_Iu : get_irn_mode(op2));
        }
        if (addr->base == NULL)
                addr->base = noreg_gp;
@@ -773,7 +775,6 @@ static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
        mode = get_irn_mode(node);
        load = get_Proj_pred(am->mem_proj);
 
-       mark_irn_visited(load);
        be_set_transformed_node(load, node);
 
        if (mode != mode_T) {
@@ -811,8 +812,9 @@ static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
                         am.new_op1, am.new_op2);
        set_am_attributes(new_node, &am);
        /* we can't use source address mode anymore when using immediates */
-       if (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2))
-               set_ia32_am_support(new_node, ia32_am_None, ia32_am_arity_none);
+       if (!(flags & match_am_and_immediates) &&
+           (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
+               set_ia32_am_support(new_node, ia32_am_none);
        SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
 
        new_node = fix_mem_proj(new_node, &am);
@@ -846,23 +848,24 @@ static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
        ir_node             *src_block  = get_nodes_block(node);
        ir_node             *op1        = get_irn_n(node, n_ia32_l_binop_left);
        ir_node             *op2        = get_irn_n(node, n_ia32_l_binop_right);
+       ir_node             *eflags     = get_irn_n(node, n_ia32_l_binop_eflags);
        dbg_info            *dbgi;
-       ir_node             *block, *new_node, *eflags, *new_eflags;
+       ir_node             *block, *new_node, *new_eflags;
        ia32_address_mode_t  am;
        ia32_address_t      *addr       = &am.addr;
 
-       match_arguments(&am, src_block, op1, op2, NULL, flags);
+       match_arguments(&am, src_block, op1, op2, eflags, flags);
 
        dbgi       = get_irn_dbg_info(node);
        block      = be_transform_node(src_block);
-       eflags     = get_irn_n(node, n_ia32_l_binop_eflags);
        new_eflags = be_transform_node(eflags);
        new_node   = func(dbgi, current_ir_graph, block, addr->base, addr->index,
                        addr->mem, am.new_op1, am.new_op2, new_eflags);
        set_am_attributes(new_node, &am);
        /* we can't use source address mode anymore when using immediates */
-       if(is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2))
-               set_ia32_am_support(new_node, ia32_am_None, ia32_am_arity_none);
+       if (!(flags & match_am_and_immediates) &&
+           (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
+               set_ia32_am_support(new_node, ia32_am_none);
        SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
 
        new_node = fix_mem_proj(new_node, &am);
@@ -1170,54 +1173,28 @@ static ir_node *gen_Mul(ir_node *node) {
  *
  * @return the created ia32 Mulh node
  */
-static ir_node *gen_Mulh(ir_node *node)
-{
-       ir_node  *block     = get_nodes_block(node);
-       ir_node  *new_block = be_transform_node(block);
-       ir_graph *irg       = current_ir_graph;
-       dbg_info *dbgi      = get_irn_dbg_info(node);
-       ir_mode  *mode      = get_irn_mode(node);
-       ir_node  *op1       = get_Mulh_left(node);
-       ir_node  *op2       = get_Mulh_right(node);
-       ir_node  *proj_res_high;
-       ir_node  *new_node;
-       ia32_address_mode_t  am;
-       ia32_address_t      *addr = &am.addr;
-
-       assert(!mode_is_float(mode) && "Mulh with float not supported");
-       assert(get_mode_size_bits(mode) == 32);
-
-       match_arguments(&am, block, op1, op2, NULL, match_commutative | match_am);
+static ir_node *gen_Mulh(ir_node *node) {
+       ir_node              *block     = get_nodes_block(node);
+       ir_node              *new_block = be_transform_node(block);
+       dbg_info             *dbgi      = get_irn_dbg_info(node);
+       ir_node              *op1       = get_Mulh_left(node);
+       ir_node              *op2       = get_Mulh_right(node);
+       ir_mode              *mode      = get_irn_mode(node);
+       ir_node              *new_node;
+       ir_node              *proj_res_high;
 
        if (mode_is_signed(mode)) {
-               new_node = new_rd_ia32_IMul1OP(dbgi, irg, new_block, addr->base,
-                                              addr->index, addr->mem, am.new_op1,
-                                              am.new_op2);
+               new_node = gen_binop(node, op1, op2, new_rd_ia32_IMul1OP, match_commutative | match_am);
+               proj_res_high = new_rd_Proj(dbgi, current_ir_graph, new_block, new_node,
+                                   mode_Iu, pn_ia32_IMul1OP_res_high);
        } else {
-               new_node = new_rd_ia32_Mul(dbgi, irg, new_block, addr->base,
-                                          addr->index, addr->mem, am.new_op1,
-                                          am.new_op2);
+               new_node = gen_binop(node, op1, op2, new_rd_ia32_Mul, match_commutative | match_am);
+               proj_res_high = new_rd_Proj(dbgi, current_ir_graph, new_block, new_node,
+                                   mode_Iu, pn_ia32_Mul_res_high);
        }
-
-       set_am_attributes(new_node, &am);
-       /* we can't use source address mode anymore when using immediates */
-       if(is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2))
-               set_ia32_am_support(new_node, ia32_am_None, ia32_am_arity_none);
-       SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
-
-       assert(get_irn_mode(new_node) == mode_T);
-
-       fix_mem_proj(new_node, &am);
-
-       assert(pn_ia32_IMul1OP_res_high == pn_ia32_Mul_res_high);
-       proj_res_high = new_rd_Proj(dbgi, irg, block, new_node,
-                              mode_Iu, pn_ia32_IMul1OP_res_high);
-
        return proj_res_high;
 }
 
-
-
 /**
  * Creates an ia32 And.
  *
@@ -1315,6 +1292,49 @@ static ir_node *gen_Sub(ir_node *node) {
                        | match_am | match_immediate);
 }
 
+static ir_node *transform_AM_mem(ir_graph *const irg, ir_node *const block,
+                                 ir_node  *const src_val,
+                                 ir_node  *const src_mem,
+                                 ir_node  *const am_mem)
+{
+       if (is_NoMem(am_mem)) {
+               return be_transform_node(src_mem);
+       } else if (is_Proj(src_val) &&
+                  is_Proj(src_mem) &&
+                  get_Proj_pred(src_val) == get_Proj_pred(src_mem)) {
+               /* avoid memory loop */
+               return am_mem;
+       } else if (is_Proj(src_val) && is_Sync(src_mem)) {
+               ir_node  *const ptr_pred = get_Proj_pred(src_val);
+               int       const arity    = get_Sync_n_preds(src_mem);
+               int             n        = 0;
+               ir_node **      ins;
+               int             i;
+
+               NEW_ARR_A(ir_node*, ins, arity + 1);
+
+               for (i = arity - 1; i >= 0; --i) {
+                       ir_node *const pred = get_Sync_pred(src_mem, i);
+
+                       /* avoid memory loop */
+                       if (is_Proj(pred) && get_Proj_pred(pred) == ptr_pred)
+                               continue;
+
+                       ins[n++] = be_transform_node(pred);
+               }
+
+               ins[n++] = am_mem;
+
+               return new_r_Sync(irg, block, n, ins);
+       } else {
+               ir_node *ins[2];
+
+               ins[0] = be_transform_node(src_mem);
+               ins[1] = am_mem;
+               return new_r_Sync(irg, block, 2, ins);
+       }
+}
+
 /**
  * Generates an ia32 DivMod with additional infrastructure for the
  * register allocator if needed.
@@ -1364,17 +1384,7 @@ static ir_node *create_Div(ir_node *node)
        /* Beware: We don't need a Sync, if the memory predecessor of the Div node
           is the memory of the consumed address. We can have only the second op as address
           in Div nodes, so check only op2. */
-       if(!is_NoMem(mem) && skip_Proj(mem) != skip_Proj(op2)) {
-               new_mem = be_transform_node(mem);
-               if(!is_NoMem(addr->mem)) {
-                       ir_node *in[2];
-                       in[0] = new_mem;
-                       in[1] = addr->mem;
-                       new_mem = new_rd_Sync(dbgi, irg, new_block, 2, in);
-               }
-       } else {
-               new_mem = addr->mem;
-       }
+       new_mem = transform_AM_mem(irg, block, op2, mem, addr->mem);
 
        if (mode_is_signed(mode)) {
                ir_node *produceval = new_rd_ia32_ProduceVal(dbgi, irg, new_block);
@@ -1904,38 +1914,51 @@ static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
 {
        ir_node *load;
 
-       if(!is_Proj(node))
+       if (!is_Proj(node))
                return 0;
 
        /* we only use address mode if we're the only user of the load */
-       if(get_irn_n_edges(node) > 1)
+       if (get_irn_n_edges(node) > 1)
                return 0;
 
        load = get_Proj_pred(node);
-       if(!is_Load(load))
+       if (!is_Load(load))
                return 0;
-       if(get_nodes_block(load) != block)
+       if (get_nodes_block(load) != block)
                return 0;
 
-       /* Store should be attached to the load */
-       if(!is_Proj(mem) || get_Proj_pred(mem) != load)
-               return 0;
        /* store should have the same pointer as the load */
-       if(get_Load_ptr(load) != ptr)
+       if (get_Load_ptr(load) != ptr)
                return 0;
 
        /* don't do AM if other node inputs depend on the load (via mem-proj) */
-       if(other != NULL && get_nodes_block(other) == block
-                       && heights_reachable_in_block(heights, other, load))
+       if (other != NULL                   &&
+           get_nodes_block(other) == block &&
+           heights_reachable_in_block(heights, other, load)) {
                return 0;
+       }
 
-       return 1;
-}
+       if (is_Sync(mem)) {
+               int i;
 
-static void set_transformed_and_mark(ir_node *const old_node, ir_node *const new_node)
-{
-       mark_irn_visited(old_node);
-       be_set_transformed_node(old_node, new_node);
+               for (i = get_Sync_n_preds(mem) - 1; i >= 0; --i) {
+                       ir_node *const pred = get_Sync_pred(mem, i);
+
+                       if (is_Proj(pred) && get_Proj_pred(pred) == load)
+                               continue;
+
+                       if (get_nodes_block(pred) == block &&
+                           heights_reachable_in_block(heights, pred, load)) {
+                               return 0;
+                       }
+               }
+       } else {
+               /* Store should be attached to the load */
+               if (!is_Proj(mem) || get_Proj_pred(mem) != load)
+                       return 0;
+       }
+
+       return 1;
 }
 
 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
@@ -1949,6 +1972,7 @@ static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
        ir_node  *noreg_gp  = ia32_new_NoReg_gp(env_cg);
        ir_graph *irg      = current_ir_graph;
        dbg_info *dbgi;
+       ir_node  *new_mem;
        ir_node  *new_node;
        ir_node  *new_op;
        ir_node  *mem_proj;
@@ -1978,13 +2002,15 @@ static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
        if(addr->mem == NULL)
                addr->mem = new_NoMem();
 
-       dbgi  = get_irn_dbg_info(node);
-       block = be_transform_node(src_block);
+       dbgi    = get_irn_dbg_info(node);
+       block   = be_transform_node(src_block);
+       new_mem = transform_AM_mem(irg, block, am.am_node, mem, addr->mem);
+
        if(get_mode_size_bits(mode) == 8) {
                new_node = func8bit(dbgi, irg, block, addr->base, addr->index,
-                                   addr->mem, new_op);
+                                   new_mem, new_op);
        } else {
-               new_node = func(dbgi, irg, block, addr->base, addr->index, addr->mem,
+               new_node = func(dbgi, irg, block, addr->base, addr->index, new_mem,
                                new_op);
        }
        set_address(new_node, addr);
@@ -1992,9 +2018,9 @@ static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
        set_ia32_ls_mode(new_node, mode);
        SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
 
-       set_transformed_and_mark(get_Proj_pred(am.mem_proj), new_node);
+       be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
        mem_proj = be_transform_node(am.mem_proj);
-       set_transformed_and_mark(mem_proj ? mem_proj : am.mem_proj, new_node);
+       be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
 
        return new_node;
 }
@@ -2003,12 +2029,13 @@ static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
                              ir_node *ptr, ir_mode *mode,
                              construct_unop_dest_func *func)
 {
-       ir_graph *irg      = current_ir_graph;
-       ir_node *src_block = get_nodes_block(node);
-       ir_node *block;
+       ir_graph *irg       = current_ir_graph;
+       ir_node  *src_block = get_nodes_block(node);
+       ir_node  *block;
        dbg_info *dbgi;
-       ir_node *new_node;
-       ir_node *mem_proj;
+       ir_node  *new_mem;
+       ir_node  *new_node;
+       ir_node  *mem_proj;
        ia32_address_mode_t  am;
        ia32_address_t *addr = &am.addr;
        memset(&am, 0, sizeof(am));
@@ -2020,15 +2047,16 @@ static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
 
        dbgi     = get_irn_dbg_info(node);
        block    = be_transform_node(src_block);
-       new_node = func(dbgi, irg, block, addr->base, addr->index, addr->mem);
+       new_mem  = transform_AM_mem(irg, block, am.am_node, mem, addr->mem);
+       new_node = func(dbgi, irg, block, addr->base, addr->index, new_mem);
        set_address(new_node, addr);
        set_ia32_op_type(new_node, ia32_AddrModeD);
        set_ia32_ls_mode(new_node, mode);
        SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
 
-       set_transformed_and_mark(get_Proj_pred(am.mem_proj), new_node);
+       be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
        mem_proj = be_transform_node(am.mem_proj);
-       set_transformed_and_mark(mem_proj ? mem_proj : am.mem_proj, new_node);
+       be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
 
        return new_node;
 }
@@ -2101,6 +2129,8 @@ static ir_node *try_create_dest_am(ir_node *node) {
                if(is_Conv(val)) {
                        ir_node *conv_op   = get_Conv_op(val);
                        ir_mode *pred_mode = get_irn_mode(conv_op);
+                       if (!ia32_mode_needs_gp_reg(pred_mode))
+                               break;
                        if(pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
                                val = conv_op;
                                continue;
@@ -2134,9 +2164,8 @@ static ir_node *try_create_dest_am(ir_node *node) {
        case iro_Sub:
                op1      = get_Sub_left(val);
                op2      = get_Sub_right(val);
-               if(is_Const(op2)) {
-                       ir_fprintf(stderr, "Optimisation warning: not-normalize sub ,C"
-                                  "found\n");
+               if (is_Const(op2)) {
+                       ir_fprintf(stderr, "Optimisation warning: not-normalized sub ,C found\n");
                }
                new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
                                         new_rd_ia32_SubMem, new_rd_ia32_SubMem8Bit,
@@ -2223,17 +2252,13 @@ static ir_node *try_create_dest_am(ir_node *node) {
        return new_node;
 }
 
-static int is_float_to_int32_conv(const ir_node *node)
+static int is_float_to_int_conv(const ir_node *node)
 {
        ir_mode  *mode = get_irn_mode(node);
        ir_node  *conv_op;
        ir_mode  *conv_mode;
 
-       if(get_mode_size_bits(mode) != 32 || !ia32_mode_needs_gp_reg(mode))
-               return 0;
-       /* don't report unsigned as conv to 32bit, because we really need to do
-        * a vfist with 64bit signed in this case */
-       if(!mode_is_signed(mode))
+       if (mode != mode_Is && mode != mode_Hs)
                return 0;
 
        if(!is_Conv(node))
@@ -2252,78 +2277,52 @@ static int is_float_to_int32_conv(const ir_node *node)
  *
  * @return the created ia32 Store node
  */
-static ir_node *gen_float_const_Store(ir_node *node, ir_node *cns) {
-       ir_mode  *mode      = get_irn_mode(cns);
-       int      size       = get_mode_size_bits(mode);
-       tarval   *tv        = get_Const_tarval(cns);
-       ir_node  *block     = get_nodes_block(node);
-       ir_node  *new_block = be_transform_node(block);
-       ir_node  *ptr       = get_Store_ptr(node);
-       ir_node  *mem       = get_Store_mem(node);
-       ir_graph *irg       = current_ir_graph;
-       dbg_info *dbgi      = get_irn_dbg_info(node);
-       ir_node  *noreg     = ia32_new_NoReg_gp(env_cg);
-       int      ofs        = 4;
-       ir_node  *new_node;
-       ia32_address_t addr;
-
-       unsigned val = get_tarval_sub_bits(tv, 0) |
-               (get_tarval_sub_bits(tv, 1) << 8) |
-               (get_tarval_sub_bits(tv, 2) << 16) |
-               (get_tarval_sub_bits(tv, 3) << 24);
-       ir_node *imm = create_Immediate(NULL, 0, val);
-
-       /* construct store address */
-       memset(&addr, 0, sizeof(addr));
-       ia32_create_address_mode(&addr, ptr, /*force=*/0);
-
-       if (addr.base == NULL) {
-               addr.base = noreg;
-       } else {
-               addr.base = be_transform_node(addr.base);
-       }
-
-       if (addr.index == NULL) {
-               addr.index = noreg;
-       } else {
-               addr.index = be_transform_node(addr.index);
-       }
-       addr.mem = be_transform_node(mem);
-
-       new_node = new_rd_ia32_Store(dbgi, irg, new_block, addr.base,
-               addr.index, addr.mem, imm);
+static ir_node *gen_float_const_Store(ir_node *node, ir_node *cns)
+{
+       ir_mode        *mode      = get_irn_mode(cns);
+       unsigned        size      = get_mode_size_bytes(mode);
+       tarval         *tv        = get_Const_tarval(cns);
+       ir_node        *block     = get_nodes_block(node);
+       ir_node        *new_block = be_transform_node(block);
+       ir_node        *ptr       = get_Store_ptr(node);
+       ir_node        *mem       = get_Store_mem(node);
+       ir_graph       *irg       = current_ir_graph;
+       dbg_info       *dbgi      = get_irn_dbg_info(node);
+       int             ofs       = 0;
+       size_t          i         = 0;
+       ir_node        *ins[4];
+       ia32_address_t  addr;
 
-       set_irn_pinned(new_node, get_irn_pinned(node));
-       set_ia32_op_type(new_node, ia32_AddrModeD);
-       set_ia32_ls_mode(new_node, mode_Iu);
+       assert(size % 4 ==  0);
+       assert(size     <= 16);
 
-       set_address(new_node, &addr);
+       build_address_ptr(&addr, ptr, mem);
 
-       /** add more stores if needed */
-       while (size > 32) {
-               unsigned val = get_tarval_sub_bits(tv, ofs) |
-                       (get_tarval_sub_bits(tv, ofs + 1) << 8) |
+       do {
+               unsigned val =
+                        get_tarval_sub_bits(tv, ofs)            |
+                       (get_tarval_sub_bits(tv, ofs + 1) <<  8) |
                        (get_tarval_sub_bits(tv, ofs + 2) << 16) |
                        (get_tarval_sub_bits(tv, ofs + 3) << 24);
                ir_node *imm = create_Immediate(NULL, 0, val);
 
-               addr.offset += 4;
-               addr.mem = new_node;
-
-               new_node = new_rd_ia32_Store(dbgi, irg, new_block, addr.base,
+               ir_node *new_node = new_rd_ia32_Store(dbgi, irg, new_block, addr.base,
                        addr.index, addr.mem, imm);
 
                set_irn_pinned(new_node, get_irn_pinned(node));
                set_ia32_op_type(new_node, ia32_AddrModeD);
                set_ia32_ls_mode(new_node, mode_Iu);
-
                set_address(new_node, &addr);
-               size -= 32;
-               ofs  += 4;
-       }
+               SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
 
-       SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
-       return new_node;
+               ins[i++] = new_node;
+
+               size        -= 4;
+               ofs         += 4;
+               addr.offset += 4;
+       } while (size != 0);
+
+       return i == 1 ? ins[0] : new_rd_Sync(dbgi, irg, new_block, i, ins);
 }
 
 /**
@@ -2412,7 +2411,7 @@ static ir_node *gen_normal_Store(ir_node *node)
                                                    addr.index, addr.mem, new_val, mode);
                }
                store = new_node;
-       } else if (!ia32_cg_config.use_sse2 && is_float_to_int32_conv(val)) {
+       } else if (!ia32_cg_config.use_sse2 && is_float_to_int_conv(val)) {
                val = get_Conv_op(val);
 
                /* TODO: is this optimisation still necessary at all (middleend)? */
@@ -2462,7 +2461,7 @@ static ir_node *gen_Store(ir_node *node)
        ir_mode  *mode = get_irn_mode(val);
 
        if (mode_is_float(mode) && is_Const(val)) {
-               int transform = 1;
+               int transform;
 
                /* we are storing a floating point constant */
                if (ia32_cg_config.use_sse2) {
@@ -2557,48 +2556,6 @@ static ir_node *gen_Cond(ir_node *node) {
        return new_node;
 }
 
-/**
- * Transforms a CopyB node.
- *
- * @return The transformed node.
- */
-static ir_node *gen_CopyB(ir_node *node) {
-       ir_node  *block    = be_transform_node(get_nodes_block(node));
-       ir_node  *src      = get_CopyB_src(node);
-       ir_node  *new_src  = be_transform_node(src);
-       ir_node  *dst      = get_CopyB_dst(node);
-       ir_node  *new_dst  = be_transform_node(dst);
-       ir_node  *mem      = get_CopyB_mem(node);
-       ir_node  *new_mem  = be_transform_node(mem);
-       ir_node  *res      = NULL;
-       ir_graph *irg      = current_ir_graph;
-       dbg_info *dbgi     = get_irn_dbg_info(node);
-       int      size      = get_type_size_bytes(get_CopyB_type(node));
-       int      rem;
-
-       /* If we have to copy more than 32 bytes, we use REP MOVSx and */
-       /* then we need the size explicitly in ECX.                    */
-       if (size >= 32 * 4) {
-               rem = size & 0x3; /* size % 4 */
-               size >>= 2;
-
-               res = new_rd_ia32_Const(dbgi, irg, block, NULL, 0, size);
-               add_irn_dep(res, get_irg_frame(irg));
-
-               res = new_rd_ia32_CopyB(dbgi, irg, block, new_dst, new_src, res, new_mem, rem);
-       } else {
-               if(size == 0) {
-                       ir_fprintf(stderr, "Optimisation warning copyb %+F with size <4\n",
-                                  node);
-               }
-               res = new_rd_ia32_CopyB_i(dbgi, irg, block, new_dst, new_src, new_mem, size);
-       }
-
-       SET_IA32_ORIG_NODE(res, ia32_get_old_node_name(env_cg, node));
-
-       return res;
-}
-
 static ir_node *gen_be_Copy(ir_node *node)
 {
        ir_node *new_node = be_duplicate_node(node);
@@ -2678,10 +2635,10 @@ static ir_node *create_Ucomi(ir_node *node)
 }
 
 /**
- * helper function: checks wether all Cmp projs are Lg or Eq which is needed
+ * helper function: checks whether all Cmp projs are Lg or Eq which is needed
  * to fold an and into a test node
  */
-static int can_fold_test_and(ir_node *node)
+static bool can_fold_test_and(ir_node *node)
 {
        const ir_edge_t *edge;
 
@@ -2690,10 +2647,84 @@ static int can_fold_test_and(ir_node *node)
                ir_node *proj = get_edge_src_irn(edge);
                pn_Cmp   pnc  = get_Proj_proj(proj);
                if(pnc != pn_Cmp_Eq && pnc != pn_Cmp_Lg)
-                       return 0;
+                       return false;
        }
 
-       return 1;
+       return true;
+}
+
+/**
+ * returns true if it is assured, that the upper bits of a node are "clean"
+ * which means for a 16 or 8 bit value, that the upper bits in the register
+ * are 0 for unsigned and a copy of the last significant bit for signed
+ * numbers.
+ */
+static bool upper_bits_clean(ir_node *transformed_node, ir_mode *mode)
+{
+       assert(ia32_mode_needs_gp_reg(mode));
+       if (get_mode_size_bits(mode) >= 32)
+               return true;
+
+       if (is_Proj(transformed_node))
+               return upper_bits_clean(get_Proj_pred(transformed_node), mode);
+
+       if (is_ia32_Conv_I2I(transformed_node)
+                       || is_ia32_Conv_I2I8Bit(transformed_node)) {
+               ir_mode *smaller_mode = get_ia32_ls_mode(transformed_node);
+               if (mode_is_signed(smaller_mode) != mode_is_signed(mode))
+                       return false;
+               if (get_mode_size_bits(smaller_mode) > get_mode_size_bits(mode))
+                       return false;
+
+               return true;
+       }
+
+       if (is_ia32_Shr(transformed_node) && !mode_is_signed(mode)) {
+               ir_node *right = get_irn_n(transformed_node, n_ia32_Shr_count);
+               if (is_ia32_Immediate(right) || is_ia32_Const(right)) {
+                       const ia32_immediate_attr_t *attr
+                               = get_ia32_immediate_attr_const(right);
+                       if (attr->symconst == 0
+                                       && (unsigned) attr->offset >= (32 - get_mode_size_bits(mode))) {
+                               return true;
+                       }
+               }
+               return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Shr_val), mode);
+       }
+
+       if (is_ia32_And(transformed_node) && !mode_is_signed(mode)) {
+               ir_node *right = get_irn_n(transformed_node, n_ia32_And_right);
+               if (is_ia32_Immediate(right) || is_ia32_Const(right)) {
+                       const ia32_immediate_attr_t *attr
+                               = get_ia32_immediate_attr_const(right);
+                       if (attr->symconst == 0
+                                       && (unsigned) attr->offset
+                                       <= (0xffffffff >> (32 - get_mode_size_bits(mode)))) {
+                               return true;
+                       }
+               }
+               /* TODO recurse? */
+       }
+
+       /* TODO recurse on Or, Xor, ... if appropriate? */
+
+       if (is_ia32_Immediate(transformed_node)
+                       || is_ia32_Const(transformed_node)) {
+               const ia32_immediate_attr_t *attr
+                       = get_ia32_immediate_attr_const(transformed_node);
+               if (mode_is_signed(mode)) {
+                       long shifted = attr->offset >> (get_mode_size_bits(mode) - 1);
+                       if (shifted == 0 || shifted == -1)
+                               return true;
+               } else {
+                       unsigned long shifted = (unsigned long) attr->offset;
+                       shifted >>= get_mode_size_bits(mode);
+                       if (shifted == 0)
+                               return true;
+               }
+       }
+
+       return false;
 }
 
 /**
@@ -2723,64 +2754,44 @@ static ir_node *gen_Cmp(ir_node *node)
 
        assert(ia32_mode_needs_gp_reg(cmp_mode));
 
-       /* we prefer the Test instruction where possible except cases where
-        * we can use SourceAM */
+       /* Prefer the Test instruction, when encountering (x & y) ==/!= 0 */
        cmp_unsigned = !mode_is_signed(cmp_mode);
-       if (is_Const_0(right)) {
-               if (is_And(left) &&
-                               get_irn_n_edges(left) == 1 &&
-                               can_fold_test_and(node)) {
-                       /* Test(and_left, and_right) */
-                       ir_node *and_left  = get_And_left(left);
-                       ir_node *and_right = get_And_right(left);
-                       ir_mode *mode      = get_irn_mode(and_left);
-
-                       match_arguments(&am, block, and_left, and_right, NULL,
-                                       match_commutative |
-                                       match_am | match_8bit_am | match_16bit_am |
-                                       match_am_and_immediates | match_immediate |
-                                       match_8bit | match_16bit);
-                       if (get_mode_size_bits(mode) == 8) {
-                               new_node = new_rd_ia32_Test8Bit(dbgi, irg, new_block, addr->base,
-                                                               addr->index, addr->mem, am.new_op1,
-                                                               am.new_op2, am.ins_permuted,
-                                                               cmp_unsigned);
-                       } else {
-                               new_node = new_rd_ia32_Test(dbgi, irg, new_block, addr->base,
-                                                           addr->index, addr->mem, am.new_op1,
-                                                           am.new_op2, am.ins_permuted, cmp_unsigned);
-                       }
+       if (is_Const_0(right)          &&
+           is_And(left)               &&
+           get_irn_n_edges(left) == 1 &&
+           can_fold_test_and(node)) {
+               /* Test(and_left, and_right) */
+               ir_node *and_left  = get_And_left(left);
+               ir_node *and_right = get_And_right(left);
+
+               /* matze: code here used mode instead of cmd_mode, I think it is always
+                * the same as cmp_mode, but I leave this here to see if this is really
+                * true...
+                */
+               assert(get_irn_mode(and_left) == cmp_mode);
+
+               match_arguments(&am, block, and_left, and_right, NULL,
+                                                                               match_commutative |
+                                                                               match_am | match_8bit_am | match_16bit_am |
+                                                                               match_am_and_immediates | match_immediate |
+                                                                               match_8bit | match_16bit);
+
+               /* use 32bit compare mode if possible since the opcode is smaller */
+               if (upper_bits_clean(am.new_op1, cmp_mode) &&
+                   upper_bits_clean(am.new_op2, cmp_mode)) {
+                       cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
+               }
+
+               if (get_mode_size_bits(cmp_mode) == 8) {
+                       new_node = new_rd_ia32_Test8Bit(dbgi, irg, new_block, addr->base,
+                                                       addr->index, addr->mem, am.new_op1,
+                                                       am.new_op2, am.ins_permuted,
+                                                       cmp_unsigned);
                } else {
-                       match_arguments(&am, block, NULL, left, NULL,
-                                       match_am | match_8bit_am | match_16bit_am |
-                                       match_8bit | match_16bit);
-                       if (am.op_type == ia32_AddrModeS) {
-                               /* Cmp(AM, 0) */
-                               ir_node *imm_zero = try_create_Immediate(right, 0);
-                               if (get_mode_size_bits(cmp_mode) == 8) {
-                                       new_node = new_rd_ia32_Cmp8Bit(dbgi, irg, new_block, addr->base,
-                                                                      addr->index, addr->mem, am.new_op2,
-                                                                      imm_zero, am.ins_permuted,
-                                                                      cmp_unsigned);
-                               } else {
-                                       new_node = new_rd_ia32_Cmp(dbgi, irg, new_block, addr->base,
-                                                                  addr->index, addr->mem, am.new_op2,
-                                                                  imm_zero, am.ins_permuted, cmp_unsigned);
-                               }
-                       } else {
-                               /* Test(left, left) */
-                               if (get_mode_size_bits(cmp_mode) == 8) {
-                                       new_node = new_rd_ia32_Test8Bit(dbgi, irg, new_block, addr->base,
-                                                                       addr->index, addr->mem, am.new_op2,
-                                                                       am.new_op2, am.ins_permuted,
-                                                                       cmp_unsigned);
-                               } else {
-                                       new_node = new_rd_ia32_Test(dbgi, irg, new_block, addr->base,
-                                                                   addr->index, addr->mem, am.new_op2,
-                                                                   am.new_op2, am.ins_permuted,
-                                                                   cmp_unsigned);
-                               }
-                       }
+                       new_node = new_rd_ia32_Test(dbgi, irg, new_block, addr->base,
+                                                   addr->index, addr->mem, am.new_op1,
+                                                   am.new_op2, am.ins_permuted,
+                                                                               cmp_unsigned);
                }
        } else {
                /* Cmp(left, right) */
@@ -2788,6 +2799,12 @@ static ir_node *gen_Cmp(ir_node *node)
                                match_commutative | match_am | match_8bit_am |
                                match_16bit_am | match_am_and_immediates |
                                match_immediate | match_8bit | match_16bit);
+               /* use 32bit compare mode if possible since the opcode is smaller */
+               if (upper_bits_clean(am.new_op1, cmp_mode) &&
+                   upper_bits_clean(am.new_op2, cmp_mode)) {
+                       cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
+               }
+
                if (get_mode_size_bits(cmp_mode) == 8) {
                        new_node = new_rd_ia32_Cmp8Bit(dbgi, irg, new_block, addr->base,
                                                       addr->index, addr->mem, am.new_op1,
@@ -2800,7 +2817,6 @@ static ir_node *gen_Cmp(ir_node *node)
                }
        }
        set_am_attributes(new_node, &am);
-       assert(cmp_mode != NULL);
        set_ia32_ls_mode(new_node, cmp_mode);
 
        SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
@@ -3103,12 +3119,12 @@ static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode) {
        ir_node  *new_node;
        int       src_bits;
 
-       /* fild can use source AM if the operand is a signed 32bit integer */
-       if (src_mode == mode_Is) {
+       /* fild can use source AM if the operand is a signed 16bit or 32bit integer */
+       if (src_mode == mode_Is || src_mode == mode_Hs) {
                ia32_address_mode_t am;
 
                match_arguments(&am, src_block, NULL, op, NULL,
-                               match_am | match_try_am);
+                               match_am | match_try_am | match_16bit | match_16bit_am);
                if (am.op_type == ia32_AddrModeS) {
                        ia32_address_t *addr = &am.addr;
 
@@ -3229,6 +3245,17 @@ static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
        match_arguments(&am, block, NULL, op, NULL,
                        match_8bit | match_16bit |
                        match_am | match_8bit_am | match_16bit_am);
+
+       if (upper_bits_clean(am.new_op2, smaller_mode)) {
+               /* unnecessary conv. in theory it shouldn't have been AM */
+               assert(is_ia32_NoReg_GP(addr->base));
+               assert(is_ia32_NoReg_GP(addr->index));
+               assert(is_NoMem(addr->mem));
+               assert(am.addr.offset == 0);
+               assert(am.addr.symconst_ent == NULL);
+               return am.new_op2;
+       }
+
        if (smaller_bits == 8) {
                new_node = new_rd_ia32_Conv_I2I8Bit(dbgi, irg, new_block, addr->base,
                                                    addr->index, addr->mem, am.new_op2,
@@ -3344,7 +3371,8 @@ static ir_node *gen_Conv(ir_node *node) {
                                        switch (get_mode_size_bits(tgt_mode)) {
                                                case 32: float_mantissa = 23 + 1; break; // + 1 for implicit 1
                                                case 64: float_mantissa = 52 + 1; break;
-                                               case 80: float_mantissa = 64 + 1; break;
+                                               case 80:
+                                               case 96: float_mantissa = 64;     break;
                                                default: float_mantissa = 0;      break;
                                        }
                                        if (float_mantissa < int_mantissa) {
@@ -3502,7 +3530,6 @@ static ir_node *gen_be_Return(ir_node *node) {
        copy_node_attr(barrier, new_barrier);
        be_duplicate_deps(barrier, new_barrier);
        be_set_transformed_node(barrier, new_barrier);
-       mark_irn_visited(barrier);
 
        /* transform normally */
        return be_duplicate_node(node);
@@ -3516,7 +3543,8 @@ static ir_node *gen_be_AddSP(ir_node *node)
        ir_node  *sz = get_irn_n(node, be_pos_AddSP_size);
        ir_node  *sp = get_irn_n(node, be_pos_AddSP_old_sp);
 
-       return gen_binop(node, sp, sz, new_rd_ia32_SubSP, match_am);
+       return gen_binop(node, sp, sz, new_rd_ia32_SubSP,
+                        match_am | match_immediate);
 }
 
 /**
@@ -3527,7 +3555,8 @@ static ir_node *gen_be_SubSP(ir_node *node)
        ir_node  *sz = get_irn_n(node, be_pos_SubSP_size);
        ir_node  *sp = get_irn_n(node, be_pos_SubSP_old_sp);
 
-       return gen_binop(node, sp, sz, new_rd_ia32_AddSP, match_am);
+       return gen_binop(node, sp, sz, new_rd_ia32_AddSP,
+                        match_am | match_immediate);
 }
 
 /**
@@ -3560,7 +3589,6 @@ static ir_node *gen_Phi(ir_node *node) {
        copy_node_attr(node, phi);
        be_duplicate_deps(node, phi);
 
-       be_set_transformed_node(node, phi);
        be_enqueue_preds(node);
 
        return phi;
@@ -3631,81 +3659,6 @@ static ir_node *gen_Bound(ir_node *node)
 }
 
 
-typedef ir_node *construct_load_func(dbg_info *db, ir_graph *irg, ir_node *block, ir_node *base, ir_node *index, \
-                                     ir_node *mem);
-
-typedef ir_node *construct_store_func(dbg_info *db, ir_graph *irg, ir_node *block, ir_node *base, ir_node *index, \
-                                      ir_node *val, ir_node *mem);
-
-/**
- * Transforms a lowered Load into a "real" one.
- */
-static ir_node *gen_lowered_Load(ir_node *node, construct_load_func func)
-{
-       ir_node  *block   = be_transform_node(get_nodes_block(node));
-       ir_node  *ptr     = get_irn_n(node, 0);
-       ir_node  *new_ptr = be_transform_node(ptr);
-       ir_node  *mem     = get_irn_n(node, 1);
-       ir_node  *new_mem = be_transform_node(mem);
-       ir_graph *irg     = current_ir_graph;
-       dbg_info *dbgi    = get_irn_dbg_info(node);
-       ir_mode  *mode    = get_ia32_ls_mode(node);
-       ir_node  *noreg   = ia32_new_NoReg_gp(env_cg);
-       ir_node  *new_op;
-
-       new_op  = func(dbgi, irg, block, new_ptr, noreg, new_mem);
-
-       set_ia32_op_type(new_op, ia32_AddrModeS);
-       set_ia32_am_offs_int(new_op, get_ia32_am_offs_int(node));
-       set_ia32_am_scale(new_op, get_ia32_am_scale(node));
-       set_ia32_am_sc(new_op, get_ia32_am_sc(node));
-       if (is_ia32_am_sc_sign(node))
-               set_ia32_am_sc_sign(new_op);
-       set_ia32_ls_mode(new_op, mode);
-       if (is_ia32_use_frame(node)) {
-               set_ia32_frame_ent(new_op, get_ia32_frame_ent(node));
-               set_ia32_use_frame(new_op);
-       }
-
-       SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env_cg, node));
-
-       return new_op;
-}
-
-/**
- * Transforms a lowered Store into a "real" one.
- */
-static ir_node *gen_lowered_Store(ir_node *node, construct_store_func func)
-{
-       ir_node  *block   = be_transform_node(get_nodes_block(node));
-       ir_node  *ptr     = get_irn_n(node, 0);
-       ir_node  *new_ptr = be_transform_node(ptr);
-       ir_node  *val     = get_irn_n(node, 1);
-       ir_node  *new_val = be_transform_node(val);
-       ir_node  *mem     = get_irn_n(node, 2);
-       ir_node  *new_mem = be_transform_node(mem);
-       ir_graph *irg     = current_ir_graph;
-       dbg_info *dbgi    = get_irn_dbg_info(node);
-       ir_node  *noreg   = ia32_new_NoReg_gp(env_cg);
-       ir_mode  *mode    = get_ia32_ls_mode(node);
-       ir_node  *new_op;
-       long     am_offs;
-
-       new_op = func(dbgi, irg, block, new_ptr, noreg, new_val, new_mem);
-
-       am_offs = get_ia32_am_offs_int(node);
-       add_ia32_am_offs_int(new_op, am_offs);
-
-       set_ia32_op_type(new_op, ia32_AddrModeD);
-       set_ia32_ls_mode(new_op, mode);
-       set_ia32_frame_ent(new_op, get_ia32_frame_ent(node));
-       set_ia32_use_frame(new_op);
-
-       SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env_cg, node));
-
-       return new_op;
-}
-
 static ir_node *gen_ia32_l_ShlDep(ir_node *node)
 {
        ir_node *left  = get_irn_n(node, n_ia32_l_ShlDep_val);
@@ -3755,71 +3708,6 @@ static ir_node *gen_ia32_l_Adc(ir_node *node)
                        match_mode_neutral);
 }
 
-/**
- * Transforms an ia32_l_vfild into a "real" ia32_vfild node
- *
- * @param node   The node to transform
- * @return the created ia32 vfild node
- */
-static ir_node *gen_ia32_l_vfild(ir_node *node) {
-       return gen_lowered_Load(node, new_rd_ia32_vfild);
-}
-
-/**
- * Transforms an ia32_l_Load into a "real" ia32_Load node
- *
- * @param node   The node to transform
- * @return the created ia32 Load node
- */
-static ir_node *gen_ia32_l_Load(ir_node *node) {
-       return gen_lowered_Load(node, new_rd_ia32_Load);
-}
-
-/**
- * Transforms an ia32_l_Store into a "real" ia32_Store node
- *
- * @param node   The node to transform
- * @return the created ia32 Store node
- */
-static ir_node *gen_ia32_l_Store(ir_node *node) {
-       return gen_lowered_Store(node, new_rd_ia32_Store);
-}
-
-/**
- * Transforms a l_vfist into a "real" vfist node.
- *
- * @param node   The node to transform
- * @return the created ia32 vfist node
- */
-static ir_node *gen_ia32_l_vfist(ir_node *node) {
-       ir_node  *block      = be_transform_node(get_nodes_block(node));
-       ir_node  *ptr        = get_irn_n(node, 0);
-       ir_node  *new_ptr    = be_transform_node(ptr);
-       ir_node  *val        = get_irn_n(node, 1);
-       ir_node  *new_val    = be_transform_node(val);
-       ir_node  *mem        = get_irn_n(node, 2);
-       ir_node  *new_mem    = be_transform_node(mem);
-       ir_graph *irg        = current_ir_graph;
-       dbg_info *dbgi       = get_irn_dbg_info(node);
-       ir_node  *noreg      = ia32_new_NoReg_gp(env_cg);
-       ir_mode  *mode       = get_ia32_ls_mode(node);
-       ir_node  *memres, *fist;
-       long     am_offs;
-
-       memres = gen_vfist(dbgi, irg, block, new_ptr, noreg, new_mem, new_val, &fist);
-       am_offs = get_ia32_am_offs_int(node);
-       add_ia32_am_offs_int(fist, am_offs);
-
-       set_ia32_op_type(fist, ia32_AddrModeD);
-       set_ia32_ls_mode(fist, mode);
-       set_ia32_frame_ent(fist, get_ia32_frame_ent(node));
-       set_ia32_use_frame(fist);
-
-       SET_IA32_ORIG_NODE(fist, ia32_get_old_node_name(env_cg, node));
-
-       return memres;
-}
-
 /**
  * Transforms a l_MulS into a "real" MulS node.
  *
@@ -3888,7 +3776,9 @@ static ir_node *gen_lowered_64bit_shifts(ir_node *node, ir_node *high,
 
        /* the shift amount can be any mode that is bigger than 5 bits, since all
         * other bits are ignored anyway */
-       while (is_Conv(count) && get_irn_n_edges(count) == 1) {
+       while (is_Conv(count)              &&
+              get_irn_n_edges(count) == 1 &&
+              mode_is_int(get_irn_mode(count))) {
                assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
                count = get_Conv_op(count);
        }
@@ -4001,7 +3891,7 @@ static ir_node *gen_ia32_l_FloattoLL(ir_node *node) {
  * the BAD transformer.
  */
 static ir_node *bad_transform(ir_node *node) {
-       panic("No transform function for %+F available.\n", node);
+       panic("No transform function for %+F available.", node);
        return NULL;
 }
 
@@ -4062,8 +3952,7 @@ static ir_node *gen_Proj_be_AddSP(ir_node *node) {
                return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_SubSP_M);
        }
 
-       assert(0);
-       return new_rd_Unknown(irg, get_irn_mode(node));
+       panic("No idea how to transform proj->AddSP");
 }
 
 /**
@@ -4086,8 +3975,7 @@ static ir_node *gen_Proj_be_SubSP(ir_node *node) {
                return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_AddSP_M);
        }
 
-       assert(0);
-       return new_rd_Unknown(irg, get_irn_mode(node));
+       panic("No idea how to transform proj->SubSP");
 }
 
 /**
@@ -4185,8 +4073,7 @@ static ir_node *gen_Proj_Load(ir_node *node) {
                return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, 1);
        }
 
-       assert(0);
-       return new_rd_Unknown(irg, get_irn_mode(node));
+       panic("No idea how to transform proj");
 }
 
 /**
@@ -4198,7 +4085,6 @@ static ir_node *gen_Proj_DivMod(ir_node *node) {
        ir_node  *new_pred = be_transform_node(pred);
        ir_graph *irg      = current_ir_graph;
        dbg_info *dbgi     = get_irn_dbg_info(node);
-       ir_mode  *mode     = get_irn_mode(node);
        long     proj      = get_Proj_proj(node);
 
        assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
@@ -4253,8 +4139,7 @@ static ir_node *gen_Proj_DivMod(ir_node *node) {
                break;
        }
 
-       assert(0);
-       return new_rd_Unknown(irg, mode);
+       panic("No idea how to transform proj->DivMod");
 }
 
 /**
@@ -4266,7 +4151,6 @@ static ir_node *gen_Proj_CopyB(ir_node *node) {
        ir_node  *new_pred = be_transform_node(pred);
        ir_graph *irg      = current_ir_graph;
        dbg_info *dbgi     = get_irn_dbg_info(node);
-       ir_mode  *mode     = get_irn_mode(node);
        long     proj      = get_Proj_proj(node);
 
        switch(proj) {
@@ -4281,8 +4165,7 @@ static ir_node *gen_Proj_CopyB(ir_node *node) {
                break;
        }
 
-       assert(0);
-       return new_rd_Unknown(irg, mode);
+       panic("No idea how to transform proj->CopyB");
 }
 
 /**
@@ -4294,7 +4177,6 @@ static ir_node *gen_Proj_Quot(ir_node *node) {
        ir_node  *new_pred = be_transform_node(pred);
        ir_graph *irg      = current_ir_graph;
        dbg_info *dbgi     = get_irn_dbg_info(node);
-       ir_mode  *mode     = get_irn_mode(node);
        long     proj      = get_Proj_proj(node);
 
        switch(proj) {
@@ -4318,27 +4200,77 @@ static ir_node *gen_Proj_Quot(ir_node *node) {
                break;
        }
 
-       assert(0);
-       return new_rd_Unknown(irg, mode);
+       panic("No idea how to transform proj->Quot");
 }
 
-/**
- * Transform the Thread Local Storage Proj.
- */
-static ir_node *gen_Proj_tls(ir_node *node) {
-       ir_node  *block = be_transform_node(get_nodes_block(node));
-       ir_graph *irg   = current_ir_graph;
-       dbg_info *dbgi  = NULL;
-       ir_node  *res   = new_rd_ia32_LdTls(dbgi, irg, block, mode_Iu);
+static ir_node *gen_be_Call(ir_node *node)
+{
+       dbg_info       *const dbgi      = get_irn_dbg_info(node);
+       ir_graph       *const irg       = current_ir_graph;
+       ir_node        *const src_block = get_nodes_block(node);
+       ir_node        *const block     = be_transform_node(src_block);
+       ir_node        *const src_mem   = get_irn_n(node, be_pos_Call_mem);
+       ir_node        *const src_sp    = get_irn_n(node, be_pos_Call_sp);
+       ir_node        *const sp        = be_transform_node(src_sp);
+       ir_node        *const src_ptr   = get_irn_n(node, be_pos_Call_ptr);
+       ir_node        *const noreg     = ia32_new_NoReg_gp(env_cg);
+       ia32_address_mode_t   am;
+       ia32_address_t *const addr      = &am.addr;
+       ir_node        *      mem;
+       ir_node        *      call;
+       int                   i;
+       ir_node        *      fpcw;
+       ir_node        *      eax       = noreg;
+       ir_node        *      ecx       = noreg;
+       ir_node        *      edx       = noreg;
+       unsigned        const pop       = be_Call_get_pop(node);
+       ir_type        *const call_tp   = be_Call_get_type(node);
+
+       /* Run the x87 simulator if the call returns a float value */
+       if (get_method_n_ress(call_tp) > 0) {
+               ir_type *const res_type = get_method_res_type(call_tp, 0);
+               ir_mode *const res_mode = get_type_mode(res_type);
+
+               if (res_mode != NULL && mode_is_float(res_mode)) {
+                       env_cg->do_x87_sim = 1;
+               }
+       }
 
-       return res;
-}
+       /* We do not want be_Call direct calls */
+       assert(be_Call_get_entity(node) == NULL);
 
-static ir_node *gen_be_Call(ir_node *node) {
-       ir_node *res = be_duplicate_node(node);
-       be_node_add_flags(res, -1, arch_irn_flags_modify_flags);
+       match_arguments(&am, src_block, NULL, src_ptr, src_mem,
+                       match_am | match_immediate);
 
-       return res;
+       i    = get_irn_arity(node) - 1;
+       fpcw = be_transform_node(get_irn_n(node, i--));
+       for (; i >= be_pos_Call_first_arg; --i) {
+               arch_register_req_t const *const req =
+                       arch_get_register_req(env_cg->arch_env, node, i);
+               ir_node *const reg_parm = be_transform_node(get_irn_n(node, i));
+
+               assert(req->type == arch_register_req_type_limited);
+               assert(req->cls == &ia32_reg_classes[CLASS_ia32_gp]);
+
+               switch (*req->limited) {
+                       case 1 << REG_EAX: assert(eax == noreg); eax = reg_parm; break;
+                       case 1 << REG_ECX: assert(ecx == noreg); ecx = reg_parm; break;
+                       case 1 << REG_EDX: assert(edx == noreg); edx = reg_parm; break;
+                       default: panic("Invalid GP register for register parameter");
+               }
+       }
+
+       mem  = transform_AM_mem(irg, block, src_ptr, src_mem, addr->mem);
+       call = new_rd_ia32_Call(dbgi, irg, block, addr->base, addr->index, mem,
+                               am.new_op2, sp, fpcw, eax, ecx, edx, pop, call_tp);
+       set_am_attributes(call, &am);
+       call = fix_mem_proj(call, &am);
+
+       if (get_irn_pinned(node) == op_pin_state_pinned)
+               set_irn_pinned(call, op_pin_state_pinned);
+
+       SET_IA32_ORIG_NODE(call, ia32_get_old_node_name(env_cg, node));
+       return call;
 }
 
 static ir_node *gen_be_IncSP(ir_node *node) {
@@ -4351,7 +4283,8 @@ static ir_node *gen_be_IncSP(ir_node *node) {
 /**
  * Transform the Projs from a be_Call.
  */
-static ir_node *gen_Proj_be_Call(ir_node *node) {
+static ir_node *gen_Proj_be_Call(ir_node *node)
+{
        ir_node  *block       = be_transform_node(get_nodes_block(node));
        ir_node  *call        = get_Proj_pred(node);
        ir_node  *new_call    = be_transform_node(call);
@@ -4363,6 +4296,7 @@ static ir_node *gen_Proj_be_Call(ir_node *node) {
        ir_mode  *mode        = get_irn_mode(node);
        ir_node  *sse_load;
        const arch_register_class_t *cls;
+       ir_node                     *res;
 
        /* The following is kinda tricky: If we're using SSE, then we have to
         * move the result value of the call in floating point registers to an
@@ -4381,9 +4315,9 @@ static ir_node *gen_Proj_be_Call(ir_node *node) {
                        call_res_pred = get_Proj_pred(call_res_new);
                }
 
-               if (call_res_pred == NULL || be_is_Call(call_res_pred)) {
+               if (call_res_pred == NULL || is_ia32_Call(call_res_pred)) {
                        return new_rd_Proj(dbgi, irg, block, new_call, mode_M,
-                                          pn_be_Call_M_regular);
+                                          n_ia32_Call_mem);
                } else {
                        assert(is_ia32_xLoad(call_res_pred));
                        return new_rd_Proj(dbgi, irg, block, call_res_pred, mode_M,
@@ -4430,7 +4364,47 @@ static ir_node *gen_Proj_be_Call(ir_node *node) {
                mode = cls->mode;
        }
 
-       return new_rd_Proj(dbgi, irg, block, new_call, mode, proj);
+       /* Map from be_Call to ia32_Call proj number */
+       if (proj == pn_be_Call_sp) {
+               proj = pn_ia32_Call_stack;
+       } else if (proj == pn_be_Call_M_regular) {
+               proj = pn_ia32_Call_M;
+       } else {
+               arch_register_req_t const *const req    = arch_get_register_req(env_cg->arch_env, node, BE_OUT_POS(proj));
+               int                        const n_outs = get_ia32_n_res(new_call);
+               int                              i;
+
+               assert(proj      >= pn_be_Call_first_res);
+               assert(req->type == arch_register_req_type_limited);
+
+               for (i = 0; i < n_outs; ++i) {
+                       arch_register_req_t const *const new_req = get_ia32_out_req(new_call, i);
+
+                       if (new_req->type     != arch_register_req_type_limited ||
+                           new_req->cls      != req->cls                       ||
+                           *new_req->limited != *req->limited)
+                               continue;
+
+                       proj = i;
+                       break;
+               }
+               assert(i < n_outs);
+       }
+
+       res = new_rd_Proj(dbgi, irg, block, new_call, mode, proj);
+
+       /* TODO arch_set_irn_register() only operates on Projs, need variant with index */
+       switch (proj) {
+               case pn_ia32_Call_stack:
+                       arch_set_irn_register(env_cg->arch_env, res, &ia32_gp_regs[REG_ESP]);
+                       break;
+
+               case pn_ia32_Call_fpcw:
+                       arch_set_irn_register(env_cg->arch_env, res, &ia32_fp_cw_regs[REG_FPCW]);
+                       break;
+       }
+
+       return res;
 }
 
 /**
@@ -4498,8 +4472,7 @@ static ir_node *gen_Proj(ir_node *node) {
                if (proj == pn_Store_M) {
                        return be_transform_node(pred);
                } else {
-                       assert(0);
-                       return new_r_Bad(current_ir_graph);
+                       panic("No idea how to transform proj->Store");
                }
        case iro_Load:
                return gen_Proj_Load(node);
@@ -4525,18 +4498,19 @@ static ir_node *gen_Proj(ir_node *node) {
                return gen_Proj_Bound(node);
        case iro_Start:
                proj = get_Proj_proj(node);
-               if (proj == pn_Start_X_initial_exec) {
-                       ir_node *block = get_nodes_block(pred);
-                       dbg_info *dbgi = get_irn_dbg_info(node);
-                       ir_node *jump;
-
-                       /* we exchange the ProjX with a jump */
-                       block = be_transform_node(block);
-                       jump  = new_rd_Jmp(dbgi, current_ir_graph, block);
-                       return jump;
-               }
-               if (node == be_get_old_anchor(anchor_tls)) {
-                       return gen_Proj_tls(node);
+               switch (proj) {
+                       case pn_Start_X_initial_exec: {
+                               ir_node  *block     = get_nodes_block(pred);
+                               ir_node  *new_block = be_transform_node(block);
+                               dbg_info *dbgi      = get_irn_dbg_info(node);
+                               /* we exchange the ProjX with a jump */
+                               ir_node  *jump      = new_rd_Jmp(dbgi, current_ir_graph, new_block);
+
+                               return jump;
+                       }
+
+                       case pn_Start_P_tls:
+                               return gen_Proj_tls(node);
                }
                break;
 
@@ -4625,10 +4599,6 @@ static void register_transformers(void)
        GEN(ia32_l_ShrD);
        GEN(ia32_l_Sub);
        GEN(ia32_l_Sbb);
-       GEN(ia32_l_vfild);
-       GEN(ia32_l_Load);
-       GEN(ia32_l_vfist);
-       GEN(ia32_l_Store);
        GEN(ia32_l_LLtoFloat);
        GEN(ia32_l_FloattoLL);
 
@@ -4671,8 +4641,9 @@ static void register_transformers(void)
 /**
  * Pre-transform all unknown and noreg nodes.
  */
-static void ia32_pretransform_node(void *arch_cg) {
-       ia32_code_gen_t *cg = arch_cg;
+static void ia32_pretransform_node(void)
+{
+       ia32_code_gen_t *cg = env_cg;
 
        cg->unknown_gp  = be_pre_transform_node(cg->unknown_gp);
        cg->unknown_vfp = be_pre_transform_node(cg->unknown_vfp);
@@ -4684,8 +4655,8 @@ static void ia32_pretransform_node(void *arch_cg) {
 }
 
 /**
- * Walker, checks if all ia32 nodes producing more than one result have
- * its Projs, otherwise creates new Projs and keep them using a be_Keep node.
+ * Walker, checks if all ia32 nodes producing more than one result have their
+ * Projs, otherwise creates new Projs and keeps them using a be_Keep node.
  */
 static void add_missing_keep_walker(ir_node *node, void *data)
 {
@@ -4709,11 +4680,16 @@ static void add_missing_keep_walker(ir_node *node, void *data)
        assert(n_outs < (int) sizeof(unsigned) * 8);
        foreach_out_edge(node, edge) {
                ir_node *proj = get_edge_src_irn(edge);
-               int      pn   = get_Proj_proj(proj);
+               int      pn;
+
+               /* The node could be kept */
+               if (is_End(proj))
+                       continue;
 
                if (get_irn_mode(proj) == mode_M)
                        continue;
 
+               pn = get_Proj_proj(proj);
                assert(pn < n_outs);
                found_projs |= 1 << pn;
        }
@@ -4765,16 +4741,16 @@ void ia32_add_missing_keeps(ia32_code_gen_t *cg)
 }
 
 /* do the transformation */
-void ia32_transform_graph(ia32_code_gen_t *cg) {
+void ia32_transform_graph(ia32_code_gen_t *cg)
+{
        int cse_last;
-       ir_graph *irg = cg->irg;
 
        register_transformers();
        env_cg       = cg;
        initial_fpcw = NULL;
 
        BE_TIMER_PUSH(t_heights);
-       heights      = heights_new(irg);
+       heights      = heights_new(cg->irg);
        BE_TIMER_POP(t_heights);
        ia32_calculate_non_address_mode_nodes(cg->birg);
 
@@ -4783,7 +4759,7 @@ void ia32_transform_graph(ia32_code_gen_t *cg) {
        cse_last = get_opt_cse();
        set_opt_cse(0);
 
-       be_transform_graph(cg->birg, ia32_pretransform_node, cg);
+       be_transform_graph(cg->birg, ia32_pretransform_node);
 
        set_opt_cse(cse_last);