Defer decision whether to create Test(x, x) instead of Cmp(x, 0) until peephole optim...
[libfirm] / ir / be / ia32 / ia32_transform.c
index 6edace1..52d3947 100644 (file)
@@ -58,6 +58,7 @@
 #include "../be_t.h"
 
 #include "bearch_ia32_t.h"
+#include "ia32_common_transform.h"
 #include "ia32_nodes_attr.h"
 #include "ia32_transform.h"
 #include "ia32_new_nodes.h"
 
 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
 
-/** hold the current code generator during transformation */
-static ia32_code_gen_t *env_cg       = NULL;
 static ir_node         *initial_fpcw = NULL;
-static heights_t       *heights      = NULL;
 
 extern ir_op *get_op_Mulh(void);
 
@@ -125,9 +123,6 @@ typedef ir_node *construct_binop_float_func(dbg_info *db, ir_graph *irg,
 typedef ir_node *construct_unop_func(dbg_info *db, ir_graph *irg,
         ir_node *block, ir_node *op);
 
-static ir_node *try_create_Immediate(ir_node *node,
-                                     char immediate_constraint_type);
-
 static ir_node *create_immediate_or_transform(ir_node *node,
                                               char immediate_constraint_type);
 
@@ -135,142 +130,17 @@ static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
                                 dbg_info *dbgi, ir_node *block,
                                 ir_node *op, ir_node *orig_node);
 
-/**
- * Return true if a mode can be stored in the GP register set
- */
-static INLINE int mode_needs_gp_reg(ir_mode *mode) {
-       if(mode == mode_fpcw)
-               return 0;
-       if(get_mode_size_bits(mode) > 32)
-               return 0;
-       return mode_is_int(mode) || mode_is_reference(mode) || mode == mode_b;
-}
-
-/**
- * creates a unique ident by adding a number to a tag
- *
- * @param tag   the tag string, must contain a %d if a number
- *              should be added
- */
-static ident *unique_id(const char *tag)
-{
-       static unsigned id = 0;
-       char str[256];
-
-       snprintf(str, sizeof(str), tag, ++id);
-       return new_id_from_str(str);
-}
-
-/**
- * Get a primitive type for a mode.
- */
-static ir_type *get_prim_type(pmap *types, ir_mode *mode)
-{
-       pmap_entry *e = pmap_find(types, mode);
-       ir_type *res;
-
-       if (! e) {
-               char buf[64];
-               snprintf(buf, sizeof(buf), "prim_type_%s", get_mode_name(mode));
-               res = new_type_primitive(new_id_from_str(buf), mode);
-               set_type_alignment_bytes(res, 16);
-               pmap_insert(types, mode, res);
-       }
-       else
-               res = e->value;
-       return res;
-}
-
-/**
- * Creates an immediate.
- *
- * @param symconst       if set, create a SymConst immediate
- * @param symconst_sign  sign for the symconst
- * @param val            integer value for the immediate
- */
-static ir_node *create_Immediate(ir_entity *symconst, int symconst_sign, long val)
-{
-       ir_graph *irg         = current_ir_graph;
-       ir_node  *start_block = get_irg_start_block(irg);
-       ir_node  *immediate   = new_rd_ia32_Immediate(NULL, irg, start_block,
-                                                     symconst, symconst_sign, val);
-       arch_set_irn_register(env_cg->arch_env, immediate, &ia32_gp_regs[REG_GP_NOREG]);
-
-       return immediate;
-}
-
-/**
- * Get an atomic entity that is initialized with a tarval forming
- * a given constant.
- *
- * @param cnst             the node representing the constant
- */
-static ir_entity *create_float_const_entity(ir_node *cnst)
-{
-       ia32_isa_t *isa = env_cg->isa;
-       tarval *key     = get_Const_tarval(cnst);
-       pmap_entry *e   = pmap_find(isa->tv_ent, key);
-       ir_entity *res;
-       ir_graph *rem;
-
-       if (e == NULL) {
-               tarval  *tv   = key;
-               ir_mode *mode = get_tarval_mode(tv);
-               ir_type *tp;
-
-               if (! ia32_cg_config.use_sse2) {
-                       /* try to reduce the mode to produce smaller sized entities */
-                       if (mode != mode_F) {
-                               if (tarval_ieee754_can_conv_lossless(tv, mode_F)) {
-                                       mode = mode_F;
-                                       tv = tarval_convert_to(tv, mode);
-                               } else if (mode != mode_D) {
-                                       if (tarval_ieee754_can_conv_lossless(tv, mode_D)) {
-                                               mode = mode_D;
-                                               tv = tarval_convert_to(tv, mode);
-                                       }
-                               }
-                       }
-               }
-
-               if (mode == get_irn_mode(cnst)) {
-                       /* mode was not changed */
-                       tp = get_Const_type(cnst);
-                       if (tp == firm_unknown_type)
-                               tp = get_prim_type(isa->types, mode);
-               } else
-                       tp = get_prim_type(isa->types, mode);
-
-               res = new_entity(get_glob_type(), unique_id(".LC%u"), tp);
-
-               set_entity_ld_ident(res, get_entity_ident(res));
-               set_entity_visibility(res, visibility_local);
-               set_entity_variability(res, variability_constant);
-               set_entity_allocation(res, allocation_static);
-
-                /* we create a new entity here: It's initialization must resist on the
-                   const code irg */
-               rem = current_ir_graph;
-               current_ir_graph = get_const_code_irg();
-               set_atomic_ent_value(res, new_Const_type(tv, tp));
-               current_ir_graph = rem;
-
-               pmap_insert(isa->tv_ent, key, res);
-       } else {
-               res = e->value;
-       }
-
-       return res;
-}
-
+/** Return non-zero is a node represents the 0 constant. */
 static int is_Const_0(ir_node *node) {
        return is_Const(node) && is_Const_null(node);
 }
 
+/** Return non-zero is a node represents the 1 constant. */
 static int is_Const_1(ir_node *node) {
        return is_Const(node) && is_Const_one(node);
 }
 
+/** Return non-zero is a node represents the -1 constant. */
 static int is_Const_Minus_1(ir_node *node) {
        return is_Const(node) && is_Const_all_one(node);
 }
@@ -561,19 +431,6 @@ ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct) {
        return ent_cache[kct];
 }
 
-#ifndef NDEBUG
-/**
- * Prints the old node name on cg obst and returns a pointer to it.
- */
-const char *ia32_get_old_node_name(ia32_code_gen_t *cg, ir_node *irn) {
-       ia32_isa_t *isa = (ia32_isa_t *)cg->arch_env->isa;
-
-       lc_eoprintf(firm_get_arg_env(), isa->name_obst, "%+F", irn);
-       obstack_1grow(isa->name_obst, 0);
-       return obstack_finish(isa->name_obst);
-}
-#endif /* NDEBUG */
-
 /**
  * return true if the node is a Proj(Load) and could be used in source address
  * mode for another node. Will return only true if the @p other node is not
@@ -717,7 +574,9 @@ static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
        set_ia32_op_type(node, am->op_type);
        set_ia32_ls_mode(node, am->ls_mode);
        if (am->pinned == op_pin_state_pinned) {
-               set_irn_pinned(node, am->pinned);
+               /* beware: some nodes are already pinned and did not allow to change the state */
+               if (get_irn_pinned(node) != op_pin_state_pinned)
+                       set_irn_pinned(node, op_pin_state_pinned);
        }
        if (am->commutative)
                set_ia32_commutative(node);
@@ -747,8 +606,8 @@ static int is_downconv(const ir_node *node)
 
        src_mode  = get_irn_mode(get_Conv_op(node));
        dest_mode = get_irn_mode(node);
-       return mode_needs_gp_reg(src_mode)
-               && mode_needs_gp_reg(dest_mode)
+       return ia32_mode_needs_gp_reg(src_mode)
+               && ia32_mode_needs_gp_reg(dest_mode)
                && get_mode_size_bits(dest_mode) < get_mode_size_bits(src_mode);
 }
 
@@ -760,7 +619,6 @@ ir_node *ia32_skip_downconv(ir_node *node) {
        return node;
 }
 
-#if 0
 static ir_node *create_upconv(ir_node *node, ir_node *orig_node)
 {
        ir_mode  *mode = get_irn_mode(node);
@@ -778,7 +636,6 @@ static ir_node *create_upconv(ir_node *node, ir_node *orig_node)
 
        return create_I2I_Conv(mode, tgt_mode, dbgi, block, node, orig_node);
 }
-#endif
 
 /**
  * matches operands of a node into ia32 addressing/operand modes. This covers
@@ -1085,16 +942,21 @@ static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
        assert((flags & ~(match_mode_neutral | match_immediate)) == 0);
 
        if (flags & match_mode_neutral) {
-               op1 = ia32_skip_downconv(op1);
+               op1     = ia32_skip_downconv(op1);
+               new_op1 = be_transform_node(op1);
        } else if (get_mode_size_bits(get_irn_mode(node)) != 32) {
-               panic("right shifting of non-32bit values not supported, yet");
+               new_op1 = create_upconv(op1, node);
+       } else {
+               new_op1 = be_transform_node(op1);
        }
-       new_op1 = be_transform_node(op1);
 
        /* the shift amount can be any mode that is bigger than 5 bits, since all
         * other bits are ignored anyway */
        while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
-               op2 = get_Conv_op(op2);
+               ir_node *const op = get_Conv_op(op2);
+               if (mode_is_float(get_irn_mode(op)))
+                       break;
+               op2 = op;
                assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
        }
        new_op2 = create_immediate_or_transform(op2, 0);
@@ -1665,20 +1527,20 @@ static ir_node *gen_Shrs(ir_node *node) {
 
 
 /**
- * Creates an ia32 RotL.
+ * Creates an ia32 Rol.
  *
  * @param op1   The first operator
  * @param op2   The second operator
  * @return The created ia32 RotL node
  */
-static ir_node *gen_RotL(ir_node *node, ir_node *op1, ir_node *op2) {
+static ir_node *gen_Rol(ir_node *node, ir_node *op1, ir_node *op2) {
        return gen_shift_binop(node, op1, op2, new_rd_ia32_Rol, match_immediate);
 }
 
 
 
 /**
- * Creates an ia32 RotR.
+ * Creates an ia32 Ror.
  * NOTE: There is no RotR with immediate because this would always be a RotL
  *       "imm-mode_size_bits" which can be pre-calculated.
  *
@@ -1686,7 +1548,7 @@ static ir_node *gen_RotL(ir_node *node, ir_node *op1, ir_node *op2) {
  * @param op2   The second operator
  * @return The created ia32 RotR node
  */
-static ir_node *gen_RotR(ir_node *node, ir_node *op1, ir_node *op2) {
+static ir_node *gen_Ror(ir_node *node, ir_node *op1, ir_node *op2) {
        return gen_shift_binop(node, op1, op2, new_rd_ia32_Ror, match_immediate);
 }
 
@@ -1697,16 +1559,16 @@ static ir_node *gen_RotR(ir_node *node, ir_node *op1, ir_node *op2) {
  *
  * @return The created ia32 RotL or RotR node
  */
-static ir_node *gen_Rot(ir_node *node) {
+static ir_node *gen_Rotl(ir_node *node) {
        ir_node *rotate = NULL;
-       ir_node *op1    = get_Rot_left(node);
-       ir_node *op2    = get_Rot_right(node);
+       ir_node *op1    = get_Rotl_left(node);
+       ir_node *op2    = get_Rotl_right(node);
 
-       /* Firm has only Rot (which is a RotL), so we are looking for a right (op2)
+       /* Firm has only RotL, so we are looking for a right (op2)
                 operand "-e+mode_size_bits" (it's an already modified "mode_size_bits-e",
                 that means we can create a RotR instead of an Add and a RotL */
 
-       if (get_irn_op(op2) == op_Add) {
+       if (is_Add(op2)) {
                ir_node *add = op2;
                ir_node *left = get_Add_left(add);
                ir_node *right = get_Add_right(add);
@@ -1715,19 +1577,19 @@ static ir_node *gen_Rot(ir_node *node) {
                        ir_mode *mode = get_irn_mode(node);
                        long     bits = get_mode_size_bits(mode);
 
-                       if (get_irn_op(left) == op_Minus &&
-                                       tarval_is_long(tv)       &&
-                                       get_tarval_long(tv) == bits &&
-                                       bits                == 32)
+                       if (is_Minus(left) &&
+                           tarval_is_long(tv)       &&
+                           get_tarval_long(tv) == bits &&
+                           bits                == 32)
                        {
                                DB((dbg, LEVEL_1, "RotL into RotR ... "));
-                               rotate = gen_RotR(node, op1, get_Minus_op(left));
+                               rotate = gen_Ror(node, op1, get_Minus_op(left));
                        }
                }
        }
 
        if (rotate == NULL) {
-               rotate = gen_RotL(node, op1, op2);
+               rotate = gen_Rol(node, op1, op2);
        }
 
        return rotate;
@@ -2070,6 +1932,12 @@ static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
        return 1;
 }
 
+static void set_transformed_and_mark(ir_node *const old_node, ir_node *const new_node)
+{
+       mark_irn_visited(old_node);
+       be_set_transformed_node(old_node, new_node);
+}
+
 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
                               ir_node *mem, ir_node *ptr, ir_mode *mode,
                               construct_binop_dest_func *func,
@@ -2083,6 +1951,7 @@ static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
        dbg_info *dbgi;
        ir_node  *new_node;
        ir_node  *new_op;
+       ir_node  *mem_proj;
        int       commutative;
        ia32_address_mode_t  am;
        ia32_address_t      *addr = &am.addr;
@@ -2123,6 +1992,10 @@ static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
        set_ia32_ls_mode(new_node, mode);
        SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
 
+       set_transformed_and_mark(get_Proj_pred(am.mem_proj), new_node);
+       mem_proj = be_transform_node(am.mem_proj);
+       set_transformed_and_mark(mem_proj ? mem_proj : am.mem_proj, new_node);
+
        return new_node;
 }
 
@@ -2135,6 +2008,7 @@ static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
        ir_node *block;
        dbg_info *dbgi;
        ir_node *new_node;
+       ir_node *mem_proj;
        ia32_address_mode_t  am;
        ia32_address_t *addr = &am.addr;
        memset(&am, 0, sizeof(am));
@@ -2152,13 +2026,17 @@ static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
        set_ia32_ls_mode(new_node, mode);
        SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
 
+       set_transformed_and_mark(get_Proj_pred(am.mem_proj), new_node);
+       mem_proj = be_transform_node(am.mem_proj);
+       set_transformed_and_mark(mem_proj ? mem_proj : am.mem_proj, new_node);
+
        return new_node;
 }
 
 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem) {
        ir_mode  *mode        = get_irn_mode(node);
-       ir_node  *psi_true    = get_Psi_val(node, 0);
-       ir_node  *psi_default = get_Psi_default(node);
+       ir_node  *mux_true    = get_Mux_true(node);
+       ir_node  *mux_false   = get_Mux_false(node);
        ir_graph *irg;
        ir_node  *cond;
        ir_node  *new_mem;
@@ -2174,9 +2052,9 @@ static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem) {
        if(get_mode_size_bits(mode) != 8)
                return NULL;
 
-       if(is_Const_1(psi_true) && is_Const_0(psi_default)) {
+       if(is_Const_1(mux_true) && is_Const_0(mux_false)) {
                negated = 0;
-       } else if(is_Const_0(psi_true) && is_Const_1(psi_default)) {
+       } else if(is_Const_0(mux_true) && is_Const_1(mux_false)) {
                negated = 1;
        } else {
                return NULL;
@@ -2188,7 +2066,7 @@ static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem) {
        dbgi      = get_irn_dbg_info(node);
        block     = get_nodes_block(node);
        new_block = be_transform_node(block);
-       cond      = get_Psi_cond(node, 0);
+       cond      = get_Mux_sel(node);
        flags     = get_flags_node(cond, &pnc);
        new_mem   = be_transform_node(mem);
        new_node  = new_rd_ia32_SetMem(dbgi, irg, new_block, addr.base,
@@ -2212,7 +2090,7 @@ static ir_node *try_create_dest_am(ir_node *node) {
        ir_node  *new_node;
 
        /* handle only GP modes for now... */
-       if(!mode_needs_gp_reg(mode))
+       if(!ia32_mode_needs_gp_reg(mode))
                return NULL;
 
        while(1) {
@@ -2235,7 +2113,7 @@ static ir_node *try_create_dest_am(ir_node *node) {
        if(get_nodes_block(node) != get_nodes_block(val))
                return NULL;
 
-       switch(get_irn_opcode(val)) {
+       switch (get_irn_opcode(val)) {
        case iro_Add:
                op1      = get_Add_left(val);
                op2      = get_Add_right(val);
@@ -2310,15 +2188,15 @@ static ir_node *try_create_dest_am(ir_node *node) {
                                         new_rd_ia32_SarMem, new_rd_ia32_SarMem,
                                         match_dest_am | match_immediate);
                break;
-       case iro_Rot:
-               op1      = get_Rot_left(val);
-               op2      = get_Rot_right(val);
+       case iro_Rotl:
+               op1      = get_Rotl_left(val);
+               op2      = get_Rotl_right(val);
                new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
                                         new_rd_ia32_RolMem, new_rd_ia32_RolMem,
                                         match_dest_am | match_immediate);
                break;
        /* TODO: match ROR patterns... */
-       case iro_Psi:
+       case iro_Mux:
                new_node = try_create_SetMem(val, ptr, mem);
                break;
        case iro_Minus:
@@ -2351,7 +2229,11 @@ static int is_float_to_int32_conv(const ir_node *node)
        ir_node  *conv_op;
        ir_mode  *conv_mode;
 
-       if(get_mode_size_bits(mode) != 32 || !mode_needs_gp_reg(mode))
+       if(get_mode_size_bits(mode) != 32 || !ia32_mode_needs_gp_reg(mode))
+               return 0;
+       /* don't report unsigned as conv to 32bit, because we really need to do
+        * a vfist with 64bit signed in this case */
+       if(!mode_is_signed(mode))
                return 0;
 
        if(!is_Conv(node))
@@ -2513,10 +2395,13 @@ static ir_node *gen_normal_Store(ir_node *node)
        addr.mem = be_transform_node(mem);
 
        if (mode_is_float(mode)) {
-               /* convs (and strict-convs) before stores are unnecessary if the mode
-                  is the same */
-               while (is_Conv(val) && mode == get_irn_mode(get_Conv_op(val))) {
-                       val = get_Conv_op(val);
+               /* Convs (and strict-Convs) before stores are unnecessary if the mode
+                  is the same. */
+               while (is_Conv(val) && mode == get_irn_mode(val)) {
+                       ir_node *op = get_Conv_op(val);
+                       if (!mode_is_float(get_irn_mode(op)))
+                               break;
+                       val = op;
                }
                new_val = be_transform_node(val);
                if (ia32_cg_config.use_sse2) {
@@ -2527,13 +2412,18 @@ static ir_node *gen_normal_Store(ir_node *node)
                                                    addr.index, addr.mem, new_val, mode);
                }
                store = new_node;
-       } else if (is_float_to_int32_conv(val)) {
+       } else if (!ia32_cg_config.use_sse2 && is_float_to_int32_conv(val)) {
                val = get_Conv_op(val);
 
-               /* convs (and strict-convs) before stores are unnecessary if the mode
-                  is the same */
-               while(is_Conv(val) && mode == get_irn_mode(get_Conv_op(val))) {
-                       val = get_Conv_op(val);
+               /* TODO: is this optimisation still necessary at all (middleend)? */
+               /* We can skip ALL float->float up-Convs (and strict-up-Convs) before stores. */
+               while (is_Conv(val)) {
+                       ir_node *op = get_Conv_op(val);
+                       if (!mode_is_float(get_irn_mode(op)))
+                               break;
+                       if (get_mode_size_bits(get_irn_mode(op)) > get_mode_size_bits(get_irn_mode(val)))
+                               break;
+                       val = op;
                }
                new_val  = be_transform_node(val);
                new_node = gen_vfist(dbgi, irg, new_block, addr.base, addr.index, addr.mem, new_val, &store);
@@ -2714,7 +2604,7 @@ static ir_node *gen_be_Copy(ir_node *node)
        ir_node *new_node = be_duplicate_node(node);
        ir_mode *mode     = get_irn_mode(new_node);
 
-       if (mode_needs_gp_reg(mode)) {
+       if (ia32_mode_needs_gp_reg(mode)) {
                set_irn_mode(new_node, mode_Iu);
        }
 
@@ -2831,66 +2721,33 @@ static ir_node *gen_Cmp(ir_node *node)
                }
        }
 
-       assert(mode_needs_gp_reg(cmp_mode));
+       assert(ia32_mode_needs_gp_reg(cmp_mode));
 
-       /* we prefer the Test instruction where possible except cases where
-        * we can use SourceAM */
+       /* Prefer the Test instruction, when encountering (x & y) ==/!= 0 */
        cmp_unsigned = !mode_is_signed(cmp_mode);
-       if (is_Const_0(right)) {
-               if (is_And(left) &&
-                               get_irn_n_edges(left) == 1 &&
-                               can_fold_test_and(node)) {
-                       /* Test(and_left, and_right) */
-                       ir_node *and_left  = get_And_left(left);
-                       ir_node *and_right = get_And_right(left);
-                       ir_mode *mode      = get_irn_mode(and_left);
-
-                       match_arguments(&am, block, and_left, and_right, NULL,
-                                       match_commutative |
-                                       match_am | match_8bit_am | match_16bit_am |
-                                       match_am_and_immediates | match_immediate |
-                                       match_8bit | match_16bit);
-                       if (get_mode_size_bits(mode) == 8) {
-                               new_node = new_rd_ia32_Test8Bit(dbgi, irg, new_block, addr->base,
-                                                               addr->index, addr->mem, am.new_op1,
-                                                               am.new_op2, am.ins_permuted,
-                                                               cmp_unsigned);
-                       } else {
-                               new_node = new_rd_ia32_Test(dbgi, irg, new_block, addr->base,
-                                                           addr->index, addr->mem, am.new_op1,
-                                                           am.new_op2, am.ins_permuted, cmp_unsigned);
-                       }
+       if (is_Const_0(right)          &&
+           is_And(left)               &&
+           get_irn_n_edges(left) == 1 &&
+           can_fold_test_and(node)) {
+               /* Test(and_left, and_right) */
+               ir_node *and_left  = get_And_left(left);
+               ir_node *and_right = get_And_right(left);
+               ir_mode *mode      = get_irn_mode(and_left);
+
+               match_arguments(&am, block, and_left, and_right, NULL,
+                                                                               match_commutative |
+                                                                               match_am | match_8bit_am | match_16bit_am |
+                                                                               match_am_and_immediates | match_immediate |
+                                                                               match_8bit | match_16bit);
+               if (get_mode_size_bits(mode) == 8) {
+                       new_node = new_rd_ia32_Test8Bit(dbgi, irg, new_block, addr->base,
+                                                                                                                                                       addr->index, addr->mem, am.new_op1,
+                                                                                                                                                       am.new_op2, am.ins_permuted,
+                                                                                                                                                       cmp_unsigned);
                } else {
-                       match_arguments(&am, block, NULL, left, NULL,
-                                       match_am | match_8bit_am | match_16bit_am |
-                                       match_8bit | match_16bit);
-                       if (am.op_type == ia32_AddrModeS) {
-                               /* Cmp(AM, 0) */
-                               ir_node *imm_zero = try_create_Immediate(right, 0);
-                               if (get_mode_size_bits(cmp_mode) == 8) {
-                                       new_node = new_rd_ia32_Cmp8Bit(dbgi, irg, new_block, addr->base,
-                                                                      addr->index, addr->mem, am.new_op2,
-                                                                      imm_zero, am.ins_permuted,
-                                                                      cmp_unsigned);
-                               } else {
-                                       new_node = new_rd_ia32_Cmp(dbgi, irg, new_block, addr->base,
-                                                                  addr->index, addr->mem, am.new_op2,
-                                                                  imm_zero, am.ins_permuted, cmp_unsigned);
-                               }
-                       } else {
-                               /* Test(left, left) */
-                               if (get_mode_size_bits(cmp_mode) == 8) {
-                                       new_node = new_rd_ia32_Test8Bit(dbgi, irg, new_block, addr->base,
-                                                                       addr->index, addr->mem, am.new_op2,
-                                                                       am.new_op2, am.ins_permuted,
-                                                                       cmp_unsigned);
-                               } else {
-                                       new_node = new_rd_ia32_Test(dbgi, irg, new_block, addr->base,
-                                                                   addr->index, addr->mem, am.new_op2,
-                                                                   am.new_op2, am.ins_permuted,
-                                                                   cmp_unsigned);
-                               }
-                       }
+                       new_node = new_rd_ia32_Test(dbgi, irg, new_block, addr->base,
+                                                                                                                                       addr->index, addr->mem, am.new_op1,
+                                                                                                                                       am.new_op2, am.ins_permuted, cmp_unsigned);
                }
        } else {
                /* Cmp(left, right) */
@@ -2910,7 +2767,6 @@ static ir_node *gen_Cmp(ir_node *node)
                }
        }
        set_am_attributes(new_node, &am);
-       assert(cmp_mode != NULL);
        set_ia32_ls_mode(new_node, cmp_mode);
 
        SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
@@ -2927,15 +2783,15 @@ static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
        dbg_info            *dbgi          = get_irn_dbg_info(node);
        ir_node             *block         = get_nodes_block(node);
        ir_node             *new_block     = be_transform_node(block);
-       ir_node             *val_true      = get_Psi_val(node, 0);
-       ir_node             *val_false     = get_Psi_default(node);
+       ir_node             *val_true      = get_Mux_true(node);
+       ir_node             *val_false     = get_Mux_false(node);
        ir_node             *new_node;
        match_flags_t        match_flags;
        ia32_address_mode_t  am;
        ia32_address_t      *addr;
 
        assert(ia32_cg_config.use_cmov);
-       assert(mode_needs_gp_reg(get_irn_mode(val_true)));
+       assert(ia32_mode_needs_gp_reg(get_irn_mode(val_true)));
 
        addr = &am.addr;
 
@@ -2985,7 +2841,7 @@ static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
 /**
  * Create instruction for an unsigned Difference or Zero.
  */
-static ir_node *create_Doz(ir_node *psi, ir_node *new_block, ir_node *a, ir_node *b) {
+static ir_node *create_Doz(ir_node *psi, ir_node *a, ir_node *b) {
        ir_graph *irg   = current_ir_graph;
        ir_mode  *mode  = get_irn_mode(psi);
        ir_node  *new_node, *sub, *sbb, *eflags, *block, *noreg, *tmpreg, *nomem;
@@ -3018,85 +2874,97 @@ static ir_node *create_Doz(ir_node *psi, ir_node *new_block, ir_node *a, ir_node
 }
 
 /**
- * Transforms a Psi node into CMov.
+ * Transforms a Mux node into CMov.
  *
  * @return The transformed node.
  */
-static ir_node *gen_Psi(ir_node *node)
+static ir_node *gen_Mux(ir_node *node)
 {
        dbg_info *dbgi        = get_irn_dbg_info(node);
        ir_node  *block       = get_nodes_block(node);
        ir_node  *new_block   = be_transform_node(block);
-       ir_node  *psi_true    = get_Psi_val(node, 0);
-       ir_node  *psi_default = get_Psi_default(node);
-       ir_node  *cond        = get_Psi_cond(node, 0);
+       ir_node  *mux_true    = get_Mux_true(node);
+       ir_node  *mux_false   = get_Mux_false(node);
+       ir_node  *cond        = get_Mux_sel(node);
        ir_mode  *mode        = get_irn_mode(node);
-       ir_node  *cmp         = get_Proj_pred(cond);
-       ir_node  *cmp_left    = get_Cmp_left(cmp);
-       ir_node  *cmp_right   = get_Cmp_right(cmp);
-       pn_Cmp   pnc          = get_Proj_proj(cond);
+       pn_Cmp   pnc;
 
-       assert(get_Psi_n_conds(node) == 1);
        assert(get_irn_mode(cond) == mode_b);
 
-       /* Note: a Psi node uses a Load two times IFF it's used in the compare AND in the result */
+       /* Note: a Mux node uses a Load two times IFF it's used in the compare AND in the result */
        if (mode_is_float(mode)) {
+               ir_node  *cmp         = get_Proj_pred(cond);
+               ir_node  *cmp_left    = get_Cmp_left(cmp);
+               ir_node  *cmp_right   = get_Cmp_right(cmp);
+               pn_Cmp   pnc          = get_Proj_proj(cond);
+
                if (ia32_cg_config.use_sse2) {
                        if (pnc == pn_Cmp_Lt || pnc == pn_Cmp_Le) {
-                               if (cmp_left == psi_true && cmp_right == psi_default) {
-                                       /* psi(a <= b, a, b) => MIN */
+                               if (cmp_left == mux_true && cmp_right == mux_false) {
+                                       /* Mux(a <= b, a, b) => MIN */
                                        return gen_binop(node, cmp_left, cmp_right, new_rd_ia32_xMin,
                                         match_commutative | match_am | match_two_users);
-                               } else if (cmp_left == psi_default && cmp_right == psi_true) {
-                                       /* psi(a <= b, b, a) => MAX */
+                               } else if (cmp_left == mux_false && cmp_right == mux_true) {
+                                       /* Mux(a <= b, b, a) => MAX */
                                        return gen_binop(node, cmp_left, cmp_right, new_rd_ia32_xMax,
                                         match_commutative | match_am | match_two_users);
                                }
                        } else if (pnc == pn_Cmp_Gt || pnc == pn_Cmp_Ge) {
-                               if (cmp_left == psi_true && cmp_right == psi_default) {
-                                       /* psi(a >= b, a, b) => MAX */
+                               if (cmp_left == mux_true && cmp_right == mux_false) {
+                                       /* Mux(a >= b, a, b) => MAX */
                                        return gen_binop(node, cmp_left, cmp_right, new_rd_ia32_xMax,
                                         match_commutative | match_am | match_two_users);
-                               } else if (cmp_left == psi_default && cmp_right == psi_true) {
-                                       /* psi(a >= b, b, a) => MIN */
+                               } else if (cmp_left == mux_false && cmp_right == mux_true) {
+                                       /* Mux(a >= b, b, a) => MIN */
                                        return gen_binop(node, cmp_left, cmp_right, new_rd_ia32_xMin,
                                         match_commutative | match_am | match_two_users);
                                }
                        }
                }
-               panic("cannot transform floating point Psi");
+               panic("cannot transform floating point Mux");
 
        } else {
                ir_node *flags;
                ir_node *new_node;
 
-               assert(mode_needs_gp_reg(mode));
-
-               /* check for unsigned Doz first */
-               if ((pnc & pn_Cmp_Gt) && !mode_is_signed(mode) &&
-                   is_Const_0(psi_default) && is_Sub(psi_true) &&
-                   get_Sub_left(psi_true) == cmp_left && get_Sub_right(psi_true) == cmp_right) {
-                       /* Psi(a >=u b, a - b, 0) unsigned Doz */
-                       return create_Doz(node, new_block, cmp_left, cmp_right);
-               } else if ((pnc & pn_Cmp_Lt) && !mode_is_signed(mode) &&
-                                  is_Const_0(psi_true) && is_Sub(psi_default) &&
-                                  get_Sub_left(psi_default) == cmp_left && get_Sub_right(psi_default) == cmp_right) {
-                       /* Psi(a <=u b, 0, a - b) unsigned Doz */
-                       return create_Doz(node, new_block, cmp_left, cmp_right);
+               assert(ia32_mode_needs_gp_reg(mode));
+
+               if (is_Proj(cond)) {
+                       ir_node *cmp = get_Proj_pred(cond);
+                       if (is_Cmp(cmp)) {
+                               ir_node  *cmp_left    = get_Cmp_left(cmp);
+                               ir_node  *cmp_right   = get_Cmp_right(cmp);
+                               pn_Cmp   pnc          = get_Proj_proj(cond);
+
+                               /* check for unsigned Doz first */
+                               if ((pnc & pn_Cmp_Gt) && !mode_is_signed(mode) &&
+                                       is_Const_0(mux_false) && is_Sub(mux_true) &&
+                                       get_Sub_left(mux_true) == cmp_left && get_Sub_right(mux_true) == cmp_right) {
+                                       /* Mux(a >=u b, a - b, 0) unsigned Doz */
+                                       return create_Doz(node, cmp_left, cmp_right);
+                               } else if ((pnc & pn_Cmp_Lt) && !mode_is_signed(mode) &&
+                                       is_Const_0(mux_true) && is_Sub(mux_false) &&
+                                       get_Sub_left(mux_false) == cmp_left && get_Sub_right(mux_false) == cmp_right) {
+                                       /* Mux(a <=u b, 0, a - b) unsigned Doz */
+                                       return create_Doz(node, cmp_left, cmp_right);
+                               }
+                       }
                }
 
                flags = get_flags_node(cond, &pnc);
 
-               if (is_Const(psi_true) && is_Const(psi_default)) {
+               if (is_Const(mux_true) && is_Const(mux_false)) {
                        /* both are const, good */
-                       if (is_Const_1(psi_true) && is_Const_0(psi_default)) {
+                       if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
                                new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, /*is_premuted=*/0);
-                       } else if (is_Const_0(psi_true) && is_Const_1(psi_default)) {
+                       } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
                                new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, /*is_premuted=*/1);
                        } else {
                                /* Not that simple. */
+                               goto need_cmov;
                        }
                } else {
+need_cmov:
                        new_node = create_CMov(node, cond, flags, pnc);
                }
                return new_node;
@@ -3153,7 +3021,7 @@ static ir_node *gen_x87_fp_to_gp(ir_node *node) {
 }
 
 /**
- * Creates a x87 strict Conv by placing a Sore and a Load
+ * Creates a x87 strict Conv by placing a Store and a Load
  */
 static ir_node *gen_x87_strict_conv(ir_mode *tgt_mode, ir_node *node)
 {
@@ -3434,9 +3302,21 @@ static ir_node *gen_Conv(ir_node *node) {
                        } else {
                                res = gen_x87_gp_to_fp(node, src_mode);
                                if(get_Conv_strict(node)) {
-                                       res = gen_x87_strict_conv(tgt_mode, res);
-                                       SET_IA32_ORIG_NODE(get_Proj_pred(res),
-                                                          ia32_get_old_node_name(env_cg, node));
+                                       /* The strict-Conv is only necessary, if the int mode has more bits
+                                        * than the float mantissa */
+                                       size_t int_mantissa = get_mode_size_bits(src_mode) - (mode_is_signed(src_mode) ? 1 : 0);
+                                       size_t float_mantissa;
+                                       /* FIXME There is no way to get the mantissa size of a mode */
+                                       switch (get_mode_size_bits(tgt_mode)) {
+                                               case 32: float_mantissa = 23 + 1; break; // + 1 for implicit 1
+                                               case 64: float_mantissa = 52 + 1; break;
+                                               case 80: float_mantissa = 64 + 1; break;
+                                               default: float_mantissa = 0;      break;
+                                       }
+                                       if (float_mantissa < int_mantissa) {
+                                               res = gen_x87_strict_conv(tgt_mode, res);
+                                               SET_IA32_ORIG_NODE(get_Proj_pred(res), ia32_get_old_node_name(env_cg, node));
+                                       }
                                }
                                return res;
                        }
@@ -3461,135 +3341,6 @@ static ir_node *gen_Conv(ir_node *node) {
        return res;
 }
 
-static int check_immediate_constraint(long val, char immediate_constraint_type)
-{
-       switch (immediate_constraint_type) {
-       case 0:
-               return 1;
-       case 'I':
-               return val >= 0 && val <= 32;
-       case 'J':
-               return val >= 0 && val <= 63;
-       case 'K':
-               return val >= -128 && val <= 127;
-       case 'L':
-               return val == 0xff || val == 0xffff;
-       case 'M':
-               return val >= 0 && val <= 3;
-       case 'N':
-               return val >= 0 && val <= 255;
-       case 'O':
-               return val >= 0 && val <= 127;
-       default:
-               break;
-       }
-       panic("Invalid immediate constraint found");
-       return 0;
-}
-
-static ir_node *try_create_Immediate(ir_node *node,
-                                     char immediate_constraint_type)
-{
-       int          minus         = 0;
-       tarval      *offset        = NULL;
-       int          offset_sign   = 0;
-       long         val = 0;
-       ir_entity   *symconst_ent  = NULL;
-       int          symconst_sign = 0;
-       ir_mode     *mode;
-       ir_node     *cnst          = NULL;
-       ir_node     *symconst      = NULL;
-       ir_node     *new_node;
-
-       mode = get_irn_mode(node);
-       if(!mode_is_int(mode) && !mode_is_reference(mode)) {
-               return NULL;
-       }
-
-       if(is_Minus(node)) {
-               minus = 1;
-               node  = get_Minus_op(node);
-       }
-
-       if(is_Const(node)) {
-               cnst        = node;
-               symconst    = NULL;
-               offset_sign = minus;
-       } else if(is_SymConst(node)) {
-               cnst          = NULL;
-               symconst      = node;
-               symconst_sign = minus;
-       } else if(is_Add(node)) {
-               ir_node *left  = get_Add_left(node);
-               ir_node *right = get_Add_right(node);
-               if(is_Const(left) && is_SymConst(right)) {
-                       cnst          = left;
-                       symconst      = right;
-                       symconst_sign = minus;
-                       offset_sign   = minus;
-               } else if(is_SymConst(left) && is_Const(right)) {
-                       cnst          = right;
-                       symconst      = left;
-                       symconst_sign = minus;
-                       offset_sign   = minus;
-               }
-       } else if(is_Sub(node)) {
-               ir_node *left  = get_Sub_left(node);
-               ir_node *right = get_Sub_right(node);
-               if(is_Const(left) && is_SymConst(right)) {
-                       cnst          = left;
-                       symconst      = right;
-                       symconst_sign = !minus;
-                       offset_sign   = minus;
-               } else if(is_SymConst(left) && is_Const(right)) {
-                       cnst          = right;
-                       symconst      = left;
-                       symconst_sign = minus;
-                       offset_sign   = !minus;
-               }
-       } else {
-               return NULL;
-       }
-
-       if(cnst != NULL) {
-               offset = get_Const_tarval(cnst);
-               if(tarval_is_long(offset)) {
-                       val = get_tarval_long(offset);
-               } else {
-                       ir_fprintf(stderr, "Optimisation Warning: tarval from %+F is not a "
-                                  "long?\n", cnst);
-                       return NULL;
-               }
-
-               if(!check_immediate_constraint(val, immediate_constraint_type))
-                       return NULL;
-       }
-       if(symconst != NULL) {
-               if(immediate_constraint_type != 0) {
-                       /* we need full 32bits for symconsts */
-                       return NULL;
-               }
-
-               /* unfortunately the assembler/linker doesn't support -symconst */
-               if(symconst_sign)
-                       return NULL;
-
-               if(get_SymConst_kind(symconst) != symconst_addr_ent)
-                       return NULL;
-               symconst_ent = get_SymConst_entity(symconst);
-       }
-       if(cnst == NULL && symconst == NULL)
-               return NULL;
-
-       if(offset_sign && offset != NULL) {
-               offset = tarval_neg(offset);
-       }
-
-       new_node = create_Immediate(symconst_ent, symconst_sign, val);
-
-       return new_node;
-}
-
 static ir_node *create_immediate_or_transform(ir_node *node,
                                               char immediate_constraint_type)
 {
@@ -3600,483 +3351,6 @@ static ir_node *create_immediate_or_transform(ir_node *node,
        return new_node;
 }
 
-static const arch_register_req_t no_register_req = {
-       arch_register_req_type_none,
-       NULL,                         /* regclass */
-       NULL,                         /* limit bitset */
-       0,                            /* same pos */
-       0                             /* different pos */
-};
-
-/**
- * An assembler constraint.
- */
-typedef struct constraint_t constraint_t;
-struct constraint_t {
-       int                         is_in;
-       int                         n_outs;
-       const arch_register_req_t **out_reqs;
-
-       const arch_register_req_t  *req;
-       unsigned                    immediate_possible;
-       char                        immediate_type;
-};
-
-static void parse_asm_constraint(int pos, constraint_t *constraint, const char *c)
-{
-       int                          immediate_possible = 0;
-       char                         immediate_type     = 0;
-       unsigned                     limited            = 0;
-       const arch_register_class_t *cls                = NULL;
-       ir_graph                    *irg = current_ir_graph;
-       struct obstack              *obst = get_irg_obstack(irg);
-       arch_register_req_t         *req;
-       unsigned                    *limited_ptr = NULL;
-       int                          p;
-       int                          same_as = -1;
-
-       /* TODO: replace all the asserts with nice error messages */
-
-       if(*c == 0) {
-               /* a memory constraint: no need to do anything in backend about it
-                * (the dependencies are already respected by the memory edge of
-                * the node) */
-               constraint->req = &no_register_req;
-               return;
-       }
-
-       while(*c != 0) {
-               switch(*c) {
-               case ' ':
-               case '\t':
-               case '\n':
-                       break;
-
-               case 'a':
-                       assert(cls == NULL ||
-                                       (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
-                       cls      = &ia32_reg_classes[CLASS_ia32_gp];
-                       limited |= 1 << REG_EAX;
-                       break;
-               case 'b':
-                       assert(cls == NULL ||
-                                       (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
-                       cls      = &ia32_reg_classes[CLASS_ia32_gp];
-                       limited |= 1 << REG_EBX;
-                       break;
-               case 'c':
-                       assert(cls == NULL ||
-                                       (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
-                       cls      = &ia32_reg_classes[CLASS_ia32_gp];
-                       limited |= 1 << REG_ECX;
-                       break;
-               case 'd':
-                       assert(cls == NULL ||
-                                       (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
-                       cls      = &ia32_reg_classes[CLASS_ia32_gp];
-                       limited |= 1 << REG_EDX;
-                       break;
-               case 'D':
-                       assert(cls == NULL ||
-                                       (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
-                       cls      = &ia32_reg_classes[CLASS_ia32_gp];
-                       limited |= 1 << REG_EDI;
-                       break;
-               case 'S':
-                       assert(cls == NULL ||
-                                       (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
-                       cls      = &ia32_reg_classes[CLASS_ia32_gp];
-                       limited |= 1 << REG_ESI;
-                       break;
-               case 'Q':
-               case 'q': /* q means lower part of the regs only, this makes no
-                                  * difference to Q for us (we only assigne whole registers) */
-                       assert(cls == NULL ||
-                                       (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
-                       cls      = &ia32_reg_classes[CLASS_ia32_gp];
-                       limited |= 1 << REG_EAX | 1 << REG_EBX | 1 << REG_ECX |
-                                  1 << REG_EDX;
-                       break;
-               case 'A':
-                       assert(cls == NULL ||
-                                       (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
-                       cls      = &ia32_reg_classes[CLASS_ia32_gp];
-                       limited |= 1 << REG_EAX | 1 << REG_EDX;
-                       break;
-               case 'l':
-                       assert(cls == NULL ||
-                                       (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
-                       cls      = &ia32_reg_classes[CLASS_ia32_gp];
-                       limited |= 1 << REG_EAX | 1 << REG_EBX | 1 << REG_ECX |
-                                  1 << REG_EDX | 1 << REG_ESI | 1 << REG_EDI |
-                                  1 << REG_EBP;
-                       break;
-
-               case 'R':
-               case 'r':
-               case 'p':
-                       assert(cls == NULL);
-                       cls      = &ia32_reg_classes[CLASS_ia32_gp];
-                       break;
-
-               case 'f':
-               case 't':
-               case 'u':
-                       /* TODO: mark values so the x87 simulator knows about t and u */
-                       assert(cls == NULL);
-                       cls = &ia32_reg_classes[CLASS_ia32_vfp];
-                       break;
-
-               case 'Y':
-               case 'x':
-                       assert(cls == NULL);
-                       /* TODO: check that sse2 is supported */
-                       cls = &ia32_reg_classes[CLASS_ia32_xmm];
-                       break;
-
-               case 'I':
-               case 'J':
-               case 'K':
-               case 'L':
-               case 'M':
-               case 'N':
-               case 'O':
-                       assert(!immediate_possible);
-                       immediate_possible = 1;
-                       immediate_type     = *c;
-                       break;
-               case 'n':
-               case 'i':
-                       assert(!immediate_possible);
-                       immediate_possible = 1;
-                       break;
-
-               case 'g':
-                       assert(!immediate_possible && cls == NULL);
-                       immediate_possible = 1;
-                       cls                = &ia32_reg_classes[CLASS_ia32_gp];
-                       break;
-
-               case '0':
-               case '1':
-               case '2':
-               case '3':
-               case '4':
-               case '5':
-               case '6':
-               case '7':
-               case '8':
-               case '9':
-                       assert(constraint->is_in && "can only specify same constraint "
-                              "on input");
-
-                       sscanf(c, "%d%n", &same_as, &p);
-                       if(same_as >= 0) {
-                               c += p;
-                               continue;
-                       }
-                       break;
-
-               case 'm':
-                       /* memory constraint no need to do anything in backend about it
-                        * (the dependencies are already respected by the memory edge of
-                        * the node) */
-                       constraint->req    = &no_register_req;
-                       return;
-
-               case 'E': /* no float consts yet */
-               case 'F': /* no float consts yet */
-               case 's': /* makes no sense on x86 */
-               case 'X': /* we can't support that in firm */
-               case 'o':
-               case 'V':
-               case '<': /* no autodecrement on x86 */
-               case '>': /* no autoincrement on x86 */
-               case 'C': /* sse constant not supported yet */
-               case 'G': /* 80387 constant not supported yet */
-               case 'y': /* we don't support mmx registers yet */
-               case 'Z': /* not available in 32 bit mode */
-               case 'e': /* not available in 32 bit mode */
-                       panic("unsupported asm constraint '%c' found in (%+F)",
-                             *c, current_ir_graph);
-                       break;
-               default:
-                       panic("unknown asm constraint '%c' found in (%+F)", *c,
-                             current_ir_graph);
-                       break;
-               }
-               ++c;
-       }
-
-       if(same_as >= 0) {
-               const arch_register_req_t *other_constr;
-
-               assert(cls == NULL && "same as and register constraint not supported");
-               assert(!immediate_possible && "same as and immediate constraint not "
-                      "supported");
-               assert(same_as < constraint->n_outs && "wrong constraint number in "
-                      "same_as constraint");
-
-               other_constr         = constraint->out_reqs[same_as];
-
-               req                  = obstack_alloc(obst, sizeof(req[0]));
-               req->cls             = other_constr->cls;
-               req->type            = arch_register_req_type_should_be_same;
-               req->limited         = NULL;
-               req->other_same      = 1U << pos;
-               req->other_different = 0;
-
-               /* switch constraints. This is because in firm we have same_as
-                * constraints on the output constraints while in the gcc asm syntax
-                * they are specified on the input constraints */
-               constraint->req               = other_constr;
-               constraint->out_reqs[same_as] = req;
-               constraint->immediate_possible = 0;
-               return;
-       }
-
-       if(immediate_possible && cls == NULL) {
-               cls = &ia32_reg_classes[CLASS_ia32_gp];
-       }
-       assert(!immediate_possible || cls == &ia32_reg_classes[CLASS_ia32_gp]);
-       assert(cls != NULL);
-
-       if(immediate_possible) {
-               assert(constraint->is_in
-                      && "immediate make no sense for output constraints");
-       }
-       /* todo: check types (no float input on 'r' constrained in and such... */
-
-       if(limited != 0) {
-               req          = obstack_alloc(obst, sizeof(req[0]) + sizeof(unsigned));
-               limited_ptr  = (unsigned*) (req+1);
-       } else {
-               req = obstack_alloc(obst, sizeof(req[0]));
-       }
-       memset(req, 0, sizeof(req[0]));
-
-       if(limited != 0) {
-               req->type    = arch_register_req_type_limited;
-               *limited_ptr = limited;
-               req->limited = limited_ptr;
-       } else {
-               req->type    = arch_register_req_type_normal;
-       }
-       req->cls = cls;
-
-       constraint->req                = req;
-       constraint->immediate_possible = immediate_possible;
-       constraint->immediate_type     = immediate_type;
-}
-
-static void parse_clobber(ir_node *node, int pos, constraint_t *constraint,
-                          const char *clobber)
-{
-       ir_graph                    *irg  = get_irn_irg(node);
-       struct obstack              *obst = get_irg_obstack(irg);
-       const arch_register_t       *reg  = NULL;
-       int                          c;
-       size_t                       r;
-       arch_register_req_t         *req;
-       const arch_register_class_t *cls;
-       unsigned                    *limited;
-
-       (void) pos;
-
-       /* TODO: construct a hashmap instead of doing linear search for clobber
-        * register */
-       for(c = 0; c < N_CLASSES; ++c) {
-               cls = & ia32_reg_classes[c];
-               for(r = 0; r < cls->n_regs; ++r) {
-                       const arch_register_t *temp_reg = arch_register_for_index(cls, r);
-                       if(strcmp(temp_reg->name, clobber) == 0
-                                       || (c == CLASS_ia32_gp && strcmp(temp_reg->name+1, clobber) == 0)) {
-                               reg = temp_reg;
-                               break;
-                       }
-               }
-               if(reg != NULL)
-                       break;
-       }
-       if(reg == NULL) {
-               panic("Register '%s' mentioned in asm clobber is unknown\n", clobber);
-               return;
-       }
-
-       assert(reg->index < 32);
-
-       limited  = obstack_alloc(obst, sizeof(limited[0]));
-       *limited = 1 << reg->index;
-
-       req          = obstack_alloc(obst, sizeof(req[0]));
-       memset(req, 0, sizeof(req[0]));
-       req->type    = arch_register_req_type_limited;
-       req->cls     = cls;
-       req->limited = limited;
-
-       constraint->req                = req;
-       constraint->immediate_possible = 0;
-       constraint->immediate_type     = 0;
-}
-
-static int is_memory_op(const ir_asm_constraint *constraint)
-{
-       ident      *id  = constraint->constraint;
-       const char *str = get_id_str(id);
-       const char *c;
-
-       for(c = str; *c != '\0'; ++c) {
-               if(*c == 'm')
-                       return 1;
-       }
-
-       return 0;
-}
-
-/**
- * generates code for a ASM node
- */
-static ir_node *gen_ASM(ir_node *node)
-{
-       int                         i, arity;
-       ir_graph                   *irg       = current_ir_graph;
-       ir_node                    *block     = get_nodes_block(node);
-       ir_node                    *new_block = be_transform_node(block);
-       dbg_info                   *dbgi      = get_irn_dbg_info(node);
-       ir_node                   **in;
-       ir_node                    *new_node;
-       int                         out_arity;
-       int                         n_out_constraints;
-       int                         n_clobbers;
-       const arch_register_req_t **out_reg_reqs;
-       const arch_register_req_t **in_reg_reqs;
-       ia32_asm_reg_t             *register_map;
-       unsigned                    reg_map_size = 0;
-       struct obstack             *obst;
-       const ir_asm_constraint    *in_constraints;
-       const ir_asm_constraint    *out_constraints;
-       ident                     **clobbers;
-       constraint_t                parsed_constraint;
-
-       arity = get_irn_arity(node);
-       in    = alloca(arity * sizeof(in[0]));
-       memset(in, 0, arity * sizeof(in[0]));
-
-       n_out_constraints = get_ASM_n_output_constraints(node);
-       n_clobbers        = get_ASM_n_clobbers(node);
-       out_arity         = n_out_constraints + n_clobbers;
-       /* hack to keep space for mem proj */
-       if(n_clobbers > 0)
-               out_arity += 1;
-
-       in_constraints  = get_ASM_input_constraints(node);
-       out_constraints = get_ASM_output_constraints(node);
-       clobbers        = get_ASM_clobbers(node);
-
-       /* construct output constraints */
-       obst         = get_irg_obstack(irg);
-       out_reg_reqs = obstack_alloc(obst, out_arity * sizeof(out_reg_reqs[0]));
-       parsed_constraint.out_reqs = out_reg_reqs;
-       parsed_constraint.n_outs   = n_out_constraints;
-       parsed_constraint.is_in    = 0;
-
-       for(i = 0; i < out_arity; ++i) {
-               const char   *c;
-
-               if(i < n_out_constraints) {
-                       const ir_asm_constraint *constraint = &out_constraints[i];
-                       c = get_id_str(constraint->constraint);
-                       parse_asm_constraint(i, &parsed_constraint, c);
-
-                       if(constraint->pos > reg_map_size)
-                               reg_map_size = constraint->pos;
-
-                       out_reg_reqs[i] = parsed_constraint.req;
-               } else if(i < out_arity - 1) {
-                       ident *glob_id = clobbers [i - n_out_constraints];
-                       assert(glob_id != NULL);
-                       c = get_id_str(glob_id);
-                       parse_clobber(node, i, &parsed_constraint, c);
-
-                       out_reg_reqs[i+1] = parsed_constraint.req;
-               }
-       }
-       if(n_clobbers > 1)
-               out_reg_reqs[n_out_constraints] = &no_register_req;
-
-       /* construct input constraints */
-       in_reg_reqs = obstack_alloc(obst, arity * sizeof(in_reg_reqs[0]));
-       parsed_constraint.is_in = 1;
-       for(i = 0; i < arity; ++i) {
-               const ir_asm_constraint   *constraint = &in_constraints[i];
-               ident                     *constr_id  = constraint->constraint;
-               const char                *c          = get_id_str(constr_id);
-
-               parse_asm_constraint(i, &parsed_constraint, c);
-               in_reg_reqs[i] = parsed_constraint.req;
-
-               if(constraint->pos > reg_map_size)
-                       reg_map_size = constraint->pos;
-
-               if(parsed_constraint.immediate_possible) {
-                       ir_node *pred      = get_irn_n(node, i);
-                       char     imm_type  = parsed_constraint.immediate_type;
-                       ir_node *immediate = try_create_Immediate(pred, imm_type);
-
-                       if(immediate != NULL) {
-                               in[i] = immediate;
-                       }
-               }
-       }
-       reg_map_size++;
-
-       register_map = NEW_ARR_D(ia32_asm_reg_t, obst, reg_map_size);
-       memset(register_map, 0, reg_map_size * sizeof(register_map[0]));
-
-       for(i = 0; i < n_out_constraints; ++i) {
-               const ir_asm_constraint *constraint = &out_constraints[i];
-               unsigned                 pos        = constraint->pos;
-
-               assert(pos < reg_map_size);
-               register_map[pos].use_input = 0;
-               register_map[pos].valid     = 1;
-               register_map[pos].memory    = is_memory_op(constraint);
-               register_map[pos].inout_pos = i;
-               register_map[pos].mode      = constraint->mode;
-       }
-
-       /* transform inputs */
-       for(i = 0; i < arity; ++i) {
-               const ir_asm_constraint *constraint = &in_constraints[i];
-               unsigned                 pos        = constraint->pos;
-               ir_node                 *pred       = get_irn_n(node, i);
-               ir_node                 *transformed;
-
-               assert(pos < reg_map_size);
-               register_map[pos].use_input = 1;
-               register_map[pos].valid     = 1;
-               register_map[pos].memory    = is_memory_op(constraint);
-               register_map[pos].inout_pos = i;
-               register_map[pos].mode      = constraint->mode;
-
-               if(in[i] != NULL)
-                       continue;
-
-               transformed = be_transform_node(pred);
-               in[i]       = transformed;
-       }
-
-       new_node = new_rd_ia32_Asm(dbgi, irg, new_block, arity, in, out_arity,
-                                  get_ASM_text(node), register_map);
-
-       set_ia32_out_req_all(new_node, out_reg_reqs);
-       set_ia32_in_req_all(new_node, in_reg_reqs);
-
-       SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
-
-       return new_node;
-}
-
 /**
  * Transforms a FrameAddr into an ia32 Add.
  */
@@ -4222,40 +3496,6 @@ static ir_node *gen_be_SubSP(ir_node *node)
        return gen_binop(node, sp, sz, new_rd_ia32_AddSP, match_am);
 }
 
-/**
- * This function just sets the register for the Unknown node
- * as this is not done during register allocation because Unknown
- * is an "ignore" node.
- */
-static ir_node *gen_Unknown(ir_node *node) {
-       ir_mode *mode = get_irn_mode(node);
-
-       if (mode_is_float(mode)) {
-               if (ia32_cg_config.use_sse2) {
-                       return ia32_new_Unknown_xmm(env_cg);
-               } else {
-                       /* Unknown nodes are buggy in x87 simulator, use zero for now... */
-                       ir_graph *irg   = current_ir_graph;
-                       dbg_info *dbgi  = get_irn_dbg_info(node);
-                       ir_node  *block = get_irg_start_block(irg);
-                       ir_node  *ret   = new_rd_ia32_vfldz(dbgi, irg, block);
-
-                       /* Const Nodes before the initial IncSP are a bad idea, because
-                        * they could be spilled and we have no SP ready at that point yet.
-                        * So add a dependency to the initial frame pointer calculation to
-                        * avoid that situation.
-                        */
-                       add_irn_dep(ret, get_irg_frame(irg));
-                       return ret;
-               }
-       } else if (mode_needs_gp_reg(mode)) {
-               return ia32_new_Unknown_gp(env_cg);
-       } else {
-               panic("unsupported Unknown-Mode");
-       }
-       return NULL;
-}
-
 /**
  * Change some phi modes
  */
@@ -4266,7 +3506,7 @@ static ir_node *gen_Phi(ir_node *node) {
        ir_mode  *mode  = get_irn_mode(node);
        ir_node  *phi;
 
-       if(mode_needs_gp_reg(mode)) {
+       if(ia32_mode_needs_gp_reg(mode)) {
                /* we shouldn't have any 64bit stuff around anymore */
                assert(get_mode_size_bits(mode) <= 32);
                /* all integer operations are on 32bit registers now */
@@ -4299,7 +3539,6 @@ static ir_node *gen_IJmp(ir_node *node)
 {
        ir_node  *block     = get_nodes_block(node);
        ir_node  *new_block = be_transform_node(block);
-       ir_graph *irg       = current_ir_graph;
        dbg_info *dbgi      = get_irn_dbg_info(node);
        ir_node  *op        = get_IJmp_target(node);
        ir_node  *new_node;
@@ -4312,8 +3551,9 @@ static ir_node *gen_IJmp(ir_node *node)
                        match_am | match_8bit_am | match_16bit_am |
                        match_immediate | match_8bit | match_16bit);
 
-       new_node = new_rd_ia32_IJmp(dbgi, irg, new_block, addr->base, addr->index,
-                                   addr->mem, am.new_op2);
+       new_node = new_rd_ia32_IJmp(dbgi, current_ir_graph, new_block,
+                                   addr->base, addr->index, addr->mem,
+                                   am.new_op2);
        set_am_attributes(new_node, &am);
        SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
 
@@ -4322,6 +3562,41 @@ static ir_node *gen_IJmp(ir_node *node)
        return new_node;
 }
 
+/**
+ * Transform a Bound node.
+ */
+static ir_node *gen_Bound(ir_node *node)
+{
+       ir_node  *new_node;
+       ir_node  *lower = get_Bound_lower(node);
+       dbg_info *dbgi  = get_irn_dbg_info(node);
+
+       if (is_Const_0(lower)) {
+               /* typical case for Java */
+               ir_node  *sub, *res, *flags, *block;
+               ir_graph *irg  = current_ir_graph;
+
+               res = gen_binop(node, get_Bound_index(node), get_Bound_upper(node),
+                       new_rd_ia32_Sub, match_mode_neutral     | match_am | match_immediate);
+
+               block = get_nodes_block(res);
+               if (! is_Proj(res)) {
+                       sub = res;
+                       set_irn_mode(sub, mode_T);
+                       res = new_rd_Proj(NULL, irg, block, sub, mode_Iu, pn_ia32_res);
+               } else {
+                       sub = get_Proj_pred(res);
+               }
+               flags = new_rd_Proj(NULL, irg, block, sub, mode_Iu, pn_ia32_Sub_flags);
+               new_node = new_rd_ia32_Jcc(dbgi, irg, block, flags, pn_Cmp_Lt | ia32_pn_Cmp_unsigned);
+               SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
+       } else {
+               panic("generic Bound not supported in ia32 Backend");
+       }
+       return new_node;
+}
+
+
 typedef ir_node *construct_load_func(dbg_info *db, ir_graph *irg, ir_node *block, ir_node *base, ir_node *index, \
                                      ir_node *mem);
 
@@ -4792,22 +4067,19 @@ static ir_node *gen_Proj_Load(ir_node *node) {
        dbg_info *dbgi     = get_irn_dbg_info(node);
        long     proj      = get_Proj_proj(node);
 
-
        /* loads might be part of source address mode matches, so we don't
-          transform the ProjMs yet (with the exception of loads whose result is
-          not used)
+        * transform the ProjMs yet (with the exception of loads whose result is
+        * not used)
         */
        if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
                ir_node *res;
 
-               assert(pn_ia32_Load_M == 1); /* convention: mem-result of Source-AM
-                                                                               nodes is 1 */
                /* this is needed, because sometimes we have loops that are only
                   reachable through the ProjM */
                be_enqueue_preds(node);
                /* do it in 2 steps, to silence firm verifier */
                res = new_rd_Proj(dbgi, irg, block, pred, mode_M, pn_Load_M);
-               set_Proj_proj(res, pn_ia32_Load_M);
+               set_Proj_proj(res, pn_ia32_mem);
                return res;
        }
 
@@ -5137,37 +4409,88 @@ static ir_node *gen_Proj_Cmp(ir_node *node)
              node);
 }
 
+/**
+ * Transform the Projs from a Bound.
+ */
+static ir_node *gen_Proj_Bound(ir_node *node)
+{
+       ir_node *new_node, *block;
+       ir_node *pred = get_Proj_pred(node);
+
+       switch (get_Proj_proj(node)) {
+       case pn_Bound_M:
+               return be_transform_node(get_Bound_mem(pred));
+       case pn_Bound_X_regular:
+               new_node = be_transform_node(pred);
+               block    = get_nodes_block(new_node);
+               return new_r_Proj(current_ir_graph, block, new_node, mode_X, pn_ia32_Jcc_true);
+       case pn_Bound_X_except:
+               new_node = be_transform_node(pred);
+               block    = get_nodes_block(new_node);
+               return new_r_Proj(current_ir_graph, block, new_node, mode_X, pn_ia32_Jcc_false);
+       case pn_Bound_res:
+               return be_transform_node(get_Bound_index(pred));
+       default:
+               panic("unsupported Proj from Bound");
+       }
+}
+
+static ir_node *gen_Proj_ASM(ir_node *node)
+{
+       ir_node *pred;
+       ir_node *new_pred;
+       ir_node *block;
+
+       if (get_irn_mode(node) != mode_M)
+               return be_duplicate_node(node);
+
+       pred     = get_Proj_pred(node);
+       new_pred = be_transform_node(pred);
+       block    = get_nodes_block(new_pred);
+       return new_r_Proj(current_ir_graph, block, new_pred, mode_M,
+                       get_ia32_n_res(new_pred) + 1);
+}
+
 /**
  * Transform and potentially renumber Proj nodes.
  */
 static ir_node *gen_Proj(ir_node *node) {
-       ir_node  *pred = get_Proj_pred(node);
-       if (is_Store(pred)) {
-               long proj = get_Proj_proj(node);
+       ir_node *pred = get_Proj_pred(node);
+       long    proj;
+
+       switch (get_irn_opcode(pred)) {
+       case iro_Store:
+               proj = get_Proj_proj(node);
                if (proj == pn_Store_M) {
                        return be_transform_node(pred);
                } else {
                        assert(0);
                        return new_r_Bad(current_ir_graph);
                }
-       } else if (is_Load(pred)) {
+       case iro_Load:
                return gen_Proj_Load(node);
-       } else if (is_Div(pred) || is_Mod(pred) || is_DivMod(pred)) {
+       case iro_ASM:
+               return gen_Proj_ASM(node);
+       case iro_Div:
+       case iro_Mod:
+       case iro_DivMod:
                return gen_Proj_DivMod(node);
-       } else if (is_CopyB(pred)) {
+       case iro_CopyB:
                return gen_Proj_CopyB(node);
-       } else if (is_Quot(pred)) {
+       case iro_Quot:
                return gen_Proj_Quot(node);
-       } else if (be_is_SubSP(pred)) {
+       case beo_SubSP:
                return gen_Proj_be_SubSP(node);
-       } else if (be_is_AddSP(pred)) {
+       case beo_AddSP:
                return gen_Proj_be_AddSP(node);
-       } else if (be_is_Call(pred)) {
+       case beo_Call:
                return gen_Proj_be_Call(node);
-       } else if (is_Cmp(pred)) {
+       case iro_Cmp:
                return gen_Proj_Cmp(node);
-       } else if (get_irn_op(pred) == op_Start) {
-               long proj = get_Proj_proj(node);
+       case iro_Bound:
+               return gen_Proj_Bound(node);
+       case iro_Start:
+               proj = get_Proj_proj(node);
                if (proj == pn_Start_X_initial_exec) {
                        ir_node *block = get_nodes_block(pred);
                        dbg_info *dbgi = get_irn_dbg_info(node);
@@ -5181,26 +4504,29 @@ static ir_node *gen_Proj(ir_node *node) {
                if (node == be_get_old_anchor(anchor_tls)) {
                        return gen_Proj_tls(node);
                }
-       } else if (is_ia32_l_FloattoLL(pred)) {
-               return gen_Proj_l_FloattoLL(node);
+               break;
+
+       default:
+               if (is_ia32_l_FloattoLL(pred)) {
+                       return gen_Proj_l_FloattoLL(node);
 #ifdef FIRM_EXT_GRS
-       } else if(!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
+               } else if (!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
 #else
-       } else {
+               } else {
 #endif
-               ir_mode *mode = get_irn_mode(node);
-               if (mode_needs_gp_reg(mode)) {
-                       ir_node *new_pred = be_transform_node(pred);
-                       ir_node *block    = be_transform_node(get_nodes_block(node));
-                       ir_node *new_proj = new_r_Proj(current_ir_graph, block, new_pred,
-                                                      mode_Iu, get_Proj_proj(node));
+                       ir_mode *mode = get_irn_mode(node);
+                       if (ia32_mode_needs_gp_reg(mode)) {
+                               ir_node *new_pred = be_transform_node(pred);
+                               ir_node *block    = be_transform_node(get_nodes_block(node));
+                               ir_node *new_proj = new_r_Proj(current_ir_graph, block, new_pred,
+                                                                                          mode_Iu, get_Proj_proj(node));
 #ifdef DEBUG_libfirm
-                       new_proj->node_nr = node->node_nr;
+                               new_proj->node_nr = node->node_nr;
 #endif
-                       return new_proj;
+                               return new_proj;
+                       }
                }
        }
-
        return be_duplicate_node(node);
 }
 
@@ -5227,7 +4553,7 @@ static void register_transformers(void)
        GEN(Shl);
        GEN(Shr);
        GEN(Shrs);
-       GEN(Rot);
+       GEN(Rotl);
 
        GEN(Quot);
 
@@ -5247,11 +4573,11 @@ static void register_transformers(void)
        GEN(Cmp);
        GEN(ASM);
        GEN(CopyB);
-       BAD(Mux);
-       GEN(Psi);
+       GEN(Mux);
        GEN(Proj);
        GEN(Phi);
        GEN(IJmp);
+       GEN(Bound);
 
        /* transform ops from intrinsic lowering */
        GEN(ia32_l_Add);
@@ -5325,7 +4651,7 @@ static void ia32_pretransform_node(void *arch_cg) {
 
 /**
  * Walker, checks if all ia32 nodes producing more than one result have
- * its Projs, other wise creates new projs and keep them using a be_Keep node.
+ * its Projs, otherwise creates new Projs and keep them using a be_Keep node.
  */
 static void add_missing_keep_walker(ir_node *node, void *data)
 {
@@ -5351,7 +4677,10 @@ static void add_missing_keep_walker(ir_node *node, void *data)
                ir_node *proj = get_edge_src_irn(edge);
                int      pn   = get_Proj_proj(proj);
 
-               assert(get_irn_mode(proj) == mode_M || pn < n_outs);
+               if (get_irn_mode(proj) == mode_M)
+                       continue;
+
+               assert(pn < n_outs);
                found_projs |= 1 << pn;
        }
 
@@ -5362,28 +4691,28 @@ static void add_missing_keep_walker(ir_node *node, void *data)
                ir_node                     *block;
                ir_node                     *in[1];
                const arch_register_req_t   *req;
-               const arch_register_class_t *class;
+               const arch_register_class_t *cls;
 
                if(found_projs & (1 << i)) {
                        continue;
                }
 
-               req   = get_ia32_out_req(node, i);
-               class = req->cls;
-               if(class == NULL) {
+               req = get_ia32_out_req(node, i);
+               cls = req->cls;
+               if(cls == NULL) {
                        continue;
                }
-               if(class == &ia32_reg_classes[CLASS_ia32_flags]) {
+               if(cls == &ia32_reg_classes[CLASS_ia32_flags]) {
                        continue;
                }
 
                block = get_nodes_block(node);
                in[0] = new_r_Proj(current_ir_graph, block, node,
-                                  arch_register_class_mode(class), i);
+                                  arch_register_class_mode(cls), i);
                if(last_keep != NULL) {
-                       be_Keep_add_node(last_keep, class, in[0]);
+                       be_Keep_add_node(last_keep, cls, in[0]);
                } else {
-                       last_keep = be_new_Keep(class, current_ir_graph, block, 1, in);
+                       last_keep = be_new_Keep(cls, current_ir_graph, block, 1, in);
                        if(sched_is_scheduled(node)) {
                                sched_add_after(node, last_keep);
                        }
@@ -5410,9 +4739,9 @@ void ia32_transform_graph(ia32_code_gen_t *cg) {
        env_cg       = cg;
        initial_fpcw = NULL;
 
-BE_TIMER_PUSH(t_heights);
+       BE_TIMER_PUSH(t_heights);
        heights      = heights_new(irg);
-BE_TIMER_POP(t_heights);
+       BE_TIMER_POP(t_heights);
        ia32_calculate_non_address_mode_nodes(cg->birg);
 
        /* the transform phase is not safe for CSE (yet) because several nodes get