do not free the graph after emitting it (this should only change the peak memory...
[libfirm] / ir / be / ia32 / ia32_transform.c
index 3f9bab9..415100d 100644 (file)
 #include "array_t.h"
 #include "height.h"
 
-#include "../benode_t.h"
+#include "../benode.h"
 #include "../besched.h"
 #include "../beabi.h"
 #include "../beutil.h"
-#include "../beirg_t.h"
+#include "../beirg.h"
 #include "../betranshlp.h"
 #include "../be_t.h"
 
@@ -195,6 +195,19 @@ static bool is_simple_sse_Const(ir_node *node)
        return false;
 }
 
+/**
+ * return NoREG or pic_base in case of PIC.
+ * This is necessary as base address for newly created symbols
+ */
+static ir_node *get_symconst_base(void)
+{
+       if (env_cg->birg->main_env->options->pic) {
+               return arch_code_generator_get_pic_base(env_cg);
+       }
+
+       return noreg_GP;
+}
+
 /**
  * Transforms a Const.
  */
@@ -210,6 +223,7 @@ static ir_node *gen_Const(ir_node *node)
        if (mode_is_float(mode)) {
                ir_node   *res   = NULL;
                ir_node   *load;
+               ir_node   *base;
                ir_entity *floatent;
 
                if (ia32_cg_config.use_sse2) {
@@ -271,7 +285,9 @@ static ir_node *gen_Const(ir_node *node)
 #endif /* CONSTRUCT_SSE_CONST */
                                floatent = create_float_const_entity(node);
 
-                               load     = new_bd_ia32_xLoad(dbgi, block, noreg_GP, noreg_GP, nomem, mode);
+                               base     = get_symconst_base();
+                               load     = new_bd_ia32_xLoad(dbgi, block, base, noreg_GP, nomem,
+                                                            mode);
                                set_ia32_op_type(load, ia32_AddrModeS);
                                set_ia32_am_sc(load, floatent);
                                arch_irn_add_flags(load, arch_irn_flags_rematerializable);
@@ -288,13 +304,14 @@ static ir_node *gen_Const(ir_node *node)
                                set_ia32_ls_mode(load, mode);
                        } else {
                                ir_mode *ls_mode;
+                               ir_node *base;
 
                                floatent = create_float_const_entity(node);
                                /* create_float_const_ent is smart and sometimes creates
                                   smaller entities */
                                ls_mode  = get_type_mode(get_entity_type(floatent));
-
-                               load     = new_bd_ia32_vfld(dbgi, block, noreg_GP, noreg_GP, nomem,
+                               base     = get_symconst_base();
+                               load     = new_bd_ia32_vfld(dbgi, block, base, noreg_GP, nomem,
                                                            ls_mode);
                                set_ia32_op_type(load, ia32_AddrModeS);
                                set_ia32_am_sc(load, floatent);
@@ -370,8 +387,8 @@ static ir_node *gen_SymConst(ir_node *node)
  * @param mode   the mode for the float type (might be integer mode for SSE2 types)
  * @param align  alignment
  */
-static ir_type *ia32_create_float_type(ir_mode *mode, unsigned align) {
-       char    buf[32];
+static ir_type *ia32_create_float_type(ir_mode *mode, unsigned align)
+{
        ir_type *tp;
 
        assert(align <= 16);
@@ -380,8 +397,7 @@ static ir_type *ia32_create_float_type(ir_mode *mode, unsigned align) {
                static ir_type *int_Iu[16] = {NULL, };
 
                if (int_Iu[align] == NULL) {
-                       snprintf(buf, sizeof(buf), "int_Iu_%u", align);
-                       int_Iu[align] = tp = new_type_primitive(new_id_from_str(buf), mode);
+                       int_Iu[align] = tp = new_type_primitive(mode);
                        /* set the specified alignment */
                        set_type_alignment_bytes(tp, align);
                }
@@ -390,8 +406,7 @@ static ir_type *ia32_create_float_type(ir_mode *mode, unsigned align) {
                static ir_type *int_Lu[16] = {NULL, };
 
                if (int_Lu[align] == NULL) {
-                       snprintf(buf, sizeof(buf), "int_Lu_%u", align);
-                       int_Lu[align] = tp = new_type_primitive(new_id_from_str(buf), mode);
+                       int_Lu[align] = tp = new_type_primitive(mode);
                        /* set the specified alignment */
                        set_type_alignment_bytes(tp, align);
                }
@@ -400,8 +415,7 @@ static ir_type *ia32_create_float_type(ir_mode *mode, unsigned align) {
                static ir_type *float_F[16] = {NULL, };
 
                if (float_F[align] == NULL) {
-                       snprintf(buf, sizeof(buf), "float_F_%u", align);
-                       float_F[align] = tp = new_type_primitive(new_id_from_str(buf), mode);
+                       float_F[align] = tp = new_type_primitive(mode);
                        /* set the specified alignment */
                        set_type_alignment_bytes(tp, align);
                }
@@ -410,8 +424,7 @@ static ir_type *ia32_create_float_type(ir_mode *mode, unsigned align) {
                static ir_type *float_D[16] = {NULL, };
 
                if (float_D[align] == NULL) {
-                       snprintf(buf, sizeof(buf), "float_D_%u", align);
-                       float_D[align] = tp = new_type_primitive(new_id_from_str(buf), mode);
+                       float_D[align] = tp = new_type_primitive(mode);
                        /* set the specified alignment */
                        set_type_alignment_bytes(tp, align);
                }
@@ -420,8 +433,7 @@ static ir_type *ia32_create_float_type(ir_mode *mode, unsigned align) {
                static ir_type *float_E[16] = {NULL, };
 
                if (float_E[align] == NULL) {
-                       snprintf(buf, sizeof(buf), "float_E_%u", align);
-                       float_E[align] = tp = new_type_primitive(new_id_from_str(buf), mode);
+                       float_E[align] = tp = new_type_primitive(mode);
                        /* set the specified alignment */
                        set_type_alignment_bytes(tp, align);
                }
@@ -434,8 +446,8 @@ static ir_type *ia32_create_float_type(ir_mode *mode, unsigned align) {
  *
  * @param tp  the atomic type
  */
-static ir_type *ia32_create_float_array(ir_type *tp) {
-       char     buf[32];
+static ir_type *ia32_create_float_array(ir_type *tp)
+{
        ir_mode  *mode = get_type_mode(tp);
        unsigned align = get_type_alignment_bytes(tp);
        ir_type  *arr;
@@ -447,22 +459,19 @@ static ir_type *ia32_create_float_array(ir_type *tp) {
 
                if (float_F[align] != NULL)
                        return float_F[align];
-               snprintf(buf, sizeof(buf), "arr_float_F_%u", align);
-               arr = float_F[align] = new_type_array(new_id_from_str(buf), 1, tp);
+               arr = float_F[align] = new_type_array(1, tp);
        } else if (mode == mode_D) {
                static ir_type *float_D[16] = {NULL, };
 
                if (float_D[align] != NULL)
                        return float_D[align];
-               snprintf(buf, sizeof(buf), "arr_float_D_%u", align);
-               arr = float_D[align] = new_type_array(new_id_from_str(buf), 1, tp);
+               arr = float_D[align] = new_type_array(1, tp);
        } else {
                static ir_type *float_E[16] = {NULL, };
 
                if (float_E[align] != NULL)
                        return float_E[align];
-               snprintf(buf, sizeof(buf), "arr_float_E_%u", align);
-               arr = float_E[align] = new_type_array(new_id_from_str(buf), 1, tp);
+               arr = float_E[align] = new_type_array(1, tp);
        }
        set_type_alignment_bytes(arr, align);
        set_type_size_bytes(arr, 2 * get_type_size_bytes(tp));
@@ -510,9 +519,8 @@ ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct)
                ent = new_entity(get_glob_type(), new_id_from_str(ent_name), tp);
 
                set_entity_ld_ident(ent, get_entity_ident(ent));
-               set_entity_visibility(ent, visibility_local);
-               set_entity_variability(ent, variability_constant);
-               set_entity_allocation(ent, allocation_static);
+               add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
+               set_entity_visibility(ent, ir_visibility_local);
 
                if (kct == ia32_ULLBIAS) {
                        ir_initializer_t *initializer = create_initializer_compound(2);
@@ -624,9 +632,10 @@ static void build_address(ia32_address_mode_t *am, ir_node *node,
        ir_node        *mem;
        ir_node        *new_mem;
 
+       /* floating point immediates */
        if (is_Const(node)) {
                ir_entity *entity  = create_float_const_entity(node);
-               addr->base         = noreg_GP;
+               addr->base         = get_symconst_base();
                addr->index        = noreg_GP;
                addr->mem          = nomem;
                addr->symconst_ent = entity;
@@ -829,6 +838,7 @@ static void match_arguments(ia32_address_mode_t *am, ir_node *block,
                }
                am->op_type = ia32_AddrModeS;
        } else {
+               ir_mode *mode;
                am->op_type = ia32_Normal;
 
                if (flags & match_try_am) {
@@ -837,11 +847,18 @@ static void match_arguments(ia32_address_mode_t *am, ir_node *block,
                        return;
                }
 
-               new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
-               if (new_op2 == NULL)
-                       new_op2 = be_transform_node(op2);
-               am->ls_mode =
-                       (flags & match_mode_neutral ? mode_Iu : get_irn_mode(op2));
+               mode = get_irn_mode(op2);
+               if (flags & match_upconv_32 && get_mode_size_bits(mode) != 32) {
+                       new_op1 = (op1 == NULL ? NULL : create_upconv(op1, NULL));
+                       if (new_op2 == NULL)
+                               new_op2 = create_upconv(op2, NULL);
+                       am->ls_mode = mode_Iu;
+               } else {
+                       new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
+                       if (new_op2 == NULL)
+                               new_op2 = be_transform_node(op2);
+                       am->ls_mode = (flags & match_mode_neutral) ? mode_Iu : mode;
+               }
        }
        if (addr->base == NULL)
                addr->base = noreg_GP;
@@ -1295,6 +1312,10 @@ static ir_node *gen_Mulh(ir_node *node)
        ir_node              *new_node;
        ir_node              *proj_res_high;
 
+       if (get_mode_size_bits(mode) != 32) {
+               panic("Mulh without 32bit size not supported in ia32 backend (%+F)", node);
+       }
+
        if (mode_is_signed(mode)) {
                new_node = gen_binop(node, op1, op2, new_bd_ia32_IMul1OP, match_commutative | match_am);
                proj_res_high = new_rd_Proj(dbgi, new_block, new_node, mode_Iu, pn_ia32_IMul1OP_res_high);
@@ -1519,7 +1540,7 @@ static ir_node *create_Div(ir_node *node)
                panic("invalid divmod node %+F", node);
        }
 
-       match_arguments(&am, block, op1, op2, NULL, match_am);
+       match_arguments(&am, block, op1, op2, NULL, match_am | match_upconv_32);
 
        /* Beware: We don't need a Sync, if the memory predecessor of the Div node
           is the memory of the consumed address. We can have only the second op as address
@@ -1777,8 +1798,8 @@ static ir_node *gen_Minus(ir_node *node)
                         * several AM nodes... */
                        ir_node *noreg_xmm = ia32_new_NoReg_xmm(env_cg);
 
-                       new_node = new_bd_ia32_xXor(dbgi, block, noreg_GP, noreg_GP,
-                                                   nomem, new_op, noreg_xmm);
+                       new_node = new_bd_ia32_xXor(dbgi, block, get_symconst_base(),
+                                                   noreg_GP, nomem, new_op, noreg_xmm);
 
                        size = get_mode_size_bits(mode);
                        ent  = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
@@ -1805,7 +1826,7 @@ static ir_node *gen_Minus(ir_node *node)
  */
 static ir_node *gen_Not(ir_node *node)
 {
-       ir_node *op   = get_Not_op(node);
+       ir_node *op = get_Not_op(node);
 
        assert(get_irn_mode(node) != mode_b); /* should be lowered already */
        assert (! mode_is_float(get_irn_mode(node)));
@@ -1837,8 +1858,8 @@ static ir_node *gen_Abs(ir_node *node)
 
                if (ia32_cg_config.use_sse2) {
                        ir_node *noreg_fp = ia32_new_NoReg_xmm(env_cg);
-                       new_node = new_bd_ia32_xAnd(dbgi, new_block, noreg_GP, noreg_GP,
-                                                   nomem, new_op, noreg_fp);
+                       new_node = new_bd_ia32_xAnd(dbgi, new_block, get_symconst_base(),
+                                                   noreg_GP, nomem, new_op, noreg_fp);
 
                        size = get_mode_size_bits(mode);
                        ent  = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
@@ -1944,8 +1965,17 @@ static ir_node *get_flags_node(ir_node *node, pn_Cmp *pnc_out)
                                        }
                                }
                        }
-                       flags    = be_transform_node(pred);
+                       /* add ia32 compare flags */
+                       {
+                               ir_node *l    = get_Cmp_left(pred);
+                               ir_mode *mode = get_irn_mode(l);
+                               if (mode_is_float(mode))
+                                       pnc |= ia32_pn_Cmp_float;
+                               else if (! mode_is_signed(mode))
+                                       pnc |= ia32_pn_Cmp_unsigned;
+                       }
                        *pnc_out = pnc;
+                       flags = be_transform_node(pred);
                        return flags;
                }
        }
@@ -2170,43 +2200,55 @@ static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
        return new_node;
 }
 
+static pn_Cmp ia32_get_negated_pnc(pn_Cmp pnc)
+{
+       ir_mode *mode = pnc & ia32_pn_Cmp_float ? mode_F : mode_Iu;
+       return get_negated_pnc(pnc, mode);
+}
+
 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem)
 {
-       ir_mode  *mode        = get_irn_mode(node);
-       ir_node  *mux_true    = get_Mux_true(node);
-       ir_node  *mux_false   = get_Mux_false(node);
-       ir_node  *cond;
-       ir_node  *new_mem;
-       dbg_info *dbgi;
-       ir_node  *block;
-       ir_node  *new_block;
-       ir_node  *flags;
-       ir_node  *new_node;
-       int       negated;
-       pn_Cmp    pnc;
-       ia32_address_t addr;
+       ir_mode        *mode      = get_irn_mode(node);
+       ir_node        *mux_true  = get_Mux_true(node);
+       ir_node        *mux_false = get_Mux_false(node);
+       ir_node        *cond;
+       ir_node        *new_mem;
+       dbg_info       *dbgi;
+       ir_node        *block;
+       ir_node        *new_block;
+       ir_node        *flags;
+       ir_node        *new_node;
+       bool            negated;
+       pn_Cmp          pnc;
+       ia32_address_t  addr;
 
        if (get_mode_size_bits(mode) != 8)
                return NULL;
 
        if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
-               negated = 0;
+               negated = false;
        } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
-               negated = 1;
+               negated = true;
        } else {
                return NULL;
        }
 
+       cond  = get_Mux_sel(node);
+       flags = get_flags_node(cond, &pnc);
+       /* we can't handle the float special cases with SetM */
+       if (pnc & ia32_pn_Cmp_float)
+               return NULL;
+       if (negated)
+               pnc = ia32_get_negated_pnc(pnc);
+
        build_address_ptr(&addr, ptr, mem);
 
        dbgi      = get_irn_dbg_info(node);
        block     = get_nodes_block(node);
        new_block = be_transform_node(block);
-       cond      = get_Mux_sel(node);
-       flags     = get_flags_node(cond, &pnc);
        new_mem   = be_transform_node(mem);
-       new_node  = new_bd_ia32_SetMem(dbgi, new_block, addr.base,
-                                      addr.index, addr.mem, flags, pnc, negated);
+       new_node  = new_bd_ia32_SetccMem(dbgi, new_block, addr.base,
+                                        addr.index, addr.mem, flags, pnc);
        set_address(new_node, &addr);
        set_ia32_op_type(new_node, ia32_AddrModeD);
        set_ia32_ls_mode(new_node, mode);
@@ -2332,6 +2374,7 @@ static ir_node *try_create_dest_am(ir_node *node)
        case iro_Mux:
                new_node = try_create_SetMem(val, ptr, mem);
                break;
+
        case iro_Minus:
                op1      = get_Minus_op(val);
                new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NegMem);
@@ -2457,10 +2500,9 @@ static ir_node *gen_vfist(dbg_info *dbgi, ir_node *block, ir_node *base, ir_node
        if (ia32_cg_config.use_fisttp) {
                /* Note: fisttp ALWAYS pop the tos. We have to ensure here that the value is copied
                if other users exists */
-               const arch_register_class_t *reg_class = &ia32_reg_classes[CLASS_ia32_vfp];
                ir_node *vfisttp = new_bd_ia32_vfisttp(dbgi, block, base, index, mem, val);
                ir_node *value   = new_r_Proj(block, vfisttp, mode_E, pn_ia32_vfisttp_res);
-               be_new_Keep(reg_class, block, 1, &value);
+               be_new_Keep(block, 1, &value);
 
                new_node = new_r_Proj(block, vfisttp, mode_M, pn_ia32_vfisttp_M);
                *fist    = vfisttp;
@@ -2622,8 +2664,8 @@ static ir_node *create_Switch(ir_node *node)
                        switch_max = pn;
        }
 
-       if ((unsigned long) (switch_max - switch_min) > 256000) {
-               panic("Size of switch %+F bigger than 256000", node);
+       if ((unsigned long) (switch_max - switch_min) > 128000) {
+               panic("Size of switch %+F bigger than 128000", node);
        }
 
        if (switch_min != 0) {
@@ -2957,9 +2999,12 @@ static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
        match_arguments(&am, block, val_false, val_true, flags,
                        match_commutative | match_am | match_16bit_am | match_mode_neutral);
 
-       new_node = new_bd_ia32_CMov(dbgi, new_block, addr->base, addr->index,
-                                   addr->mem, am.new_op1, am.new_op2, new_flags,
-                                   am.ins_permuted, pnc);
+       if (am.ins_permuted)
+               pnc = ia32_get_negated_pnc(pnc);
+
+       new_node = new_bd_ia32_CMovcc(dbgi, new_block, addr->base, addr->index,
+                                     addr->mem, am.new_op1, am.new_op2, new_flags,
+                                     pnc);
        set_am_attributes(new_node, &am);
 
        SET_IA32_ORIG_NODE(new_node, node);
@@ -2973,13 +3018,13 @@ static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
  * Creates a ia32 Setcc instruction.
  */
 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
-                                 ir_node *flags, pn_Cmp pnc, ir_node *orig_node,
-                                 int ins_permuted)
+                                 ir_node *flags, pn_Cmp pnc,
+                                 ir_node *orig_node)
 {
        ir_mode *mode  = get_irn_mode(orig_node);
        ir_node *new_node;
 
-       new_node = new_bd_ia32_Set(dbgi, new_block, flags, pnc, ins_permuted);
+       new_node = new_bd_ia32_Setcc(dbgi, new_block, flags, pnc);
        SET_IA32_ORIG_NODE(new_node, orig_node);
 
        /* we might need to conv the result up */
@@ -2995,10 +3040,15 @@ static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
 /**
  * Create instruction for an unsigned Difference or Zero.
  */
-static ir_node *create_Doz(ir_node *psi, ir_node *a, ir_node *b)
+static ir_node *create_doz(ir_node *psi, ir_node *a, ir_node *b)
 {
-       ir_mode  *mode  = get_irn_mode(psi);
-       ir_node  *new_node, *sub, *sbb, *eflags, *block;
+       ir_mode *mode  = get_irn_mode(psi);
+       ir_node *new_node;
+       ir_node *sub;
+       ir_node *sbb;
+       ir_node *not;
+       ir_node *eflags;
+       ir_node *block;
 
        dbg_info *dbgi;
 
@@ -3019,8 +3069,9 @@ static ir_node *create_Doz(ir_node *psi, ir_node *a, ir_node *b)
 
        dbgi   = get_irn_dbg_info(psi);
        sbb    = new_bd_ia32_Sbb0(dbgi, block, eflags);
+       not    = new_bd_ia32_Not(dbgi, block, sbb);
 
-       new_node = new_bd_ia32_And(dbgi, block, noreg_GP, noreg_GP, nomem, new_node, sbb);
+       new_node = new_bd_ia32_And(dbgi, block, noreg_GP, noreg_GP, nomem, new_node, not);
        set_ia32_commutative(new_node);
        return new_node;
 }
@@ -3069,9 +3120,8 @@ static ir_entity *ia32_create_const_array(ir_node *c0, ir_node *c1, ir_mode **ne
        ent = new_entity(get_glob_type(), ia32_unique_id(".LC%u"), tp);
 
        set_entity_ld_ident(ent, get_entity_ident(ent));
-       set_entity_visibility(ent, visibility_local);
-       set_entity_variability(ent, variability_constant);
-       set_entity_allocation(ent, allocation_static);
+       set_entity_visibility(ent, ir_visibility_local);
+       add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
 
        initializer = create_initializer_compound(2);
 
@@ -3084,6 +3134,155 @@ static ir_entity *ia32_create_const_array(ir_node *c0, ir_node *c1, ir_mode **ne
        return ent;
 }
 
+/**
+ * Possible transformations for creating a Setcc.
+ */
+enum setcc_transform_insn {
+       SETCC_TR_ADD,
+       SETCC_TR_ADDxx,
+       SETCC_TR_LEA,
+       SETCC_TR_LEAxx,
+       SETCC_TR_SHL,
+       SETCC_TR_NEG,
+       SETCC_TR_NOT,
+       SETCC_TR_AND,
+       SETCC_TR_SET,
+       SETCC_TR_SBB,
+};
+
+typedef struct setcc_transform {
+       unsigned num_steps;
+       unsigned permutate_cmp_ins;
+       pn_Cmp   pnc;
+       struct {
+               enum setcc_transform_insn  transform;
+               long val;
+               int  scale;
+       } steps[4];
+} setcc_transform_t;
+
+/**
+ * Setcc can only handle 0 and 1 result.
+ * Find a transformation that creates 0 and 1 from
+ * tv_t and tv_f.
+ */
+static void find_const_transform(pn_Cmp pnc, tarval *t, tarval *f, setcc_transform_t *res, int can_permutate)
+{
+       unsigned step = 0;
+
+       res->num_steps = 0;
+       res->permutate_cmp_ins = 0;
+
+       if (tarval_is_null(t)) {
+               tarval *tmp = t;
+               t = f;
+               f = tmp;
+               pnc = ia32_get_negated_pnc(pnc);
+       } else if (tarval_cmp(t, f) == pn_Cmp_Lt) {
+               // now, t is the bigger one
+               tarval *tmp = t;
+               t = f;
+               f = tmp;
+               pnc = ia32_get_negated_pnc(pnc);
+       }
+       res->pnc = pnc;
+
+       if (! tarval_is_null(f)) {
+               tarval *t_sub = tarval_sub(t, f, NULL);
+
+               t = t_sub;
+               res->steps[step].transform = SETCC_TR_ADD;
+
+               if (t == tarval_bad)
+                       panic("constant subtract failed");
+               if (! tarval_is_long(f))
+                       panic("tarval is not long");
+
+               res->steps[step].val = get_tarval_long(f);
+               ++step;
+               f = tarval_sub(f, f, NULL);
+               assert(tarval_is_null(f));
+       }
+
+       if (tarval_is_one(t)) {
+               res->steps[step].transform = SETCC_TR_SET;
+               res->num_steps = ++step;
+               return;
+       }
+
+       if (tarval_is_minus_one(t)) {
+               res->steps[step].transform = SETCC_TR_NEG;
+               ++step;
+               res->steps[step].transform = SETCC_TR_SET;
+               res->num_steps = ++step;
+               return;
+       }
+       if (tarval_is_long(t)) {
+               long v = get_tarval_long(t);
+
+               res->steps[step].val = 0;
+               switch (v) {
+               case 9:
+                       if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
+                               --step;
+                       res->steps[step].transform = SETCC_TR_LEAxx;
+                       res->steps[step].scale     = 3; /* (a << 3) + a */
+                       break;
+               case 8:
+                       if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
+                               --step;
+                       res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
+                       res->steps[step].scale     = 3; /* (a << 3) */
+                       break;
+               case 5:
+                       if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
+                               --step;
+                       res->steps[step].transform = SETCC_TR_LEAxx;
+                       res->steps[step].scale     = 2; /* (a << 2) + a */
+                       break;
+               case 4:
+                       if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
+                               --step;
+                       res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
+                       res->steps[step].scale     = 2; /* (a << 2) */
+                       break;
+               case 3:
+                       if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
+                               --step;
+                       res->steps[step].transform = SETCC_TR_LEAxx;
+                       res->steps[step].scale     = 1; /* (a << 1) + a */
+                       break;
+               case 2:
+                       if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
+                               --step;
+                       res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
+                       res->steps[step].scale     = 1; /* (a << 1) */
+                       break;
+               case 1:
+                       res->num_steps = step;
+                       return;
+               default:
+                       if (! tarval_is_single_bit(t)) {
+                               res->steps[step].transform = SETCC_TR_AND;
+                               res->steps[step].val       = v;
+                               ++step;
+                               res->steps[step].transform = SETCC_TR_NEG;
+                       } else {
+                               int v = get_tarval_lowest_bit(t);
+                               assert(v >= 0);
+
+                               res->steps[step].transform = SETCC_TR_SHL;
+                               res->steps[step].scale     = v;
+                       }
+               }
+               ++step;
+               res->steps[step].transform = SETCC_TR_SET;
+               res->num_steps = ++step;
+               return;
+       }
+       panic("tarval is not long");
+}
+
 /**
  * Transforms a Mux node into some code sequence.
  *
@@ -3134,6 +3333,7 @@ static ir_node *gen_Mux(ir_node *node)
                                }
                        }
                }
+
                if (is_Const(mux_true) && is_Const(mux_false)) {
                        ia32_address_mode_t am;
                        ir_node             *load;
@@ -3141,7 +3341,7 @@ static ir_node *gen_Mux(ir_node *node)
                        unsigned            scale;
 
                        flags    = get_flags_node(cond, &pnc);
-                       new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, /*is_premuted=*/0);
+                       new_node = create_set_32bit(dbgi, new_block, flags, pnc, node);
 
                        if (ia32_cg_config.use_sse2) {
                                /* cannot load from different mode on SSE */
@@ -3182,7 +3382,7 @@ static ir_node *gen_Mux(ir_node *node)
                        }
 
                        am.ls_mode            = new_mode;
-                       am.addr.base          = noreg_GP;
+                       am.addr.base          = get_symconst_base();
                        am.addr.index         = new_node;
                        am.addr.mem           = nomem;
                        am.addr.offset        = 0;
@@ -3223,12 +3423,12 @@ static ir_node *gen_Mux(ir_node *node)
                                        is_Const_0(mux_false) && is_Sub(mux_true) &&
                                        get_Sub_left(mux_true) == cmp_left && get_Sub_right(mux_true) == cmp_right) {
                                        /* Mux(a >=u b, a - b, 0) unsigned Doz */
-                                       return create_Doz(node, cmp_left, cmp_right);
+                                       return create_doz(node, cmp_left, cmp_right);
                                } else if ((pnc & pn_Cmp_Lt) && !mode_is_signed(mode) &&
                                        is_Const_0(mux_true) && is_Sub(mux_false) &&
                                        get_Sub_left(mux_false) == cmp_left && get_Sub_right(mux_false) == cmp_right) {
                                        /* Mux(a <=u b, 0, a - b) unsigned Doz */
-                                       return create_Doz(node, cmp_left, cmp_right);
+                                       return create_doz(node, cmp_left, cmp_right);
                                }
                        }
                }
@@ -3237,16 +3437,70 @@ static ir_node *gen_Mux(ir_node *node)
 
                if (is_Const(mux_true) && is_Const(mux_false)) {
                        /* both are const, good */
-                       if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
-                               new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, /*is_premuted=*/0);
-                       } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
-                               new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, /*is_premuted=*/1);
-                       } else {
-                               /* Not that simple. */
-                               goto need_cmov;
+                       tarval *tv_true = get_Const_tarval(mux_true);
+                       tarval *tv_false = get_Const_tarval(mux_false);
+                       setcc_transform_t res;
+                       int step;
+
+                       /* check if flags is a cmp node and we are the only user,
+                          i.e no other user yet */
+                       int permutate_allowed = 0;
+                       if (is_ia32_Cmp(flags) && get_irn_n_edges(flags) == 0) {
+                               /* yes, we can permutate its inputs */
+                               permutate_allowed = 1;
+                       }
+                       find_const_transform(pnc, tv_true, tv_false, &res, 0);
+                       new_node = node;
+                       if (res.permutate_cmp_ins) {
+                               ia32_attr_t *attr = get_ia32_attr(flags);
+                               attr->data.ins_permuted ^= 1;
+                       }
+                       for (step = (int)res.num_steps - 1; step >= 0; --step) {
+                               ir_node *imm;
+
+                               switch (res.steps[step].transform) {
+                               case SETCC_TR_ADD:
+                                       imm = ia32_immediate_from_long(res.steps[step].val);
+                                       new_node = new_bd_ia32_Add(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, imm);
+                                       break;
+                               case SETCC_TR_ADDxx:
+                                       new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
+                                       break;
+                               case SETCC_TR_LEA:
+                                       new_node = new_bd_ia32_Lea(dbgi, new_block, noreg_GP, new_node);
+                                       set_ia32_am_scale(new_node, res.steps[step].scale);
+                                       set_ia32_am_offs_int(new_node, res.steps[step].val);
+                                       break;
+                               case SETCC_TR_LEAxx:
+                                       new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
+                                       set_ia32_am_scale(new_node, res.steps[step].scale);
+                                       set_ia32_am_offs_int(new_node, res.steps[step].val);
+                                       break;
+                               case SETCC_TR_SHL:
+                                       imm = ia32_immediate_from_long(res.steps[step].scale);
+                                       new_node = new_bd_ia32_Shl(dbgi, new_block, new_node, imm);
+                                       break;
+                               case SETCC_TR_NEG:
+                                       new_node = new_bd_ia32_Neg(dbgi, new_block, new_node);
+                                       break;
+                               case SETCC_TR_NOT:
+                                       new_node = new_bd_ia32_Not(dbgi, new_block, new_node);
+                                       break;
+                               case SETCC_TR_AND:
+                                       imm = ia32_immediate_from_long(res.steps[step].val);
+                                       new_node = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, imm);
+                                       break;
+                               case SETCC_TR_SET:
+                                       new_node = create_set_32bit(dbgi, new_block, flags, res.pnc, new_node);
+                                       break;
+                               case SETCC_TR_SBB:
+                                       new_node = new_bd_ia32_Sbb0(dbgi, new_block, flags);
+                                       break;
+                               default:
+                                       panic("unknown setcc transform");
+                               }
                        }
                } else {
-need_cmov:
                        new_node = create_CMov(node, cond, flags, pnc);
                }
                return new_node;
@@ -3504,10 +3758,9 @@ static ir_node *gen_Conv(ir_node *node)
        assert(!mode_is_int(src_mode) || src_bits <= 32);
        assert(!mode_is_int(tgt_mode) || tgt_bits <= 32);
 
+       /* modeB -> X should already be lowered by the lower_mode_b pass */
        if (src_mode == mode_b) {
-               assert(mode_is_int(tgt_mode) || mode_is_reference(tgt_mode));
-               /* nothing to do, we already model bools as 0/1 ints */
-               return be_transform_node(op);
+               panic("ConvB not lowered %+F", node);
        }
 
        if (src_mode == tgt_mode) {
@@ -3528,16 +3781,6 @@ static ir_node *gen_Conv(ir_node *node)
                new_op = be_transform_node(op);
                /* we convert from float ... */
                if (mode_is_float(tgt_mode)) {
-#if 0
-                       /* Matze: I'm a bit unsure what the following is for? seems wrong
-                        * to me... */
-                       if (src_mode == mode_E && tgt_mode == mode_D
-                                       && !get_Conv_strict(node)) {
-                               DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
-                               return new_op;
-                       }
-#endif
-
                        /* ... to float */
                        if (ia32_cg_config.use_sse2) {
                                DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
@@ -3775,6 +4018,7 @@ static ir_node *gen_be_SubSP(ir_node *node)
  */
 static ir_node *gen_Phi(ir_node *node)
 {
+       const arch_register_req_t *req;
        ir_node  *block = be_transform_node(get_nodes_block(node));
        ir_graph *irg   = current_ir_graph;
        dbg_info *dbgi  = get_irn_dbg_info(node);
@@ -3786,12 +4030,17 @@ static ir_node *gen_Phi(ir_node *node)
                assert(get_mode_size_bits(mode) <= 32);
                /* all integer operations are on 32bit registers now */
                mode = mode_Iu;
+               req  = ia32_reg_classes[CLASS_ia32_gp].class_req;
        } else if (mode_is_float(mode)) {
                if (ia32_cg_config.use_sse2) {
                        mode = mode_xmm;
+                       req  = ia32_reg_classes[CLASS_ia32_xmm].class_req;
                } else {
                        mode = mode_vfp;
+                       req  = ia32_reg_classes[CLASS_ia32_vfp].class_req;
                }
+       } else {
+               req = arch_no_register_req;
        }
 
        /* phi nodes allow loops, so we use the old arguments for now
@@ -3801,11 +4050,26 @@ static ir_node *gen_Phi(ir_node *node)
        copy_node_attr(node, phi);
        be_duplicate_deps(node, phi);
 
+       arch_set_out_register_req(phi, 0, req);
+
        be_enqueue_preds(node);
 
        return phi;
 }
 
+static ir_node *gen_Jmp(ir_node *node)
+{
+       ir_node  *block     = get_nodes_block(node);
+       ir_node  *new_block = be_transform_node(block);
+       dbg_info *dbgi      = get_irn_dbg_info(node);
+       ir_node  *new_node;
+
+       new_node = new_bd_ia32_Jmp(dbgi, new_block);
+       SET_IA32_ORIG_NODE(new_node, node);
+
+       return new_node;
+}
+
 /**
  * Transform IJmp
  */
@@ -3845,7 +4109,6 @@ static ir_node *gen_Bound(ir_node *node)
        if (is_Const_0(lower)) {
                /* typical case for Java */
                ir_node  *sub, *res, *flags, *block;
-               ir_graph *irg  = current_ir_graph;
 
                res = gen_binop(node, get_Bound_index(node), get_Bound_upper(node),
                        new_bd_ia32_Sub, match_mode_neutral     | match_am | match_immediate);
@@ -4081,7 +4344,7 @@ static ir_node *gen_ia32_l_LLtoFloat(ir_node *node)
                ir_node *count = ia32_create_Immediate(NULL, 0, 31);
                ir_node *fadd;
 
-               am.addr.base          = noreg_GP;
+               am.addr.base          = get_symconst_base();
                am.addr.index         = new_bd_ia32_Shr(dbgi, block, new_val_high, count);
                am.addr.mem           = nomem;
                am.addr.offset        = 0;
@@ -4206,7 +4469,6 @@ static ir_node *gen_Proj_be_SubSP(ir_node *node)
        ir_node  *block    = be_transform_node(get_nodes_block(node));
        ir_node  *pred     = get_Proj_pred(node);
        ir_node  *new_pred = be_transform_node(pred);
-       ir_graph *irg      = current_ir_graph;
        dbg_info *dbgi     = get_irn_dbg_info(node);
        long     proj      = get_Proj_proj(node);
 
@@ -4301,7 +4563,7 @@ static ir_node *gen_Proj_Load(ir_node *node)
                case pn_Load_X_except:
                        /* This Load might raise an exception. Mark it. */
                        set_ia32_exc_label(new_pred, 1);
-                       return new_rd_Proj(dbgi, block, new_pred, mode_X, pn_ia32_xLoad_X_exc);
+                       return new_rd_Proj(dbgi, block, new_pred, mode_X, pn_ia32_vfld_X_exc);
                default:
                        break;
                }
@@ -4451,7 +4713,6 @@ static ir_node *gen_Proj_Quot(ir_node *node)
 static ir_node *gen_be_Call(ir_node *node)
 {
        dbg_info       *const dbgi      = get_irn_dbg_info(node);
-       ir_graph       *const irg       = current_ir_graph;
        ir_node        *const src_block = get_nodes_block(node);
        ir_node        *const block     = be_transform_node(src_block);
        ir_node        *const src_mem   = get_irn_n(node, be_pos_Call_mem);
@@ -4772,7 +5033,7 @@ static ir_node *gen_ffs(ir_node *node)
        flag = new_r_Proj(block, real, mode_b, pn_ia32_flags);
 
        /* sete */
-       set = new_bd_ia32_Set(dbgi, block, flag, pn_Cmp_Eq, 0);
+       set = new_bd_ia32_Setcc(dbgi, block, flag, pn_Cmp_Eq);
        SET_IA32_ORIG_NODE(set, node);
 
        /* conv to 32bit */
@@ -4842,7 +5103,7 @@ static ir_node *gen_parity(ir_node *node)
        cmp = fix_mem_proj(cmp, &am);
 
        /* setp */
-       new_node = new_bd_ia32_Set(dbgi, new_block, cmp, ia32_pn_Cmp_parity, 0);
+       new_node = new_bd_ia32_Setcc(dbgi, new_block, cmp, ia32_pn_Cmp_parity);
        SET_IA32_ORIG_NODE(new_node, node);
 
        /* conv to 32bit */
@@ -5220,7 +5481,6 @@ static ir_node *gen_Proj_be_Call(ir_node *node)
        ir_node  *block       = be_transform_node(get_nodes_block(node));
        ir_node  *call        = get_Proj_pred(node);
        ir_node  *new_call    = be_transform_node(call);
-       ir_graph *irg         = current_ir_graph;
        dbg_info *dbgi        = get_irn_dbg_info(node);
        long      proj        = get_Proj_proj(node);
        ir_mode  *mode        = get_irn_mode(node);
@@ -5249,7 +5509,8 @@ static ir_node *gen_Proj_be_Call(ir_node *node)
                assert(req->type & arch_register_req_type_limited);
 
                for (i = 0; i < n_outs; ++i) {
-                       arch_register_req_t const *const new_req = get_ia32_out_req(new_call, i);
+                       arch_register_req_t const *const new_req
+                               = arch_get_out_register_req(new_call, i);
 
                        if (!(new_req->type & arch_register_req_type_limited) ||
                            new_req->cls      != req->cls                     ||
@@ -5323,7 +5584,7 @@ static ir_node *gen_Proj_ASM(ir_node *node)
        long     pos      = get_Proj_proj(node);
 
        if (mode == mode_M) {
-               pos = arch_irn_get_n_outs(new_pred) + 1;
+               pos = arch_irn_get_n_outs(new_pred)-1;
        } else if (mode_is_int(mode) || mode_is_reference(mode)) {
                mode = mode_Iu;
        } else if (mode_is_float(mode)) {
@@ -5460,6 +5721,7 @@ static void register_transformers(void)
        GEN(Mux);
        GEN(Proj);
        GEN(Phi);
+       GEN(Jmp);
        GEN(IJmp);
        GEN(Bound);
 
@@ -5586,7 +5848,7 @@ static void add_missing_keep_walker(ir_node *node, void *data)
                        continue;
                }
 
-               req = get_ia32_out_req(node, i);
+               req = arch_get_out_register_req(node, i);
                cls = req->cls;
                if (cls == NULL) {
                        continue;
@@ -5600,7 +5862,7 @@ static void add_missing_keep_walker(ir_node *node, void *data)
                if (last_keep != NULL) {
                        be_Keep_add_node(last_keep, cls, in[0]);
                } else {
-                       last_keep = be_new_Keep(cls, block, 1, in);
+                       last_keep = be_new_Keep(block, 1, in);
                        if (sched_is_scheduled(node)) {
                                sched_add_after(node, last_keep);
                        }
@@ -5721,9 +5983,9 @@ void ia32_transform_graph(ia32_code_gen_t *cg)
        initial_fpcw  = NULL;
        no_pic_adjust = 0;
 
-       BE_TIMER_PUSH(t_heights);
+       be_timer_push(T_HEIGHTS);
        heights      = heights_new(cg->irg);
-       BE_TIMER_POP(t_heights);
+       be_timer_pop(T_HEIGHTS);
        ia32_calculate_non_address_mode_nodes(cg->birg);
 
        /* the transform phase is not safe for CSE (yet) because several nodes get