backend_marked was a buggy/wrong concept, removed it
[libfirm] / ir / be / ia32 / ia32_emitter.c
index 8909df2..4716f55 100644 (file)
@@ -282,11 +282,10 @@ void ia32_emit_source_register(const ir_node *node, int pos)
 
 static void ia32_emit_entity(ir_entity *entity, int no_pic_adjust)
 {
-       set_entity_backend_marked(entity, 1);
        be_gas_emit_entity(entity);
 
        if (get_entity_owner(entity) == get_tls_type()) {
-               if (get_entity_visibility(entity) == visibility_external_allocated) {
+               if (get_entity_linkage(entity) & IR_LINKAGE_EXTERN) {
                        be_emit_cstring("@INDNTPOFF");
                } else {
                        be_emit_cstring("@NTPOFF");
@@ -556,6 +555,7 @@ static void ia32_emit_cmp_suffix(int pnc)
                be_emit_char('p');
                return;
        }
+
        if (pnc & ia32_pn_Cmp_float || pnc & ia32_pn_Cmp_unsigned) {
                str = cmp2condition_u[pnc & 7];
        } else {
@@ -568,7 +568,9 @@ static void ia32_emit_cmp_suffix(int pnc)
 typedef enum ia32_emit_mod_t {
        EMIT_RESPECT_LS   = 1U << 0,
        EMIT_ALTERNATE_AM = 1U << 1,
-       EMIT_LONG         = 1U << 2
+       EMIT_LONG         = 1U << 2,
+       EMIT_HIGH_REG     = 1U << 3,
+       EMIT_LOW_REG      = 1U << 4
 } ia32_emit_mod_t;
 
 /**
@@ -647,9 +649,11 @@ void ia32_emit_am(const ir_node *node)
  * %d   signed int              signed int
  *
  * x starts at 0
- * # modifier for %ASx, %D and %S uses ls mode of node to alter register width
+ * # modifier for %ASx, %D, %R, and %S uses ls mode of node to alter register width
  * * modifier does not prefix immediates with $, but AM with *
  * l modifier for %lu and %ld
+ * > modifier to output high 8bit register (ah, bh)
+ * < modifier to output low 8bit register (al, bl)
  */
 static void ia32_emitf(const ir_node *node, const char *fmt, ...)
 {
@@ -678,20 +682,19 @@ static void ia32_emitf(const ir_node *node, const char *fmt, ...)
                        break;
 
                ++fmt;
-               if (*fmt == '*') {
-                       mod |= EMIT_ALTERNATE_AM;
-                       ++fmt;
-               }
-
-               if (*fmt == '#') {
-                       mod |= EMIT_RESPECT_LS;
-                       ++fmt;
-               }
-
-               if (*fmt == 'l') {
-                       mod |= EMIT_LONG;
+               while (1) {
+                       switch(*fmt) {
+                       case '*': mod |= EMIT_ALTERNATE_AM; break;
+                       case '#': mod |= EMIT_RESPECT_LS;   break;
+                       case 'l': mod |= EMIT_LONG;         break;
+                       case '>': mod |= EMIT_HIGH_REG;     break;
+                       case '<': mod |= EMIT_LOW_REG;      break;
+                       default:
+                               goto end_of_mods;
+                       }
                        ++fmt;
                }
+end_of_mods:
 
                switch (*fmt++) {
                        case '%':
@@ -700,20 +703,20 @@ static void ia32_emitf(const ir_node *node, const char *fmt, ...)
 
                        case 'A': {
                                switch (*fmt++) {
+emit_AM:
                                        case 'M':
                                                if (mod & EMIT_ALTERNATE_AM)
                                                        be_emit_char('*');
-
                                                ia32_emit_am(node);
                                                break;
 
                                        case 'R': {
                                                const arch_register_t *reg = va_arg(ap, const arch_register_t*);
-                                               if (mod & EMIT_ALTERNATE_AM)
-                                                       be_emit_char('*');
                                                if (get_ia32_op_type(node) == ia32_AddrModeS) {
-                                                       ia32_emit_am(node);
+                                                       goto emit_AM;
                                                } else {
+                                                       if (mod & EMIT_ALTERNATE_AM)
+                                                               be_emit_char('*');
                                                        emit_register(reg, NULL);
                                                }
                                                break;
@@ -721,10 +724,8 @@ static void ia32_emitf(const ir_node *node, const char *fmt, ...)
 
                                        case 'S':
                                                if (get_ia32_op_type(node) == ia32_AddrModeS) {
-                                                       if (mod & EMIT_ALTERNATE_AM)
-                                                               be_emit_char('*');
-                                                       ia32_emit_am(node);
                                                        ++fmt;
+                                                       goto emit_AM;
                                                } else {
                                                        assert(get_ia32_op_type(node) == ia32_Normal);
                                                        goto emit_S;
@@ -772,7 +773,13 @@ static void ia32_emitf(const ir_node *node, const char *fmt, ...)
 
                        case 'R': {
                                const arch_register_t *reg = va_arg(ap, const arch_register_t*);
-                               emit_register(reg, NULL);
+                               if (mod & EMIT_HIGH_REG) {
+                                       emit_8bit_register_high(reg);
+                               } else if (mod & EMIT_LOW_REG) {
+                                       emit_8bit_register(reg);
+                               } else {
+                                       emit_register(reg, mod & EMIT_RESPECT_LS ? get_ia32_ls_mode(node) : NULL);
+                               }
                                break;
                        }
 
@@ -949,8 +956,7 @@ static ir_node *find_original_value(ir_node *node)
        }
 }
 
-static int determine_final_pnc(const ir_node *node, int flags_pos,
-                               int pnc)
+static int determine_final_pnc(const ir_node *node, int flags_pos, int pnc)
 {
        ir_node           *flags = get_irn_n(node, flags_pos);
        const ia32_attr_t *flags_attr;
@@ -1083,37 +1089,37 @@ static void emit_ia32_Jcc(const ir_node *node)
        if (pnc & ia32_pn_Cmp_float) {
                /* Some floating point comparisons require a test of the parity flag,
                 * which indicates that the result is unordered */
-               switch (pnc & 15) {
-                       case pn_Cmp_Uo: {
-                               ia32_emitf(proj_true, "\tjp %L\n");
-                               break;
-                       }
+               switch (pnc & 0x0f) {
+               case pn_Cmp_Uo: {
+                       ia32_emitf(proj_true, "\tjp %L\n");
+                       break;
+               }
 
-                       case pn_Cmp_Leg:
-                               ia32_emitf(proj_true, "\tjnp %L\n");
-                               break;
+               case pn_Cmp_Leg:
+                       ia32_emitf(proj_true, "\tjnp %L\n");
+                       break;
 
-                       case pn_Cmp_Eq:
-                       case pn_Cmp_Lt:
-                       case pn_Cmp_Le:
-                               /* we need a local label if the false proj is a fallthrough
-                                * as the falseblock might have no label emitted then */
-                               if (can_be_fallthrough(proj_false)) {
-                                       need_parity_label = 1;
-                                       ia32_emitf(proj_false, "\tjp 1f\n");
-                               } else {
-                                       ia32_emitf(proj_false, "\tjp %L\n");
-                               }
-                               goto emit_jcc;
+               case pn_Cmp_Eq:
+               case pn_Cmp_Lt:
+               case pn_Cmp_Le:
+                       /* we need a local label if the false proj is a fallthrough
+                        * as the falseblock might have no label emitted then */
+                       if (can_be_fallthrough(proj_false)) {
+                               need_parity_label = 1;
+                               ia32_emitf(proj_false, "\tjp 1f\n");
+                       } else {
+                               ia32_emitf(proj_false, "\tjp %L\n");
+                       }
+                       goto emit_jcc;
 
-                       case pn_Cmp_Ug:
-                       case pn_Cmp_Uge:
-                       case pn_Cmp_Ne:
-                               ia32_emitf(proj_true, "\tjp %L\n");
-                               goto emit_jcc;
+               case pn_Cmp_Ug:
+               case pn_Cmp_Uge:
+               case pn_Cmp_Ne:
+                       ia32_emitf(proj_true, "\tjp %L\n");
+                       goto emit_jcc;
 
-                       default:
-                               goto emit_jcc;
+               default:
+                       goto emit_jcc;
                }
        } else {
 emit_jcc:
@@ -1132,19 +1138,65 @@ emit_jcc:
        }
 }
 
-static void emit_ia32_CMov(const ir_node *node)
+/**
+ * Emits an ia32 Setcc. This is mostly easy but some floating point compares
+ * are tricky.
+ */
+static void emit_ia32_Setcc(const ir_node *node)
+{
+       const arch_register_t *dreg = get_out_reg(node, pn_ia32_Setcc_res);
+
+       pn_Cmp pnc = get_ia32_condcode(node);
+       pnc        = determine_final_pnc(node, n_ia32_Setcc_eflags, pnc);
+       if (pnc & ia32_pn_Cmp_float) {
+               switch (pnc & 0x0f) {
+               case pn_Cmp_Uo:
+                       ia32_emitf(node, "\tsetp %#R\n", dreg);
+                       return;
+
+               case pn_Cmp_Leg:
+                       ia32_emitf(node, "\tsetnp %#R\n", dreg);
+                       return;
+
+               case pn_Cmp_Eq:
+               case pn_Cmp_Lt:
+               case pn_Cmp_Le:
+                       ia32_emitf(node, "\tset%P %<R\n", pnc, dreg);
+                       ia32_emitf(node, "\tsetnp %>R\n", dreg);
+                       ia32_emitf(node, "\tandb %>R, %<R\n", dreg, dreg);
+                       return;
+
+               case pn_Cmp_Ug:
+               case pn_Cmp_Uge:
+               case pn_Cmp_Ne:
+                       ia32_emitf(node, "\tset%P %<R\n", pnc, dreg);
+                       ia32_emitf(node, "\tsetp %>R\n", dreg);
+                       ia32_emitf(node, "\torb %>R, %<R\n", dreg, dreg);
+                       return;
+
+               default:
+                       break;
+               }
+       }
+       ia32_emitf(node, "\tset%P %#R\n", pnc, dreg);
+}
+
+static void emit_ia32_CMovcc(const ir_node *node)
 {
        const ia32_attr_t     *attr         = get_ia32_attr_const(node);
-       int                    ins_permuted = attr->data.ins_permuted;
        const arch_register_t *out          = arch_irn_get_register(node, pn_ia32_res);
        pn_Cmp                 pnc          = get_ia32_condcode(node);
        const arch_register_t *in_true;
        const arch_register_t *in_false;
 
-       pnc = determine_final_pnc(node, n_ia32_CMov_eflags, pnc);
+       pnc = determine_final_pnc(node, n_ia32_CMovcc_eflags, pnc);
+       /* although you can't set ins_permuted in the constructor it might still
+          be set by memory operand folding */
+       if (attr->data.ins_permuted)
+               pnc = ia32_get_negated_pnc(pnc);
 
-       in_true  = arch_get_irn_register(get_irn_n(node, n_ia32_CMov_val_true));
-       in_false = arch_get_irn_register(get_irn_n(node, n_ia32_CMov_val_false));
+       in_true  = arch_get_irn_register(get_irn_n(node, n_ia32_CMovcc_val_true));
+       in_false = arch_get_irn_register(get_irn_n(node, n_ia32_CMovcc_val_false));
 
        /* should be same constraint fullfilled? */
        if (out == in_false) {
@@ -1154,7 +1206,7 @@ static void emit_ia32_CMov(const ir_node *node)
 
                assert(get_ia32_op_type(node) == ia32_Normal);
 
-               ins_permuted = !ins_permuted;
+               pnc = ia32_get_negated_pnc(pnc);
 
                tmp      = in_true;
                in_true  = in_false;
@@ -1164,10 +1216,20 @@ static void emit_ia32_CMov(const ir_node *node)
                ia32_emitf(node, "\tmovl %R, %R\n", in_false, out);
        }
 
-       if (ins_permuted)
-               pnc = ia32_get_negated_pnc(pnc);
-
        /* TODO: handling of Nans isn't correct yet */
+       if (pnc & ia32_pn_Cmp_float) {
+               switch (pnc & 0x0f) {
+               case pn_Cmp_Uo:
+               case pn_Cmp_Leg:
+               case pn_Cmp_Eq:
+               case pn_Cmp_Lt:
+               case pn_Cmp_Le:
+               case pn_Cmp_Ug:
+               case pn_Cmp_Uge:
+               case pn_Cmp_Ne:
+                       panic("CMov with floatingpoint compare/parity not supported yet");
+               }
+       }
 
        ia32_emitf(node, "\tcmov%P %#AR, %#R\n", pnc, in_true, out);
 }
@@ -1191,12 +1253,12 @@ typedef struct _branch_t {
 
 /* jump table for switch generation */
 typedef struct _jmp_tbl_t {
-       ir_node  *defProj;         /**< default target */
-       long      min_value;       /**< smallest switch case */
-       long      max_value;       /**< largest switch case */
-       long      num_branches;    /**< number of jumps */
-       char     *label;           /**< label of the jump table */
-       branch_t *branches;        /**< jump array */
+       ir_node  *defProj;                 /**< default target */
+       long      min_value;               /**< smallest switch case */
+       long      max_value;               /**< largest switch case */
+       long      num_branches;            /**< number of jumps */
+       char      label[SNPRINTF_BUF_LEN]; /**< label of the jump table */
+       branch_t *branches;                /**< jump array */
 } jmp_tbl_t;
 
 /**
@@ -1213,29 +1275,21 @@ static int ia32_cmp_branch_t(const void *a, const void *b)
                return 1;
 }
 
-/**
- * Emits code for a SwitchJmp (creates a jump table if
- * possible otherwise a cmp-jmp cascade). Port from
- * cggg ia32 backend
- */
-static void emit_ia32_SwitchJmp(const ir_node *node)
+static void generate_jump_table(jmp_tbl_t *tbl, const ir_node *node)
 {
-       unsigned long       interval;
-       int                 last_value, i;
+       int                 i;
        long                pnc;
        long                default_pn;
-       jmp_tbl_t           tbl;
        ir_node            *proj;
        const ir_edge_t    *edge;
 
        /* fill the table structure */
-       tbl.label        = XMALLOCN(char, SNPRINTF_BUF_LEN);
-       tbl.label        = get_unique_label(tbl.label, SNPRINTF_BUF_LEN, ".TBL_");
-       tbl.defProj      = NULL;
-       tbl.num_branches = get_irn_n_edges(node) - 1;
-       tbl.branches     = XMALLOCNZ(branch_t, tbl.num_branches);
-       tbl.min_value    = INT_MAX;
-       tbl.max_value    = INT_MIN;
+       get_unique_label(tbl->label, SNPRINTF_BUF_LEN, ".TBL_");
+       tbl->defProj      = NULL;
+       tbl->num_branches = get_irn_n_edges(node) - 1;
+       tbl->branches     = XMALLOCNZ(branch_t, tbl->num_branches);
+       tbl->min_value    = LONG_MAX;
+       tbl->max_value    = LONG_MIN;
 
        default_pn = get_ia32_condcode(node);
        i = 0;
@@ -1248,23 +1302,38 @@ static void emit_ia32_SwitchJmp(const ir_node *node)
 
                /* check for default proj */
                if (pnc == default_pn) {
-                       assert(tbl.defProj == NULL && "found two default Projs at SwitchJmp");
-                       tbl.defProj = proj;
+                       assert(tbl->defProj == NULL && "found two default Projs at SwitchJmp");
+                       tbl->defProj = proj;
                } else {
-                       tbl.min_value = pnc < tbl.min_value ? pnc : tbl.min_value;
-                       tbl.max_value = pnc > tbl.max_value ? pnc : tbl.max_value;
+                       tbl->min_value = pnc < tbl->min_value ? pnc : tbl->min_value;
+                       tbl->max_value = pnc > tbl->max_value ? pnc : tbl->max_value;
 
                        /* create branch entry */
-                       tbl.branches[i].target = proj;
-                       tbl.branches[i].value  = pnc;
+                       tbl->branches[i].target = proj;
+                       tbl->branches[i].value  = pnc;
                        ++i;
                }
 
        }
-       assert(i == tbl.num_branches);
+       assert(i == tbl->num_branches);
 
        /* sort the branches by their number */
-       qsort(tbl.branches, tbl.num_branches, sizeof(tbl.branches[0]), ia32_cmp_branch_t);
+       qsort(tbl->branches, tbl->num_branches, sizeof(tbl->branches[0]), ia32_cmp_branch_t);
+}
+
+/**
+ * Emits code for a SwitchJmp (creates a jump table if
+ * possible otherwise a cmp-jmp cascade). Port from
+ * cggg ia32 backend
+ */
+static void emit_ia32_SwitchJmp(const ir_node *node)
+{
+       unsigned long       interval;
+       int                 last_value, i;
+       jmp_tbl_t           tbl;
+
+       /* fill the table structure */
+       generate_jump_table(&tbl, node);
 
        /* two-complement's magic make this work without overflow */
        interval = tbl.max_value - tbl.min_value;
@@ -1296,10 +1365,7 @@ static void emit_ia32_SwitchJmp(const ir_node *node)
                ia32_emitf(tbl.branches[0].target, "\tjmp %L\n");
        }
 
-       if (tbl.label)
-               free(tbl.label);
-       if (tbl.branches)
-               free(tbl.branches);
+       free(tbl.branches);
 }
 
 /**
@@ -1867,7 +1933,7 @@ static void ia32_register_emitters(void)
        /* other ia32 emitter functions */
        IA32_EMIT2(Conv_I2I8Bit, Conv_I2I);
        IA32_EMIT(Asm);
-       IA32_EMIT(CMov);
+       IA32_EMIT(CMovcc);
        IA32_EMIT(Call);
        IA32_EMIT(Const);
        IA32_EMIT(Conv_FP2FP);
@@ -1879,6 +1945,7 @@ static void ia32_register_emitters(void)
        IA32_EMIT(GetEIP);
        IA32_EMIT(IMul);
        IA32_EMIT(Jcc);
+       IA32_EMIT(Setcc);
        IA32_EMIT(LdTls);
        IA32_EMIT(Minus64Bit);
        IA32_EMIT(SwitchJmp);
@@ -2176,6 +2243,8 @@ void ia32_gen_routine(ia32_code_gen_t *ia32_cg, ir_graph *irg)
        isa      = cg->isa;
        do_pic   = cg->birg->main_env->options->pic;
 
+       be_gas_elf_type_char = '@';
+
        ia32_register_emitters();
 
        get_unique_label(pic_base_label, sizeof(pic_base_label), ".PIC_BASE");
@@ -2235,8 +2304,10 @@ static const lc_opt_table_entry_t ia32_emitter_options[] = {
 /* ==== Experimental binary emitter ==== */
 
 static unsigned char reg_gp_map[N_ia32_gp_REGS];
-static unsigned char reg_mmx_map[N_ia32_mmx_REGS];
-static unsigned char reg_sse_map[N_ia32_xmm_REGS];
+//static unsigned char reg_mmx_map[N_ia32_mmx_REGS];
+//static unsigned char reg_sse_map[N_ia32_xmm_REGS];
+static unsigned char pnc_map_signed[8];
+static unsigned char pnc_map_unsigned[8];
 
 static void build_reg_map(void)
 {
@@ -2248,8 +2319,43 @@ static void build_reg_map(void)
        reg_gp_map[REG_EBP] = 0x5;
        reg_gp_map[REG_ESI] = 0x6;
        reg_gp_map[REG_EDI] = 0x7;
+
+       pnc_map_signed[pn_Cmp_Eq]    = 0x04;
+       pnc_map_signed[pn_Cmp_Lt]    = 0x0C;
+       pnc_map_signed[pn_Cmp_Le]    = 0x0E;
+       pnc_map_signed[pn_Cmp_Gt]    = 0x0F;
+       pnc_map_signed[pn_Cmp_Ge]    = 0x0D;
+       pnc_map_signed[pn_Cmp_Lg]    = 0x05;
+
+       pnc_map_unsigned[pn_Cmp_Eq]    = 0x04;
+       pnc_map_unsigned[pn_Cmp_Lt]    = 0x02;
+       pnc_map_unsigned[pn_Cmp_Le]    = 0x06;
+       pnc_map_unsigned[pn_Cmp_Gt]    = 0x07;
+       pnc_map_unsigned[pn_Cmp_Ge]    = 0x03;
+       pnc_map_unsigned[pn_Cmp_Lg]    = 0x05;
+}
+
+/** Returns the encoding for a pnc field. */
+static unsigned char pnc2cc(int pnc)
+{
+       unsigned char cc;
+       if (pnc == ia32_pn_Cmp_parity) {
+               cc = 0x0A;
+       } else if (pnc & ia32_pn_Cmp_float || pnc & ia32_pn_Cmp_unsigned) {
+               cc = pnc_map_unsigned[pnc & 0x07];
+       } else {
+               cc = pnc_map_signed[pnc & 0x07];
+       }
+       assert(cc != 0);
+       return cc;
 }
 
+/** Sign extension bit values for binops */
+enum SignExt {
+       UNSIGNED_IMM = 0,  /**< unsigned immediate */
+       SIGNEXT_IMM  = 2,  /**< sign extended immediate */
+};
+
 /** The mod encoding of the ModR/M */
 enum Mod {
        MOD_IND          = 0x00, /**< [reg1] */
@@ -2258,25 +2364,13 @@ enum Mod {
        MOD_REG          = 0xC0  /**< reg1 */
 };
 
-#define GET_MODE(code) ((code) & 0xC0)
-
-/** Sign extension bit values for binops */
-enum SignExt {
-       UNSIGNED_IMM = 0,  /**< unsigned immediate */
-       SIGNEXT_IMM  = 2,  /**< sign extended immediate */
-};
-
 /** create R/M encoding for ModR/M */
 #define ENC_RM(x) (x)
 /** create REG encoding for ModR/M */
 #define ENC_REG(x) ((x) << 3)
 
-/** create Base encoding for SIB */
-#define ENC_BASE(x) (x)
-/** create Index encoding for SIB */
-#define ENC_INDEX(x) ((x) << 3)
-/** create Scale encoding for SIB */
-#define ENC_SCALE(x) ((x) << 6)
+/** create encoding for a SIB byte */
+#define ENC_SIB(scale, index, base) ((scale) << 6 | (index) << 3 | (base))
 
 /* Node: The following routines are supposed to append bytes, words, dwords
    to the output stream.
@@ -2291,7 +2385,7 @@ static void bemit8(const unsigned char byte)
        be_emit_write_line();
 }
 
-static void bemit16(const unsigned u16)
+static void bemit16(const unsigned short u16)
 {
        be_emit_irprintf("\t.word 0x%x\n", u16);
        be_emit_write_line();
@@ -2303,6 +2397,10 @@ static void bemit32(const unsigned u32)
        be_emit_write_line();
 }
 
+/**
+ * Emit address of an entity. If @p is_relative is true then a relative
+ * offset from behind the address to the entity is created.
+ */
 static void bemit_entity(ir_entity *entity, bool entity_sign, int offset,
                          bool is_relative)
 {
@@ -2316,11 +2414,19 @@ static void bemit_entity(ir_entity *entity, bool entity_sign, int offset,
        be_emit_cstring("\t.long ");
        if (entity_sign)
                be_emit_char('-');
-       set_entity_backend_marked(entity, 1);
        be_gas_emit_entity(entity);
 
+       if (get_entity_owner(entity) == get_tls_type()) {
+               if (get_entity_linkage(entity) & IR_LINKAGE_EXTERN) {
+                       be_emit_cstring("@INDNTPOFF");
+               } else {
+                       be_emit_cstring("@NTPOFF");
+               }
+       }
+
        if (is_relative) {
                be_emit_cstring("-.");
+               offset -= 4;
        }
 
        if (offset != 0) {
@@ -2330,9 +2436,22 @@ static void bemit_entity(ir_entity *entity, bool entity_sign, int offset,
        be_emit_write_line();
 }
 
+static void bemit_jmp_destination(const ir_node *dest_block)
+{
+       be_emit_cstring("\t.long ");
+       ia32_emit_block_name(dest_block);
+       be_emit_cstring(" - . - 4\n");
+       be_emit_write_line();
+}
+
 /* end emit routines, all emitters following here should only use the functions
    above. */
 
+typedef enum reg_modifier {
+       REG_LOW  = 0,
+       REG_HIGH = 1
+} reg_modifier_t;
+
 /** Create a ModR/M byte for src1,src2 registers */
 static void bemit_modrr(const arch_register_t *src1,
                         const arch_register_t *src2)
@@ -2343,6 +2462,16 @@ static void bemit_modrr(const arch_register_t *src1,
        bemit8(modrm);
 }
 
+/** Create a ModR/M8 byte for src1,src2 registers */
+static void bemit_modrr8(reg_modifier_t high_part1, const arch_register_t *src1,
+                                                reg_modifier_t high_part2, const arch_register_t *src2)
+{
+       unsigned char modrm = MOD_REG;
+       modrm |= ENC_RM(reg_gp_map[src1->index] +  (high_part1 == REG_HIGH ? 4 : 0));
+       modrm |= ENC_REG(reg_gp_map[src2->index] + (high_part2 == REG_HIGH ? 4 : 0));
+       bemit8(modrm);
+}
+
 /** Create a ModR/M byte for one register and extension */
 static void bemit_modru(const arch_register_t *reg, unsigned ext)
 {
@@ -2353,20 +2482,14 @@ static void bemit_modru(const arch_register_t *reg, unsigned ext)
        bemit8(modrm);
 }
 
-/**
- * Calculate the size of an (unsigned) immediate in bytes.
- *
- * @param offset  an offset
- */
-static unsigned get_unsigned_imm_size(unsigned offset)
+/** Create a ModR/M8 byte for one register */
+static void bemit_modrm8(reg_modifier_t high_part, const arch_register_t *reg)
 {
-       if (offset < 256) {
-               return 1;
-       } else if (offset < 65536) {
-               return 2;
-       } else {
-               return 4;
-       }
+       unsigned char modrm = MOD_REG;
+       assert(reg_gp_map[reg->index] < 4);
+       modrm |= ENC_RM(reg_gp_map[reg->index] + (high_part == REG_HIGH ? 4 : 0));
+       modrm |= MOD_REG;
+       bemit8(modrm);
 }
 
 /**
@@ -2385,52 +2508,6 @@ static unsigned get_signed_imm_size(int offset)
        }
 }
 
-/**
- * Emit a binop with a immediate operand.
- *
- * @param node        the node to emit
- * @param opcode_eax  the opcode for the op eax, imm variant
- * @param opcode      the opcode for the reg, imm variant
- * @param ruval       the opcode extension for opcode
- */
-static void bemit_binop_with_imm(
-       const ir_node *node,
-       unsigned char opcode_ax,
-       unsigned char opcode, unsigned char ruval)
-{
-       const arch_register_t       *reg  = get_out_reg(node, 0);
-       const ir_node               *op   = get_irn_n(node, n_ia32_binary_right);
-       const ia32_immediate_attr_t *attr = get_ia32_immediate_attr_const(op);
-       unsigned                    size;
-
-       if (attr->symconst != NULL)
-               size = 4;
-       else {
-               /* check for sign extension */
-               size = get_signed_imm_size(attr->offset);
-       }
-
-       switch (size) {
-       case 1:
-               bemit8(opcode | SIGNEXT_IMM);
-               bemit_modru(reg, ruval);
-               bemit8((unsigned char)attr->offset);
-               return;
-       case 2:
-       case 4:
-               /* check for eax variant: this variant is shorter for 32bit immediates only */
-               if (reg->index == REG_EAX) {
-                       bemit8(opcode_ax);
-               } else {
-                       bemit8(opcode);
-                       bemit_modru(reg, ruval);
-               }
-               bemit_entity(attr->symconst, attr->sc_sign, attr->offset, false);
-               return;
-       }
-       panic("invalid imm size?!?");
-}
-
 /**
  * Emit an address mode.
  *
@@ -2449,6 +2526,7 @@ static void bemit_mod_am(unsigned reg, const ir_node *node)
        unsigned   sib       = 0;
        unsigned   emitoffs  = 0;
        bool       emitsib   = false;
+       unsigned   base_enc;
 
        /* set the mod part depending on displacement */
        if (ent != NULL) {
@@ -2465,55 +2543,42 @@ static void bemit_mod_am(unsigned reg, const ir_node *node)
                emitoffs = 32;
        }
 
-       /* determine if we need a SIB byte */
+       if (has_base) {
+               const arch_register_t *base_reg = arch_get_irn_register(base);
+               base_enc = reg_gp_map[base_reg->index];
+       } else {
+               /* Use the EBP encoding + MOD_IND if NO base register. There is
+                * always a 32bit offset present in this case. */
+               modrm    = MOD_IND;
+               base_enc = 0x05;
+               emitoffs = 32;
+       }
+
+       /* Determine if we need a SIB byte. */
        if (has_index) {
-               int scale;
                const arch_register_t *reg_index = arch_get_irn_register(index);
-               assert(reg_index->index != REG_ESP);
-               sib |= ENC_INDEX(reg_gp_map[reg_index->index]);
-
-               if (has_base) {
-                       const arch_register_t *reg = arch_get_irn_register(base);
-                       sib |= ENC_BASE(reg_gp_map[reg->index]);
-               } else {
-                       /* use the EBP encoding if NO base register */
-                       sib |= 0x05;
-               }
-
-               scale = get_ia32_am_scale(node);
+               int                    scale     = get_ia32_am_scale(node);
                assert(scale < 4);
-               sib |= ENC_SCALE(scale);
+               /* R/M set to ESP means SIB in 32bit mode. */
+               modrm   |= ENC_RM(0x04);
+               sib      = ENC_SIB(scale, reg_gp_map[reg_index->index], base_enc);
                emitsib = true;
+       } else if (base_enc == 0x04) {
+               /* for the above reason we are forced to emit a SIB when base is ESP.
+                * Only the base is used, index must be ESP too, which means no index.
+                */
+               modrm   |= ENC_RM(0x04);
+               sib      = ENC_SIB(0, 0x04, 0x04);
+               emitsib  = true;
+       } else {
+               modrm |= ENC_RM(base_enc);
        }
 
-       /* determine modrm byte */
-       if (emitsib) {
-               /* R/M set to ESP means SIB in 32bit mode */
-               modrm |= ENC_RM(0x04);
-       } else if (has_base) {
-               const arch_register_t *reg = arch_get_irn_register(base);
-               if (reg->index == REG_ESP) {
-                       /* for the above reason we are forced to emit a sib when base is
-                        * ESP. Only the base is used, index must be ESP too, which means no
-                        * index. */
-                       sib     = ENC_BASE(0x04) | ENC_INDEX(0x04);
-                       emitsib = true;
-
-               /* we are forced to emit a 8bit offset as EBP base without
-                  offset is a special case for SIB without base register */
-               } else if (reg->index == REG_EBP && emitoffs == 0) {
-                       assert(GET_MODE(modrm) == MOD_IND);
-                       emitoffs  = 8;
-                       modrm    |= MOD_IND_BYTE_OFS;
-               }
-               modrm |= ENC_RM(reg_gp_map[reg->index]);
-       } else {
-               /* only displacement: Use EBP + disp encoding in 32bit mode */
-               if (emitoffs == 0) {
-                       emitoffs = 8;
-                       modrm    = MOD_IND_BYTE_OFS;
-               }
-               modrm |= ENC_RM(0x05);
+       /* We are forced to emit an 8bit offset as EBP base without offset is a
+        * special case for SIB without base register. */
+       if (base_enc == 0x05 && emitoffs == 0) {
+               modrm    |= MOD_IND_BYTE_OFS;
+               emitoffs  = 8;
        }
 
        modrm |= ENC_REG(reg);
@@ -2530,31 +2595,79 @@ static void bemit_mod_am(unsigned reg, const ir_node *node)
        }
 }
 
+/**
+ * Emit a binop with a immediate operand.
+ *
+ * @param node        the node to emit
+ * @param opcode_eax  the opcode for the op eax, imm variant
+ * @param opcode      the opcode for the reg, imm variant
+ * @param ruval       the opcode extension for opcode
+ */
+static void bemit_binop_with_imm(
+       const ir_node *node,
+       unsigned char opcode_ax,
+       unsigned char opcode, unsigned char ruval)
+{
+       /* Use in-reg, because some instructions (cmp, test) have no out-reg. */
+       const ir_node               *op   = get_irn_n(node, n_ia32_binary_right);
+       const ia32_immediate_attr_t *attr = get_ia32_immediate_attr_const(op);
+       unsigned                     size;
+
+       /* Some instructions (test) have no short form with 32bit value + 8bit
+        * immediate. */
+       if (attr->symconst != NULL || opcode & SIGNEXT_IMM) {
+               size = 4;
+       } else {
+               /* check for sign extension */
+               size = get_signed_imm_size(attr->offset);
+       }
+
+       switch (size) {
+       case 1:
+               bemit8(opcode | SIGNEXT_IMM);
+               /* cmp has this special mode */
+               if (get_ia32_op_type(node) == ia32_AddrModeS) {
+                       bemit_mod_am(ruval, node);
+               } else {
+                       const arch_register_t *reg = get_in_reg(node, n_ia32_binary_left);
+                       bemit_modru(reg, ruval);
+               }
+               bemit8((unsigned char)attr->offset);
+               return;
+       case 2:
+       case 4:
+               /* check for eax variant: this variant is shorter for 32bit immediates only */
+               if (get_ia32_op_type(node) == ia32_AddrModeS) {
+                       bemit8(opcode);
+                       bemit_mod_am(ruval, node);
+               } else {
+                       const arch_register_t *reg = get_in_reg(node, n_ia32_binary_left);
+                       if (reg->index == REG_EAX) {
+                               bemit8(opcode_ax);
+                       } else {
+                               bemit8(opcode);
+                               bemit_modru(reg, ruval);
+                       }
+               }
+               bemit_entity(attr->symconst, attr->sc_sign, attr->offset, false);
+               return;
+       }
+       panic("invalid imm size?!?");
+}
+
 /**
  * Emits a binop.
  */
 static void bemit_binop_2(const ir_node *node, unsigned code)
 {
-       const arch_register_t *out    = get_in_reg(node, n_ia32_binary_left);
-       ia32_op_type_t        am_type = get_ia32_op_type(node);
-       unsigned char         d       = 0;
-       const arch_register_t *op2;
-
-       switch (am_type) {
-       case ia32_AddrModeS:
-               d = 2;
-               /* FALLTHROUGH */
-       case ia32_AddrModeD:
-               bemit8(code | d);
+       const arch_register_t *out = get_in_reg(node, n_ia32_binary_left);
+       bemit8(code);
+       if (get_ia32_op_type(node) == ia32_Normal) {
+               const arch_register_t *op2 = get_in_reg(node, n_ia32_binary_right);
+               bemit_modrr(op2, out);
+       } else {
                bemit_mod_am(reg_gp_map[out->index], node);
-               return;
-       case ia32_Normal:
-               bemit8(code);
-               op2 = get_in_reg(node, n_ia32_binary_right);
-               bemit_modrr(out, op2);
-               return;
        }
-       panic("invalid address mode");
 }
 
 /**
@@ -2564,10 +2677,6 @@ static void bemit_binop(const ir_node *node, const unsigned char opcodes[4])
 {
        ir_node *right = get_irn_n(node, n_ia32_binary_right);
        if (is_ia32_Immediate(right)) {
-               /* there's a shorter variant with DEST=EAX */
-               const arch_register_t *reg = get_out_reg(node, 0);
-               if (reg->index == REG_EAX)
-
                bemit_binop_with_imm(node, opcodes[1], opcodes[2], opcodes[3]);
        } else {
                bemit_binop_2(node, opcodes[0]);
@@ -2579,19 +2688,29 @@ static void bemit_binop(const ir_node *node, const unsigned char opcodes[4])
  */
 static void bemit_unop(const ir_node *node, unsigned char code, unsigned char ext, int input)
 {
-       ia32_op_type_t am_type = get_ia32_op_type(node);
-
        bemit8(code);
-       if (am_type == ia32_AddrModeD) {
-               bemit8(code);
-               bemit_mod_am(ext, node);
-       } else {
+       if (get_ia32_op_type(node) == ia32_Normal) {
                const arch_register_t *in = get_in_reg(node, input);
-               assert(am_type == ia32_Normal);
                bemit_modru(in, ext);
-       }
+       } else {
+               bemit_mod_am(ext, node);
+       }
+}
+
+static void bemit_unop_reg(const ir_node *node, unsigned char code, int input)
+{
+       const arch_register_t *out = get_out_reg(node, 0);
+       bemit_unop(node, code, reg_gp_map[out->index], input);
 }
 
+static void bemit_unop_mem(const ir_node *node, unsigned char code, unsigned char ext)
+{
+       unsigned size = get_mode_size_bits(get_ia32_ls_mode(node));
+       if (size == 16)
+               bemit8(0x66);
+       bemit8(size == 8 ? code : code + 1);
+       bemit_mod_am(ext, node);
+}
 
 static void bemit_immediate(const ir_node *node, bool relative)
 {
@@ -2601,9 +2720,8 @@ static void bemit_immediate(const ir_node *node, bool relative)
 
 static void bemit_copy(const ir_node *copy)
 {
-       const ir_node *op = be_get_Copy_op(copy);
-       const arch_register_t *in  = arch_get_irn_register(op);
-       const arch_register_t *out = arch_get_irn_register(copy);
+       const arch_register_t *in  = get_in_reg(copy, 0);
+       const arch_register_t *out = get_out_reg(copy, 0);
 
        if (in == out || is_unknown_reg(in))
                return;
@@ -2615,8 +2733,39 @@ static void bemit_copy(const ir_node *copy)
                panic("NIY");
        } else {
                assert(arch_register_get_class(in) == &ia32_reg_classes[CLASS_ia32_gp]);
-               bemit8(0x89);
-               bemit_modrr(out, in);
+               bemit8(0x8B);
+               bemit_modrr(in, out);
+       }
+}
+
+static void bemit_perm(const ir_node *node)
+{
+       const arch_register_t       *in0  = arch_get_irn_register(get_irn_n(node, 0));
+       const arch_register_t       *in1  = arch_get_irn_register(get_irn_n(node, 1));
+       const arch_register_class_t *cls0 = arch_register_get_class(in0);
+
+       assert(cls0 == arch_register_get_class(in1) && "Register class mismatch at Perm");
+
+       if (cls0 == &ia32_reg_classes[CLASS_ia32_gp]) {
+               if (in0->index == REG_EAX) {
+                       bemit8(0x90 + reg_gp_map[in1->index]);
+               } else if (in1->index == REG_EAX) {
+                       bemit8(0x90 + reg_gp_map[in0->index]);
+               } else {
+                       bemit8(0x87);
+                       bemit_modrr(in0, in1);
+               }
+       } else if (cls0 == &ia32_reg_classes[CLASS_ia32_xmm]) {
+               panic("unimplemented"); // TODO implement
+               //ia32_emitf(NULL, "\txorpd %R, %R\n", in1, in0);
+               //ia32_emitf(NULL, "\txorpd %R, %R\n", in0, in1);
+               //ia32_emitf(node, "\txorpd %R, %R\n", in1, in0);
+       } else if (cls0 == &ia32_reg_classes[CLASS_ia32_vfp]) {
+               /* is a NOP */
+       } else if (cls0 == &ia32_reg_classes[CLASS_ia32_st]) {
+               /* is a NOP */
+       } else {
+               panic("unexpected register class in be_Perm (%+F)", node);
        }
 }
 
@@ -2643,15 +2792,65 @@ static void bemit_ ## op(const ir_node *node) {                           \
        bemit_binop(node, op ## _codes);                                      \
 }
 
-/*   insn  def  eax,imm   imm  */
-BINOP(add, 0x01, 0x05, 0x81, 0 )
-BINOP(or,  0x09, 0x0D, 0x81, 1 )
-BINOP(adc, 0x11, 0x15, 0x81, 2 )
-BINOP(sbb, 0x19, 0x1D, 0x81, 3 )
-BINOP(and, 0x21, 0x25, 0x81, 4 )
-BINOP(sub, 0x29, 0x2D, 0x81, 5 )
-BINOP(xor, 0x31, 0x35, 0x81, 6 )
-BINOP(cmp, 0x39, 0x3D, 0x81, 7 )
+/*    insn  def  eax,imm   imm */
+BINOP(add,  0x03, 0x05, 0x81, 0)
+BINOP(or,   0x0B, 0x0D, 0x81, 1)
+BINOP(adc,  0x13, 0x15, 0x81, 2)
+BINOP(sbb,  0x1B, 0x1D, 0x81, 3)
+BINOP(and,  0x23, 0x25, 0x81, 4)
+BINOP(sub,  0x2B, 0x2D, 0x81, 5)
+BINOP(xor,  0x33, 0x35, 0x81, 6)
+BINOP(test, 0x85, 0xA9, 0xF7, 0)
+
+#define BINOPMEM(op, ext) \
+static void bemit_##op(const ir_node *node) \
+{ \
+       ir_node *val; \
+       unsigned size = get_mode_size_bits(get_ia32_ls_mode(node)); \
+       if (size == 16) \
+               bemit8(0x66); \
+       val = get_irn_n(node, n_ia32_unary_op); \
+       if (is_ia32_Immediate(val)) { \
+               const ia32_immediate_attr_t *attr   = get_ia32_immediate_attr_const(val); \
+               int                          offset = attr->offset; \
+               if (attr->symconst == NULL && get_signed_imm_size(offset) == 1) { \
+                       bemit8(0x83); \
+                       bemit_mod_am(ext, node); \
+                       bemit8(offset); \
+               } else { \
+                       bemit8(0x81); \
+                       bemit_mod_am(ext, node); \
+                       if (size == 16) { \
+                               bemit16(offset); \
+                       } else { \
+                               bemit_entity(attr->symconst, attr->sc_sign, offset, false); \
+                       } \
+               } \
+       } else { \
+               bemit8(ext << 3 | 1); \
+               bemit_mod_am(reg_gp_map[get_out_reg(val, 0)->index], node); \
+       } \
+} \
+ \
+static void bemit_##op##8bit(const ir_node *node) \
+{ \
+       ir_node *val = get_irn_n(node, n_ia32_unary_op); \
+       if (is_ia32_Immediate(val)) { \
+               bemit8(0x80); \
+               bemit_mod_am(ext, node); \
+               bemit8(get_ia32_immediate_attr_const(val)->offset); \
+       } else { \
+               bemit8(ext << 3); \
+               bemit_mod_am(reg_gp_map[get_out_reg(val, 0)->index], node); \
+       } \
+}
+
+BINOPMEM(addmem,  0)
+BINOPMEM(ormem,   1)
+BINOPMEM(andmem,  4)
+BINOPMEM(submem,  5)
+BINOPMEM(xormem,  6)
+
 
 /**
  * Creates a function for an Unop with code /ext encoding.
@@ -2661,14 +2860,392 @@ static void bemit_ ## op(const ir_node *node) { \
        bemit_unop(node, code, ext, input);         \
 }
 
-UNOP(not,     0xF7, 2, n_ia32_unary_op)
-UNOP(neg,     0xF7, 3, n_ia32_unary_op)
-UNOP(mul,     0xF7, 4, n_ia32_binary_right)
-UNOP(imul1op, 0xF7, 5, n_ia32_binary_right)
-UNOP(div,     0xF7, 6, n_ia32_unary_op)
-UNOP(idiv,    0xF7, 7, n_ia32_unary_op)
+UNOP(not,     0xF7, 2, n_ia32_Not_val)
+UNOP(neg,     0xF7, 3, n_ia32_Neg_val)
+UNOP(mul,     0xF7, 4, n_ia32_Mul_right)
+UNOP(imul1op, 0xF7, 5, n_ia32_IMul1OP_right)
+UNOP(div,     0xF7, 6, n_ia32_Div_divisor)
+UNOP(idiv,    0xF7, 7, n_ia32_IDiv_divisor)
+
+/* TODO: am support for IJmp */
+UNOP(ijmp,    0xFF, 4, n_ia32_IJmp_target)
+
+#define SHIFT(op, ext) \
+static void bemit_##op(const ir_node *node) \
+{ \
+       const arch_register_t *out   = get_out_reg(node, 0); \
+       ir_node               *count = get_irn_n(node, 1); \
+       if (is_ia32_Immediate(count)) { \
+               int offset = get_ia32_immediate_attr_const(count)->offset; \
+               if (offset == 1) { \
+                       bemit8(0xD1); \
+                       bemit_modru(out, ext); \
+               } else { \
+                       bemit8(0xC1); \
+                       bemit_modru(out, ext); \
+                       bemit8(offset); \
+               } \
+       } else { \
+               bemit8(0xD3); \
+               bemit_modru(out, ext); \
+       } \
+} \
+ \
+static void bemit_##op##mem(const ir_node *node) \
+{ \
+       ir_node *count; \
+       unsigned size = get_mode_size_bits(get_ia32_ls_mode(node)); \
+       if (size == 16) \
+               bemit8(0x66); \
+       count = get_irn_n(node, 1); \
+       if (is_ia32_Immediate(count)) { \
+               int offset = get_ia32_immediate_attr_const(count)->offset; \
+               if (offset == 1) { \
+                       bemit8(size == 8 ? 0xD0 : 0xD1); \
+                       bemit_mod_am(ext, node); \
+               } else { \
+                       bemit8(size == 8 ? 0xC0 : 0xC1); \
+                       bemit_mod_am(ext, node); \
+                       bemit8(offset); \
+               } \
+       } else { \
+               bemit8(size == 8 ? 0xD2 : 0xD3); \
+               bemit_mod_am(ext, node); \
+       } \
+}
+
+SHIFT(rol, 0)
+SHIFT(ror, 1)
+SHIFT(shl, 4)
+SHIFT(shr, 5)
+SHIFT(sar, 7)
+
+static void bemit_shld(const ir_node *node)
+{
+       const arch_register_t *in  = get_in_reg(node, n_ia32_ShlD_val_low);
+       const arch_register_t *out = get_out_reg(node, pn_ia32_ShlD_res);
+       ir_node *count = get_irn_n(node, n_ia32_ShlD_count);
+       bemit8(0x0F);
+       if (is_ia32_Immediate(count)) {
+               bemit8(0xA4);
+               bemit_modrr(out, in);
+               bemit8(get_ia32_immediate_attr_const(count)->offset);
+       } else {
+               bemit8(0xA5);
+               bemit_modrr(out, in);
+       }
+}
+
+static void bemit_shrd(const ir_node *node)
+{
+       const arch_register_t *in  = get_in_reg(node, n_ia32_ShrD_val_low);
+       const arch_register_t *out = get_out_reg(node, pn_ia32_ShrD_res);
+       ir_node *count = get_irn_n(node, n_ia32_ShrD_count);
+       bemit8(0x0F);
+       if (is_ia32_Immediate(count)) {
+               bemit8(0xAC);
+               bemit_modrr(out, in);
+               bemit8(get_ia32_immediate_attr_const(count)->offset);
+       } else {
+               bemit8(0xAD);
+               bemit_modrr(out, in);
+       }
+}
+
+/**
+ * binary emitter for setcc.
+ */
+static void bemit_setcc(const ir_node *node)
+{
+       const arch_register_t *dreg = get_out_reg(node, pn_ia32_Setcc_res);
+
+       pn_Cmp pnc = get_ia32_condcode(node);
+       pnc        = determine_final_pnc(node, n_ia32_Setcc_eflags, pnc);
+       if (pnc & ia32_pn_Cmp_float) {
+               switch (pnc & 0x0f) {
+               case pn_Cmp_Uo:
+                        /* setp <dreg */
+                       bemit8(0x0F);
+                       bemit8(0x9A);
+                       bemit_modrm8(REG_LOW, dreg);
+                       return;
+
+               case pn_Cmp_Leg:
+                        /* setnp <dreg*/
+                       bemit8(0x0F);
+                       bemit8(0x9B);
+                       bemit_modrm8(REG_LOW, dreg);
+                       return;
+
+               case pn_Cmp_Eq:
+               case pn_Cmp_Lt:
+               case pn_Cmp_Le:
+                        /* set%PNC <dreg */
+                       bemit8(0x0F);
+                       bemit8(0x90 | pnc2cc(pnc));
+                       bemit_modrm8(REG_LOW, dreg);
+
+                       /* setnp >dreg */
+                       bemit8(0x0F);
+                       bemit8(0x9B);
+                       bemit_modrm8(REG_HIGH, dreg);
+
+                       /* andb %>dreg, %<dreg */
+                       bemit8(0x20);
+                       bemit_modrr8(REG_LOW, dreg, REG_HIGH, dreg);
+                       return;
+
+               case pn_Cmp_Ug:
+               case pn_Cmp_Uge:
+               case pn_Cmp_Ne:
+                       /* set%PNC <dreg */
+                       bemit8(0x0F);
+                       bemit8(0x90 | pnc2cc(pnc));
+                       bemit_modrm8(REG_LOW, dreg);
+
+                       /* setp >dreg */
+                       bemit8(0x0F);
+                       bemit8(0x9A);
+                       bemit_modrm8(REG_HIGH, dreg);
+
+                       /* orb %>dreg, %<dreg */
+                       bemit8(0x08);
+                       bemit_modrr8(REG_LOW, dreg, REG_HIGH, dreg);
+                       return;
+
+               default:
+                       break;
+               }
+       }
+       /* set%PNC <dreg */
+       bemit8(0x0F);
+       bemit8(0x90 | pnc2cc(pnc));
+       bemit_modrm8(REG_LOW, dreg);
+}
+
+static void bemit_cmovcc(const ir_node *node)
+{
+       const ia32_attr_t     *attr         = get_ia32_attr_const(node);
+       int                    ins_permuted = attr->data.ins_permuted;
+       const arch_register_t *out          = arch_irn_get_register(node, pn_ia32_res);
+       pn_Cmp                 pnc          = get_ia32_condcode(node);
+       const arch_register_t *in_true;
+       const arch_register_t *in_false;
+
+       pnc = determine_final_pnc(node, n_ia32_CMovcc_eflags, pnc);
+
+       in_true  = arch_get_irn_register(get_irn_n(node, n_ia32_CMovcc_val_true));
+       in_false = arch_get_irn_register(get_irn_n(node, n_ia32_CMovcc_val_false));
+
+       /* should be same constraint fullfilled? */
+       if (out == in_false) {
+               /* yes -> nothing to do */
+       } else if (out == in_true) {
+               assert(get_ia32_op_type(node) == ia32_Normal);
+               ins_permuted = !ins_permuted;
+               in_true      = in_false;
+       } else {
+               /* we need a mov */
+               bemit8(0x8B); // mov %in_false, %out
+               bemit_modrr(in_false, out);
+       }
+
+       if (ins_permuted)
+               pnc = ia32_get_negated_pnc(pnc);
+
+       /* TODO: handling of Nans isn't correct yet */
+
+       bemit8(0x0F);
+       bemit8(0x40 | pnc2cc(pnc));
+       if (get_ia32_op_type(node) == ia32_Normal) {
+               bemit_modrr(in_true, out);
+       } else {
+               bemit_mod_am(reg_gp_map[out->index], node);
+       }
+}
+
+static void bemit_cmp(const ir_node *node)
+{
+       unsigned  ls_size = get_mode_size_bits(get_ia32_ls_mode(node));
+       ir_node  *right;
+
+       if (ls_size == 16)
+               bemit8(0x66);
+
+       right = get_irn_n(node, n_ia32_binary_right);
+       if (is_ia32_Immediate(right)) {
+               /* Use in-reg, because some instructions (cmp, test) have no out-reg. */
+               const ir_node               *op   = get_irn_n(node, n_ia32_binary_right);
+               const ia32_immediate_attr_t *attr = get_ia32_immediate_attr_const(op);
+               unsigned                     size;
+
+               if (attr->symconst != NULL) {
+                       size = 4;
+               } else {
+                       /* check for sign extension */
+                       size = get_signed_imm_size(attr->offset);
+               }
+
+               switch (size) {
+                       case 1:
+                               bemit8(0x81 | SIGNEXT_IMM);
+                               /* cmp has this special mode */
+                               if (get_ia32_op_type(node) == ia32_AddrModeS) {
+                                       bemit_mod_am(7, node);
+                               } else {
+                                       const arch_register_t *reg = get_in_reg(node, n_ia32_binary_left);
+                                       bemit_modru(reg, 7);
+                               }
+                               bemit8((unsigned char)attr->offset);
+                               return;
+                       case 2:
+                       case 4:
+                               /* check for eax variant: this variant is shorter for 32bit immediates only */
+                               if (get_ia32_op_type(node) == ia32_AddrModeS) {
+                                       bemit8(0x81);
+                                       bemit_mod_am(7, node);
+                               } else {
+                                       const arch_register_t *reg = get_in_reg(node, n_ia32_binary_left);
+                                       if (reg->index == REG_EAX) {
+                                               bemit8(0x3D);
+                                       } else {
+                                               bemit8(0x81);
+                                               bemit_modru(reg, 7);
+                                       }
+                               }
+                               if (ls_size == 16) {
+                                       bemit16(attr->offset);
+                               } else {
+                                       bemit_entity(attr->symconst, attr->sc_sign, attr->offset, false);
+                               }
+                               return;
+               }
+               panic("invalid imm size?!?");
+       } else {
+               const arch_register_t *out = get_in_reg(node, n_ia32_binary_left);
+               bemit8(0x3B);
+               if (get_ia32_op_type(node) == ia32_Normal) {
+                       const arch_register_t *op2 = get_in_reg(node, n_ia32_binary_right);
+                       bemit_modrr(op2, out);
+               } else {
+                       bemit_mod_am(reg_gp_map[out->index], node);
+               }
+       }
+}
+
+static void bemit_cmp8bit(const ir_node *node)
+{
+       ir_node *right = get_irn_n(node, n_ia32_binary_right);
+       if (is_ia32_Immediate(right)) {
+               if (get_ia32_op_type(node) == ia32_Normal) {
+                       const arch_register_t *out = get_in_reg(node, n_ia32_Cmp_left);
+                       if (out->index == REG_EAX) {
+                               bemit8(0x3C);
+                       } else {
+                               bemit8(0x80);
+                               bemit_modru(out, 7);
+                       }
+               } else {
+                       bemit8(0x80);
+                       bemit_mod_am(7, node);
+               }
+               bemit8(get_ia32_immediate_attr_const(right)->offset);
+       } else {
+               const arch_register_t *out = get_in_reg(node, n_ia32_Cmp_left);
+               bemit8(0x3A);
+               if (get_ia32_op_type(node) == ia32_Normal) {
+                       const arch_register_t *in = get_in_reg(node, n_ia32_Cmp_right);
+                       bemit_modrr(out, in);
+               } else {
+                       bemit_mod_am(reg_gp_map[out->index], node);
+               }
+       }
+}
+
+static void bemit_test8bit(const ir_node *node)
+{
+       ir_node *right = get_irn_n(node, n_ia32_Test8Bit_right);
+       if (is_ia32_Immediate(right)) {
+               if (get_ia32_op_type(node) == ia32_Normal) {
+                       const arch_register_t *out = get_in_reg(node, n_ia32_Test8Bit_left);
+                       if (out->index == REG_EAX) {
+                               bemit8(0xA8);
+                       } else {
+                               bemit8(0xF6);
+                               bemit_modru(out, 0);
+                       }
+               } else {
+                       bemit8(0xF6);
+                       bemit_mod_am(0, node);
+               }
+               bemit8(get_ia32_immediate_attr_const(right)->offset);
+       } else {
+               const arch_register_t *out = get_in_reg(node, n_ia32_Test8Bit_left);
+               bemit8(0x84);
+               if (get_ia32_op_type(node) == ia32_Normal) {
+                       const arch_register_t *in = get_in_reg(node, n_ia32_Test8Bit_right);
+                       bemit_modrr(out, in);
+               } else {
+                       bemit_mod_am(reg_gp_map[out->index], node);
+               }
+       }
+}
+
+static void bemit_imul(const ir_node *node)
+{
+       ir_node *right = get_irn_n(node, n_ia32_IMul_right);
+       /* Do we need the immediate form? */
+       if (is_ia32_Immediate(right)) {
+               int imm = get_ia32_immediate_attr_const(right)->offset;
+               if (get_signed_imm_size(imm) == 1) {
+                       bemit_unop_reg(node, 0x6B, n_ia32_IMul_left);
+                       bemit8(imm);
+               } else {
+                       bemit_unop_reg(node, 0x69, n_ia32_IMul_left);
+                       bemit32(imm);
+               }
+       } else {
+               bemit8(0x0F);
+               bemit_unop_reg(node, 0xAF, n_ia32_IMul_right);
+       }
+}
+
+static void bemit_dec(const ir_node *node)
+{
+       const arch_register_t *out = get_out_reg(node, pn_ia32_Dec_res);
+       bemit8(0x48 + reg_gp_map[out->index]);
+}
+
+static void bemit_inc(const ir_node *node)
+{
+       const arch_register_t *out = get_out_reg(node, pn_ia32_Inc_res);
+       bemit8(0x40 + reg_gp_map[out->index]);
+}
+
+#define UNOPMEM(op, code, ext) \
+static void bemit_##op(const ir_node *node) \
+{ \
+       bemit_unop_mem(node, code, ext); \
+}
+
+UNOPMEM(notmem, 0xF6, 2)
+UNOPMEM(negmem, 0xF6, 3)
+UNOPMEM(incmem, 0xFE, 0)
+UNOPMEM(decmem, 0xFE, 1)
+
+static void bemit_ldtls(const ir_node *node)
+{
+       const arch_register_t *out = get_out_reg(node, 0);
 
-UNOP(ijmp,    0xFF, 4, n_ia32_unary_op)
+       bemit8(0x65); // gs:
+       if (out->index == REG_EAX) {
+               bemit8(0xA1); // movl 0, %eax
+       } else {
+               bemit8(0x8B); // movl 0, %reg
+               bemit8(MOD_IND | ENC_REG(reg_gp_map[out->index]) | ENC_RM(0x05));
+       }
+       bemit32(0);
+}
 
 /**
  * Emit a Lea.
@@ -2680,8 +3257,111 @@ static void bemit_lea(const ir_node *node)
        bemit_mod_am(reg_gp_map[out->index], node);
 }
 
+/* helper function for bemit_minus64bit */
+static void bemit_helper_mov(const arch_register_t *src, const arch_register_t *dst)
+{
+       bemit8(0x8B); // movl %src, %dst
+       bemit_modrr(src, dst);
+}
+
+/* helper function for bemit_minus64bit */
+static void bemit_helper_neg(const arch_register_t *reg)
+{
+       bemit8(0xF7); // negl %reg
+       bemit_modru(reg, 3);
+}
+
+/* helper function for bemit_minus64bit */
+static void bemit_helper_sbb0(const arch_register_t *reg)
+{
+       bemit8(0x83); // sbbl $0, %reg
+       bemit_modru(reg, 3);
+       bemit8(0);
+}
+
+/* helper function for bemit_minus64bit */
+static void bemit_helper_sbb(const arch_register_t *src, const arch_register_t *dst)
+{
+       bemit8(0x1B); // sbbl %src, %dst
+       bemit_modrr(src, dst);
+}
+
+/* helper function for bemit_minus64bit */
+static void bemit_helper_xchg(const arch_register_t *src, const arch_register_t *dst)
+{
+       if (src->index == REG_EAX) {
+               bemit8(0x90 + reg_gp_map[dst->index]); // xchgl %eax, %dst
+       } else if (dst->index == REG_EAX) {
+               bemit8(0x90 + reg_gp_map[src->index]); // xchgl %src, %eax
+       } else {
+               bemit8(0x87); // xchgl %src, %dst
+               bemit_modrr(src, dst);
+       }
+}
+
+/* helper function for bemit_minus64bit */
+static void bemit_helper_zero(const arch_register_t *reg)
+{
+       bemit8(0x33); // xorl %reg, %reg
+       bemit_modrr(reg, reg);
+}
+
+static void bemit_minus64bit(const ir_node *node)
+{
+       const arch_register_t *in_lo  = get_in_reg(node, 0);
+       const arch_register_t *in_hi  = get_in_reg(node, 1);
+       const arch_register_t *out_lo = get_out_reg(node, 0);
+       const arch_register_t *out_hi = get_out_reg(node, 1);
+
+       if (out_lo == in_lo) {
+               if (out_hi != in_hi) {
+                       /* a -> a, b -> d */
+                       goto zero_neg;
+               } else {
+                       /* a -> a, b -> b */
+                       goto normal_neg;
+               }
+       } else if (out_lo == in_hi) {
+               if (out_hi == in_lo) {
+                       /* a -> b, b -> a */
+                       bemit_helper_xchg(in_lo, in_hi);
+                       goto normal_neg;
+               } else {
+                       /* a -> b, b -> d */
+                       bemit_helper_mov(in_hi, out_hi);
+                       bemit_helper_mov(in_lo, out_lo);
+                       goto normal_neg;
+               }
+       } else {
+               if (out_hi == in_lo) {
+                       /* a -> c, b -> a */
+                       bemit_helper_mov(in_lo, out_lo);
+                       goto zero_neg;
+               } else if (out_hi == in_hi) {
+                       /* a -> c, b -> b */
+                       bemit_helper_mov(in_lo, out_lo);
+                       goto normal_neg;
+               } else {
+                       /* a -> c, b -> d */
+                       bemit_helper_mov(in_lo, out_lo);
+                       goto zero_neg;
+               }
+       }
+
+normal_neg:
+       bemit_helper_neg( out_hi);
+       bemit_helper_neg( out_lo);
+       bemit_helper_sbb0(out_hi);
+       return;
+
+zero_neg:
+       bemit_helper_zero(out_hi);
+       bemit_helper_neg( out_lo);
+       bemit_helper_sbb( in_hi, out_hi);
+}
+
 /**
- * Emit a single optcode.
+ * Emit a single opcode.
  */
 #define EMIT_SINGLEOP(op, code)                 \
 static void bemit_ ## op(const ir_node *node) { \
@@ -2694,19 +3374,20 @@ static void bemit_ ## op(const ir_node *node) { \
 //EMIT_SINGLEOP(aaa,  0x37)
 //EMIT_SINGLEOP(aas,  0x3F)
 //EMIT_SINGLEOP(nop,  0x90)
-EMIT_SINGLEOP(cwde, 0x98)
-EMIT_SINGLEOP(cltd, 0x99)
+EMIT_SINGLEOP(cwtl,  0x98)
+EMIT_SINGLEOP(cltd,  0x99)
 //EMIT_SINGLEOP(fwait, 0x9B)
-EMIT_SINGLEOP(sahf, 0x9E)
+EMIT_SINGLEOP(sahf,  0x9E)
 //EMIT_SINGLEOP(popf, 0x9D)
-EMIT_SINGLEOP(int3, 0xCC)
+EMIT_SINGLEOP(leave, 0xC9)
+EMIT_SINGLEOP(int3,  0xCC)
 //EMIT_SINGLEOP(iret, 0xCF)
 //EMIT_SINGLEOP(xlat, 0xD7)
 //EMIT_SINGLEOP(lock, 0xF0)
-EMIT_SINGLEOP(rep,  0xF3)
+EMIT_SINGLEOP(rep,   0xF3)
 //EMIT_SINGLEOP(halt, 0xF4)
-EMIT_SINGLEOP(cmc,  0xF5)
-EMIT_SINGLEOP(stc,  0xF9)
+EMIT_SINGLEOP(cmc,   0xF5)
+EMIT_SINGLEOP(stc,   0xF9)
 //EMIT_SINGLEOP(cli,  0xFA)
 //EMIT_SINGLEOP(sti,  0xFB)
 //EMIT_SINGLEOP(std,  0xFD)
@@ -2719,18 +3400,17 @@ static void bemit_load(const ir_node *node)
        const arch_register_t *out = get_out_reg(node, 0);
 
        if (out->index == REG_EAX) {
-               ir_entity *ent       = get_ia32_am_sc(node);
-               int        offs      = get_ia32_am_offs_int(node);
                ir_node   *base      = get_irn_n(node, n_ia32_base);
                int        has_base  = !is_ia32_NoReg_GP(base);
                ir_node   *index     = get_irn_n(node, n_ia32_index);
                int        has_index = !is_ia32_NoReg_GP(index);
-
-               if (ent == NULL && !has_base && !has_index) {
+               if (!has_base && !has_index) {
+                       ir_entity *ent  = get_ia32_am_sc(node);
+                       int        offs = get_ia32_am_offs_int(node);
                        /* load from constant address to EAX can be encoded
                           as 0xA1 [offset] */
                        bemit8(0xA1);
-                       bemit_entity(NULL, 0, offs, false);
+                       bemit_entity(ent, 0, offs, false);
                        return;
                }
        }
@@ -2744,35 +3424,75 @@ static void bemit_load(const ir_node *node)
 static void bemit_store(const ir_node *node)
 {
        const ir_node *value = get_irn_n(node, n_ia32_Store_val);
+       unsigned       size  = get_mode_size_bits(get_ia32_ls_mode(node));
 
        if (is_ia32_Immediate(value)) {
-               bemit8(0xC7);
-               bemit_mod_am(0, node);
-               bemit_immediate(value, false);
+               if (size == 8) {
+                       bemit8(0xC6);
+                       bemit_mod_am(0, node);
+                       bemit8(get_ia32_immediate_attr_const(value)->offset);
+               } else if (size == 16) {
+                       bemit8(0x66);
+                       bemit8(0xC7);
+                       bemit_mod_am(0, node);
+                       bemit16(get_ia32_immediate_attr_const(value)->offset);
+               } else {
+                       bemit8(0xC7);
+                       bemit_mod_am(0, node);
+                       bemit_immediate(value, false);
+               }
        } else {
                const arch_register_t *in = get_in_reg(node, n_ia32_Store_val);
 
                if (in->index == REG_EAX) {
-                       ir_entity *ent       = get_ia32_am_sc(node);
-                       int        offs      = get_ia32_am_offs_int(node);
                        ir_node   *base      = get_irn_n(node, n_ia32_base);
                        int        has_base  = !is_ia32_NoReg_GP(base);
                        ir_node   *index     = get_irn_n(node, n_ia32_index);
                        int        has_index = !is_ia32_NoReg_GP(index);
-
-                       if (ent == NULL && !has_base && !has_index) {
+                       if (!has_base && !has_index) {
+                               ir_entity *ent  = get_ia32_am_sc(node);
+                               int        offs = get_ia32_am_offs_int(node);
                                /* store to constant address from EAX can be encoded as
-                                  0xA3 [offset]*/
-                               bemit8(0xA3);
-                               bemit_entity(NULL, 0, offs, false);
+                                * 0xA2/0xA3 [offset]*/
+                               if (size == 8) {
+                                       bemit8(0xA2);
+                               } else {
+                                       if (size == 16)
+                                               bemit8(0x66);
+                                       bemit8(0xA3);
+                               }
+                               bemit_entity(ent, 0, offs, false);
                                return;
                        }
                }
-               bemit8(0x89);
+
+               if (size == 8) {
+                       bemit8(0x88);
+               } else {
+                       if (size == 16)
+                               bemit8(0x66);
+                       bemit8(0x89);
+               }
                bemit_mod_am(reg_gp_map[in->index], node);
        }
 }
 
+static void bemit_conv_i2i(const ir_node *node)
+{
+       ir_mode  *smaller_mode = get_ia32_ls_mode(node);
+       unsigned  opcode;
+
+       bemit8(0x0F);
+       /*        8 16 bit source
+        * movzx B6 B7
+        * movsx BE BF
+        */
+       opcode = 0xB6;
+       if (mode_is_signed(smaller_mode))           opcode |= 0x08;
+       if (get_mode_size_bits(smaller_mode) == 16) opcode |= 0x01;
+       bemit_unop_reg(node, opcode, n_ia32_Conv_I2I_val);
+}
+
 /**
  * Emit a Push.
  */
@@ -2783,7 +3503,7 @@ static void bemit_push(const ir_node *node)
        if (is_ia32_Immediate(value)) {
                const ia32_immediate_attr_t *attr
                        = get_ia32_immediate_attr_const(value);
-               unsigned size = get_unsigned_imm_size(attr->offset);
+               unsigned size = get_signed_imm_size(attr->offset);
                if (attr->symconst)
                        size = 4;
                switch (size) {
@@ -2797,9 +3517,12 @@ static void bemit_push(const ir_node *node)
                        bemit_immediate(value, false);
                        break;
                }
-       } else {
+       } else if (is_ia32_NoReg_GP(value)) {
                bemit8(0xFF);
                bemit_mod_am(6, node);
+       } else {
+               const arch_register_t *reg = get_in_reg(node, n_ia32_Push_val);
+               bemit8(0x50 + reg_gp_map[reg->index]);
        }
 }
 
@@ -2809,12 +3532,13 @@ static void bemit_push(const ir_node *node)
 static void bemit_pop(const ir_node *node)
 {
        const arch_register_t *reg = get_out_reg(node, pn_ia32_Pop_res);
-       if (get_ia32_op_type(node) == ia32_Normal)
-               bemit8(0x58 + reg_gp_map[reg->index]);
-       else {
-               bemit8(0x8F);
-               bemit_mod_am(0, node);
-       }
+       bemit8(0x58 + reg_gp_map[reg->index]);
+}
+
+static void bemit_popmem(const ir_node *node)
+{
+       bemit8(0x8F);
+       bemit_mod_am(0, node);
 }
 
 static void bemit_call(const ir_node *node)
@@ -2825,16 +3549,186 @@ static void bemit_call(const ir_node *node)
                bemit8(0xE8);
                bemit_immediate(proc, true);
        } else {
-               panic("indirect call NIY");
+               bemit_unop(node, 0xFF, 2, n_ia32_Call_addr);
        }
 }
 
-/**
- * Emits a return.
- */
-static void bemit_return(const ir_node *node)
+static void bemit_jmp(const ir_node *dest_block)
 {
-       unsigned pop = be_Return_get_pop(node);
+       bemit8(0xE9);
+       bemit_jmp_destination(dest_block);
+}
+
+static void bemit_jump(const ir_node *node)
+{
+       if (can_be_fallthrough(node))
+               return;
+
+       bemit_jmp(get_cfop_target_block(node));
+}
+
+static void bemit_jcc(int pnc, const ir_node *dest_block)
+{
+       unsigned char cc = pnc2cc(pnc);
+       bemit8(0x0F);
+       bemit8(0x80 + cc);
+       bemit_jmp_destination(dest_block);
+}
+
+static void bemit_jp(bool odd, const ir_node *dest_block)
+{
+       bemit8(0x0F);
+       bemit8(0x8A + odd);
+       bemit_jmp_destination(dest_block);
+}
+
+static void bemit_ia32_jcc(const ir_node *node)
+{
+       int            pnc = get_ia32_condcode(node);
+       const ir_node *proj_true;
+       const ir_node *proj_false;
+       const ir_node *dest_true;
+       const ir_node *dest_false;
+       const ir_node *block;
+
+       pnc = determine_final_pnc(node, 0, pnc);
+
+       /* get both Projs */
+       proj_true = get_proj(node, pn_ia32_Jcc_true);
+       assert(proj_true && "Jcc without true Proj");
+
+       proj_false = get_proj(node, pn_ia32_Jcc_false);
+       assert(proj_false && "Jcc without false Proj");
+
+       block = get_nodes_block(node);
+
+       if (can_be_fallthrough(proj_true)) {
+               /* exchange both proj's so the second one can be omitted */
+               const ir_node *t = proj_true;
+
+               proj_true  = proj_false;
+               proj_false = t;
+               pnc        = ia32_get_negated_pnc(pnc);
+       }
+
+       dest_true  = get_cfop_target_block(proj_true);
+       dest_false = get_cfop_target_block(proj_false);
+
+       if (pnc & ia32_pn_Cmp_float) {
+               /* Some floating point comparisons require a test of the parity flag,
+                * which indicates that the result is unordered */
+               switch (pnc & 15) {
+                       case pn_Cmp_Uo: {
+                               bemit_jp(false, dest_true);
+                               break;
+                       }
+
+                       case pn_Cmp_Leg:
+                               bemit_jp(true, dest_true);
+                               break;
+
+                       case pn_Cmp_Eq:
+                       case pn_Cmp_Lt:
+                       case pn_Cmp_Le:
+                               /* we need a local label if the false proj is a fallthrough
+                                * as the falseblock might have no label emitted then */
+                               if (can_be_fallthrough(proj_false)) {
+                                       bemit8(0x7A);
+                                       bemit8(0x06);  // jp + 6
+                               } else {
+                                       bemit_jp(false, dest_false);
+                               }
+                               goto emit_jcc;
+
+                       case pn_Cmp_Ug:
+                       case pn_Cmp_Uge:
+                       case pn_Cmp_Ne:
+                               bemit_jp(false, dest_true);
+                               goto emit_jcc;
+
+                       default:
+                               goto emit_jcc;
+               }
+       } else {
+emit_jcc:
+               bemit_jcc(pnc, dest_true);
+       }
+
+       /* the second Proj might be a fallthrough */
+       if (can_be_fallthrough(proj_false)) {
+               /* it's a fallthrough */
+       } else {
+               bemit_jmp(dest_false);
+       }
+}
+
+static void bemit_switchjmp(const ir_node *node)
+{
+       unsigned long          interval;
+       int                    last_value;
+       int                    i;
+       jmp_tbl_t              tbl;
+       const arch_register_t *in;
+
+       /* fill the table structure */
+       generate_jump_table(&tbl, node);
+
+       /* two-complement's magic make this work without overflow */
+       interval = tbl.max_value - tbl.min_value;
+
+       in = get_in_reg(node, 0);
+       /* emit the table */
+       if (get_signed_imm_size(interval) == 1) {
+               bemit8(0x83); // cmpl $imm8, %in
+               bemit_modru(in, 7);
+               bemit8(interval);
+       } else {
+               bemit8(0x81); // cmpl $imm32, %in
+               bemit_modru(in, 7);
+               bemit32(interval);
+       }
+       bemit8(0x0F); // ja tbl.defProj
+       bemit8(0x87);
+       ia32_emitf(tbl.defProj, ".long %L - . - 4\n");
+
+       if (tbl.num_branches > 1) {
+               /* create table */
+               bemit8(0xFF); // jmp *tbl.label(,%in,4)
+               bemit8(MOD_IND | ENC_REG(4) | ENC_RM(0x04));
+               bemit8(ENC_SIB(2, reg_gp_map[in->index], 0x05));
+               be_emit_irprintf("\t.long %s\n", tbl.label);
+
+               be_gas_emit_switch_section(GAS_SECTION_RODATA);
+               be_emit_cstring(".align 4\n");
+               be_emit_irprintf("%s:\n", tbl.label);
+
+               last_value = tbl.branches[0].value;
+               for (i = 0; i != tbl.num_branches; ++i) {
+                       while (last_value != tbl.branches[i].value) {
+                               ia32_emitf(tbl.defProj, ".long %L\n");
+                               ++last_value;
+                       }
+                       ia32_emitf(tbl.branches[i].target, ".long %L\n");
+                       ++last_value;
+               }
+               be_gas_emit_switch_section(GAS_SECTION_TEXT);
+       } else {
+               /* one jump is enough */
+               panic("switch only has one case");
+               //ia32_emitf(tbl.branches[0].target, "\tjmp %L\n");
+       }
+
+       be_emit_write_line();
+
+       free(tbl.branches);
+}
+
+/**
+ * Emits a return.
+ */
+static void bemit_return(const ir_node *node)
+{
+       unsigned pop = be_Return_get_pop(node);
        if (pop > 0 || be_Return_get_emit_pop(node)) {
                bemit8(0xC2);
                assert(pop <= 0xffff);
@@ -2844,6 +3738,17 @@ static void bemit_return(const ir_node *node)
        }
 }
 
+static void bemit_subsp(const ir_node *node)
+{
+       const arch_register_t *out;
+       /* sub %in, %esp */
+       bemit_sub(node);
+       /* mov %esp, %out */
+       bemit8(0x8B);
+       out = get_out_reg(node, 1);
+       bemit8(MOD_REG | ENC_REG(reg_gp_map[out->index]) | ENC_RM(0x04));
+}
+
 static void bemit_incsp(const ir_node *node)
 {
        int                    offs;
@@ -2875,6 +3780,372 @@ static void bemit_incsp(const ir_node *node)
        }
 }
 
+static void bemit_copybi(const ir_node *node)
+{
+       unsigned size = get_ia32_copyb_size(node);
+       if (size & 1)
+               bemit8(0xA4); // movsb
+       if (size & 2) {
+               bemit8(0x66);
+               bemit8(0xA5); // movsw
+       }
+       size >>= 2;
+       while (size--) {
+               bemit8(0xA5); // movsl
+       }
+}
+
+static void bemit_fbinop(const ir_node *node, unsigned code, unsigned code_to)
+{
+       if (get_ia32_op_type(node) == ia32_Normal) {
+               const ia32_x87_attr_t *x87_attr = get_ia32_x87_attr_const(node);
+               const arch_register_t *in1      = x87_attr->x87[0];
+               const arch_register_t *in       = x87_attr->x87[1];
+               const arch_register_t *out      = x87_attr->x87[2];
+
+               if (out == NULL) {
+                       out = in1;
+               } else if (out == in) {
+                       in = in1;
+               }
+
+               if (out->index == 0) {
+                       bemit8(0xD8);
+                       bemit8(MOD_REG | ENC_REG(code) | ENC_RM(in->index));
+               } else {
+                       bemit8(0xDC);
+                       bemit8(MOD_REG | ENC_REG(code_to) | ENC_RM(out->index));
+               }
+       } else {
+               if (get_mode_size_bits(get_ia32_ls_mode(node)) == 32) {
+                       bemit8(0xD8);
+               } else {
+                       bemit8(0xDC);
+               }
+               bemit_mod_am(code, node);
+       }
+}
+
+static void bemit_fbinopp(const ir_node *node, unsigned const code)
+{
+       const ia32_x87_attr_t *x87_attr = get_ia32_x87_attr_const(node);
+       const arch_register_t *out      = x87_attr->x87[2];
+       bemit8(0xDE);
+       bemit8(code + out->index);
+}
+
+static void bemit_fabs(const ir_node *node)
+{
+       (void)node;
+
+       bemit8(0xD9);
+       bemit8(0xE1);
+}
+
+static void bemit_fadd(const ir_node *node)
+{
+       bemit_fbinop(node, 0, 0);
+}
+
+static void bemit_faddp(const ir_node *node)
+{
+       bemit_fbinopp(node, 0xC0);
+}
+
+static void bemit_fchs(const ir_node *node)
+{
+       (void)node;
+
+       bemit8(0xD9);
+       bemit8(0xE0);
+}
+
+static void bemit_fdiv(const ir_node *node)
+{
+       bemit_fbinop(node, 6, 7);
+}
+
+static void bemit_fdivp(const ir_node *node)
+{
+       bemit_fbinopp(node, 0xF8);
+}
+
+static void bemit_fdivr(const ir_node *node)
+{
+       bemit_fbinop(node, 7, 6);
+}
+
+static void bemit_fdivrp(const ir_node *node)
+{
+       bemit_fbinopp(node, 0xF0);
+}
+
+static void bemit_fild(const ir_node *node)
+{
+       switch (get_mode_size_bits(get_ia32_ls_mode(node))) {
+               case 16:
+                       bemit8(0xDF); // filds
+                       bemit_mod_am(0, node);
+                       return;
+
+               case 32:
+                       bemit8(0xDB); // fildl
+                       bemit_mod_am(0, node);
+                       return;
+
+               case 64:
+                       bemit8(0xDF); // fildll
+                       bemit_mod_am(5, node);
+                       return;
+
+               default:
+                       panic("invalid mode size");
+       }
+}
+
+static void bemit_fist(const ir_node *node)
+{
+       switch (get_mode_size_bits(get_ia32_ls_mode(node))) {
+               case 16:
+                       bemit8(0xDF); // fists
+                       break;
+
+               case 32:
+                       bemit8(0xDB); // fistl
+                       break;
+
+               default:
+                       panic("invalid mode size");
+       }
+       bemit_mod_am(2, node);
+}
+
+static void bemit_fistp(const ir_node *node)
+{
+       switch (get_mode_size_bits(get_ia32_ls_mode(node))) {
+               case 16:
+                       bemit8(0xDF); // fistps
+                       bemit_mod_am(3, node);
+                       return;
+
+               case 32:
+                       bemit8(0xDB); // fistpl
+                       bemit_mod_am(3, node);
+                       return;
+
+               case 64:
+                       bemit8(0xDF); // fistpll
+                       bemit_mod_am(7, node);
+                       return;
+
+               default:
+                       panic("invalid mode size");
+       }
+}
+
+static void bemit_fld(const ir_node *node)
+{
+       switch (get_mode_size_bits(get_ia32_ls_mode(node))) {
+               case 32:
+                       bemit8(0xD9); // flds
+                       bemit_mod_am(0, node);
+                       return;
+
+               case 64:
+                       bemit8(0xDD); // fldl
+                       bemit_mod_am(0, node);
+                       return;
+
+               case 80:
+               case 96:
+                       bemit8(0xDB); // fldt
+                       bemit_mod_am(5, node);
+                       return;
+
+               default:
+                       panic("invalid mode size");
+       }
+}
+
+static void bemit_fld1(const ir_node *node)
+{
+       (void)node;
+       bemit8(0xD9);
+       bemit8(0xE8); // fld1
+}
+
+static void bemit_fldcw(const ir_node *node)
+{
+       bemit8(0xD9); // fldcw
+       bemit_mod_am(5, node);
+}
+
+static void bemit_fldz(const ir_node *node)
+{
+       (void)node;
+       bemit8(0xD9);
+       bemit8(0xEE); // fldz
+}
+
+static void bemit_fmul(const ir_node *node)
+{
+       bemit_fbinop(node, 1, 1);
+}
+
+static void bemit_fmulp(const ir_node *node)
+{
+       bemit_fbinopp(node, 0xC8);
+}
+
+static void bemit_fpop(const ir_node *node)
+{
+       const ia32_x87_attr_t *attr = get_ia32_x87_attr_const(node);
+       bemit8(0xDD);
+       bemit8(0xD8 + attr->x87[0]->index);
+}
+
+static void bemit_fpush(const ir_node *node)
+{
+       const ia32_x87_attr_t *attr = get_ia32_x87_attr_const(node);
+       bemit8(0xD9);
+       bemit8(0xC0 + attr->x87[0]->index);
+}
+
+static void bemit_fpushcopy(const ir_node *node)
+{
+       const ia32_x87_attr_t *attr = get_ia32_x87_attr_const(node);
+       bemit8(0xD9);
+       bemit8(0xC0 + attr->x87[0]->index);
+}
+
+static void bemit_fst(const ir_node *node)
+{
+       switch (get_mode_size_bits(get_ia32_ls_mode(node))) {
+               case 32:
+                       bemit8(0xD9); // fsts
+                       break;
+
+               case 64:
+                       bemit8(0xDD); // fstl
+                       break;
+
+               default:
+                       panic("invalid mode size");
+       }
+       bemit_mod_am(2, node);
+}
+
+static void bemit_fstp(const ir_node *node)
+{
+       switch (get_mode_size_bits(get_ia32_ls_mode(node))) {
+               case 32:
+                       bemit8(0xD9); // fstps
+                       bemit_mod_am(3, node);
+                       return;
+
+               case 64:
+                       bemit8(0xDD); // fstpl
+                       bemit_mod_am(3, node);
+                       return;
+
+               case 80:
+               case 96:
+                       bemit8(0xDB); // fstpt
+                       bemit_mod_am(7, node);
+                       return;
+
+               default:
+                       panic("invalid mode size");
+       }
+}
+
+static void bemit_fsub(const ir_node *node)
+{
+       bemit_fbinop(node, 4, 5);
+}
+
+static void bemit_fsubp(const ir_node *node)
+{
+       bemit_fbinopp(node, 0xE8);
+}
+
+static void bemit_fsubr(const ir_node *node)
+{
+       bemit_fbinop(node, 5, 4);
+}
+
+static void bemit_fsubrp(const ir_node *node)
+{
+       bemit_fbinopp(node, 0xE0);
+}
+
+static void bemit_fnstcw(const ir_node *node)
+{
+       bemit8(0xD9); // fnstcw
+       bemit_mod_am(7, node);
+}
+
+static void bemit_fnstsw(void)
+{
+       bemit8(0xDF); // fnstsw %ax
+       bemit8(0xE0);
+}
+
+static void bemit_ftstfnstsw(const ir_node *node)
+{
+       (void)node;
+
+       bemit8(0xD9); // ftst
+       bemit8(0xE4);
+       bemit_fnstsw();
+}
+
+static void bemit_fucomi(const ir_node *node)
+{
+       const ia32_x87_attr_t *attr = get_ia32_x87_attr_const(node);
+       bemit8(0xDB); // fucomi
+       bemit8(0xE8 + attr->x87[1]->index);
+}
+
+static void bemit_fucomip(const ir_node *node)
+{
+       const ia32_x87_attr_t *attr = get_ia32_x87_attr_const(node);
+       bemit8(0xDF); // fucomip
+       bemit8(0xE8 + attr->x87[1]->index);
+}
+
+static void bemit_fucomfnstsw(const ir_node *node)
+{
+       const ia32_x87_attr_t *attr = get_ia32_x87_attr_const(node);
+       bemit8(0xDD); // fucom
+       bemit8(0xE0 + attr->x87[1]->index);
+       bemit_fnstsw();
+}
+
+static void bemit_fucompfnstsw(const ir_node *node)
+{
+       const ia32_x87_attr_t *attr = get_ia32_x87_attr_const(node);
+       bemit8(0xDD); // fucomp
+       bemit8(0xE8 + attr->x87[1]->index);
+       bemit_fnstsw();
+}
+
+static void bemit_fucomppfnstsw(const ir_node *node)
+{
+       (void)node;
+
+       bemit8(0xDA); // fucompp
+       bemit8(0xE9);
+       bemit_fnstsw();
+}
+
+static void bemit_fxch(const ir_node *node)
+{
+       const ia32_x87_attr_t *attr = get_ia32_x87_attr_const(node);
+       bemit8(0xD9);
+       bemit8(0xC8 + attr->x87[0]->index);
+}
+
 /**
  * The type of a emitter function.
  */
@@ -2894,47 +4165,130 @@ static void ia32_register_binary_emitters(void)
        clear_irp_opcodes_generic_func();
 
        /* benode emitter */
-       register_emitter(op_be_Copy, bemit_copy);
-       register_emitter(op_be_Return, bemit_return);
-       register_emitter(op_be_IncSP, bemit_incsp);
-       register_emitter(op_ia32_Add, bemit_add);
-       register_emitter(op_ia32_Adc, bemit_adc);
-       register_emitter(op_ia32_And, bemit_and);
-       register_emitter(op_ia32_Or, bemit_or);
-       register_emitter(op_ia32_Cmp, bemit_cmp);
-       register_emitter(op_ia32_Call, bemit_call);
-       register_emitter(op_ia32_Cltd, bemit_cltd);
-       register_emitter(op_ia32_Cmc, bemit_cmc);
-       register_emitter(op_ia32_Stc, bemit_stc);
-       register_emitter(op_ia32_RepPrefix, bemit_rep);
-       register_emitter(op_ia32_Breakpoint, bemit_int3);
-       register_emitter(op_ia32_Sahf, bemit_sahf);
-       register_emitter(op_ia32_Cltd, bemit_cwde);
-       register_emitter(op_ia32_Sub, bemit_sub);
-       register_emitter(op_ia32_Sbb, bemit_sbb);
-       register_emitter(op_ia32_Xor0, bemit_xor0);
-       register_emitter(op_ia32_Xor, bemit_xor);
-       register_emitter(op_ia32_Const, bemit_mov_const);
-       register_emitter(op_ia32_Lea, bemit_lea);
-       register_emitter(op_ia32_Load, bemit_load);
-       register_emitter(op_ia32_Not, bemit_not);
-       register_emitter(op_ia32_Neg, bemit_neg);
-       register_emitter(op_ia32_Push, bemit_push);
-       register_emitter(op_ia32_Pop, bemit_pop);
-       register_emitter(op_ia32_Store, bemit_store);
-       register_emitter(op_ia32_Mul, bemit_mul);
-       register_emitter(op_ia32_IMul1OP, bemit_imul1op);
-       register_emitter(op_ia32_Div, bemit_div);
-       register_emitter(op_ia32_IDiv, bemit_idiv);
-       register_emitter(op_ia32_IJmp, bemit_ijmp);
+       register_emitter(op_be_Copy,            bemit_copy);
+       register_emitter(op_be_CopyKeep,        bemit_copy);
+       register_emitter(op_be_IncSP,           bemit_incsp);
+       register_emitter(op_be_Perm,            bemit_perm);
+       register_emitter(op_be_Return,          bemit_return);
+       register_emitter(op_ia32_Adc,           bemit_adc);
+       register_emitter(op_ia32_Add,           bemit_add);
+       register_emitter(op_ia32_AddMem,        bemit_addmem);
+       register_emitter(op_ia32_AddMem8Bit,    bemit_addmem8bit);
+       register_emitter(op_ia32_And,           bemit_and);
+       register_emitter(op_ia32_AndMem,        bemit_andmem);
+       register_emitter(op_ia32_AndMem8Bit,    bemit_andmem8bit);
+       register_emitter(op_ia32_Breakpoint,    bemit_int3);
+       register_emitter(op_ia32_CMovcc,        bemit_cmovcc);
+       register_emitter(op_ia32_Call,          bemit_call);
+       register_emitter(op_ia32_Cltd,          bemit_cltd);
+       register_emitter(op_ia32_Cmc,           bemit_cmc);
+       register_emitter(op_ia32_Cmp,           bemit_cmp);
+       register_emitter(op_ia32_Cmp8Bit,       bemit_cmp8bit);
+       register_emitter(op_ia32_Const,         bemit_mov_const);
+       register_emitter(op_ia32_Conv_I2I,      bemit_conv_i2i);
+       register_emitter(op_ia32_Conv_I2I8Bit,  bemit_conv_i2i);
+       register_emitter(op_ia32_CopyB_i,       bemit_copybi);
+       register_emitter(op_ia32_Cwtl,          bemit_cwtl);
+       register_emitter(op_ia32_Dec,           bemit_dec);
+       register_emitter(op_ia32_DecMem,        bemit_decmem);
+       register_emitter(op_ia32_Div,           bemit_div);
+       register_emitter(op_ia32_FldCW,         bemit_fldcw);
+       register_emitter(op_ia32_FnstCW,        bemit_fnstcw);
+       register_emitter(op_ia32_FtstFnstsw,    bemit_ftstfnstsw);
+       register_emitter(op_ia32_FucomFnstsw,   bemit_fucomfnstsw);
+       register_emitter(op_ia32_Fucomi,        bemit_fucomi);
+       register_emitter(op_ia32_FucompFnstsw,  bemit_fucompfnstsw);
+       register_emitter(op_ia32_Fucompi,       bemit_fucomip);
+       register_emitter(op_ia32_FucomppFnstsw, bemit_fucomppfnstsw);
+       register_emitter(op_ia32_IDiv,          bemit_idiv);
+       register_emitter(op_ia32_IJmp,          bemit_ijmp);
+       register_emitter(op_ia32_IMul,          bemit_imul);
+       register_emitter(op_ia32_IMul1OP,       bemit_imul1op);
+       register_emitter(op_ia32_Inc,           bemit_inc);
+       register_emitter(op_ia32_IncMem,        bemit_incmem);
+       register_emitter(op_ia32_Jcc,           bemit_ia32_jcc);
+       register_emitter(op_ia32_Jmp,           bemit_jump);
+       register_emitter(op_ia32_LdTls,         bemit_ldtls);
+       register_emitter(op_ia32_Lea,           bemit_lea);
+       register_emitter(op_ia32_Leave,         bemit_leave);
+       register_emitter(op_ia32_Load,          bemit_load);
+       register_emitter(op_ia32_Minus64Bit,    bemit_minus64bit);
+       register_emitter(op_ia32_Mul,           bemit_mul);
+       register_emitter(op_ia32_Neg,           bemit_neg);
+       register_emitter(op_ia32_NegMem,        bemit_negmem);
+       register_emitter(op_ia32_Not,           bemit_not);
+       register_emitter(op_ia32_NotMem,        bemit_notmem);
+       register_emitter(op_ia32_Or,            bemit_or);
+       register_emitter(op_ia32_OrMem,         bemit_ormem);
+       register_emitter(op_ia32_OrMem8Bit,     bemit_ormem8bit);
+       register_emitter(op_ia32_Pop,           bemit_pop);
+       register_emitter(op_ia32_PopEbp,        bemit_pop);
+       register_emitter(op_ia32_PopMem,        bemit_popmem);
+       register_emitter(op_ia32_Push,          bemit_push);
+       register_emitter(op_ia32_RepPrefix,     bemit_rep);
+       register_emitter(op_ia32_Rol,           bemit_rol);
+       register_emitter(op_ia32_RolMem,        bemit_rolmem);
+       register_emitter(op_ia32_Ror,           bemit_ror);
+       register_emitter(op_ia32_RorMem,        bemit_rormem);
+       register_emitter(op_ia32_Sahf,          bemit_sahf);
+       register_emitter(op_ia32_Sar,           bemit_sar);
+       register_emitter(op_ia32_SarMem,        bemit_sarmem);
+       register_emitter(op_ia32_Sbb,           bemit_sbb);
+       register_emitter(op_ia32_Setcc,         bemit_setcc);
+       register_emitter(op_ia32_Shl,           bemit_shl);
+       register_emitter(op_ia32_ShlD,          bemit_shld);
+       register_emitter(op_ia32_ShlMem,        bemit_shlmem);
+       register_emitter(op_ia32_Shr,           bemit_shr);
+       register_emitter(op_ia32_ShrD,          bemit_shrd);
+       register_emitter(op_ia32_ShrMem,        bemit_shrmem);
+       register_emitter(op_ia32_Stc,           bemit_stc);
+       register_emitter(op_ia32_Store,         bemit_store);
+       register_emitter(op_ia32_Store8Bit,     bemit_store);
+       register_emitter(op_ia32_Sub,           bemit_sub);
+       register_emitter(op_ia32_SubMem,        bemit_submem);
+       register_emitter(op_ia32_SubMem8Bit,    bemit_submem8bit);
+       register_emitter(op_ia32_SubSP,         bemit_subsp);
+       register_emitter(op_ia32_SwitchJmp,     bemit_switchjmp);
+       register_emitter(op_ia32_Test,          bemit_test);
+       register_emitter(op_ia32_Test8Bit,      bemit_test8bit);
+       register_emitter(op_ia32_Xor,           bemit_xor);
+       register_emitter(op_ia32_Xor0,          bemit_xor0);
+       register_emitter(op_ia32_XorMem,        bemit_xormem);
+       register_emitter(op_ia32_XorMem8Bit,    bemit_xormem8bit);
+       register_emitter(op_ia32_fabs,          bemit_fabs);
+       register_emitter(op_ia32_fadd,          bemit_fadd);
+       register_emitter(op_ia32_faddp,         bemit_faddp);
+       register_emitter(op_ia32_fchs,          bemit_fchs);
+       register_emitter(op_ia32_fdiv,          bemit_fdiv);
+       register_emitter(op_ia32_fdivp,         bemit_fdivp);
+       register_emitter(op_ia32_fdivr,         bemit_fdivr);
+       register_emitter(op_ia32_fdivrp,        bemit_fdivrp);
+       register_emitter(op_ia32_fild,          bemit_fild);
+       register_emitter(op_ia32_fist,          bemit_fist);
+       register_emitter(op_ia32_fistp,         bemit_fistp);
+       register_emitter(op_ia32_fld,           bemit_fld);
+       register_emitter(op_ia32_fld1,          bemit_fld1);
+       register_emitter(op_ia32_fldz,          bemit_fldz);
+       register_emitter(op_ia32_fmul,          bemit_fmul);
+       register_emitter(op_ia32_fmulp,         bemit_fmulp);
+       register_emitter(op_ia32_fpop,          bemit_fpop);
+       register_emitter(op_ia32_fpush,         bemit_fpush);
+       register_emitter(op_ia32_fpushCopy,     bemit_fpushcopy);
+       register_emitter(op_ia32_fst,           bemit_fst);
+       register_emitter(op_ia32_fstp,          bemit_fstp);
+       register_emitter(op_ia32_fsub,          bemit_fsub);
+       register_emitter(op_ia32_fsubp,         bemit_fsubp);
+       register_emitter(op_ia32_fsubr,         bemit_fsubr);
+       register_emitter(op_ia32_fsubrp,        bemit_fsubrp);
+       register_emitter(op_ia32_fxch,          bemit_fxch);
 
        /* ignore the following nodes */
-       register_emitter(op_ia32_ProduceVal, emit_Nothing);
-       register_emitter(op_be_Barrier, emit_Nothing);
-       register_emitter(op_be_Keep, emit_Nothing);
-       register_emitter(op_be_Start, emit_Nothing);
-       register_emitter(op_Phi, emit_Nothing);
-       register_emitter(op_Start, emit_Nothing);
+       register_emitter(op_ia32_ProduceVal,   emit_Nothing);
+       register_emitter(op_be_Barrier,        emit_Nothing);
+       register_emitter(op_be_Keep,           emit_Nothing);
+       register_emitter(op_be_Start,          emit_Nothing);
+       register_emitter(op_Phi,               emit_Nothing);
+       register_emitter(op_Start,             emit_Nothing);
 }
 
 static void gen_binary_block(ir_node *block)