Implement binary emitter for LdTls.
[libfirm] / ir / be / ia32 / ia32_emitter.c
index 632c40e..9fd9439 100644 (file)
@@ -2251,7 +2251,6 @@ static void build_reg_map(void)
        reg_gp_map[REG_ESI] = 0x6;
        reg_gp_map[REG_EDI] = 0x7;
 
-       pnc_map_signed[pn_Cmp_False] = 0xFF;
        pnc_map_signed[pn_Cmp_Eq]    = 0x04;
        pnc_map_signed[pn_Cmp_Lt]    = 0x0C;
        pnc_map_signed[pn_Cmp_Le]    = 0x0E;
@@ -2259,7 +2258,6 @@ static void build_reg_map(void)
        pnc_map_signed[pn_Cmp_Ge]    = 0x0D;
        pnc_map_signed[pn_Cmp_Lg]    = 0x05;
 
-       pnc_map_unsigned[pn_Cmp_False] = 0xFF;
        pnc_map_unsigned[pn_Cmp_Eq]    = 0x04;
        pnc_map_unsigned[pn_Cmp_Lt]    = 0x02;
        pnc_map_unsigned[pn_Cmp_Le]    = 0x06;
@@ -2268,7 +2266,19 @@ static void build_reg_map(void)
        pnc_map_unsigned[pn_Cmp_Lg]    = 0x05;
 }
 
-#define GET_MODE(code) ((code) & 0xC0)
+static unsigned char pnc2cc(int pnc)
+{
+       unsigned char cc;
+       if (pnc == ia32_pn_Cmp_parity) {
+               cc = 0x0A;
+       } else if (pnc & ia32_pn_Cmp_float || pnc & ia32_pn_Cmp_unsigned) {
+               cc = pnc_map_unsigned[pnc & 0x07];
+       } else {
+               cc = pnc_map_signed[pnc & 0x07];
+       }
+       assert(cc != 0);
+       return cc;
+}
 
 /** Sign extension bit values for binops */
 enum SignExt {
@@ -2305,7 +2315,7 @@ static void bemit8(const unsigned char byte)
        be_emit_write_line();
 }
 
-static void bemit16(const unsigned u16)
+static void bemit16(const unsigned short u16)
 {
        be_emit_irprintf("\t.word 0x%x\n", u16);
        be_emit_write_line();
@@ -2383,22 +2393,6 @@ static void bemit_modru(const arch_register_t *reg, unsigned ext)
        bemit8(modrm);
 }
 
-/**
- * Calculate the size of an (unsigned) immediate in bytes.
- *
- * @param offset  an offset
- */
-static unsigned get_unsigned_imm_size(unsigned offset)
-{
-       if (offset < 256) {
-               return 1;
-       } else if (offset < 65536) {
-               return 2;
-       } else {
-               return 4;
-       }
-}
-
 /**
  * Calculate the size of an signed immediate in bytes.
  *
@@ -2479,12 +2473,13 @@ static void bemit_mod_am(unsigned reg, const ir_node *node)
                emitsib  = true;
        } else {
                modrm |= ENC_RM(base_enc);
-               /* We are forced to emit an 8bit offset as EBP base without offset is a
-                * special case for SIB without base register. */
-               if (base_enc == 0x05 && emitoffs == 0) {
-                       modrm    |= MOD_IND_BYTE_OFS;
-                       emitoffs  = 8;
-               }
+       }
+
+       /* We are forced to emit an 8bit offset as EBP base without offset is a
+        * special case for SIB without base register. */
+       if (base_enc == 0x05 && emitoffs == 0) {
+               modrm    |= MOD_IND_BYTE_OFS;
+               emitoffs  = 8;
        }
 
        modrm |= ENC_REG(reg);
@@ -2566,26 +2561,14 @@ static void bemit_binop_with_imm(
  */
 static void bemit_binop_2(const ir_node *node, unsigned code)
 {
-       const arch_register_t *out    = get_in_reg(node, n_ia32_binary_left);
-       ia32_op_type_t        am_type = get_ia32_op_type(node);
-       unsigned char         d       = 0;
-       const arch_register_t *op2;
-
-       switch (am_type) {
-       case ia32_AddrModeS:
-               d = 2;
-               /* FALLTHROUGH */
-       case ia32_AddrModeD:
-               bemit8(code | d);
+       const arch_register_t *out = get_in_reg(node, n_ia32_binary_left);
+       bemit8(code);
+       if (get_ia32_op_type(node) == ia32_Normal) {
+               const arch_register_t *op2 = get_in_reg(node, n_ia32_binary_right);
+               bemit_modrr(op2, out);
+       } else {
                bemit_mod_am(reg_gp_map[out->index], node);
-               return;
-       case ia32_Normal:
-               bemit8(code);
-               op2 = get_in_reg(node, n_ia32_binary_right);
-               bemit_modrr(out, op2);
-               return;
        }
-       panic("invalid address mode");
 }
 
 /**
@@ -2615,6 +2598,20 @@ static void bemit_unop(const ir_node *node, unsigned char code, unsigned char ex
        }
 }
 
+static void bemit_unop_reg(const ir_node *node, unsigned char code, int input)
+{
+       const arch_register_t *out = get_out_reg(node, 0);
+       bemit_unop(node, code, reg_gp_map[out->index], input);
+}
+
+static void bemit_unop_mem(const ir_node *node, unsigned char code, unsigned char ext)
+{
+       unsigned size = get_mode_size_bits(get_ia32_ls_mode(node));
+       if (size == 16)
+               bemit8(0x66);
+       bemit8(size == 8 ? code : code + 1);
+       bemit_mod_am(ext, node);
+}
 
 static void bemit_immediate(const ir_node *node, bool relative)
 {
@@ -2642,6 +2639,37 @@ static void bemit_copy(const ir_node *copy)
        }
 }
 
+static void bemit_perm(const ir_node *node)
+{
+       const arch_register_t       *in0  = arch_get_irn_register(get_irn_n(node, 0));
+       const arch_register_t       *in1  = arch_get_irn_register(get_irn_n(node, 1));
+       const arch_register_class_t *cls0 = arch_register_get_class(in0);
+
+       assert(cls0 == arch_register_get_class(in1) && "Register class mismatch at Perm");
+
+       if (cls0 == &ia32_reg_classes[CLASS_ia32_gp]) {
+               if (in0->index == REG_EAX) {
+                       bemit8(0x90 + reg_gp_map[in1->index]);
+               } else if (in1->index == REG_EAX) {
+                       bemit8(0x90 + reg_gp_map[in0->index]);
+               } else {
+                       bemit8(0x87);
+                       bemit_modrr(in0, in1);
+               }
+       } else if (cls0 == &ia32_reg_classes[CLASS_ia32_xmm]) {
+               panic("unimplemented"); // TODO implement
+               //ia32_emitf(NULL, "\txorpd %R, %R\n", in1, in0);
+               //ia32_emitf(NULL, "\txorpd %R, %R\n", in0, in1);
+               //ia32_emitf(node, "\txorpd %R, %R\n", in1, in0);
+       } else if (cls0 == &ia32_reg_classes[CLASS_ia32_vfp]) {
+               /* is a NOP */
+       } else if (cls0 == &ia32_reg_classes[CLASS_ia32_st]) {
+               /* is a NOP */
+       } else {
+               panic("unexpected register class in be_Perm (%+F)", node);
+       }
+}
+
 static void bemit_xor0(const ir_node *node)
 {
        const arch_register_t *out = get_out_reg(node, 0);
@@ -2666,16 +2694,65 @@ static void bemit_ ## op(const ir_node *node) {                           \
 }
 
 /*    insn  def  eax,imm   imm */
-BINOP(add,  0x01, 0x05, 0x81, 0)
-BINOP(or,   0x09, 0x0D, 0x81, 1)
-BINOP(adc,  0x11, 0x15, 0x81, 2)
-BINOP(sbb,  0x19, 0x1D, 0x81, 3)
-BINOP(and,  0x21, 0x25, 0x81, 4)
-BINOP(sub,  0x29, 0x2D, 0x81, 5)
-BINOP(xor,  0x31, 0x35, 0x81, 6)
-BINOP(cmp,  0x39, 0x3D, 0x81, 7)
+BINOP(add,  0x03, 0x05, 0x81, 0)
+BINOP(or,   0x0B, 0x0D, 0x81, 1)
+BINOP(adc,  0x13, 0x15, 0x81, 2)
+BINOP(sbb,  0x1B, 0x1D, 0x81, 3)
+BINOP(and,  0x23, 0x25, 0x81, 4)
+BINOP(sub,  0x2B, 0x2D, 0x81, 5)
+BINOP(xor,  0x33, 0x35, 0x81, 6)
+BINOP(cmp,  0x3B, 0x3D, 0x81, 7)
 BINOP(test, 0x85, 0xA9, 0xF7, 0)
 
+#define BINOPMEM(op, ext) \
+static void bemit_##op(const ir_node *node) \
+{ \
+       unsigned size = get_mode_size_bits(get_ia32_ls_mode(node)); \
+       if (size == 16) \
+               bemit8(0x66); \
+       ir_node *val = get_irn_n(node, n_ia32_unary_op); \
+       if (is_ia32_Immediate(val)) { \
+               const ia32_immediate_attr_t *attr   = get_ia32_immediate_attr_const(val); \
+               int                          offset = attr->offset; \
+               if (attr->symconst == NULL && get_signed_imm_size(offset) == 1) { \
+                       bemit8(0x83); \
+                       bemit_mod_am(ext, node); \
+                       bemit8(offset); \
+               } else { \
+                       bemit8(0x81); \
+                       bemit_mod_am(ext, node); \
+                       if (size == 16) { \
+                               bemit16(offset); \
+                       } else { \
+                               bemit_entity(attr->symconst, attr->sc_sign, offset, false); \
+                       } \
+               } \
+       } else { \
+               bemit8(ext << 3 | 1); \
+               bemit_mod_am(reg_gp_map[get_out_reg(val, 0)->index], node); \
+       } \
+} \
+ \
+static void bemit_##op##8bit(const ir_node *node) \
+{ \
+       ir_node *val = get_irn_n(node, n_ia32_unary_op); \
+       if (is_ia32_Immediate(val)) { \
+               bemit8(0x80); \
+               bemit_mod_am(ext, node); \
+               bemit8(get_ia32_immediate_attr_const(val)->offset); \
+       } else { \
+               bemit8(ext << 3); \
+               bemit_mod_am(reg_gp_map[get_out_reg(val, 0)->index], node); \
+       } \
+}
+
+BINOPMEM(addmem,  0)
+BINOPMEM(ormem,   1)
+BINOPMEM(andmem,  4)
+BINOPMEM(submem,  5)
+BINOPMEM(xormem,  6)
+
+
 /**
  * Creates a function for an Unop with code /ext encoding.
  */
@@ -2713,6 +2790,28 @@ static void bemit_##op(const ir_node *node) \
                bemit8(0xD3); \
                bemit_modru(out, ext); \
        } \
+} \
+ \
+static void bemit_##op##mem(const ir_node *node) \
+{ \
+       unsigned size = get_mode_size_bits(get_ia32_ls_mode(node)); \
+       if (size == 16) \
+               bemit8(0x66); \
+       ir_node *count = get_irn_n(node, 1); \
+       if (is_ia32_Immediate(count)) { \
+               int offset = get_ia32_immediate_attr_const(count)->offset; \
+               if (offset == 1) { \
+                       bemit8(size == 1 ? 0xD0 : 0xD1); \
+                       bemit_mod_am(ext, node); \
+               } else { \
+                       bemit8(size == 1 ? 0xC0 : 0xC1); \
+                       bemit_mod_am(ext, node); \
+                       bemit8(offset); \
+               } \
+       } else { \
+               bemit8(size == 1 ? 0xD2 : 0xD3); \
+               bemit_mod_am(ext, node); \
+       } \
 }
 
 SHIFT(rol, 0)
@@ -2721,6 +2820,54 @@ SHIFT(shl, 4)
 SHIFT(shr, 5)
 SHIFT(sar, 7)
 
+static void bemit_cmp8bit(const ir_node *node)
+{
+       ir_node *right = get_irn_n(node, n_ia32_binary_right);
+       if (is_ia32_Immediate(right)) {
+               if (get_ia32_op_type(node) == ia32_Normal) {
+                       const arch_register_t *out = get_in_reg(node, n_ia32_Cmp_left);
+                       if (out->index == REG_EAX) {
+                               bemit8(0x3C);
+                       } else {
+                               bemit8(0x80);
+                               bemit_modru(out, 7);
+                       }
+               } else {
+                       bemit8(0x80);
+                       bemit_mod_am(7, node);
+               }
+               bemit8(get_ia32_immediate_attr_const(right)->offset);
+       } else {
+               bemit8(0x3A);
+               const arch_register_t *out = get_in_reg(node, n_ia32_Cmp_left);
+               if (get_ia32_op_type(node) == ia32_Normal) {
+                       const arch_register_t *in = get_in_reg(node, n_ia32_Cmp_right);
+                       bemit_modrr(out, in);
+               } else {
+                       bemit_mod_am(reg_gp_map[out->index], node);
+               }
+       }
+}
+
+static void bemit_imul(const ir_node *node)
+{
+       ir_node *right = get_irn_n(node, n_ia32_IMul_right);
+       /* Do we need the immediate form? */
+       if (is_ia32_Immediate(right)) {
+               int imm = get_ia32_immediate_attr_const(right)->offset;
+               if (get_signed_imm_size(imm) == 1) {
+                       bemit_unop_reg(node, 0x6B, n_ia32_IMul_left);
+                       bemit8(imm);
+               } else {
+                       bemit_unop_reg(node, 0x69, n_ia32_IMul_left);
+                       bemit32(imm);
+               }
+       } else {
+               bemit8(0x0F);
+               bemit_unop_reg(node, 0xAF, n_ia32_IMul_right);
+       }
+}
+
 static void bemit_dec(const ir_node *node)
 {
        const arch_register_t *out = get_out_reg(node, pn_ia32_Dec_res);
@@ -2733,6 +2880,46 @@ static void bemit_inc(const ir_node *node)
        bemit8(0x40 + reg_gp_map[out->index]);
 }
 
+#define UNOPMEM(op, code, ext) \
+static void bemit_##op(const ir_node *node) \
+{ \
+       bemit_unop_mem(node, code, ext); \
+}
+
+UNOPMEM(notmem, 0xF6, 2)
+UNOPMEM(negmem, 0xF6, 3)
+UNOPMEM(incmem, 0xFE, 0)
+UNOPMEM(decmem, 0xFE, 1)
+
+static void bemit_set(const ir_node *node)
+{
+       pn_Cmp pnc;
+
+       bemit8(0x0F);
+
+       pnc = get_ia32_condcode(node);
+       pnc = determine_final_pnc(node, n_ia32_Set_eflags, pnc);
+       if (get_ia32_attr_const(node)->data.ins_permuted)
+               pnc = ia32_get_negated_pnc(pnc);
+
+       bemit8(0x90 + pnc2cc(pnc));
+       bemit_modru(get_out_reg(node, pn_ia32_Set_res), 2);
+}
+
+static void bemit_ldtls(const ir_node *node)
+{
+       const arch_register_t *out = get_out_reg(node, 0);
+
+       bemit8(0x65); // gs:
+       if (out->index == REG_EAX) {
+               bemit8(0xA1); // movl 0, %eax
+       } else {
+               bemit8(0x8B); // movl 0, %reg
+               bemit8(MOD_IND | ENC_REG(out->index) | ENC_RM(0x05));
+       }
+       bemit32(0);
+}
+
 /**
  * Emit a Lea.
  */
@@ -2744,7 +2931,7 @@ static void bemit_lea(const ir_node *node)
 }
 
 /**
- * Emit a single optcode.
+ * Emit a single opcode.
  */
 #define EMIT_SINGLEOP(op, code)                 \
 static void bemit_ ## op(const ir_node *node) { \
@@ -2757,7 +2944,7 @@ static void bemit_ ## op(const ir_node *node) { \
 //EMIT_SINGLEOP(aaa,  0x37)
 //EMIT_SINGLEOP(aas,  0x3F)
 //EMIT_SINGLEOP(nop,  0x90)
-//EMIT_SINGLEOP(cwde, 0x98)
+EMIT_SINGLEOP(cwtl, 0x98)
 EMIT_SINGLEOP(cltd, 0x99)
 //EMIT_SINGLEOP(fwait, 0x9B)
 EMIT_SINGLEOP(sahf, 0x9E)
@@ -2782,18 +2969,17 @@ static void bemit_load(const ir_node *node)
        const arch_register_t *out = get_out_reg(node, 0);
 
        if (out->index == REG_EAX) {
-               ir_entity *ent       = get_ia32_am_sc(node);
-               int        offs      = get_ia32_am_offs_int(node);
                ir_node   *base      = get_irn_n(node, n_ia32_base);
                int        has_base  = !is_ia32_NoReg_GP(base);
                ir_node   *index     = get_irn_n(node, n_ia32_index);
                int        has_index = !is_ia32_NoReg_GP(index);
-
-               if (ent == NULL && !has_base && !has_index) {
+               if (!has_base && !has_index) {
+                       ir_entity *ent  = get_ia32_am_sc(node);
+                       int        offs = get_ia32_am_offs_int(node);
                        /* load from constant address to EAX can be encoded
                           as 0xA1 [offset] */
                        bemit8(0xA1);
-                       bemit_entity(NULL, 0, offs, false);
+                       bemit_entity(ent, 0, offs, false);
                        return;
                }
        }
@@ -2807,35 +2993,75 @@ static void bemit_load(const ir_node *node)
 static void bemit_store(const ir_node *node)
 {
        const ir_node *value = get_irn_n(node, n_ia32_Store_val);
+       unsigned       size  = get_mode_size_bits(get_ia32_ls_mode(node));
 
        if (is_ia32_Immediate(value)) {
-               bemit8(0xC7);
-               bemit_mod_am(0, node);
-               bemit_immediate(value, false);
+               if (size == 8) {
+                       bemit8(0xC6);
+                       bemit_mod_am(0, node);
+                       bemit8(get_ia32_immediate_attr_const(value)->offset);
+               } else if (size == 16) {
+                       bemit8(0x66);
+                       bemit8(0xC7);
+                       bemit_mod_am(0, node);
+                       bemit16(get_ia32_immediate_attr_const(value)->offset);
+               } else {
+                       bemit8(0xC7);
+                       bemit_mod_am(0, node);
+                       bemit_immediate(value, false);
+               }
        } else {
                const arch_register_t *in = get_in_reg(node, n_ia32_Store_val);
 
                if (in->index == REG_EAX) {
-                       ir_entity *ent       = get_ia32_am_sc(node);
-                       int        offs      = get_ia32_am_offs_int(node);
                        ir_node   *base      = get_irn_n(node, n_ia32_base);
                        int        has_base  = !is_ia32_NoReg_GP(base);
                        ir_node   *index     = get_irn_n(node, n_ia32_index);
                        int        has_index = !is_ia32_NoReg_GP(index);
-
-                       if (ent == NULL && !has_base && !has_index) {
+                       if (!has_base && !has_index) {
+                               ir_entity *ent  = get_ia32_am_sc(node);
+                               int        offs = get_ia32_am_offs_int(node);
                                /* store to constant address from EAX can be encoded as
-                                  0xA3 [offset]*/
-                               bemit8(0xA3);
-                               bemit_entity(NULL, 0, offs, false);
+                                * 0xA2/0xA3 [offset]*/
+                               if (size == 8) {
+                                       bemit8(0xA2);
+                               } else {
+                                       if (size == 16)
+                                               bemit8(0x66);
+                                       bemit8(0xA3);
+                               }
+                               bemit_entity(ent, 0, offs, false);
                                return;
                        }
                }
-               bemit8(0x89);
+
+               if (size == 8) {
+                       bemit8(0x88);
+               } else {
+                       if (size == 16)
+                               bemit8(0x66);
+                       bemit8(0x89);
+               }
                bemit_mod_am(reg_gp_map[in->index], node);
        }
 }
 
+static void bemit_conv_i2i(const ir_node *node)
+{
+       ir_mode  *smaller_mode = get_ia32_ls_mode(node);
+       unsigned  opcode;
+
+       bemit8(0x0F);
+       /*        8 16 bit source
+        * movzx B6 B7
+        * movsx BE BF
+        */
+       opcode = 0xB6;
+       if (mode_is_signed(smaller_mode))           opcode |= 0x08;
+       if (get_mode_size_bits(smaller_mode) == 16) opcode |= 0x01;
+       bemit_unop_reg(node, opcode, n_ia32_Conv_I2I_val);
+}
+
 /**
  * Emit a Push.
  */
@@ -2846,7 +3072,7 @@ static void bemit_push(const ir_node *node)
        if (is_ia32_Immediate(value)) {
                const ia32_immediate_attr_t *attr
                        = get_ia32_immediate_attr_const(value);
-               unsigned size = get_unsigned_imm_size(attr->offset);
+               unsigned size = get_signed_imm_size(attr->offset);
                if (attr->symconst)
                        size = 4;
                switch (size) {
@@ -2892,12 +3118,7 @@ static void bemit_call(const ir_node *node)
                bemit8(0xE8);
                bemit_immediate(proc, true);
        } else {
-               bemit8(0xFF);
-               if (get_ia32_op_type(node) == ia32_Normal) {
-                       bemit_modru(get_in_reg(node, n_ia32_unary_op), 2);
-               } else {
-                       bemit_mod_am(2, node);
-               }
+               bemit_unop(node, 0xFF, 2, n_ia32_Call_addr);
        }
 }
 
@@ -2917,28 +3138,22 @@ static void bemit_jump(const ir_node *node)
 
 static void bemit_jcc(int pnc, const ir_node *dest_block)
 {
-       unsigned char cc;
-
-       if (pnc == ia32_pn_Cmp_parity) {
-               cc = 0x0A;
-       } else {
-               if (pnc & ia32_pn_Cmp_float || pnc & ia32_pn_Cmp_unsigned) {
-                       cc = pnc_map_unsigned[pnc & 0x07];
-               } else {
-                       cc = pnc_map_signed[pnc & 0x07];
-               }
-       }
-       assert(cc != 0xFF);
-
+       unsigned char cc = pnc2cc(pnc);
        bemit8(0x0F);
        bemit8(0x80 + cc);
        bemit_jmp_destination(dest_block);
 }
 
+static void bemit_jp(bool odd, const ir_node *dest_block)
+{
+       bemit8(0x0F);
+       bemit8(0x8A + odd);
+       bemit_jmp_destination(dest_block);
+}
+
 static void bemit_ia32_jcc(const ir_node *node)
 {
        int            pnc = get_ia32_condcode(node);
-       int            need_parity_label = 0;
        const ir_node *proj_true;
        const ir_node *proj_false;
        const ir_node *dest_true;
@@ -2969,17 +3184,16 @@ static void bemit_ia32_jcc(const ir_node *node)
        dest_false = get_cfop_target_block(proj_false);
 
        if (pnc & ia32_pn_Cmp_float) {
-               panic("Float jump NIY");
                /* Some floating point comparisons require a test of the parity flag,
                 * which indicates that the result is unordered */
                switch (pnc & 15) {
                        case pn_Cmp_Uo: {
-                               ia32_emitf(proj_true, "\tjp %L\n");
+                               bemit_jp(false, dest_true);
                                break;
                        }
 
                        case pn_Cmp_Leg:
-                               ia32_emitf(proj_true, "\tjnp %L\n");
+                               bemit_jp(true, dest_true);
                                break;
 
                        case pn_Cmp_Eq:
@@ -2988,17 +3202,17 @@ static void bemit_ia32_jcc(const ir_node *node)
                                /* we need a local label if the false proj is a fallthrough
                                 * as the falseblock might have no label emitted then */
                                if (can_be_fallthrough(proj_false)) {
-                                       need_parity_label = 1;
-                                       ia32_emitf(proj_false, "\tjp 1f\n");
+                                       bemit8(0x7A);
+                                       bemit8(0x06);  // jp + 6
                                } else {
-                                       ia32_emitf(proj_false, "\tjp %L\n");
+                                       bemit_jp(false, dest_false);
                                }
                                goto emit_jcc;
 
                        case pn_Cmp_Ug:
                        case pn_Cmp_Uge:
                        case pn_Cmp_Ne:
-                               ia32_emitf(proj_true, "\tjp %L\n");
+                               bemit_jp(false, dest_true);
                                goto emit_jcc;
 
                        default:
@@ -3009,10 +3223,6 @@ emit_jcc:
                bemit_jcc(pnc, dest_true);
        }
 
-       if (need_parity_label) {
-               panic("parity label NIY");
-       }
-
        /* the second Proj might be a fallthrough */
        if (can_be_fallthrough(proj_false)) {
                /* it's a fallthrough */
@@ -3036,6 +3246,17 @@ static void bemit_return(const ir_node *node)
        }
 }
 
+static void bemit_subsp(const ir_node *node)
+{
+       const arch_register_t *out;
+       /* sub %in, %esp */
+       bemit_sub(node);
+       /* mov %esp, %out */
+       bemit8(0x8B);
+       out = get_out_reg(node, 1);
+       bemit8(MOD_REG | ENC_REG(reg_gp_map[out->index]) | ENC_RM(0x04));
+}
+
 static void bemit_incsp(const ir_node *node)
 {
        int                    offs;
@@ -3067,6 +3288,354 @@ static void bemit_incsp(const ir_node *node)
        }
 }
 
+static void bemit_copybi(const ir_node *node)
+{
+       unsigned size = get_ia32_copyb_size(node);
+       if (size & 1)
+               bemit8(0xA4); // movsb
+       if (size & 2) {
+               bemit8(0x66);
+               bemit8(0xA5); // movsw
+       }
+       size >>= 2;
+       while (size--) {
+               bemit8(0xA5); // movsl
+       }
+}
+
+static void bemit_fbinop(const ir_node *node, unsigned code, unsigned code_to)
+{
+       if (get_ia32_op_type(node) == ia32_Normal) {
+               const ia32_x87_attr_t *x87_attr = get_ia32_x87_attr_const(node);
+               const arch_register_t *in1      = x87_attr->x87[0];
+               const arch_register_t *in       = x87_attr->x87[1];
+               const arch_register_t *out      = x87_attr->x87[2];
+
+               if (out == NULL) {
+                       out = in1;
+               } else if (out == in) {
+                       in = in1;
+               }
+
+               if (out->index == 0) {
+                       bemit8(0xD8);
+                       bemit8(MOD_REG | ENC_REG(code) | ENC_RM(in->index));
+               } else {
+                       bemit8(0xDC);
+                       bemit8(MOD_REG | ENC_REG(code_to) | ENC_RM(out->index));
+               }
+       } else {
+               if (get_mode_size_bits(get_ia32_ls_mode(node)) == 32) {
+                       bemit8(0xD8);
+               } else {
+                       bemit8(0xDC);
+               }
+               bemit_mod_am(code, node);
+       }
+}
+
+static void bemit_fbinopp(const ir_node *node, unsigned const code)
+{
+       const ia32_x87_attr_t *x87_attr = get_ia32_x87_attr_const(node);
+       const arch_register_t *out      = x87_attr->x87[2];
+       bemit8(0xDE);
+       bemit8(code + out->index);
+}
+
+static void bemit_fadd(const ir_node *node)
+{
+       bemit_fbinop(node, 0, 0);
+}
+
+static void bemit_faddp(const ir_node *node)
+{
+       bemit_fbinopp(node, 0xC0);
+}
+
+static void bemit_fchs(const ir_node *node)
+{
+       (void)node;
+
+       bemit8(0xD9);
+       bemit8(0xE0);
+}
+
+static void bemit_fdiv(const ir_node *node)
+{
+       bemit_fbinop(node, 6, 7);
+}
+
+static void bemit_fdivp(const ir_node *node)
+{
+       bemit_fbinopp(node, 0xF8);
+}
+
+static void bemit_fdivr(const ir_node *node)
+{
+       bemit_fbinop(node, 7, 6);
+}
+
+static void bemit_fdivrp(const ir_node *node)
+{
+       bemit_fbinopp(node, 0xF0);
+}
+
+static void bemit_fild(const ir_node *node)
+{
+       switch (get_mode_size_bits(get_ia32_ls_mode(node))) {
+               case 16:
+                       bemit8(0xDF); // filds
+                       bemit_mod_am(0, node);
+                       return;
+
+               case 32:
+                       bemit8(0xDB); // fildl
+                       bemit_mod_am(0, node);
+                       return;
+
+               case 64:
+                       bemit8(0xDF); // fildll
+                       bemit_mod_am(5, node);
+                       return;
+
+               default:
+                       panic("invalid mode size");
+       }
+}
+
+static void bemit_fist(const ir_node *node)
+{
+       switch (get_mode_size_bits(get_ia32_ls_mode(node))) {
+               case 16:
+                       bemit8(0xDF); // fists
+                       break;
+
+               case 32:
+                       bemit8(0xDB); // fistl
+                       break;
+
+               default:
+                       panic("invalid mode size");
+       }
+       bemit_mod_am(2, node);
+}
+
+static void bemit_fistp(const ir_node *node)
+{
+       switch (get_mode_size_bits(get_ia32_ls_mode(node))) {
+               case 16:
+                       bemit8(0xDF); // fistps
+                       bemit_mod_am(3, node);
+                       return;
+
+               case 32:
+                       bemit8(0xDB); // fistpl
+                       bemit_mod_am(3, node);
+                       return;
+
+               case 64:
+                       bemit8(0xDF); // fistpll
+                       bemit_mod_am(7, node);
+                       return;
+
+               default:
+                       panic("invalid mode size");
+       }
+}
+
+static void bemit_fld(const ir_node *node)
+{
+       switch (get_mode_size_bits(get_ia32_ls_mode(node))) {
+               case 32:
+                       bemit8(0xD9); // flds
+                       bemit_mod_am(0, node);
+                       return;
+
+               case 64:
+                       bemit8(0xDD); // fldl
+                       bemit_mod_am(0, node);
+                       return;
+
+               case 80:
+               case 96:
+                       bemit8(0xDB); // fldt
+                       bemit_mod_am(5, node);
+                       return;
+
+               default:
+                       panic("invalid mode size");
+       }
+}
+
+static void bemit_fld1(const ir_node *node)
+{
+       (void)node;
+       bemit8(0xD9);
+       bemit8(0xE8); // fld1
+}
+
+static void bemit_fldcw(const ir_node *node)
+{
+       bemit8(0xD9); // fldcw
+       bemit_mod_am(5, node);
+}
+
+static void bemit_fldz(const ir_node *node)
+{
+       (void)node;
+       bemit8(0xD9);
+       bemit8(0xEE); // fldz
+}
+
+static void bemit_fmul(const ir_node *node)
+{
+       bemit_fbinop(node, 1, 1);
+}
+
+static void bemit_fmulp(const ir_node *node)
+{
+       bemit_fbinopp(node, 0xC8);
+}
+
+static void bemit_fpop(const ir_node *node)
+{
+       const ia32_x87_attr_t *attr = get_ia32_x87_attr_const(node);
+       bemit8(0xDD);
+       bemit8(0xD8 + attr->x87[0]->index);
+}
+
+static void bemit_fpush(const ir_node *node)
+{
+       const ia32_x87_attr_t *attr = get_ia32_x87_attr_const(node);
+       bemit8(0xD9);
+       bemit8(0xC0 + attr->x87[0]->index);
+}
+
+static void bemit_fpushcopy(const ir_node *node)
+{
+       const ia32_x87_attr_t *attr = get_ia32_x87_attr_const(node);
+       bemit8(0xD9);
+       bemit8(0xC0 + attr->x87[0]->index);
+}
+
+static void bemit_fst(const ir_node *node)
+{
+       switch (get_mode_size_bits(get_ia32_ls_mode(node))) {
+               case 32:
+                       bemit8(0xD9); // fsts
+                       break;
+
+               case 64:
+                       bemit8(0xDD); // fstl
+                       break;
+
+               default:
+                       panic("invalid mode size");
+       }
+       bemit_mod_am(2, node);
+}
+
+static void bemit_fstp(const ir_node *node)
+{
+       switch (get_mode_size_bits(get_ia32_ls_mode(node))) {
+               case 32:
+                       bemit8(0xD9); // fstps
+                       bemit_mod_am(3, node);
+                       return;
+
+               case 64:
+                       bemit8(0xDD); // fstpl
+                       bemit_mod_am(3, node);
+                       return;
+
+               case 80:
+               case 96:
+                       bemit8(0xDB); // fstpt
+                       bemit_mod_am(7, node);
+                       return;
+
+               default:
+                       panic("invalid mode size");
+       }
+}
+
+static void bemit_fsub(const ir_node *node)
+{
+       bemit_fbinop(node, 4, 5);
+}
+
+static void bemit_fsubp(const ir_node *node)
+{
+       bemit_fbinopp(node, 0xE8);
+}
+
+static void bemit_fsubr(const ir_node *node)
+{
+       bemit_fbinop(node, 5, 4);
+}
+
+static void bemit_fsubrp(const ir_node *node)
+{
+       bemit_fbinopp(node, 0xE0);
+}
+
+static void bemit_fnstcw(const ir_node *node)
+{
+       bemit8(0xD9); // fnstcw
+       bemit_mod_am(7, node);
+}
+
+static void bemit_fnstsw(void)
+{
+       bemit8(0xDF); // fnstsw %ax
+       bemit8(0xE0);
+}
+
+static void bemit_ftstfnstsw(const ir_node *node)
+{
+       (void)node;
+
+       bemit8(0xD9); // ftst
+       bemit8(0xE4);
+       bemit_fnstsw();
+}
+
+static void bemit_fucomfnstsw(const ir_node *node)
+{
+       (void)node;
+
+       const ia32_x87_attr_t *attr = get_ia32_x87_attr_const(node);
+       bemit8(0xDD); // fucom
+       bemit8(0xE0 + attr->x87[1]->index);
+       bemit_fnstsw();
+}
+
+static void bemit_fucompfnstsw(const ir_node *node)
+{
+       (void)node;
+
+       const ia32_x87_attr_t *attr = get_ia32_x87_attr_const(node);
+       bemit8(0xDD); // fucomp
+       bemit8(0xE8 + attr->x87[1]->index);
+       bemit_fnstsw();
+}
+
+static void bemit_fucomppfnstsw(const ir_node *node)
+{
+       (void)node;
+
+       bemit8(0xDA); // fucompp
+       bemit8(0xE9);
+       bemit_fnstsw();
+}
+
+static void bemit_fxch(const ir_node *node)
+{
+       const ia32_x87_attr_t *attr = get_ia32_x87_attr_const(node);
+       bemit8(0xD9);
+       bemit8(0xC8 + attr->x87[0]->index);
+}
+
 /**
  * The type of a emitter function.
  */
@@ -3086,59 +3655,120 @@ static void ia32_register_binary_emitters(void)
        clear_irp_opcodes_generic_func();
 
        /* benode emitter */
-       register_emitter(op_be_Copy, bemit_copy);
-       register_emitter(op_be_CopyKeep, bemit_copy);
-       register_emitter(op_be_IncSP, bemit_incsp);
-       register_emitter(op_be_Return, bemit_return);
-       register_emitter(op_ia32_Adc, bemit_adc);
-       register_emitter(op_ia32_Add, bemit_add);
-       register_emitter(op_ia32_And, bemit_and);
-       register_emitter(op_ia32_Breakpoint, bemit_int3);
-       register_emitter(op_ia32_Call, bemit_call);
-       register_emitter(op_ia32_Cltd, bemit_cltd);
-       register_emitter(op_ia32_Cmc, bemit_cmc);
-       register_emitter(op_ia32_Cmp, bemit_cmp);
-       register_emitter(op_ia32_Const, bemit_mov_const);
-       register_emitter(op_ia32_Dec, bemit_dec);
-       register_emitter(op_ia32_Div, bemit_div);
-       register_emitter(op_ia32_IDiv, bemit_idiv);
-       register_emitter(op_ia32_IJmp, bemit_ijmp);
-       register_emitter(op_ia32_IMul1OP, bemit_imul1op);
-       register_emitter(op_ia32_Inc, bemit_inc);
-       register_emitter(op_ia32_Jcc, bemit_ia32_jcc);
-       register_emitter(op_ia32_Jmp, bemit_jump);
-       register_emitter(op_ia32_Lea, bemit_lea);
-       register_emitter(op_ia32_Load, bemit_load);
-       register_emitter(op_ia32_Mul, bemit_mul);
-       register_emitter(op_ia32_Neg, bemit_neg);
-       register_emitter(op_ia32_Not, bemit_not);
-       register_emitter(op_ia32_Or, bemit_or);
-       register_emitter(op_ia32_Pop, bemit_pop);
-       register_emitter(op_ia32_PopEbp, bemit_pop);
-       register_emitter(op_ia32_PopMem, bemit_popmem);
-       register_emitter(op_ia32_Push, bemit_push);
-       register_emitter(op_ia32_RepPrefix, bemit_rep);
-       register_emitter(op_ia32_Rol, bemit_rol);
-       register_emitter(op_ia32_Ror, bemit_ror);
-       register_emitter(op_ia32_Sahf, bemit_sahf);
-       register_emitter(op_ia32_Sar, bemit_sar);
-       register_emitter(op_ia32_Sbb, bemit_sbb);
-       register_emitter(op_ia32_Shl, bemit_shl);
-       register_emitter(op_ia32_Shr, bemit_shr);
-       register_emitter(op_ia32_Stc, bemit_stc);
-       register_emitter(op_ia32_Store, bemit_store);
-       register_emitter(op_ia32_Sub, bemit_sub);
-       register_emitter(op_ia32_Test, bemit_test);
-       register_emitter(op_ia32_Xor, bemit_xor);
-       register_emitter(op_ia32_Xor0, bemit_xor0);
+       register_emitter(op_be_Copy,            bemit_copy);
+       register_emitter(op_be_CopyKeep,        bemit_copy);
+       register_emitter(op_be_IncSP,           bemit_incsp);
+       register_emitter(op_be_Perm,            bemit_perm);
+       register_emitter(op_be_Return,          bemit_return);
+       register_emitter(op_ia32_Adc,           bemit_adc);
+       register_emitter(op_ia32_Add,           bemit_add);
+       register_emitter(op_ia32_AddMem,        bemit_addmem);
+       register_emitter(op_ia32_AddMem8Bit,    bemit_addmem8bit);
+       register_emitter(op_ia32_And,           bemit_and);
+       register_emitter(op_ia32_AndMem,        bemit_andmem);
+       register_emitter(op_ia32_AndMem8Bit,    bemit_andmem8bit);
+       register_emitter(op_ia32_Breakpoint,    bemit_int3);
+       register_emitter(op_ia32_Call,          bemit_call);
+       register_emitter(op_ia32_Cltd,          bemit_cltd);
+       register_emitter(op_ia32_Cmc,           bemit_cmc);
+       register_emitter(op_ia32_Cmp,           bemit_cmp);
+       register_emitter(op_ia32_Cmp8Bit,       bemit_cmp8bit);
+       register_emitter(op_ia32_Const,         bemit_mov_const);
+       register_emitter(op_ia32_Conv_I2I,      bemit_conv_i2i);
+       register_emitter(op_ia32_Conv_I2I8Bit,  bemit_conv_i2i);
+       register_emitter(op_ia32_CopyB_i,       bemit_copybi);
+       register_emitter(op_ia32_Cwtl,          bemit_cwtl);
+       register_emitter(op_ia32_Dec,           bemit_dec);
+       register_emitter(op_ia32_DecMem,        bemit_decmem);
+       register_emitter(op_ia32_Div,           bemit_div);
+       register_emitter(op_ia32_FldCW,         bemit_fldcw);
+       register_emitter(op_ia32_FnstCW,        bemit_fnstcw);
+       register_emitter(op_ia32_FtstFnstsw,    bemit_ftstfnstsw);
+       register_emitter(op_ia32_FucomFnstsw,   bemit_fucomfnstsw);
+       register_emitter(op_ia32_FucompFnstsw,  bemit_fucompfnstsw);
+       register_emitter(op_ia32_FucomppFnstsw, bemit_fucomppfnstsw);
+       register_emitter(op_ia32_IDiv,          bemit_idiv);
+       register_emitter(op_ia32_IJmp,          bemit_ijmp);
+       register_emitter(op_ia32_IMul,          bemit_imul);
+       register_emitter(op_ia32_IMul1OP,       bemit_imul1op);
+       register_emitter(op_ia32_Inc,           bemit_inc);
+       register_emitter(op_ia32_IncMem,        bemit_incmem);
+       register_emitter(op_ia32_Jcc,           bemit_ia32_jcc);
+       register_emitter(op_ia32_Jmp,           bemit_jump);
+       register_emitter(op_ia32_LdTls,         bemit_ldtls);
+       register_emitter(op_ia32_Lea,           bemit_lea);
+       register_emitter(op_ia32_Load,          bemit_load);
+       register_emitter(op_ia32_Mul,           bemit_mul);
+       register_emitter(op_ia32_Neg,           bemit_neg);
+       register_emitter(op_ia32_NegMem,        bemit_negmem);
+       register_emitter(op_ia32_Not,           bemit_not);
+       register_emitter(op_ia32_NotMem,        bemit_notmem);
+       register_emitter(op_ia32_Or,            bemit_or);
+       register_emitter(op_ia32_OrMem,         bemit_ormem);
+       register_emitter(op_ia32_OrMem8Bit,     bemit_ormem8bit);
+       register_emitter(op_ia32_Pop,           bemit_pop);
+       register_emitter(op_ia32_PopEbp,        bemit_pop);
+       register_emitter(op_ia32_PopMem,        bemit_popmem);
+       register_emitter(op_ia32_Push,          bemit_push);
+       register_emitter(op_ia32_RepPrefix,     bemit_rep);
+       register_emitter(op_ia32_Rol,           bemit_rol);
+       register_emitter(op_ia32_RolMem,        bemit_rolmem);
+       register_emitter(op_ia32_Ror,           bemit_ror);
+       register_emitter(op_ia32_RorMem,        bemit_rormem);
+       register_emitter(op_ia32_Sahf,          bemit_sahf);
+       register_emitter(op_ia32_Sar,           bemit_sar);
+       register_emitter(op_ia32_SarMem,        bemit_sarmem);
+       register_emitter(op_ia32_Sbb,           bemit_sbb);
+       register_emitter(op_ia32_Set,           bemit_set);
+       register_emitter(op_ia32_Shl,           bemit_shl);
+       register_emitter(op_ia32_ShlMem,        bemit_shlmem);
+       register_emitter(op_ia32_Shr,           bemit_shr);
+       register_emitter(op_ia32_ShrMem,        bemit_shrmem);
+       register_emitter(op_ia32_Stc,           bemit_stc);
+       register_emitter(op_ia32_Store,         bemit_store);
+       register_emitter(op_ia32_Store8Bit,     bemit_store);
+       register_emitter(op_ia32_Sub,           bemit_sub);
+       register_emitter(op_ia32_SubMem,        bemit_submem);
+       register_emitter(op_ia32_SubMem8Bit,    bemit_submem8bit);
+       register_emitter(op_ia32_SubSP,         bemit_subsp);
+       register_emitter(op_ia32_Test,          bemit_test);
+       register_emitter(op_ia32_Xor,           bemit_xor);
+       register_emitter(op_ia32_Xor0,          bemit_xor0);
+       register_emitter(op_ia32_XorMem,        bemit_xormem);
+       register_emitter(op_ia32_XorMem8Bit,    bemit_xormem8bit);
+       register_emitter(op_ia32_fadd,          bemit_fadd);
+       register_emitter(op_ia32_faddp,         bemit_faddp);
+       register_emitter(op_ia32_fchs,          bemit_fchs);
+       register_emitter(op_ia32_fdiv,          bemit_fdiv);
+       register_emitter(op_ia32_fdivp,         bemit_fdivp);
+       register_emitter(op_ia32_fdivr,         bemit_fdivr);
+       register_emitter(op_ia32_fdivrp,        bemit_fdivrp);
+       register_emitter(op_ia32_fild,          bemit_fild);
+       register_emitter(op_ia32_fist,          bemit_fist);
+       register_emitter(op_ia32_fistp,         bemit_fistp);
+       register_emitter(op_ia32_fld,           bemit_fld);
+       register_emitter(op_ia32_fld1,          bemit_fld1);
+       register_emitter(op_ia32_fldz,          bemit_fldz);
+       register_emitter(op_ia32_fmul,          bemit_fmul);
+       register_emitter(op_ia32_fmulp,         bemit_fmulp);
+       register_emitter(op_ia32_fpop,          bemit_fpop);
+       register_emitter(op_ia32_fpush,         bemit_fpush);
+       register_emitter(op_ia32_fpushCopy,     bemit_fpushcopy);
+       register_emitter(op_ia32_fst,           bemit_fst);
+       register_emitter(op_ia32_fstp,          bemit_fstp);
+       register_emitter(op_ia32_fsub,          bemit_fsub);
+       register_emitter(op_ia32_fsubp,         bemit_fsubp);
+       register_emitter(op_ia32_fsubr,         bemit_fsubr);
+       register_emitter(op_ia32_fsubrp,        bemit_fsubrp);
+       register_emitter(op_ia32_fxch,          bemit_fxch);
 
        /* ignore the following nodes */
-       register_emitter(op_ia32_ProduceVal, emit_Nothing);
-       register_emitter(op_be_Barrier, emit_Nothing);
-       register_emitter(op_be_Keep, emit_Nothing);
-       register_emitter(op_be_Start, emit_Nothing);
-       register_emitter(op_Phi, emit_Nothing);
-       register_emitter(op_Start, emit_Nothing);
+       register_emitter(op_ia32_ProduceVal,   emit_Nothing);
+       register_emitter(op_be_Barrier,        emit_Nothing);
+       register_emitter(op_be_Keep,           emit_Nothing);
+       register_emitter(op_be_Start,          emit_Nothing);
+       register_emitter(op_Phi,               emit_Nothing);
+       register_emitter(op_Start,             emit_Nothing);
 }
 
 static void gen_binary_block(ir_node *block)