Handle 8Bit and 16bit locations for DecMem, IncMem, NegMem and NotMem.
[libfirm] / ir / be / ia32 / ia32_emitter.c
index 1619043..59f31e5 100644 (file)
@@ -2237,6 +2237,8 @@ static const lc_opt_table_entry_t ia32_emitter_options[] = {
 static unsigned char reg_gp_map[N_ia32_gp_REGS];
 static unsigned char reg_mmx_map[N_ia32_mmx_REGS];
 static unsigned char reg_sse_map[N_ia32_xmm_REGS];
+static unsigned char pnc_map_signed[8];
+static unsigned char pnc_map_unsigned[8];
 
 static void build_reg_map(void)
 {
@@ -2248,8 +2250,42 @@ static void build_reg_map(void)
        reg_gp_map[REG_EBP] = 0x5;
        reg_gp_map[REG_ESI] = 0x6;
        reg_gp_map[REG_EDI] = 0x7;
+
+       pnc_map_signed[pn_Cmp_Eq]    = 0x04;
+       pnc_map_signed[pn_Cmp_Lt]    = 0x0C;
+       pnc_map_signed[pn_Cmp_Le]    = 0x0E;
+       pnc_map_signed[pn_Cmp_Gt]    = 0x0F;
+       pnc_map_signed[pn_Cmp_Ge]    = 0x0D;
+       pnc_map_signed[pn_Cmp_Lg]    = 0x05;
+
+       pnc_map_unsigned[pn_Cmp_Eq]    = 0x04;
+       pnc_map_unsigned[pn_Cmp_Lt]    = 0x02;
+       pnc_map_unsigned[pn_Cmp_Le]    = 0x06;
+       pnc_map_unsigned[pn_Cmp_Gt]    = 0x07;
+       pnc_map_unsigned[pn_Cmp_Ge]    = 0x03;
+       pnc_map_unsigned[pn_Cmp_Lg]    = 0x05;
 }
 
+static unsigned char pnc2cc(int pnc)
+{
+       unsigned char cc;
+       if (pnc == ia32_pn_Cmp_parity) {
+               cc = 0x0A;
+       } else if (pnc & ia32_pn_Cmp_float || pnc & ia32_pn_Cmp_unsigned) {
+               cc = pnc_map_unsigned[pnc & 0x07];
+       } else {
+               cc = pnc_map_signed[pnc & 0x07];
+       }
+       assert(cc != 0);
+       return cc;
+}
+
+/** Sign extension bit values for binops */
+enum SignExt {
+       UNSIGNED_IMM = 0,  /**< unsigned immediate */
+       SIGNEXT_IMM  = 2,  /**< sign extended immediate */
+};
+
 /** The mod encoding of the ModR/M */
 enum Mod {
        MOD_IND          = 0x00, /**< [reg1] */
@@ -2258,25 +2294,13 @@ enum Mod {
        MOD_REG          = 0xC0  /**< reg1 */
 };
 
-#define GET_MODE(code) ((code) & 0xC0)
-
-/** Sign extension bit values for binops */
-enum SignExt {
-       UNSIGNED_IMM = 0,  /**< unsigned immediate */
-       SIGNEXT_IMM  = 2,  /**< sign extended immediate */
-};
-
 /** create R/M encoding for ModR/M */
 #define ENC_RM(x) (x)
 /** create REG encoding for ModR/M */
 #define ENC_REG(x) ((x) << 3)
 
-/** create Base encoding for SIB */
-#define ENC_BASE(x) (x)
-/** create Index encoding for SIB */
-#define ENC_INDEX(x) ((x) << 3)
-/** create Scale encoding for SIB */
-#define ENC_SCALE(x) ((x) << 6)
+/** create encoding for a SIB byte */
+#define ENC_SIB(scale, index, base) ((scale) << 6 | (index) << 3 | (base))
 
 /* Node: The following routines are supposed to append bytes, words, dwords
    to the output stream.
@@ -2291,7 +2315,7 @@ static void bemit8(const unsigned char byte)
        be_emit_write_line();
 }
 
-static void bemit16(const unsigned u16)
+static void bemit16(const unsigned short u16)
 {
        be_emit_irprintf("\t.word 0x%x\n", u16);
        be_emit_write_line();
@@ -2303,6 +2327,10 @@ static void bemit32(const unsigned u32)
        be_emit_write_line();
 }
 
+/**
+ * Emit address of an entity. If @p is_relative is true then a relative
+ * offset from behind the address to the entity is created.
+ */
 static void bemit_entity(ir_entity *entity, bool entity_sign, int offset,
                          bool is_relative)
 {
@@ -2311,6 +2339,10 @@ static void bemit_entity(ir_entity *entity, bool entity_sign, int offset,
                return;
        }
 
+       if (is_relative) {
+               offset -= 4;
+       }
+
        /* the final version should remember the position in the bytestream
           and patch it with the correct address at linktime... */
        be_emit_cstring("\t.long ");
@@ -2330,6 +2362,14 @@ static void bemit_entity(ir_entity *entity, bool entity_sign, int offset,
        be_emit_write_line();
 }
 
+static void bemit_jmp_destination(const ir_node *dest_block)
+{
+       be_emit_cstring("\t.long ");
+       ia32_emit_block_name(dest_block);
+       be_emit_cstring(" - . - 4\n");
+       be_emit_write_line();
+}
+
 /* end emit routines, all emitters following here should only use the functions
    above. */
 
@@ -2376,61 +2416,15 @@ static unsigned get_unsigned_imm_size(unsigned offset)
  */
 static unsigned get_signed_imm_size(int offset)
 {
-       if (offset >= -127 && offset < 128) {
+       if (-128 <= offset && offset < 128) {
                return 1;
-       } else if (offset >= -32768 && offset < 32767) {
+       } else if (-32768 <= offset && offset < 32768) {
                return 2;
        } else {
                return 4;
        }
 }
 
-/**
- * Emit a binop with a immediate operand.
- *
- * @param node        the node to emit
- * @param opcode_eax  the opcode for the op eax, imm variant
- * @param opcode      the opcode for the reg, imm variant
- * @param ruval       the opcode extension for opcode
- */
-static void bemit_binop_with_imm(
-       const ir_node *node,
-       unsigned char opcode_ax,
-       unsigned char opcode, unsigned char ruval)
-{
-       const arch_register_t       *reg  = get_out_reg(node, 0);
-       const ir_node               *op   = get_irn_n(node, n_ia32_binary_right);
-       const ia32_immediate_attr_t *attr = get_ia32_immediate_attr_const(op);
-       unsigned                    size;
-
-       if (attr->symconst != NULL)
-               size = 4;
-       else {
-               /* check for sign extension */
-               size = get_signed_imm_size(attr->offset);
-       }
-
-       switch (size) {
-       case 1:
-               bemit8(opcode | SIGNEXT_IMM);
-               bemit_modru(reg, ruval);
-               bemit8((unsigned char)attr->offset);
-               return;
-       case 2:
-       case 4:
-               /* check for eax variant: this variant is shorter for 32bit immediates only */
-               if (reg->index == REG_EAX) {
-                       bemit8(opcode_ax);
-               } else {
-                       bemit8(opcode);
-                       bemit_modru(reg, ruval);
-               }
-               bemit_entity(attr->symconst, attr->sc_sign, attr->offset, false);
-               return;
-       }
-       panic("invalid imm size?!?");
-}
-
 /**
  * Emit an address mode.
  *
@@ -2449,6 +2443,7 @@ static void bemit_mod_am(unsigned reg, const ir_node *node)
        unsigned   sib       = 0;
        unsigned   emitoffs  = 0;
        bool       emitsib   = false;
+       unsigned   base_enc;
 
        /* set the mod part depending on displacement */
        if (ent != NULL) {
@@ -2457,7 +2452,7 @@ static void bemit_mod_am(unsigned reg, const ir_node *node)
        } else if (offs == 0) {
                modrm |= MOD_IND;
                emitoffs = 0;
-       } else if (offs >= -127 && offs <= 128) {
+       } else if (-128 <= offs && offs < 128) {
                modrm |= MOD_IND_BYTE_OFS;
                emitoffs = 8;
        } else {
@@ -2465,54 +2460,42 @@ static void bemit_mod_am(unsigned reg, const ir_node *node)
                emitoffs = 32;
        }
 
-       /* determine if we need a SIB byte */
+       if (has_base) {
+               const arch_register_t *base_reg = arch_get_irn_register(base);
+               base_enc = reg_gp_map[base_reg->index];
+       } else {
+               /* Use the EBP encoding + MOD_IND if NO base register. There is
+                * always a 32bit offset present in this case. */
+               modrm    = MOD_IND;
+               base_enc = 0x05;
+               emitoffs = 32;
+       }
+
+       /* Determine if we need a SIB byte. */
        if (has_index) {
-               int scale;
                const arch_register_t *reg_index = arch_get_irn_register(index);
-               assert(reg_index->index != REG_ESP);
-               sib |= ENC_INDEX(reg_gp_map[reg_index->index]);
-
-               if (has_base) {
-                       const arch_register_t *reg = arch_get_irn_register(base);
-                       sib |= ENC_BASE(reg_gp_map[reg->index]);
-               } else {
-                       /* use the EBP encoding if NO base register */
-                       sib |= 0x05;
-               }
-
-               scale = get_ia32_am_scale(node);
+               int                    scale     = get_ia32_am_scale(node);
                assert(scale < 4);
-               sib |= ENC_SCALE(scale);
+               /* R/M set to ESP means SIB in 32bit mode. */
+               modrm   |= ENC_RM(0x04);
+               sib      = ENC_SIB(scale, reg_gp_map[reg_index->index], base_enc);
                emitsib = true;
+       } else if (base_enc == 0x04) {
+               /* for the above reason we are forced to emit a SIB when base is ESP.
+                * Only the base is used, index must be ESP too, which means no index.
+                */
+               modrm   |= ENC_RM(0x04);
+               sib      = ENC_SIB(0, 0x04, 0x04);
+               emitsib  = true;
+       } else {
+               modrm |= ENC_RM(base_enc);
        }
 
-       /* determine modrm byte */
-       if (emitsib) {
-               /* R/M set to ESP means SIB in 32bit mode */
-               modrm |= ENC_RM(0x04);
-       } else if (has_base) {
-               const arch_register_t *reg = arch_get_irn_register(base);
-               if (reg->index == REG_ESP) {
-                       /* for the above reason we are forced to emit a sib
-                          when base is ESP. Only the base is used */
-                       sib     = ENC_BASE(0x04);
-                       emitsib = true;
-
-               /* we are forced to emit a 8bit offset as EBP base without
-                  offset is a special case for SIB without base register */
-               } else if (reg->index == REG_EBP && emitoffs == 0) {
-                       assert(GET_MODE(modrm) == MOD_IND);
-                       emitoffs  = 8;
-                       modrm    |= MOD_IND_BYTE_OFS;
-               }
-               modrm |= ENC_RM(reg_gp_map[reg->index]);
-       } else {
-               /* only displacement: Use EBP + disp encoding in 32bit mode */
-               if (emitoffs == 0) {
-                       emitoffs = 8;
-                       modrm    = MOD_IND_BYTE_OFS;
-               }
-               modrm |= ENC_RM(0x05);
+       /* We are forced to emit an 8bit offset as EBP base without offset is a
+        * special case for SIB without base register. */
+       if (base_enc == 0x05 && emitoffs == 0) {
+               modrm    |= MOD_IND_BYTE_OFS;
+               emitoffs  = 8;
        }
 
        modrm |= ENC_REG(reg);
@@ -2529,6 +2512,66 @@ static void bemit_mod_am(unsigned reg, const ir_node *node)
        }
 }
 
+/**
+ * Emit a binop with a immediate operand.
+ *
+ * @param node        the node to emit
+ * @param opcode_eax  the opcode for the op eax, imm variant
+ * @param opcode      the opcode for the reg, imm variant
+ * @param ruval       the opcode extension for opcode
+ */
+static void bemit_binop_with_imm(
+       const ir_node *node,
+       unsigned char opcode_ax,
+       unsigned char opcode, unsigned char ruval)
+{
+       /* Use in-reg, because some instructions (cmp, test) have no out-reg. */
+       const ir_node               *op   = get_irn_n(node, n_ia32_binary_right);
+       const ia32_immediate_attr_t *attr = get_ia32_immediate_attr_const(op);
+       unsigned                     size;
+
+       /* Some instructions (test) have no short form with 32bit value + 8bit
+        * immediate. */
+       if (attr->symconst != NULL || opcode & SIGNEXT_IMM) {
+               size = 4;
+       } else {
+               /* check for sign extension */
+               size = get_signed_imm_size(attr->offset);
+       }
+
+       switch (size) {
+       case 1:
+               bemit8(opcode | SIGNEXT_IMM);
+               /* cmp has this special mode */
+               if (get_ia32_op_type(node) == ia32_AddrModeS) {
+                       bemit_mod_am(ruval, node);
+               } else {
+                       const arch_register_t *reg = get_in_reg(node, n_ia32_binary_left);
+                       bemit_modru(reg, ruval);
+               }
+               bemit8((unsigned char)attr->offset);
+               return;
+       case 2:
+       case 4:
+               /* check for eax variant: this variant is shorter for 32bit immediates only */
+               if (get_ia32_op_type(node) == ia32_AddrModeS) {
+                       bemit8(opcode);
+                       bemit_mod_am(ruval, node);
+               } else {
+                       const arch_register_t *reg = get_in_reg(node, n_ia32_binary_left);
+                       if (reg->index == REG_EAX) {
+                               bemit8(opcode_ax);
+                       } else {
+                               bemit8(opcode);
+                               bemit_modru(reg, ruval);
+                       }
+               }
+               bemit_entity(attr->symconst, attr->sc_sign, attr->offset, false);
+               return;
+       }
+       panic("invalid imm size?!?");
+}
+
 /**
  * Emits a binop.
  */
@@ -2542,7 +2585,7 @@ static void bemit_binop_2(const ir_node *node, unsigned code)
        switch (am_type) {
        case ia32_AddrModeS:
                d = 2;
-               /*fallthrough*/
+               /* FALLTHROUGH */
        case ia32_AddrModeD:
                bemit8(code | d);
                bemit_mod_am(reg_gp_map[out->index], node);
@@ -2563,10 +2606,6 @@ static void bemit_binop(const ir_node *node, const unsigned char opcodes[4])
 {
        ir_node *right = get_irn_n(node, n_ia32_binary_right);
        if (is_ia32_Immediate(right)) {
-               /* there's a shorter variant with DEST=EAX */
-               const arch_register_t *reg = get_out_reg(node, 0);
-               if (reg->index == REG_EAX)
-
                bemit_binop_with_imm(node, opcodes[1], opcodes[2], opcodes[3]);
        } else {
                bemit_binop_2(node, opcodes[0]);
@@ -2576,21 +2615,32 @@ static void bemit_binop(const ir_node *node, const unsigned char opcodes[4])
 /**
  * Emit an unop.
  */
-static void bemit_unop(const ir_node *node, unsigned char code, unsigned char ext)
+static void bemit_unop(const ir_node *node, unsigned char code, unsigned char ext, int input)
 {
-       ia32_op_type_t am_type = get_ia32_op_type(node);
-
        bemit8(code);
-       if (am_type == ia32_AddrModeD) {
-               bemit8(code);
-               bemit_mod_am(ext, node);
+       if (get_ia32_op_type(node) == ia32_Normal) {
+               const arch_register_t *in = get_in_reg(node, input);
+               bemit_modru(in, ext);
        } else {
-               const arch_register_t *out = get_out_reg(node, 0);
-               assert(am_type == ia32_Normal);
-               bemit_modru(out, ext);
+               bemit_mod_am(ext, node);
        }
 }
 
+static void bemit_unop_reg(const ir_node *node, unsigned char code, int input)
+{
+       const arch_register_t *out = get_out_reg(node, 0);
+       bemit_unop(node, code, reg_gp_map[out->index], input);
+}
+
+static void bemit_unop_mem(const ir_node *node, unsigned char code, unsigned char ext)
+{
+       unsigned size = get_mode_size_bits(get_ia32_ls_mode(node));
+       if (size == 16)
+               bemit8(0x66);
+       bemit8(size == 8 ? code : code + 1);
+       bemit_mod_am(ext, node);
+}
+
 static void bemit_immediate(const ir_node *node, bool relative)
 {
        const ia32_immediate_attr_t *attr = get_ia32_immediate_attr_const(node);
@@ -2599,9 +2649,8 @@ static void bemit_immediate(const ir_node *node, bool relative)
 
 static void bemit_copy(const ir_node *copy)
 {
-       const ir_node *op = be_get_Copy_op(copy);
-       const arch_register_t *in  = arch_get_irn_register(op);
-       const arch_register_t *out = arch_get_irn_register(copy);
+       const arch_register_t *in  = get_in_reg(copy, 0);
+       const arch_register_t *out = get_out_reg(copy, 0);
 
        if (in == out || is_unknown_reg(in))
                return;
@@ -2618,6 +2667,37 @@ static void bemit_copy(const ir_node *copy)
        }
 }
 
+static void bemit_perm(const ir_node *node)
+{
+       const arch_register_t       *in0  = arch_get_irn_register(get_irn_n(node, 0));
+       const arch_register_t       *in1  = arch_get_irn_register(get_irn_n(node, 1));
+       const arch_register_class_t *cls0 = arch_register_get_class(in0);
+
+       assert(cls0 == arch_register_get_class(in1) && "Register class mismatch at Perm");
+
+       if (cls0 == &ia32_reg_classes[CLASS_ia32_gp]) {
+               if (in0->index == REG_EAX) {
+                       bemit8(0x90 + reg_gp_map[in1->index]);
+               } else if (in1->index == REG_EAX) {
+                       bemit8(0x90 + reg_gp_map[in0->index]);
+               } else {
+                       bemit8(0x87);
+                       bemit_modrr(in0, in1);
+               }
+       } else if (cls0 == &ia32_reg_classes[CLASS_ia32_xmm]) {
+               panic("unimplemented"); // TODO implement
+               //ia32_emitf(NULL, "\txorpd %R, %R\n", in1, in0);
+               //ia32_emitf(NULL, "\txorpd %R, %R\n", in0, in1);
+               //ia32_emitf(node, "\txorpd %R, %R\n", in1, in0);
+       } else if (cls0 == &ia32_reg_classes[CLASS_ia32_vfp]) {
+               /* is a NOP */
+       } else if (cls0 == &ia32_reg_classes[CLASS_ia32_st]) {
+               /* is a NOP */
+       } else {
+               panic("unexpected register class in be_Perm (%+F)", node);
+       }
+}
+
 static void bemit_xor0(const ir_node *node)
 {
        const arch_register_t *out = get_out_reg(node, 0);
@@ -2632,30 +2712,226 @@ static void bemit_mov_const(const ir_node *node)
        bemit_immediate(node, false);
 }
 
-#define BINOP(op, op0, op1, op2, op3)                                 \
-static void bemit_ ## op(const ir_node *node) {                       \
-       static const unsigned char op ## _codes[] = {op0, op1, op2, op3}; \
-       bemit_binop(node, op ## _codes);                                  \
-}
+/**
+ * Creates a function for a Binop with 3 possible encodings.
+ */
+#define BINOP(op, op0, op1, op2, op2_ext)                                 \
+static void bemit_ ## op(const ir_node *node) {                           \
+       static const unsigned char op ## _codes[] = {op0, op1, op2, op2_ext}; \
+       bemit_binop(node, op ## _codes);                                      \
+}
+
+/*    insn  def  eax,imm   imm */
+BINOP(add,  0x01, 0x05, 0x81, 0)
+BINOP(or,   0x09, 0x0D, 0x81, 1)
+BINOP(adc,  0x11, 0x15, 0x81, 2)
+BINOP(sbb,  0x19, 0x1D, 0x81, 3)
+BINOP(and,  0x21, 0x25, 0x81, 4)
+BINOP(sub,  0x29, 0x2D, 0x81, 5)
+BINOP(xor,  0x31, 0x35, 0x81, 6)
+BINOP(cmp,  0x39, 0x3D, 0x81, 7)
+BINOP(test, 0x85, 0xA9, 0xF7, 0)
+
+#define BINOPMEM(op, ext) \
+static void bemit_##op(const ir_node *node) \
+{ \
+       if (get_mode_size_bits(get_ia32_ls_mode(node)) == 16) \
+               bemit8(0x66); \
+       ir_node *val = get_irn_n(node, n_ia32_unary_op); \
+       if (is_ia32_Immediate(val)) { \
+               const ia32_immediate_attr_t *attr   = get_ia32_immediate_attr_const(val); \
+               int                          offset = attr->offset; \
+               if (attr->symconst == NULL && get_signed_imm_size(offset) == 1) { \
+                       bemit8(0x83); \
+                       bemit_mod_am(ext, node); \
+                       bemit8(offset); \
+               } else { \
+                       bemit8(0x81); \
+                       bemit_mod_am(ext, node); \
+                       bemit_entity(attr->symconst, attr->sc_sign, offset, false); \
+               } \
+       } else { \
+               bemit8(ext << 3 | 1); \
+               bemit_mod_am(reg_gp_map[get_out_reg(val, 0)->index], node); \
+       } \
+} \
+ \
+static void bemit_##op##8bit(const ir_node *node) \
+{ \
+       ir_node *val = get_irn_n(node, n_ia32_unary_op); \
+       if (is_ia32_Immediate(val)) { \
+               bemit8(0x80); \
+               bemit_mod_am(ext, node); \
+               bemit8(get_ia32_immediate_attr_const(val)->offset); \
+       } else { \
+               bemit8(ext << 3); \
+               bemit_mod_am(reg_gp_map[get_out_reg(val, 0)->index], node); \
+       } \
+}
+
+BINOPMEM(addmem,  0)
+BINOPMEM(ormem,   1)
+BINOPMEM(andmem,  4)
+BINOPMEM(submem,  5)
+BINOPMEM(xormem,  6)
 
-/*   insn  def  eax,imm   imm  */
-BINOP(add, 0x01, 0x05, 0x81, 0 )
-BINOP(or,  0x09, 0x0D, 0x81, 1 )
-BINOP(adc, 0x11, 0x15, 0x81, 2 )
-BINOP(sbb, 0x19, 0x1D, 0x81, 3 )
-BINOP(and, 0x21, 0x25, 0x81, 4 )
-BINOP(sub, 0x29, 0x2D, 0x81, 5 )
-BINOP(xor, 0x31, 0x35, 0x81, 6 )
-BINOP(cmp, 0x39, 0x3D, 0x81, 7 )
 
-#define UNOP(op, code, ext)                     \
+/**
+ * Creates a function for an Unop with code /ext encoding.
+ */
+#define UNOP(op, code, ext, input)              \
 static void bemit_ ## op(const ir_node *node) { \
-       bemit_unop(node, code, ext);                \
+       bemit_unop(node, code, ext, input);         \
+}
+
+UNOP(not,     0xF7, 2, n_ia32_Not_val)
+UNOP(neg,     0xF7, 3, n_ia32_Neg_val)
+UNOP(mul,     0xF7, 4, n_ia32_Mul_right)
+UNOP(imul1op, 0xF7, 5, n_ia32_IMul1OP_right)
+UNOP(div,     0xF7, 6, n_ia32_Div_divisor)
+UNOP(idiv,    0xF7, 7, n_ia32_IDiv_divisor)
+
+/* TODO: am support for IJmp */
+UNOP(ijmp,    0xFF, 4, n_ia32_IJmp_target)
+
+#define SHIFT(op, ext) \
+static void bemit_##op(const ir_node *node) \
+{ \
+       const arch_register_t *out   = get_out_reg(node, 0); \
+       ir_node               *count = get_irn_n(node, 1); \
+       if (is_ia32_Immediate(count)) { \
+               int offset = get_ia32_immediate_attr_const(count)->offset; \
+               if (offset == 1) { \
+                       bemit8(0xD1); \
+                       bemit_modru(out, ext); \
+               } else { \
+                       bemit8(0xC1); \
+                       bemit_modru(out, ext); \
+                       bemit8(offset); \
+               } \
+       } else { \
+               bemit8(0xD3); \
+               bemit_modru(out, ext); \
+       } \
+} \
+ \
+static void bemit_##op##mem(const ir_node *node) \
+{ \
+       unsigned size = get_mode_size_bits(get_ia32_ls_mode(node)); \
+       if (size == 16) \
+               bemit8(0x66); \
+       ir_node *count = get_irn_n(node, 1); \
+       if (is_ia32_Immediate(count)) { \
+               int offset = get_ia32_immediate_attr_const(count)->offset; \
+               if (offset == 1) { \
+                       bemit8(size == 1 ? 0xD0 : 0xD1); \
+                       bemit_mod_am(ext, node); \
+               } else { \
+                       bemit8(size == 1 ? 0xC0 : 0xC1); \
+                       bemit_mod_am(ext, node); \
+                       bemit8(offset); \
+               } \
+       } else { \
+               bemit8(size == 1 ? 0xD2 : 0xD3); \
+               bemit_mod_am(ext, node); \
+       } \
+}
+
+SHIFT(rol, 0)
+SHIFT(ror, 1)
+SHIFT(shl, 4)
+SHIFT(shr, 5)
+SHIFT(sar, 7)
+
+static void bemit_cmp8bit(const ir_node *node)
+{
+       ir_node *right = get_irn_n(node, n_ia32_binary_right);
+       if (is_ia32_Immediate(right)) {
+               if (get_ia32_op_type(node) == ia32_Normal) {
+                       const arch_register_t *out = get_in_reg(node, n_ia32_Cmp_left);
+                       if (out->index == REG_EAX) {
+                               bemit8(0x3C);
+                       } else {
+                               bemit8(0x80);
+                               bemit_modru(out, 7);
+                       }
+               } else {
+                       bemit8(0x80);
+                       bemit_mod_am(7, node);
+               }
+               bemit8(get_ia32_immediate_attr_const(right)->offset);
+       } else {
+               bemit8(0x3A);
+               const arch_register_t *out = get_in_reg(node, n_ia32_Cmp_left);
+               if (get_ia32_op_type(node) == ia32_Normal) {
+                       const arch_register_t *in = get_in_reg(node, n_ia32_Cmp_right);
+                       bemit_modrr(out, in);
+               } else {
+                       bemit_mod_am(reg_gp_map[out->index], node);
+               }
+       }
+}
+
+static void bemit_imul(const ir_node *node)
+{
+       ir_node *right = get_irn_n(node, n_ia32_IMul_right);
+       /* Do we need the immediate form? */
+       if (is_ia32_Immediate(right)) {
+               int imm = get_ia32_immediate_attr_const(right)->offset;
+               if (get_signed_imm_size(imm) == 1) {
+                       bemit_unop_reg(node, 0x6B, n_ia32_IMul_left);
+                       bemit8(imm);
+               } else {
+                       bemit_unop_reg(node, 0x69, n_ia32_IMul_left);
+                       bemit32(imm);
+               }
+       } else {
+               bemit8(0x0F);
+               bemit_unop_reg(node, 0xAF, n_ia32_IMul_right);
+       }
 }
 
-UNOP(not, 0xF7, 2)
-UNOP(neg, 0xF7, 3)
+static void bemit_dec(const ir_node *node)
+{
+       const arch_register_t *out = get_out_reg(node, pn_ia32_Dec_res);
+       bemit8(0x48 + reg_gp_map[out->index]);
+}
+
+static void bemit_inc(const ir_node *node)
+{
+       const arch_register_t *out = get_out_reg(node, pn_ia32_Inc_res);
+       bemit8(0x40 + reg_gp_map[out->index]);
+}
+
+#define UNOPMEM(op, code, ext) \
+static void bemit_##op(const ir_node *node) \
+{ \
+       bemit_unop_mem(node, code, ext); \
+}
+
+UNOPMEM(notmem, 0xF6, 2)
+UNOPMEM(negmem, 0xF6, 3)
+UNOPMEM(incmem, 0xFE, 0)
+UNOPMEM(decmem, 0xFE, 1)
+
+static void bemit_set(const ir_node *node)
+{
+       pn_Cmp pnc;
+
+       bemit8(0x0F);
+
+       pnc = get_ia32_condcode(node);
+       pnc = determine_final_pnc(node, n_ia32_Set_eflags, pnc);
+       if (get_ia32_attr_const(node)->data.ins_permuted)
+               pnc = ia32_get_negated_pnc(pnc);
+
+       bemit8(0x90 + pnc2cc(pnc));
+       bemit_modru(get_out_reg(node, pn_ia32_Set_res), 2);
+}
 
+/**
+ * Emit a Lea.
+ */
 static void bemit_lea(const ir_node *node)
 {
        const arch_register_t *out = get_out_reg(node, 0);
@@ -2663,39 +2939,138 @@ static void bemit_lea(const ir_node *node)
        bemit_mod_am(reg_gp_map[out->index], node);
 }
 
-static void bemit_cltd(const ir_node *node)
-{
-       (void) node;
-       bemit8(0x99);
-}
+/**
+ * Emit a single opcode.
+ */
+#define EMIT_SINGLEOP(op, code)                 \
+static void bemit_ ## op(const ir_node *node) { \
+       (void) node;                                \
+       bemit8(code);                               \
+}
+
+//EMIT_SINGLEOP(daa,  0x27)
+//EMIT_SINGLEOP(das,  0x2F)
+//EMIT_SINGLEOP(aaa,  0x37)
+//EMIT_SINGLEOP(aas,  0x3F)
+//EMIT_SINGLEOP(nop,  0x90)
+EMIT_SINGLEOP(cwtl, 0x98)
+EMIT_SINGLEOP(cltd, 0x99)
+//EMIT_SINGLEOP(fwait, 0x9B)
+EMIT_SINGLEOP(sahf, 0x9E)
+//EMIT_SINGLEOP(popf, 0x9D)
+EMIT_SINGLEOP(int3, 0xCC)
+//EMIT_SINGLEOP(iret, 0xCF)
+//EMIT_SINGLEOP(xlat, 0xD7)
+//EMIT_SINGLEOP(lock, 0xF0)
+EMIT_SINGLEOP(rep,  0xF3)
+//EMIT_SINGLEOP(halt, 0xF4)
+EMIT_SINGLEOP(cmc,  0xF5)
+EMIT_SINGLEOP(stc,  0xF9)
+//EMIT_SINGLEOP(cli,  0xFA)
+//EMIT_SINGLEOP(sti,  0xFB)
+//EMIT_SINGLEOP(std,  0xFD)
 
+/**
+ * Emits a MOV out, [MEM].
+ */
 static void bemit_load(const ir_node *node)
 {
        const arch_register_t *out = get_out_reg(node, 0);
 
-       /* TODO: load from constant address to EAX can be encoded
-          as 0xA1 [offset] */
+       if (out->index == REG_EAX) {
+               ir_node   *base      = get_irn_n(node, n_ia32_base);
+               int        has_base  = !is_ia32_NoReg_GP(base);
+               ir_node   *index     = get_irn_n(node, n_ia32_index);
+               int        has_index = !is_ia32_NoReg_GP(index);
+               if (!has_base && !has_index) {
+                       ir_entity *ent  = get_ia32_am_sc(node);
+                       int        offs = get_ia32_am_offs_int(node);
+                       /* load from constant address to EAX can be encoded
+                          as 0xA1 [offset] */
+                       bemit8(0xA1);
+                       bemit_entity(ent, 0, offs, false);
+                       return;
+               }
+       }
        bemit8(0x8B);
        bemit_mod_am(reg_gp_map[out->index], node);
 }
 
+/**
+ * Emits a MOV [mem], in.
+ */
 static void bemit_store(const ir_node *node)
 {
        const ir_node *value = get_irn_n(node, n_ia32_Store_val);
+       unsigned       size  = get_mode_size_bits(get_ia32_ls_mode(node));
 
        if (is_ia32_Immediate(value)) {
-               bemit8(0xC7);
-               bemit_mod_am(0, node);
-               bemit_immediate(value, false);
+               if (size == 8) {
+                       bemit8(0xC6);
+                       bemit_mod_am(0, node);
+                       bemit8(get_ia32_immediate_attr_const(value)->offset);
+               } else if (size == 16) {
+                       bemit8(0x66);
+                       bemit8(0xC7);
+                       bemit_mod_am(0, node);
+                       bemit16(get_ia32_immediate_attr_const(value)->offset);
+               } else {
+                       bemit8(0xC7);
+                       bemit_mod_am(0, node);
+                       bemit_immediate(value, false);
+               }
        } else {
-               /* TODO: store to constant address from EAX can be encoded as
-                  0xA3 [offset]*/
                const arch_register_t *in = get_in_reg(node, n_ia32_Store_val);
-               bemit8(0x89);
+
+               if (in->index == REG_EAX) {
+                       ir_node   *base      = get_irn_n(node, n_ia32_base);
+                       int        has_base  = !is_ia32_NoReg_GP(base);
+                       ir_node   *index     = get_irn_n(node, n_ia32_index);
+                       int        has_index = !is_ia32_NoReg_GP(index);
+                       if (!has_base && !has_index) {
+                               ir_entity *ent  = get_ia32_am_sc(node);
+                               int        offs = get_ia32_am_offs_int(node);
+                               /* store to constant address from EAX can be encoded as
+                                * 0xA2/0xA3 [offset]*/
+                               if (size == 8) {
+                                       bemit8(0xA2);
+                               } else {
+                                       if (size == 16)
+                                               bemit8(0x66);
+                                       bemit8(0xA3);
+                               }
+                               bemit_entity(ent, 0, offs, false);
+                               return;
+                       }
+               }
+
+               if (size == 8) {
+                       bemit8(0x88);
+               } else {
+                       if (size == 16)
+                               bemit8(0x66);
+                       bemit8(0x89);
+               }
                bemit_mod_am(reg_gp_map[in->index], node);
        }
 }
 
+static void bemit_conv_i2i(const ir_node *node)
+{
+       ir_mode  *smaller_mode = get_ia32_ls_mode(node);
+       unsigned  opcode;
+
+       bemit8(0x0F);
+       /*        8 16 bit source
+        * movzx B6 B7
+        * movsx BE BF
+        */
+       opcode = 0xB6;
+       if (mode_is_signed(smaller_mode))           opcode |= 0x08;
+       if (get_mode_size_bits(smaller_mode) == 16) opcode |= 0x01;
+       bemit_unop_reg(node, opcode, n_ia32_Conv_I2I_val);
+}
+
 /**
  * Emit a Push.
  */
@@ -2720,9 +3095,12 @@ static void bemit_push(const ir_node *node)
                        bemit_immediate(value, false);
                        break;
                }
-       } else {
+       } else if (is_ia32_NoReg_GP(value)) {
                bemit8(0xFF);
                bemit_mod_am(6, node);
+       } else {
+               const arch_register_t *reg = get_in_reg(node, n_ia32_Push_val);
+               bemit8(0x50 + reg_gp_map[reg->index]);
        }
 }
 
@@ -2732,12 +3110,13 @@ static void bemit_push(const ir_node *node)
 static void bemit_pop(const ir_node *node)
 {
        const arch_register_t *reg = get_out_reg(node, pn_ia32_Pop_res);
-       if (get_ia32_op_type(node) == ia32_Normal)
-               bemit8(0x58 + reg_gp_map[reg->index]);
-       else {
-               bemit8(0x8F);
-               bemit_mod_am(0, node);
-       }
+       bemit8(0x58 + reg_gp_map[reg->index]);
+}
+
+static void bemit_popmem(const ir_node *node)
+{
+       bemit8(0x8F);
+       bemit_mod_am(0, node);
 }
 
 static void bemit_call(const ir_node *node)
@@ -2748,10 +3127,121 @@ static void bemit_call(const ir_node *node)
                bemit8(0xE8);
                bemit_immediate(proc, true);
        } else {
-               panic("indirect call NIY");
+               bemit_unop(node, 0xFF, 2, n_ia32_Call_addr);
+       }
+}
+
+static void bemit_jmp(const ir_node *dest_block)
+{
+       bemit8(0xE9);
+       bemit_jmp_destination(dest_block);
+}
+
+static void bemit_jump(const ir_node *node)
+{
+       if (can_be_fallthrough(node))
+               return;
+
+       bemit_jmp(get_cfop_target_block(node));
+}
+
+static void bemit_jcc(int pnc, const ir_node *dest_block)
+{
+       unsigned char cc = pnc2cc(pnc);
+       bemit8(0x0F);
+       bemit8(0x80 + cc);
+       bemit_jmp_destination(dest_block);
+}
+
+static void bemit_ia32_jcc(const ir_node *node)
+{
+       int            pnc = get_ia32_condcode(node);
+       int            need_parity_label = 0;
+       const ir_node *proj_true;
+       const ir_node *proj_false;
+       const ir_node *dest_true;
+       const ir_node *dest_false;
+       const ir_node *block;
+
+       pnc = determine_final_pnc(node, 0, pnc);
+
+       /* get both Projs */
+       proj_true = get_proj(node, pn_ia32_Jcc_true);
+       assert(proj_true && "Jcc without true Proj");
+
+       proj_false = get_proj(node, pn_ia32_Jcc_false);
+       assert(proj_false && "Jcc without false Proj");
+
+       block = get_nodes_block(node);
+
+       if (can_be_fallthrough(proj_true)) {
+               /* exchange both proj's so the second one can be omitted */
+               const ir_node *t = proj_true;
+
+               proj_true  = proj_false;
+               proj_false = t;
+               pnc        = ia32_get_negated_pnc(pnc);
+       }
+
+       dest_true  = get_cfop_target_block(proj_true);
+       dest_false = get_cfop_target_block(proj_false);
+
+       if (pnc & ia32_pn_Cmp_float) {
+               panic("Float jump NIY");
+               /* Some floating point comparisons require a test of the parity flag,
+                * which indicates that the result is unordered */
+               switch (pnc & 15) {
+                       case pn_Cmp_Uo: {
+                               ia32_emitf(proj_true, "\tjp %L\n");
+                               break;
+                       }
+
+                       case pn_Cmp_Leg:
+                               ia32_emitf(proj_true, "\tjnp %L\n");
+                               break;
+
+                       case pn_Cmp_Eq:
+                       case pn_Cmp_Lt:
+                       case pn_Cmp_Le:
+                               /* we need a local label if the false proj is a fallthrough
+                                * as the falseblock might have no label emitted then */
+                               if (can_be_fallthrough(proj_false)) {
+                                       need_parity_label = 1;
+                                       ia32_emitf(proj_false, "\tjp 1f\n");
+                               } else {
+                                       ia32_emitf(proj_false, "\tjp %L\n");
+                               }
+                               goto emit_jcc;
+
+                       case pn_Cmp_Ug:
+                       case pn_Cmp_Uge:
+                       case pn_Cmp_Ne:
+                               ia32_emitf(proj_true, "\tjp %L\n");
+                               goto emit_jcc;
+
+                       default:
+                               goto emit_jcc;
+               }
+       } else {
+emit_jcc:
+               bemit_jcc(pnc, dest_true);
+       }
+
+       if (need_parity_label) {
+               panic("parity label NIY");
+       }
+
+       /* the second Proj might be a fallthrough */
+       if (can_be_fallthrough(proj_false)) {
+               /* it's a fallthrough */
+       } else {
+               bemit_jmp(dest_false);
        }
 }
 
+/**
+ * Emits a return.
+ */
 static void bemit_return(const ir_node *node)
 {
        unsigned pop = be_Return_get_pop(node);
@@ -2766,27 +3256,32 @@ static void bemit_return(const ir_node *node)
 
 static void bemit_incsp(const ir_node *node)
 {
-       const arch_register_t *reg  = get_out_reg(node, 0);
-       int                    offs = be_get_IncSP_offset(node);
-       unsigned               size = get_signed_imm_size(offs);
-       unsigned char          w    = size == 1 ? 2 : 0;
+       int                    offs;
+       const arch_register_t *reg;
+       unsigned               size;
+       unsigned               ext;
+
+       offs = be_get_IncSP_offset(node);
+       if (offs == 0)
+               return;
 
-       bemit8(0x81 | w);
        if (offs > 0) {
+               ext = 5; /* sub */
+       } else {
+               ext = 0; /* add */
+               offs = -offs;
+       }
 
-               bemit_modru(reg, 5); /* sub */
-               if (size == 8) {
-                       bemit8(offs);
-               } else {
-                       bemit32(offs);
-               }
-       } else if (offs < 0) {
-               bemit_modru(reg, 0); /* add */
-               if (size == 8) {
-                       bemit8(-offs);
-               } else {
-                       bemit32(-offs);
-               }
+       size = get_signed_imm_size(offs);
+       bemit8(size == 1 ? 0x83 : 0x81);
+
+       reg  = get_out_reg(node, 0);
+       bemit_modru(reg, ext);
+
+       if (size == 1) {
+               bemit8(offs);
+       } else {
+               bemit32(offs);
        }
 }
 
@@ -2809,36 +3304,86 @@ static void ia32_register_binary_emitters(void)
        clear_irp_opcodes_generic_func();
 
        /* benode emitter */
-       register_emitter(op_be_Copy, bemit_copy);
-       register_emitter(op_be_Return, bemit_return);
-       register_emitter(op_be_IncSP, bemit_incsp);
-       register_emitter(op_ia32_Add, bemit_add);
-       register_emitter(op_ia32_Adc, bemit_adc);
-       register_emitter(op_ia32_And, bemit_and);
-       register_emitter(op_ia32_Or, bemit_or);
-       register_emitter(op_ia32_Cmp, bemit_cmp);
-       register_emitter(op_ia32_Call, bemit_call);
-       register_emitter(op_ia32_Cltd, bemit_cltd);
-       register_emitter(op_ia32_Sub, bemit_sub);
-       register_emitter(op_ia32_Sbb, bemit_sbb);
-       register_emitter(op_ia32_Xor0, bemit_xor0);
-       register_emitter(op_ia32_Xor, bemit_xor);
-       register_emitter(op_ia32_Const, bemit_mov_const);
-       register_emitter(op_ia32_Lea, bemit_lea);
-       register_emitter(op_ia32_Load, bemit_load);
-       register_emitter(op_ia32_Not, bemit_not);
-       register_emitter(op_ia32_Neg, bemit_neg);
-       register_emitter(op_ia32_Push, bemit_push);
-       register_emitter(op_ia32_Pop, bemit_pop);
-       register_emitter(op_ia32_Store, bemit_store);
+       register_emitter(op_be_Copy,           bemit_copy);
+       register_emitter(op_be_CopyKeep,       bemit_copy);
+       register_emitter(op_be_IncSP,          bemit_incsp);
+       register_emitter(op_be_Perm,           bemit_perm);
+       register_emitter(op_be_Return,         bemit_return);
+       register_emitter(op_ia32_Adc,          bemit_adc);
+       register_emitter(op_ia32_Add,          bemit_add);
+       register_emitter(op_ia32_AddMem,       bemit_addmem);
+       register_emitter(op_ia32_AddMem8Bit,   bemit_addmem8bit);
+       register_emitter(op_ia32_And,          bemit_and);
+       register_emitter(op_ia32_AndMem,       bemit_andmem);
+       register_emitter(op_ia32_AndMem8Bit,   bemit_andmem8bit);
+       register_emitter(op_ia32_Breakpoint,   bemit_int3);
+       register_emitter(op_ia32_Call,         bemit_call);
+       register_emitter(op_ia32_Cltd,         bemit_cltd);
+       register_emitter(op_ia32_Cmc,          bemit_cmc);
+       register_emitter(op_ia32_Cmp,          bemit_cmp);
+       register_emitter(op_ia32_Cmp8Bit,      bemit_cmp8bit);
+       register_emitter(op_ia32_Const,        bemit_mov_const);
+       register_emitter(op_ia32_Conv_I2I,     bemit_conv_i2i);
+       register_emitter(op_ia32_Conv_I2I8Bit, bemit_conv_i2i);
+       register_emitter(op_ia32_Cwtl,         bemit_cwtl);
+       register_emitter(op_ia32_Dec,          bemit_dec);
+       register_emitter(op_ia32_DecMem,       bemit_decmem);
+       register_emitter(op_ia32_Div,          bemit_div);
+       register_emitter(op_ia32_IDiv,         bemit_idiv);
+       register_emitter(op_ia32_IJmp,         bemit_ijmp);
+       register_emitter(op_ia32_IMul,         bemit_imul);
+       register_emitter(op_ia32_IMul1OP,      bemit_imul1op);
+       register_emitter(op_ia32_Inc,          bemit_inc);
+       register_emitter(op_ia32_IncMem,       bemit_incmem);
+       register_emitter(op_ia32_Jcc,          bemit_ia32_jcc);
+       register_emitter(op_ia32_Jmp,          bemit_jump);
+       register_emitter(op_ia32_Lea,          bemit_lea);
+       register_emitter(op_ia32_Load,         bemit_load);
+       register_emitter(op_ia32_Mul,          bemit_mul);
+       register_emitter(op_ia32_Neg,          bemit_neg);
+       register_emitter(op_ia32_NegMem,       bemit_negmem);
+       register_emitter(op_ia32_Not,          bemit_not);
+       register_emitter(op_ia32_NotMem,       bemit_notmem);
+       register_emitter(op_ia32_Or,           bemit_or);
+       register_emitter(op_ia32_OrMem,        bemit_ormem);
+       register_emitter(op_ia32_OrMem8Bit,    bemit_ormem8bit);
+       register_emitter(op_ia32_Pop,          bemit_pop);
+       register_emitter(op_ia32_PopEbp,       bemit_pop);
+       register_emitter(op_ia32_PopMem,       bemit_popmem);
+       register_emitter(op_ia32_Push,         bemit_push);
+       register_emitter(op_ia32_RepPrefix,    bemit_rep);
+       register_emitter(op_ia32_Rol,          bemit_rol);
+       register_emitter(op_ia32_RolMem,       bemit_rolmem);
+       register_emitter(op_ia32_Ror,          bemit_ror);
+       register_emitter(op_ia32_RorMem,       bemit_rormem);
+       register_emitter(op_ia32_Sahf,         bemit_sahf);
+       register_emitter(op_ia32_Sar,          bemit_sar);
+       register_emitter(op_ia32_SarMem,       bemit_sarmem);
+       register_emitter(op_ia32_Sbb,          bemit_sbb);
+       register_emitter(op_ia32_Set,          bemit_set);
+       register_emitter(op_ia32_Shl,          bemit_shl);
+       register_emitter(op_ia32_ShlMem,       bemit_shlmem);
+       register_emitter(op_ia32_Shr,          bemit_shr);
+       register_emitter(op_ia32_ShrMem,       bemit_shrmem);
+       register_emitter(op_ia32_Stc,          bemit_stc);
+       register_emitter(op_ia32_Store,        bemit_store);
+       register_emitter(op_ia32_Store8Bit,    bemit_store);
+       register_emitter(op_ia32_Sub,          bemit_sub);
+       register_emitter(op_ia32_SubMem,       bemit_submem);
+       register_emitter(op_ia32_SubMem8Bit,   bemit_submem8bit);
+       register_emitter(op_ia32_Test,         bemit_test);
+       register_emitter(op_ia32_Xor,          bemit_xor);
+       register_emitter(op_ia32_Xor0,         bemit_xor0);
+       register_emitter(op_ia32_XorMem,       bemit_xormem);
+       register_emitter(op_ia32_XorMem8Bit,   bemit_xormem8bit);
 
        /* ignore the following nodes */
-       register_emitter(op_ia32_ProduceVal, emit_Nothing);
-       register_emitter(op_be_Barrier, emit_Nothing);
-       register_emitter(op_be_Keep, emit_Nothing);
-       register_emitter(op_be_Start, emit_Nothing);
-       register_emitter(op_Phi, emit_Nothing);
-       register_emitter(op_Start, emit_Nothing);
+       register_emitter(op_ia32_ProduceVal,   emit_Nothing);
+       register_emitter(op_be_Barrier,        emit_Nothing);
+       register_emitter(op_be_Keep,           emit_Nothing);
+       register_emitter(op_be_Start,          emit_Nothing);
+       register_emitter(op_Phi,               emit_Nothing);
+       register_emitter(op_Start,             emit_Nothing);
 }
 
 static void gen_binary_block(ir_node *block)