besched: Add and use sched_replace().
[libfirm] / ir / be / ia32 / ia32_x87.c
index 41f67cd..9694819 100644 (file)
 #include "ia32_x87.h"
 #include "ia32_architecture.h"
 
+#define N_FLOAT_REGS  (N_ia32_fp_REGS-1)  // exclude NOREG
+
 /** the debug handle */
 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
 
 /* Forward declaration. */
 typedef struct x87_simulator x87_simulator;
 
-/**
- * An exchange template.
- * Note that our virtual functions have the same inputs
- * and attributes as the real ones, so we can simple exchange
- * their opcodes!
- * Further, x87 supports inverse instructions, so we can handle them.
- */
-typedef struct exchange_tmpl {
-       ir_op *normal_op;       /**< the normal one */
-       ir_op *reverse_op;      /**< the reverse one if exists */
-       ir_op *normal_pop_op;   /**< the normal one with tos pop */
-       ir_op *reverse_pop_op;  /**< the reverse one with tos pop */
-} exchange_tmpl;
-
 /**
  * An entry on the simulated x87 stack.
  */
@@ -84,9 +72,9 @@ typedef struct st_entry {
  * The x87 state.
  */
 typedef struct x87_state {
-       st_entry       st[N_ia32_st_REGS]; /**< the register stack */
-       int            depth;              /**< the current stack depth */
-       x87_simulator *sim;                /**< The simulator. */
+       st_entry       st[N_FLOAT_REGS]; /**< the register stack */
+       int            depth;            /**< the current stack depth */
+       x87_simulator *sim;              /**< The simulator. */
 } x87_state;
 
 /** An empty state, used for blocks without fp instructions. */
@@ -121,8 +109,8 @@ typedef struct blk_state {
        x87_state *end;     /**< state at the end or NULL if not assigned */
 } blk_state;
 
-/** liveness bitset for vfp registers. */
-typedef unsigned char vfp_liveness;
+/** liveness bitset for fp registers. */
+typedef unsigned char fp_liveness;
 
 /**
  * The x87 simulator.
@@ -131,7 +119,7 @@ struct x87_simulator {
        struct obstack obst;       /**< An obstack for fast allocating. */
        pmap          *blk_states; /**< Map blocks to states. */
        be_lv_t       *lv;         /**< intrablock liveness. */
-       vfp_liveness  *live;       /**< Liveness information. */
+       fp_liveness   *live;       /**< Liveness information. */
        unsigned       n_idx;      /**< The cached get_irg_last_idx() result. */
        waitq         *worklist;   /**< Worklist of blocks that must be processed. */
 };
@@ -151,7 +139,7 @@ static int x87_get_depth(const x87_state *state)
 static st_entry *x87_get_entry(x87_state *const state, int const pos)
 {
        assert(0 <= pos && pos < state->depth);
-       return &state->st[N_ia32_st_REGS - state->depth + pos];
+       return &state->st[N_FLOAT_REGS - state->depth + pos];
 }
 
 /**
@@ -160,7 +148,7 @@ static st_entry *x87_get_entry(x87_state *const state, int const pos)
  * @param state  the x87 state
  * @param pos    a stack position
  *
- * @return the vfp register index that produced the value at st(pos)
+ * @return the fp register index that produced the value at st(pos)
  */
 static int x87_get_st_reg(const x87_state *state, int pos)
 {
@@ -187,8 +175,8 @@ static void x87_dump_stack(const x87_state *state)
  * Set a virtual register to st(pos).
  *
  * @param state    the x87 state
- * @param reg_idx  the vfp register index that should be set
- * @param node     the IR node that produces the value of the vfp register
+ * @param reg_idx  the fp register index that should be set
+ * @param node     the IR node that produces the value of the fp register
  * @param pos      the stack position where the new value should be entered
  */
 static void x87_set_st(x87_state *state, int reg_idx, ir_node *node, int pos)
@@ -201,18 +189,6 @@ static void x87_set_st(x87_state *state, int reg_idx, ir_node *node, int pos)
        DEBUG_ONLY(x87_dump_stack(state);)
 }
 
-/**
- * Set the tos virtual register.
- *
- * @param state    the x87 state
- * @param reg_idx  the vfp register index that should be set
- * @param node     the IR node that produces the value of the vfp register
- */
-static void x87_set_tos(x87_state *state, int reg_idx, ir_node *node)
-{
-       x87_set_st(state, reg_idx, node, 0);
-}
-
 /**
  * Swap st(0) with st(pos).
  *
@@ -235,7 +211,7 @@ static void x87_fxch(x87_state *state, int pos)
  * Convert a virtual register to the stack index.
  *
  * @param state    the x87 state
- * @param reg_idx  the register vfp index
+ * @param reg_idx  the register fp index
  *
  * @return the stack position where the register is stacked
  *         or -1 if the virtual register was not found
@@ -253,13 +229,13 @@ static int x87_on_stack(const x87_state *state, int reg_idx)
  * Push a virtual Register onto the stack, double pushes are NOT allowed.
  *
  * @param state     the x87 state
- * @param reg_idx   the register vfp index
- * @param node      the node that produces the value of the vfp register
+ * @param reg_idx   the register fp index
+ * @param node      the node that produces the value of the fp register
  */
 static void x87_push(x87_state *state, int reg_idx, ir_node *node)
 {
        assert(x87_on_stack(state, reg_idx) == -1 && "double push");
-       assert(state->depth < N_ia32_st_REGS && "stack overrun");
+       assert(state->depth < N_FLOAT_REGS && "stack overrun");
 
        ++state->depth;
        st_entry *const entry = x87_get_entry(state, 0);
@@ -331,37 +307,6 @@ static x87_state *x87_clone_state(x87_simulator *sim, const x87_state *src)
        return res;
 }
 
-/**
- * Patch a virtual instruction into a x87 one and return
- * the node representing the result value.
- *
- * @param n   the IR node to patch
- * @param op  the x87 opcode to patch in
- */
-static ir_node *x87_patch_insn(ir_node *n, ir_op *op)
-{
-       ir_mode *mode = get_irn_mode(n);
-       ir_node *res = n;
-
-       set_irn_op(n, op);
-
-       if (mode == mode_T) {
-               /* patch all Proj's */
-               foreach_out_edge(n, edge) {
-                       ir_node *proj = get_edge_src_irn(edge);
-                       if (is_Proj(proj)) {
-                               mode = get_irn_mode(proj);
-                               if (mode_is_float(mode)) {
-                                       res = proj;
-                                       set_irn_mode(proj, ia32_reg_classes[CLASS_ia32_st].mode);
-                               }
-                       }
-               }
-       } else if (mode_is_float(mode))
-               set_irn_mode(n, ia32_reg_classes[CLASS_ia32_st].mode);
-       return res;
-}
-
 /**
  * Returns the first Proj of a mode_T node having a given mode.
  *
@@ -389,7 +334,7 @@ static inline const arch_register_t *x87_get_irn_register(const ir_node *irn)
 {
        const arch_register_t *res = arch_get_irn_register(irn);
 
-       assert(res->reg_class == &ia32_reg_classes[CLASS_ia32_vfp]);
+       assert(res->reg_class == &ia32_reg_classes[CLASS_ia32_fp]);
        return res;
 }
 
@@ -398,7 +343,7 @@ static inline const arch_register_t *x87_irn_get_register(const ir_node *irn,
 {
        const arch_register_t *res = arch_get_irn_register_out(irn, pos);
 
-       assert(res->reg_class == &ia32_reg_classes[CLASS_ia32_vfp]);
+       assert(res->reg_class == &ia32_reg_classes[CLASS_ia32_fp]);
        return res;
 }
 
@@ -421,13 +366,12 @@ static void x87_create_fxch(x87_state *state, ir_node *n, int pos)
        ir_node         *const block = get_nodes_block(n);
        ir_node         *const fxch  = new_bd_ia32_fxch(NULL, block);
        ia32_x87_attr_t *const attr  = get_ia32_x87_attr(fxch);
-       attr->x87[0] = get_st_reg(pos);
-       attr->x87[2] = get_st_reg(0);
+       attr->reg = get_st_reg(pos);
 
        keep_alive(fxch);
 
        sched_add_before(n, fxch);
-       DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(fxch), attr->x87[0]->name, attr->x87[2]->name));
+       DB((dbg, LEVEL_1, "<<< %s %s\n", get_irn_opname(fxch), attr->reg->name));
 }
 
 /* -------------- x87 perm --------------- */
@@ -566,45 +510,42 @@ static void x87_create_fpush(x87_state *state, ir_node *n, int pos, int const ou
 
        ir_node         *const fpush = new_bd_ia32_fpush(NULL, get_nodes_block(n));
        ia32_x87_attr_t *const attr  = get_ia32_x87_attr(fpush);
-       attr->x87[0] = get_st_reg(pos);
-       attr->x87[2] = get_st_reg(0);
+       attr->reg = get_st_reg(pos);
 
        keep_alive(fpush);
        sched_add_before(n, fpush);
 
-       DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(fpush), attr->x87[0]->name, attr->x87[2]->name));
+       DB((dbg, LEVEL_1, "<<< %s %s\n", get_irn_opname(fpush), attr->reg->name));
 }
 
 /**
  * Create a fpop before node n.
+ * This overwrites st(pos) with st(0) and pops st(0).
  *
  * @param state   the x87 state
  * @param n       the node after the fpop
- * @param num     pop 1 or 2 values
+ * @param pos     the index of the entry to remove the register stack
  *
  * @return the fpop node
  */
-static ir_node *x87_create_fpop(x87_state *state, ir_node *n, int num)
+static ir_node *x87_create_fpop(x87_state *const state, ir_node *const n, int const pos)
 {
-       ir_node         *fpop = NULL;
-       ia32_x87_attr_t *attr;
-
-       assert(num > 0);
-       do {
-               x87_pop(state);
-               if (ia32_cg_config.use_ffreep)
-                       fpop = new_bd_ia32_ffreep(NULL, get_nodes_block(n));
-               else
-                       fpop = new_bd_ia32_fpop(NULL, get_nodes_block(n));
-               attr = get_ia32_x87_attr(fpop);
-               attr->x87[0] = get_st_reg(0);
-               attr->x87[1] = get_st_reg(0);
-               attr->x87[2] = get_st_reg(0);
-
-               keep_alive(fpop);
-               sched_add_before(n, fpop);
-               DB((dbg, LEVEL_1, "<<< %s %s\n", get_irn_opname(fpop), attr->x87[0]->name));
-       } while (--num > 0);
+       if (pos != 0) {
+               st_entry *const dst = x87_get_entry(state, pos);
+               st_entry *const src = x87_get_entry(state, 0);
+               *dst = *src;
+       }
+       x87_pop(state);
+       ir_node *const block = get_nodes_block(n);
+       ir_node *const fpop  = pos == 0 && ia32_cg_config.use_ffreep ?
+               new_bd_ia32_ffreep(NULL, block) :
+               new_bd_ia32_fpop(  NULL, block);
+       ia32_x87_attr_t *const attr = get_ia32_x87_attr(fpop);
+       attr->reg = get_st_reg(pos);
+
+       keep_alive(fpop);
+       sched_add_before(n, fpop);
+       DB((dbg, LEVEL_1, "<<< %s %s\n", get_irn_opname(fpop), attr->reg->name));
        return fpop;
 }
 
@@ -621,34 +562,18 @@ static ir_node *x87_create_fpop(x87_state *state, ir_node *n, int num)
  *
  * @return The live bitset.
  */
-static vfp_liveness vfp_liveness_transfer(ir_node *irn, vfp_liveness live)
+static fp_liveness fp_liveness_transfer(ir_node *irn, fp_liveness live)
 {
-       int i, n;
-       const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
-
-       if (get_irn_mode(irn) == mode_T) {
-               foreach_out_edge(irn, edge) {
-                       ir_node *proj = get_edge_src_irn(edge);
-
-                       if (arch_irn_consider_in_reg_alloc(cls, proj)) {
-                               const arch_register_t *reg = x87_get_irn_register(proj);
-                               live &= ~(1 << arch_register_get_index(reg));
-                       }
-               }
-       } else if (arch_irn_consider_in_reg_alloc(cls, irn)) {
-               const arch_register_t *reg = x87_get_irn_register(irn);
-               live &= ~(1 << arch_register_get_index(reg));
-       }
-
-       for (i = 0, n = get_irn_arity(irn); i < n; ++i) {
-               ir_node *op = get_irn_n(irn, i);
-
-               if (mode_is_float(get_irn_mode(op)) &&
-                               arch_irn_consider_in_reg_alloc(cls, op)) {
-                       const arch_register_t *reg = x87_get_irn_register(op);
-                       live |= 1 << arch_register_get_index(reg);
-               }
-       }
+       const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_fp];
+
+       be_foreach_definition(irn, cls, def, req,
+               const arch_register_t *reg = x87_get_irn_register(def);
+               live &= ~(1 << reg->index);
+       );
+       be_foreach_use(irn, cls, in_req_, op, op_req_,
+               const arch_register_t *reg = x87_get_irn_register(op);
+               live |= 1 << reg->index;
+       );
        return live;
 }
 
@@ -660,26 +585,22 @@ static vfp_liveness vfp_liveness_transfer(ir_node *irn, vfp_liveness live)
  *
  * @return The live bitset at the end of this block
  */
-static vfp_liveness vfp_liveness_end_of_block(x87_simulator *sim, const ir_node *block)
+static fp_liveness fp_liveness_end_of_block(x87_simulator *sim, const ir_node *block)
 {
-       vfp_liveness live = 0;
-       const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
+       fp_liveness live = 0;
+       const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_fp];
        const be_lv_t *lv = sim->lv;
 
-       be_lv_foreach(lv, block, be_lv_state_end, node) {
-               const arch_register_t *reg;
-               if (!arch_irn_consider_in_reg_alloc(cls, node))
-                       continue;
-
-               reg = x87_get_irn_register(node);
-               live |= 1 << arch_register_get_index(reg);
+       be_lv_foreach_cls(lv, block, be_lv_state_end, cls, node) {
+               const arch_register_t *reg = x87_get_irn_register(node);
+               live |= 1 << reg->index;
        }
 
        return live;
 }
 
 /** get the register mask from an arch_register */
-#define REGMASK(reg)    (1 << (arch_register_get_index(reg)))
+#define REGMASK(reg)    (1 << (reg->index))
 
 /**
  * Return a bitset of argument registers which are live at the end of a node.
@@ -690,7 +611,7 @@ static vfp_liveness vfp_liveness_end_of_block(x87_simulator *sim, const ir_node
  *
  * @return The live bitset.
  */
-static unsigned vfp_live_args_after(x87_simulator *sim, const ir_node *pos, unsigned kill)
+static unsigned fp_live_args_after(x87_simulator *sim, const ir_node *pos, unsigned kill)
 {
        unsigned idx = get_irn_idx(pos);
 
@@ -706,7 +627,7 @@ static unsigned vfp_live_args_after(x87_simulator *sim, const ir_node *pos, unsi
  */
 static void update_liveness(x87_simulator *sim, ir_node *block)
 {
-       vfp_liveness live = vfp_liveness_end_of_block(sim, block);
+       fp_liveness live = fp_liveness_end_of_block(sim, block);
        unsigned idx;
 
        /* now iterate through the block backward and cache the results */
@@ -718,7 +639,7 @@ static void update_liveness(x87_simulator *sim, ir_node *block)
                idx = get_irn_idx(irn);
                sim->live[idx] = live;
 
-               live = vfp_liveness_transfer(irn, live);
+               live = fp_liveness_transfer(irn, live);
        }
        idx = get_irn_idx(block);
        sim->live[idx] = live;
@@ -727,10 +648,10 @@ static void update_liveness(x87_simulator *sim, ir_node *block)
 /**
  * Returns true if a register is live in a set.
  *
- * @param reg_idx  the vfp register index
+ * @param reg_idx  the fp register index
  * @param live     a live bitset
  */
-#define is_vfp_live(reg_idx, live) ((live) & (1 << (reg_idx)))
+#define is_fp_live(reg_idx, live) ((live) & (1 << (reg_idx)))
 
 #ifdef DEBUG_libfirm
 /**
@@ -738,7 +659,7 @@ static void update_liveness(x87_simulator *sim, ir_node *block)
  *
  * @param live  the live bitset
  */
-static void vfp_dump_live(vfp_liveness live)
+static void fp_dump_live(fp_liveness live)
 {
        int i;
 
@@ -759,129 +680,99 @@ static void vfp_dump_live(vfp_liveness live)
  *
  * @param state  the x87 state
  * @param n      the node that should be simulated (and patched)
- * @param tmpl   the template containing the 4 possible x87 opcodes
  *
  * @return NO_NODE_ADDED
  */
-static int sim_binop(x87_state *state, ir_node *n, const exchange_tmpl *tmpl)
+static int sim_binop(x87_state *const state, ir_node *const n)
 {
-       int op2_idx = 0, op1_idx;
-       int out_idx, do_pop = 0;
-       ia32_x87_attr_t *attr;
-       int permuted;
-       ir_node *patched_insn;
-       ir_op *dst;
        x87_simulator         *sim     = state->sim;
        ir_node               *op1     = get_irn_n(n, n_ia32_binary_left);
        ir_node               *op2     = get_irn_n(n, n_ia32_binary_right);
        const arch_register_t *op1_reg = x87_get_irn_register(op1);
        const arch_register_t *op2_reg = x87_get_irn_register(op2);
        const arch_register_t *out     = x87_irn_get_register(n, pn_ia32_res);
-       int reg_index_1                = arch_register_get_index(op1_reg);
-       int reg_index_2                = arch_register_get_index(op2_reg);
-       vfp_liveness           live    = vfp_live_args_after(sim, n, REGMASK(out));
+       int reg_index_1                = op1_reg->index;
+       int reg_index_2                = op2_reg->index;
+       fp_liveness            live    = fp_live_args_after(sim, n, REGMASK(out));
        int                    op1_live_after;
        int                    op2_live_after;
 
-       DB((dbg, LEVEL_1, ">>> %+F %s, %s -> %s\n", n,
-               arch_register_get_name(op1_reg), arch_register_get_name(op2_reg),
-               arch_register_get_name(out)));
-       DEBUG_ONLY(vfp_dump_live(live);)
+       DB((dbg, LEVEL_1, ">>> %+F %s, %s -> %s\n", n, op1_reg->name, op2_reg->name, out->name));
+       DEBUG_ONLY(fp_dump_live(live);)
        DB((dbg, LEVEL_1, "Stack before: "));
        DEBUG_ONLY(x87_dump_stack(state);)
 
-       op1_idx = x87_on_stack(state, reg_index_1);
+       int op1_idx = x87_on_stack(state, reg_index_1);
        assert(op1_idx >= 0);
-       op1_live_after = is_vfp_live(reg_index_1, live);
-
-       attr     = get_ia32_x87_attr(n);
-       permuted = attr->attr.data.ins_permuted;
-
-       int const out_reg_idx = arch_register_get_index(out);
-       if (reg_index_2 != REG_VFP_VFP_NOREG) {
-               assert(!permuted);
-
-               /* second operand is a vfp register */
+       op1_live_after = is_fp_live(reg_index_1, live);
+
+       int                    op2_idx;
+       int                    out_idx;
+       bool                   pop         = false;
+       int              const out_reg_idx = out->index;
+       ia32_x87_attr_t *const attr        = get_ia32_x87_attr(n);
+       if (reg_index_2 != REG_FP_FP_NOREG) {
+               /* second operand is a fp register */
                op2_idx = x87_on_stack(state, reg_index_2);
                assert(op2_idx >= 0);
-               op2_live_after = is_vfp_live(reg_index_2, live);
+               op2_live_after = is_fp_live(reg_index_2, live);
 
                if (op2_live_after) {
                        /* Second operand is live. */
 
                        if (op1_live_after) {
                                /* Both operands are live: push the first one.
-                                  This works even for op1 == op2. */
-                               x87_create_fpush(state, n, op1_idx, out_reg_idx, op2);
+                                * This works even for op1 == op2. */
+                               x87_create_fpush(state, n, op1_idx, out_reg_idx, op1);
                                /* now do fxxx (tos=tos X op) */
                                op1_idx = 0;
                                op2_idx += 1;
                                out_idx = 0;
-                               dst = tmpl->normal_op;
                        } else {
-                               /* Second live, first operand is dead here, bring it to tos. */
-                               if (op1_idx != 0) {
+                               /* Second live, first operand is dead: Overwrite first. */
+                               if (op1_idx != 0 && op2_idx != 0) {
+                                       /* Bring one operand to tos. */
                                        x87_create_fxch(state, n, op1_idx);
-                                       if (op2_idx == 0)
-                                               op2_idx = op1_idx;
                                        op1_idx = 0;
                                }
-                               /* now do fxxx (tos=tos X op) */
-                               out_idx = 0;
-                               dst = tmpl->normal_op;
+                               out_idx = op1_idx;
                        }
                } else {
                        /* Second operand is dead. */
                        if (op1_live_after) {
-                               /* First operand is live: bring second to tos. */
-                               if (op2_idx != 0) {
+                               /* First operand is live, second is dead: Overwrite second. */
+                               if (op1_idx != 0 && op2_idx != 0) {
+                                       /* Bring one operand to tos. */
                                        x87_create_fxch(state, n, op2_idx);
-                                       if (op1_idx == 0)
-                                               op1_idx = op2_idx;
                                        op2_idx = 0;
                                }
-                               /* now do fxxxr (tos = op X tos) */
-                               out_idx = 0;
-                               dst = tmpl->reverse_op;
+                               out_idx = op2_idx;
                        } else {
-                               /* Both operands are dead here, pop them from the stack. */
-                               if (op2_idx == 0) {
-                                       if (op1_idx == 0) {
-                                               /* Both are identically and on tos, no pop needed. */
-                                               /* here fxxx (tos = tos X tos) */
-                                               dst = tmpl->normal_op;
-                                               out_idx = 0;
-                                       } else {
-                                               /* now do fxxxp (op = op X tos, pop) */
-                                               dst = tmpl->normal_pop_op;
-                                               do_pop = 1;
-                                               out_idx = op1_idx;
-                                       }
-                               } else if (op1_idx == 0) {
-                                       assert(op1_idx != op2_idx);
-                                       /* now do fxxxrp (op = tos X op, pop) */
-                                       dst = tmpl->reverse_pop_op;
-                                       do_pop = 1;
-                                       out_idx = op2_idx;
-                               } else {
-                                       /* Bring the second on top. */
-                                       x87_create_fxch(state, n, op2_idx);
-                                       if (op1_idx == op2_idx) {
-                                               /* Both are identically and on tos now, no pop needed. */
+                               /* Both operands are dead. */
+                               if (op1_idx == op2_idx) {
+                                       /* Operands are identical: no pop. */
+                                       if (op1_idx != 0) {
+                                               x87_create_fxch(state, n, op1_idx);
                                                op1_idx = 0;
                                                op2_idx = 0;
-                                               /* use fxxx (tos = tos X tos) */
-                                               dst = tmpl->normal_op;
-                                               out_idx = 0;
-                                       } else {
-                                               /* op2 is on tos now */
-                                               op2_idx = 0;
-                                               /* use fxxxp (op = op X tos, pop) */
-                                               dst = tmpl->normal_pop_op;
-                                               out_idx = op1_idx;
-                                               do_pop = 1;
                                        }
+                               } else {
+                                       if (op1_idx != 0 && op2_idx != 0) {
+                                               /* Bring one operand to tos. Heuristically swap the operand not at
+                                                * st(1) to tos. This way, if any operand was at st(1), the result
+                                                * will end up in the new st(0) after the implicit pop. If the next
+                                                * operation uses the result, then no fxch will be necessary. */
+                                               if (op1_idx != 1) {
+                                                       x87_create_fxch(state, n, op1_idx);
+                                                       op1_idx = 0;
+                                               } else {
+                                                       x87_create_fxch(state, n, op2_idx);
+                                                       op2_idx = 0;
+                                               }
+                                       }
+                                       pop = true;
                                }
+                               out_idx = op1_idx != 0 ? op1_idx : op2_idx;
                        }
                }
        } else {
@@ -889,42 +780,36 @@ static int sim_binop(x87_state *state, ir_node *n, const exchange_tmpl *tmpl)
                if (op1_live_after) {
                        /* first operand is live: push it here */
                        x87_create_fpush(state, n, op1_idx, out_reg_idx, op1);
-                       op1_idx = 0;
                } else {
                        /* first operand is dead: bring it to tos */
-                       if (op1_idx != 0) {
+                       if (op1_idx != 0)
                                x87_create_fxch(state, n, op1_idx);
-                               op1_idx = 0;
-                       }
                }
 
-               /* use fxxx (tos = tos X mem) */
-               dst = permuted ? tmpl->reverse_op : tmpl->normal_op;
+               op1_idx = attr->attr.data.ins_permuted ? -1 :  0;
+               op2_idx = attr->attr.data.ins_permuted ?  0 : -1;
                out_idx = 0;
        }
+       assert(op1_idx == 0       || op2_idx == 0);
+       assert(out_idx == op1_idx || out_idx == op2_idx);
 
-       patched_insn = x87_patch_insn(n, dst);
-       x87_set_st(state, out_reg_idx, patched_insn, out_idx);
-       if (do_pop) {
+       x87_set_st(state, out_reg_idx, n, out_idx);
+       if (pop)
                x87_pop(state);
-       }
 
        /* patch the operation */
-       attr->x87[0] = op1_reg = get_st_reg(op1_idx);
-       if (reg_index_2 != REG_VFP_VFP_NOREG) {
-               attr->x87[1] = op2_reg = get_st_reg(op2_idx);
-       }
-       attr->x87[2] = out = get_st_reg(out_idx);
-
-       if (reg_index_2 != REG_VFP_VFP_NOREG) {
-               DB((dbg, LEVEL_1, "<<< %s %s, %s -> %s\n", get_irn_opname(n),
-                       arch_register_get_name(op1_reg), arch_register_get_name(op2_reg),
-                       arch_register_get_name(out)));
-       } else {
-               DB((dbg, LEVEL_1, "<<< %s %s, [AM] -> %s\n", get_irn_opname(n),
-                       arch_register_get_name(op1_reg),
-                       arch_register_get_name(out)));
-       }
+       int const reg_idx = op1_idx != 0 ? op1_idx : op2_idx;
+       attr->reg                    = reg_idx >= 0 ? get_st_reg(reg_idx) : NULL;
+       attr->attr.data.ins_permuted = op1_idx != 0;
+       attr->res_in_reg             = out_idx != 0;
+       attr->pop                    = pop;
+
+       DEBUG_ONLY(
+               char const *const l = op1_idx >= 0 ? get_st_reg(op1_idx)->name : "[AM]";
+               char const *const r = op2_idx >= 0 ? get_st_reg(op2_idx)->name : "[AM]";
+               char const *const o = get_st_reg(out_idx)->name;
+               DB((dbg, LEVEL_1, "<<< %s %s, %s -> %s\n", get_irn_opname(n), l, r, o));
+       )
 
        return NO_NODE_ADDED;
 }
@@ -934,23 +819,22 @@ static int sim_binop(x87_state *state, ir_node *n, const exchange_tmpl *tmpl)
  *
  * @param state  the x87 state
  * @param n      the node that should be simulated (and patched)
- * @param op     the x87 opcode that will replace n's opcode
  *
  * @return NO_NODE_ADDED
  */
-static int sim_unop(x87_state *state, ir_node *n, ir_op *op)
+static int sim_unop(x87_state *state, ir_node *n)
 {
        arch_register_t const *const out  = x87_get_irn_register(n);
-       unsigned               const live = vfp_live_args_after(state->sim, n, REGMASK(out));
+       unsigned               const live = fp_live_args_after(state->sim, n, REGMASK(out));
        DB((dbg, LEVEL_1, ">>> %+F -> %s\n", n, out->name));
-       DEBUG_ONLY(vfp_dump_live(live);)
+       DEBUG_ONLY(fp_dump_live(live);)
 
        ir_node               *const op1         = get_irn_n(n, 0);
        arch_register_t const *const op1_reg     = x87_get_irn_register(op1);
-       int                    const op1_reg_idx = arch_register_get_index(op1_reg);
+       int                    const op1_reg_idx = op1_reg->index;
        int                    const op1_idx     = x87_on_stack(state, op1_reg_idx);
-       int                    const out_reg_idx = arch_register_get_index(out);
-       if (is_vfp_live(op1_reg_idx, live)) {
+       int                    const out_reg_idx = out->index;
+       if (is_fp_live(op1_reg_idx, live)) {
                /* push the operand here */
                x87_create_fpush(state, n, op1_idx, out_reg_idx, op1);
        } else {
@@ -960,10 +844,8 @@ static int sim_unop(x87_state *state, ir_node *n, ir_op *op)
                }
        }
 
-       x87_set_tos(state, out_reg_idx, x87_patch_insn(n, op));
-       ia32_x87_attr_t *const attr = get_ia32_x87_attr(n);
-       attr->x87[2] = attr->x87[0] = get_st_reg(0);
-       DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), attr->x87[2]->name));
+       x87_set_st(state, out_reg_idx, n, 0);
+       DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), get_st_reg(0)->name));
 
        return NO_NODE_ADDED;
 }
@@ -973,21 +855,20 @@ static int sim_unop(x87_state *state, ir_node *n, ir_op *op)
  *
  * @param state  the x87 state
  * @param n      the node that should be simulated (and patched)
- * @param op     the x87 opcode that will replace n's opcode
  *
  * @return NO_NODE_ADDED
  */
-static int sim_load(x87_state *state, ir_node *n, ir_op *op, int res_pos)
+static int sim_load(x87_state *state, ir_node *n)
 {
-       const arch_register_t *out = x87_irn_get_register(n, res_pos);
-       ia32_x87_attr_t *attr;
+       assert((int)pn_ia32_fld_res == (int)pn_ia32_fild_res
+           && (int)pn_ia32_fld_res == (int)pn_ia32_fld1_res
+           && (int)pn_ia32_fld_res == (int)pn_ia32_fldz_res);
+       const arch_register_t *out = x87_irn_get_register(n, pn_ia32_fld_res);
 
-       DB((dbg, LEVEL_1, ">>> %+F -> %s\n", n, arch_register_get_name(out)));
-       x87_push(state, arch_register_get_index(out), x87_patch_insn(n, op));
-       assert(out == x87_irn_get_register(n, res_pos));
-       attr = get_ia32_x87_attr(n);
-       attr->x87[2] = out = get_st_reg(0);
-       DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), arch_register_get_name(out)));
+       DB((dbg, LEVEL_1, ">>> %+F -> %s\n", n, out->name));
+       x87_push(state, out->index, n);
+       assert(out == x87_irn_get_register(n, pn_ia32_fld_res));
+       DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), get_st_reg(0)->name));
 
        return NO_NODE_ADDED;
 }
@@ -1015,20 +896,19 @@ static void collect_and_rewire_users(ir_node *store, ir_node *old_val, ir_node *
  *
  * @param state  the x87 state
  * @param n      the node that should be simulated (and patched)
- * @param op     the x87 store opcode
- * @param op_p   the x87 store and pop opcode
  */
-static int sim_store(x87_state *state, ir_node *n, ir_op *op, ir_op *op_p)
+static int sim_store(x87_state *state, ir_node *n)
 {
-       ir_node               *const val = get_irn_n(n, n_ia32_vfst_val);
+       ir_node               *const val = get_irn_n(n, n_ia32_fst_val);
        arch_register_t const *const op2 = x87_get_irn_register(val);
-       DB((dbg, LEVEL_1, ">>> %+F %s ->\n", n, arch_register_get_name(op2)));
+       DB((dbg, LEVEL_1, ">>> %+F %s ->\n", n, op2->name));
 
+       bool           do_pop          = false;
        int            insn            = NO_NODE_ADDED;
-       int      const op2_reg_idx     = arch_register_get_index(op2);
+       int      const op2_reg_idx     = op2->index;
        int      const op2_idx         = x87_on_stack(state, op2_reg_idx);
-       unsigned const live            = vfp_live_args_after(state->sim, n, 0);
-       int      const live_after_node = is_vfp_live(op2_reg_idx, live);
+       unsigned const live            = fp_live_args_after(state->sim, n, 0);
+       int      const live_after_node = is_fp_live(op2_reg_idx, live);
        assert(op2_idx >= 0);
        if (live_after_node) {
                /* Problem: fst doesn't support 80bit modes (spills), only fstp does
@@ -1038,20 +918,18 @@ static int sim_store(x87_state *state, ir_node *n, ir_op *op, ir_op *op_p)
                 *   - stack full: fstp value and load again
                 * Note that we cannot test on mode_E, because floats might be 80bit ... */
                ir_mode *const mode = get_ia32_ls_mode(n);
-               if (get_mode_size_bits(mode) > (mode_is_int(mode) ? 32 : 64)) {
-                       if (x87_get_depth(state) < N_ia32_st_REGS) {
+               if (get_mode_size_bits(mode) > (mode_is_int(mode) ? 32U : 64U)) {
+                       if (x87_get_depth(state) < N_FLOAT_REGS) {
                                /* ok, we have a free register: push + fstp */
-                               x87_create_fpush(state, n, op2_idx, REG_VFP_VFP_NOREG, val);
-                               x87_pop(state);
-                               x87_patch_insn(n, op_p);
+                               x87_create_fpush(state, n, op2_idx, REG_FP_FP_NOREG, val);
+                               do_pop = true;
                        } else {
                                /* stack full here: need fstp + load */
-                               x87_pop(state);
-                               x87_patch_insn(n, op_p);
+                               do_pop = true;
 
                                ir_node *const block = get_nodes_block(n);
                                ir_node *const mem   = get_irn_Proj_for_mode(n, mode_M);
-                               ir_node *const vfld  = new_bd_ia32_vfld(NULL, block, get_irn_n(n, 0), get_irn_n(n, 1), mem, mode);
+                               ir_node *const vfld  = new_bd_ia32_fld(NULL, block, get_irn_n(n, 0), get_irn_n(n, 1), mem, mode);
 
                                /* copy all attributes */
                                set_ia32_frame_ent(vfld, get_ia32_frame_ent(n));
@@ -1062,8 +940,8 @@ static int sim_store(x87_state *state, ir_node *n, ir_op *op, ir_op *op_p)
                                set_ia32_am_sc(vfld, get_ia32_am_sc(n));
                                set_ia32_ls_mode(vfld, mode);
 
-                               ir_node *const rproj = new_r_Proj(vfld, mode, pn_ia32_vfld_res);
-                               ir_node *const mproj = new_r_Proj(vfld, mode_M, pn_ia32_vfld_M);
+                               ir_node *const rproj = new_r_Proj(vfld, mode, pn_ia32_fld_res);
+                               ir_node *const mproj = new_r_Proj(vfld, mode_M, pn_ia32_fld_M);
 
                                arch_set_irn_register(rproj, op2);
 
@@ -1081,68 +959,32 @@ static int sim_store(x87_state *state, ir_node *n, ir_op *op, ir_op *op_p)
                        /* we can only store the tos to memory */
                        if (op2_idx != 0)
                                x87_create_fxch(state, n, op2_idx);
-
-                       /* mode size 64 or smaller -> use normal fst */
-                       x87_patch_insn(n, op);
                }
        } else {
                /* we can only store the tos to memory */
                if (op2_idx != 0)
                        x87_create_fxch(state, n, op2_idx);
 
-               x87_pop(state);
-               x87_patch_insn(n, op_p);
+               do_pop = true;
        }
 
+       if (do_pop)
+               x87_pop(state);
+
        ia32_x87_attr_t *const attr = get_ia32_x87_attr(n);
-       attr->x87[1] = get_st_reg(0);
-       DB((dbg, LEVEL_1, "<<< %s %s ->\n", get_irn_opname(n), arch_register_get_name(attr->x87[1])));
+       attr->pop = do_pop;
+       DB((dbg, LEVEL_1, "<<< %s %s ->\n", get_irn_opname(n), get_st_reg(0)->name));
 
        return insn;
 }
 
-#define _GEN_BINOP(op, rev) \
-static int sim_##op(x87_state *state, ir_node *n) { \
-       exchange_tmpl tmpl = { op_ia32_##op, op_ia32_##rev, op_ia32_##op##p, op_ia32_##rev##p }; \
-       return sim_binop(state, n, &tmpl); \
-}
-
-#define GEN_BINOP(op)   _GEN_BINOP(op, op)
-#define GEN_BINOPR(op)  _GEN_BINOP(op, op##r)
-
-#define GEN_LOAD(op)                                              \
-static int sim_##op(x87_state *state, ir_node *n) {               \
-       return sim_load(state, n, op_ia32_##op, pn_ia32_v##op##_res); \
-}
-
-#define GEN_UNOP(op) \
-static int sim_##op(x87_state *state, ir_node *n) { \
-       return sim_unop(state, n, op_ia32_##op); \
-}
-
-#define GEN_STORE(op) \
-static int sim_##op(x87_state *state, ir_node *n) { \
-       return sim_store(state, n, op_ia32_##op, op_ia32_##op##p); \
+static int sim_fprem(x87_state *const state, ir_node *const n)
+{
+       (void)state;
+       (void)n;
+       panic("TODO implement");
 }
 
-/* all stubs */
-GEN_BINOP(fadd)
-GEN_BINOPR(fsub)
-GEN_BINOP(fmul)
-GEN_BINOPR(fdiv)
-GEN_BINOP(fprem)
-
-GEN_UNOP(fabs)
-GEN_UNOP(fchs)
-
-GEN_LOAD(fld)
-GEN_LOAD(fild)
-GEN_LOAD(fldz)
-GEN_LOAD(fld1)
-
-GEN_STORE(fst)
-GEN_STORE(fist)
-
 /**
  * Simulate a virtual fisttp.
  *
@@ -1153,14 +995,11 @@ GEN_STORE(fist)
  */
 static int sim_fisttp(x87_state *state, ir_node *n)
 {
-       ir_node               *val = get_irn_n(n, n_ia32_vfst_val);
+       ir_node               *val = get_irn_n(n, n_ia32_fst_val);
        const arch_register_t *op2 = x87_get_irn_register(val);
-       ia32_x87_attr_t *attr;
-       int op2_reg_idx, op2_idx;
 
-       op2_reg_idx = arch_register_get_index(op2);
-       op2_idx     = x87_on_stack(state, op2_reg_idx);
-       DB((dbg, LEVEL_1, ">>> %+F %s ->\n", n, arch_register_get_name(op2)));
+       int const op2_idx = x87_on_stack(state, op2->index);
+       DB((dbg, LEVEL_1, ">>> %+F %s ->\n", n, op2->name));
        assert(op2_idx >= 0);
 
        /* Note: although the value is still live here, it is destroyed because
@@ -1172,11 +1011,8 @@ static int sim_fisttp(x87_state *state, ir_node *n)
                x87_create_fxch(state, n, op2_idx);
 
        x87_pop(state);
-       x87_patch_insn(n, op_ia32_fisttp);
 
-       attr = get_ia32_x87_attr(n);
-       attr->x87[1] = op2 = get_st_reg(0);
-       DB((dbg, LEVEL_1, "<<< %s %s ->\n", get_irn_opname(n), arch_register_get_name(op2)));
+       DB((dbg, LEVEL_1, "<<< %s %s ->\n", get_irn_opname(n), get_st_reg(0)->name));
 
        return NO_NODE_ADDED;
 }
@@ -1192,15 +1028,14 @@ static int sim_fisttp(x87_state *state, ir_node *n)
 static int sim_FtstFnstsw(x87_state *state, ir_node *n)
 {
        x87_simulator         *sim         = state->sim;
-       ia32_x87_attr_t       *attr        = get_ia32_x87_attr(n);
-       ir_node               *op1_node    = get_irn_n(n, n_ia32_vFtstFnstsw_left);
+       ir_node               *op1_node    = get_irn_n(n, n_ia32_FtstFnstsw_left);
        const arch_register_t *reg1        = x87_get_irn_register(op1_node);
-       int                    reg_index_1 = arch_register_get_index(reg1);
+       int                    reg_index_1 = reg1->index;
        int                    op1_idx     = x87_on_stack(state, reg_index_1);
-       unsigned               live        = vfp_live_args_after(sim, n, 0);
+       unsigned               live        = fp_live_args_after(sim, n, 0);
 
-       DB((dbg, LEVEL_1, ">>> %+F %s\n", n, arch_register_get_name(reg1)));
-       DEBUG_ONLY(vfp_dump_live(live);)
+       DB((dbg, LEVEL_1, ">>> %+F %s\n", n, reg1->name));
+       DEBUG_ONLY(fp_dump_live(live);)
        DB((dbg, LEVEL_1, "Stack before: "));
        DEBUG_ONLY(x87_dump_stack(state);)
        assert(op1_idx >= 0);
@@ -1208,18 +1043,10 @@ static int sim_FtstFnstsw(x87_state *state, ir_node *n)
        if (op1_idx != 0) {
                /* bring the value to tos */
                x87_create_fxch(state, n, op1_idx);
-               op1_idx = 0;
        }
 
-       /* patch the operation */
-       x87_patch_insn(n, op_ia32_FtstFnstsw);
-       reg1 = get_st_reg(op1_idx);
-       attr->x87[0] = reg1;
-       attr->x87[1] = NULL;
-       attr->x87[2] = NULL;
-
-       if (!is_vfp_live(reg_index_1, live))
-               x87_create_fpop(state, sched_next(n), 1);
+       if (!is_fp_live(reg_index_1, live))
+               x87_create_fpop(state, sched_next(n), 0);
 
        return NO_NODE_ADDED;
 }
@@ -1234,57 +1061,42 @@ static int sim_FtstFnstsw(x87_state *state, ir_node *n)
  */
 static int sim_Fucom(x87_state *state, ir_node *n)
 {
-       int op1_idx;
-       int op2_idx = -1;
-       ia32_x87_attr_t *attr = get_ia32_x87_attr(n);
-       ir_op *dst;
-       x87_simulator         *sim        = state->sim;
-       ir_node               *op1_node   = get_irn_n(n, n_ia32_vFucomFnstsw_left);
-       ir_node               *op2_node   = get_irn_n(n, n_ia32_vFucomFnstsw_right);
-       const arch_register_t *op1        = x87_get_irn_register(op1_node);
-       const arch_register_t *op2        = x87_get_irn_register(op2_node);
-       int reg_index_1 = arch_register_get_index(op1);
-       int                    reg_index_2 = arch_register_get_index(op2);
-       unsigned               live       = vfp_live_args_after(sim, n, 0);
-       bool                   permuted   = attr->attr.data.ins_permuted;
-       bool                   xchg       = false;
-       int                    pops       = 0;
-
-       DB((dbg, LEVEL_1, ">>> %+F %s, %s\n", n,
-               arch_register_get_name(op1), arch_register_get_name(op2)));
-       DEBUG_ONLY(vfp_dump_live(live);)
+       ia32_x87_attr_t       *attr        = get_ia32_x87_attr(n);
+       x87_simulator         *sim         = state->sim;
+       ir_node               *op1_node    = get_irn_n(n, n_ia32_FucomFnstsw_left);
+       ir_node               *op2_node    = get_irn_n(n, n_ia32_FucomFnstsw_right);
+       const arch_register_t *op1         = x87_get_irn_register(op1_node);
+       const arch_register_t *op2         = x87_get_irn_register(op2_node);
+       int                    reg_index_1 = op1->index;
+       int                    reg_index_2 = op2->index;
+       unsigned               live        = fp_live_args_after(sim, n, 0);
+
+       DB((dbg, LEVEL_1, ">>> %+F %s, %s\n", n, op1->name, op2->name));
+       DEBUG_ONLY(fp_dump_live(live);)
        DB((dbg, LEVEL_1, "Stack before: "));
        DEBUG_ONLY(x87_dump_stack(state);)
 
-       op1_idx = x87_on_stack(state, reg_index_1);
+       int op1_idx = x87_on_stack(state, reg_index_1);
        assert(op1_idx >= 0);
 
+       int op2_idx;
+       int pops = 0;
        /* BEWARE: check for comp a,a cases, they might happen */
-       if (reg_index_2 != REG_VFP_VFP_NOREG) {
-               /* second operand is a vfp register */
+       if (reg_index_2 != REG_FP_FP_NOREG) {
+               /* second operand is a fp register */
                op2_idx = x87_on_stack(state, reg_index_2);
                assert(op2_idx >= 0);
 
-               if (is_vfp_live(reg_index_2, live)) {
+               if (is_fp_live(reg_index_2, live)) {
                        /* second operand is live */
 
-                       if (is_vfp_live(reg_index_1, live)) {
+                       if (is_fp_live(reg_index_1, live)) {
                                /* both operands are live */
-
-                               if (op1_idx == 0) {
-                                       /* res = tos X op */
-                               } else if (op2_idx == 0) {
-                                       /* res = op X tos */
-                                       permuted = !permuted;
-                                       xchg     = true;
-                               } else {
+                               if (op1_idx != 0 && op2_idx != 0) {
                                        /* bring the first one to tos */
                                        x87_create_fxch(state, n, op1_idx);
-                                       if (op1_idx == op2_idx) {
+                                       if (op1_idx == op2_idx)
                                                op2_idx = 0;
-                                       } else if (op2_idx == 0) {
-                                               op2_idx = op1_idx;
-                                       }
                                        op1_idx = 0;
                                        /* res = tos X op */
                                }
@@ -1303,7 +1115,7 @@ static int sim_Fucom(x87_state *state, ir_node *n)
                        }
                } else {
                        /* second operand is dead */
-                       if (is_vfp_live(reg_index_1, live)) {
+                       if (is_fp_live(reg_index_1, live)) {
                                /* first operand is live: bring second to tos.
                                   This means further, op1_idx != op2_idx. */
                                assert(op1_idx != op2_idx);
@@ -1314,9 +1126,7 @@ static int sim_Fucom(x87_state *state, ir_node *n)
                                        op2_idx = 0;
                                }
                                /* res = op X tos, pop */
-                               pops     = 1;
-                               permuted = !permuted;
-                               xchg     = true;
+                               pops = 1;
                        } else {
                                /* both operands are dead here, check first for identity. */
                                if (op1_idx == op2_idx) {
@@ -1328,137 +1138,60 @@ static int sim_Fucom(x87_state *state, ir_node *n)
                                        }
                                        /* res = tos X op, pop */
                                        pops    = 1;
-                               }
-                               /* different, move them to st and st(1) and pop both.
-                                  The tricky part is to get one into st(1).*/
-                               else if (op2_idx == 1) {
-                                       /* good, second operand is already in the right place, move the first */
-                                       if (op1_idx != 0) {
-                                               /* bring the first on top */
-                                               x87_create_fxch(state, n, op1_idx);
-                                               assert(op2_idx != 0);
-                                               op1_idx = 0;
-                                       }
-                                       /* res = tos X op, pop, pop */
-                                       pops = 2;
-                               } else if (op1_idx == 1) {
-                                       /* good, first operand is already in the right place, move the second */
-                                       if (op2_idx != 0) {
-                                               /* bring the first on top */
-                                               x87_create_fxch(state, n, op2_idx);
-                                               assert(op1_idx != 0);
-                                               op2_idx = 0;
-                                       }
-                                       /* res = op X tos, pop, pop */
-                                       permuted = !permuted;
-                                       xchg     = true;
-                                       pops     = 2;
                                } else {
-                                       /* if one is already the TOS, we need two fxch */
-                                       if (op1_idx == 0) {
-                                               /* first one is TOS, move to st(1) */
-                                               x87_create_fxch(state, n, 1);
-                                               assert(op2_idx != 1);
-                                               op1_idx = 1;
-                                               x87_create_fxch(state, n, op2_idx);
-                                               op2_idx = 0;
-                                               /* res = op X tos, pop, pop */
-                                               pops     = 2;
-                                               permuted = !permuted;
-                                               xchg     = true;
-                                       } else if (op2_idx == 0) {
-                                               /* second one is TOS, move to st(1) */
-                                               x87_create_fxch(state, n, 1);
-                                               assert(op1_idx != 1);
-                                               op2_idx = 1;
-                                               x87_create_fxch(state, n, op1_idx);
-                                               op1_idx = 0;
-                                               /* res = tos X op, pop, pop */
-                                               pops    = 2;
-                                       } else {
-                                               /* none of them is either TOS or st(1), 3 fxch needed */
-                                               x87_create_fxch(state, n, op2_idx);
-                                               assert(op1_idx != 0);
-                                               x87_create_fxch(state, n, 1);
-                                               op2_idx = 1;
-                                               x87_create_fxch(state, n, op1_idx);
-                                               op1_idx = 0;
-                                               /* res = tos X op, pop, pop */
-                                               pops    = 2;
+                                       if (op1_idx != 0 && op2_idx != 0) {
+                                               /* Both not at tos: Move one operand to tos. Move the one not at
+                                                * pos 1, so we get a chance to use fucompp. */
+                                               if (op1_idx != 1) {
+                                                       x87_create_fxch(state, n, op1_idx);
+                                                       op1_idx = 0;
+                                               } else {
+                                                       x87_create_fxch(state, n, op2_idx);
+                                                       op2_idx = 0;
+                                               }
                                        }
+                                       pops = 2;
                                }
                        }
                }
        } else {
                /* second operand is an address mode */
-               if (is_vfp_live(reg_index_1, live)) {
-                       /* first operand is live: bring it to TOS */
-                       if (op1_idx != 0) {
-                               x87_create_fxch(state, n, op1_idx);
-                               op1_idx = 0;
-                       }
-               } else {
-                       /* first operand is dead: bring it to tos */
-                       if (op1_idx != 0) {
-                               x87_create_fxch(state, n, op1_idx);
-                               op1_idx = 0;
-                       }
+               if (op1_idx != 0)
+                       x87_create_fxch(state, n, op1_idx);
+               /* Pop first operand, if it is dead. */
+               if (!is_fp_live(reg_index_1, live))
                        pops = 1;
-               }
+
+               op1_idx = attr->attr.data.ins_permuted ? -1 :  0;
+               op2_idx = attr->attr.data.ins_permuted ?  0 : -1;
        }
+       assert(op1_idx == 0 || op2_idx == 0);
 
        /* patch the operation */
-       if (is_ia32_vFucomFnstsw(n)) {
-               int i;
-
-               switch (pops) {
-               case 0: dst = op_ia32_FucomFnstsw;   break;
-               case 1: dst = op_ia32_FucompFnstsw;  break;
-               case 2: dst = op_ia32_FucomppFnstsw; break;
-               default: panic("invalid popcount");
-               }
-
-               for (i = 0; i < pops; ++i) {
-                       x87_pop(state);
-               }
-       } else if (is_ia32_vFucomi(n)) {
-               switch (pops) {
-               case 0: dst = op_ia32_Fucomi;                  break;
-               case 1: dst = op_ia32_Fucompi; x87_pop(state); break;
-               case 2:
-                       dst = op_ia32_Fucompi;
+       if (is_ia32_FucomFnstsw(n) && pops == 2
+           && (op1_idx == 1 || op2_idx == 1)) {
+               set_irn_op(n, op_ia32_FucomppFnstsw);
+               x87_pop(state);
+               x87_pop(state);
+       } else {
+               if (pops != 0)
                        x87_pop(state);
-                       x87_create_fpop(state, sched_next(n), 1);
-                       break;
-               default: panic("invalid popcount");
+               if (pops == 2) {
+                       int const idx = (op1_idx != 0 ? op1_idx : op2_idx) - 1 /* Due to prior pop. */;
+                       x87_create_fpop(state, sched_next(n), idx);
                }
-       } else {
-               panic("invalid operation %+F", n);
        }
 
-       x87_patch_insn(n, dst);
-       if (xchg) {
-               int tmp = op1_idx;
-               op1_idx = op2_idx;
-               op2_idx = tmp;
-       }
-
-       op1 = get_st_reg(op1_idx);
-       attr->x87[0] = op1;
-       if (op2_idx >= 0) {
-               op2 = get_st_reg(op2_idx);
-               attr->x87[1] = op2;
-       }
-       attr->x87[2] = NULL;
-       attr->attr.data.ins_permuted = permuted;
+       int const reg_idx = op1_idx != 0 ? op1_idx : op2_idx;
+       attr->reg                    = reg_idx >= 0 ? get_st_reg(reg_idx) : NULL;
+       attr->attr.data.ins_permuted = op1_idx != 0;
+       attr->pop                    = pops != 0;
 
-       if (op2_idx >= 0) {
-               DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(n),
-                       arch_register_get_name(op1), arch_register_get_name(op2)));
-       } else {
-               DB((dbg, LEVEL_1, "<<< %s %s, [AM]\n", get_irn_opname(n),
-                       arch_register_get_name(op1)));
-       }
+       DEBUG_ONLY(
+               char const *const l = op1_idx >= 0 ? get_st_reg(op1_idx)->name : "[AM]";
+               char const *const r = op2_idx >= 0 ? get_st_reg(op2_idx)->name : "[AM]";
+               DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(n), l, r));
+       )
 
        return NO_NODE_ADDED;
 }
@@ -1486,15 +1219,15 @@ static int sim_Keep(x87_state *state, ir_node *node)
        for (i = 0; i < arity; ++i) {
                op      = get_irn_n(node, i);
                op_reg  = arch_get_irn_register(op);
-               if (arch_register_get_class(op_reg) != &ia32_reg_classes[CLASS_ia32_vfp])
+               if (op_reg->reg_class != &ia32_reg_classes[CLASS_ia32_fp])
                        continue;
 
-               reg_id = arch_register_get_index(op_reg);
-               live   = vfp_live_args_after(state->sim, node, 0);
+               reg_id = op_reg->index;
+               live   = fp_live_args_after(state->sim, node, 0);
 
                op_stack_idx = x87_on_stack(state, reg_id);
-               if (op_stack_idx >= 0 && !is_vfp_live(reg_id, live))
-                       x87_create_fpop(state, sched_next(node), 1);
+               if (op_stack_idx >= 0 && !is_fp_live(reg_id, live))
+                       x87_create_fpop(state, sched_next(node), 0);
        }
 
        DB((dbg, LEVEL_1, "Stack after: "));
@@ -1512,8 +1245,6 @@ static void keep_float_node_alive(ir_node *node)
 {
        ir_node *block = get_nodes_block(node);
        ir_node *keep  = be_new_Keep(block, 1, &node);
-
-       assert(sched_is_scheduled(node));
        sched_add_after(node, keep);
 }
 
@@ -1531,11 +1262,10 @@ static ir_node *create_Copy(x87_state *state, ir_node *n)
        ir_mode *mode = get_irn_mode(n);
        ir_node *block = get_nodes_block(n);
        ir_node *pred = get_irn_n(n, 0);
-       ir_node *(*cnstr)(dbg_info *, ir_node *, ir_mode *) = NULL;
+       ir_node *(*cnstr)(dbg_info *, ir_node *) = NULL;
        ir_node *res;
        const arch_register_t *out;
        const arch_register_t *op1;
-       ia32_x87_attr_t *attr;
 
        /* Do not copy constants, recreate them. */
        switch (get_ia32_irn_opcode(pred)) {
@@ -1569,22 +1299,18 @@ static ir_node *create_Copy(x87_state *state, ir_node *n)
 
        if (cnstr != NULL) {
                /* copy a constant */
-               res = (*cnstr)(n_dbg, block, mode);
-
-               x87_push(state, arch_register_get_index(out), res);
+               res = (*cnstr)(n_dbg, block);
 
-               attr = get_ia32_x87_attr(res);
-               attr->x87[2] = get_st_reg(0);
+               x87_push(state, out->index, res);
        } else {
-               int op1_idx = x87_on_stack(state, arch_register_get_index(op1));
+               int op1_idx = x87_on_stack(state, op1->index);
 
                res = new_bd_ia32_fpushCopy(n_dbg, block, pred, mode);
 
-               x87_push(state, arch_register_get_index(out), res);
+               x87_push(state, out->index, res);
 
-               attr = get_ia32_x87_attr(res);
-               attr->x87[0] = get_st_reg(op1_idx);
-               attr->x87[2] = get_st_reg(0);
+               ia32_x87_attr_t *const attr = get_ia32_x87_attr(res);
+               attr->reg = get_st_reg(op1_idx);
        }
        arch_set_irn_register(res, out);
 
@@ -1602,19 +1328,18 @@ static ir_node *create_Copy(x87_state *state, ir_node *n)
 static int sim_Copy(x87_state *state, ir_node *n)
 {
        arch_register_class_t const *const cls = arch_get_irn_reg_class(n);
-       if (cls != &ia32_reg_classes[CLASS_ia32_vfp])
+       if (cls != &ia32_reg_classes[CLASS_ia32_fp])
                return NO_NODE_ADDED;
 
        ir_node               *const pred = be_get_Copy_op(n);
        arch_register_t const *const op1  = x87_get_irn_register(pred);
        arch_register_t const *const out  = x87_get_irn_register(n);
-       unsigned               const live = vfp_live_args_after(state->sim, n, REGMASK(out));
+       unsigned               const live = fp_live_args_after(state->sim, n, REGMASK(out));
 
-       DB((dbg, LEVEL_1, ">>> %+F %s -> %s\n", n,
-               arch_register_get_name(op1), arch_register_get_name(out)));
-       DEBUG_ONLY(vfp_dump_live(live);)
+       DB((dbg, LEVEL_1, ">>> %+F %s -> %s\n", n, op1->name, out->name));
+       DEBUG_ONLY(fp_dump_live(live);)
 
-       if (is_vfp_live(arch_register_get_index(op1), live)) {
+       if (is_fp_live(op1->index, live)) {
                /* Operand is still live, a real copy. We need here an fpush that can
                   hold a a register, so use the fpushCopy or recreate constants */
                ir_node *const node = create_Copy(state, n);
@@ -1625,10 +1350,8 @@ static int sim_Copy(x87_state *state, ir_node *n)
                 * instruction, but we would have to rerun all the simulation to get
                 * this correct...
                 */
-               ir_node *const next = sched_next(n);
-               sched_remove(n);
+               sched_replace(n, node);
                exchange(n, node);
-               sched_add_before(next, node);
 
                if (get_irn_n_edges(pred) == 0) {
                        keep_float_node_alive(pred);
@@ -1636,50 +1359,9 @@ static int sim_Copy(x87_state *state, ir_node *n)
 
                DB((dbg, LEVEL_1, "<<< %+F %s -> ?\n", node, op1->name));
        } else {
-               int const op1_idx = x87_on_stack(state, arch_register_get_index(op1));
-               int const out_idx = x87_on_stack(state, arch_register_get_index(out));
-               if (out_idx >= 0 && out_idx != op1_idx) {
-                       /* Matze: out already on stack? how can this happen? */
-                       panic("invalid stack state");
-
-#if 0
-                       /* op1 must be killed and placed where out is */
-                       if (out_idx == 0) {
-                               ia32_x87_attr_t *attr;
-                               /* best case, simple remove and rename */
-                               x87_patch_insn(n, op_ia32_Pop);
-                               attr = get_ia32_x87_attr(n);
-                               attr->x87[0] = op1 = get_st_reg(0);
-
-                               x87_pop(state);
-                               x87_set_st(state, arch_register_get_index(out), n, op1_idx - 1);
-                       } else {
-                               ia32_x87_attr_t *attr;
-                               /* move op1 to tos, store and pop it */
-                               if (op1_idx != 0) {
-                                       x87_create_fxch(state, n, op1_idx);
-                                       op1_idx = 0;
-                               }
-                               x87_patch_insn(n, op_ia32_Pop);
-                               attr = get_ia32_x87_attr(n);
-                               attr->x87[0] = op1 = get_st_reg(out_idx);
-
-                               x87_pop(state);
-                               x87_set_st(state, arch_register_get_index(out), n, out_idx - 1);
-                       }
-                       DB((dbg, LEVEL_1, "<<< %+F %s\n", n, op1->name));
-#endif
-               } else {
-                       /* just a virtual copy */
-                       x87_set_st(state, arch_register_get_index(out), pred, op1_idx);
-                       /* don't remove the node to keep the verifier quiet :),
-                          the emitter won't emit any code for the node */
-#if 0
-                       sched_remove(n);
-                       DB((dbg, LEVEL_1, "<<< KILLED %s\n", get_irn_opname(n)));
-                       exchange(n, pred);
-#endif
-               }
+               /* Just a virtual copy. */
+               int const op1_idx = x87_on_stack(state, op1->index);
+               x87_set_st(state, out->index, n, op1_idx);
        }
        return NO_NODE_ADDED;
 }
@@ -1696,7 +1378,7 @@ static ir_node *get_call_result_proj(ir_node *call)
                ir_node *proj = get_edge_src_irn(edge);
                long pn = get_Proj_proj(proj);
 
-               if (pn == pn_ia32_Call_vf0)
+               if (pn == pn_ia32_Call_st0)
                        return proj;
        }
 
@@ -1707,15 +1389,13 @@ static int sim_Asm(x87_state *const state, ir_node *const n)
 {
        (void)state;
 
-       for (size_t i = get_irn_arity(n); i-- != 0;) {
-               arch_register_req_t const *const req = arch_get_irn_register_req_in(n, i);
-               if (req->cls == &ia32_reg_classes[CLASS_ia32_vfp])
-                       panic("cannot handle %+F with x87 constraints", n);
-       }
+       be_foreach_use(n, &ia32_reg_classes[CLASS_ia32_fp], in_req, value, value_req,
+               panic("cannot handle %+F with x87 constraints", n);
+       );
 
-       for (size_t i = arch_get_irn_n_outs(n); i-- != 0;) {
+       be_foreach_out(n, i) {
                arch_register_req_t const *const req = arch_get_irn_register_req_out(n, i);
-               if (req->cls == &ia32_reg_classes[CLASS_ia32_vfp])
+               if (req->cls == &ia32_reg_classes[CLASS_ia32_fp])
                        panic("cannot handle %+F with x87 constraints", n);
        }
 
@@ -1747,7 +1427,7 @@ static int sim_Call(x87_state *state, ir_node *n)
                if (mode && mode_is_float(mode)) {
                        ir_node               *const resproj = get_call_result_proj(n);
                        arch_register_t const *const reg     = x87_get_irn_register(resproj);
-                       x87_push(state, arch_register_get_index(reg), resproj);
+                       x87_push(state, reg->index, resproj);
                }
        }
        DB((dbg, LEVEL_1, "Stack after: "));
@@ -1813,7 +1493,7 @@ static int sim_Perm(x87_state *state, ir_node *irn)
        /* collect old stack positions */
        for (i = 0; i < n; ++i) {
                const arch_register_t *inreg = x87_get_irn_register(get_irn_n(irn, i));
-               int idx = x87_on_stack(state, arch_register_get_index(inreg));
+               int idx = x87_on_stack(state, inreg->index);
 
                assert(idx >= 0 && "Perm argument not on x87 stack");
 
@@ -1826,7 +1506,7 @@ static int sim_Perm(x87_state *state, ir_node *irn)
                long                  num   = get_Proj_proj(proj);
 
                assert(0 <= num && num < n && "More Proj's than Perm inputs");
-               x87_set_st(state, arch_register_get_index(out), proj, stack_pos[(unsigned)num]);
+               x87_set_st(state, out->index, proj, stack_pos[(unsigned)num]);
        }
        DB((dbg, LEVEL_1, "<<< %+F\n", irn));
 
@@ -1844,22 +1524,22 @@ static void x87_kill_deads(x87_simulator *const sim, ir_node *const block, x87_s
 {
        ir_node *first_insn = sched_first(block);
        ir_node *keep = NULL;
-       unsigned live = vfp_live_args_after(sim, block, 0);
+       unsigned live = fp_live_args_after(sim, block, 0);
        unsigned kill_mask;
-       int i, depth, num_pop;
+       int i, depth;
 
        kill_mask = 0;
        depth = x87_get_depth(state);
        for (i = depth - 1; i >= 0; --i) {
                int reg = x87_get_st_reg(state, i);
 
-               if (! is_vfp_live(reg, live))
+               if (! is_fp_live(reg, live))
                        kill_mask |= (1 << i);
        }
 
        if (kill_mask) {
                DB((dbg, LEVEL_1, "Killing deads:\n"));
-               DEBUG_ONLY(vfp_dump_live(live);)
+               DEBUG_ONLY(fp_dump_live(live);)
                DEBUG_ONLY(x87_dump_stack(state);)
 
                if (kill_mask != 0 && live == 0) {
@@ -1896,18 +1576,9 @@ static void x87_kill_deads(x87_simulator *const sim, ir_node *const block, x87_s
                                x87_create_fxch(state, first_insn, i);
                        }
 
-                       if ((kill_mask & 3) == 3) {
-                               /* we can do a double-pop */
-                               num_pop = 2;
-                       }
-                       else {
-                               /* only a single pop */
-                               num_pop = 1;
-                       }
-
-                       depth -= num_pop;
-                       kill_mask >>= num_pop;
-                       keep = x87_create_fpop(state, first_insn, num_pop);
+                       depth      -= 1;
+                       kill_mask >>= 1;
+                       keep        = x87_create_fpop(state, first_insn, 0);
                }
                keep_alive(keep);
        }
@@ -2025,7 +1696,7 @@ static void x87_init_simulator(x87_simulator *sim, ir_graph *irg)
        obstack_init(&sim->obst);
        sim->blk_states = pmap_create();
        sim->n_idx      = get_irg_last_idx(irg);
-       sim->live       = OALLOCN(&sim->obst, vfp_liveness, sim->n_idx);
+       sim->live       = OALLOCN(&sim->obst, fp_liveness, sim->n_idx);
 
        DB((dbg, LEVEL_1, "--------------------------------\n"
                "x87 Simulator started for %+F\n", irg));
@@ -2035,23 +1706,23 @@ static void x87_init_simulator(x87_simulator *sim, ir_graph *irg)
 
        register_sim(op_ia32_Asm,          sim_Asm);
        register_sim(op_ia32_Call,         sim_Call);
-       register_sim(op_ia32_vfld,         sim_fld);
-       register_sim(op_ia32_vfild,        sim_fild);
-       register_sim(op_ia32_vfld1,        sim_fld1);
-       register_sim(op_ia32_vfldz,        sim_fldz);
-       register_sim(op_ia32_vfadd,        sim_fadd);
-       register_sim(op_ia32_vfsub,        sim_fsub);
-       register_sim(op_ia32_vfmul,        sim_fmul);
-       register_sim(op_ia32_vfdiv,        sim_fdiv);
-       register_sim(op_ia32_vfprem,       sim_fprem);
-       register_sim(op_ia32_vfabs,        sim_fabs);
-       register_sim(op_ia32_vfchs,        sim_fchs);
-       register_sim(op_ia32_vfist,        sim_fist);
-       register_sim(op_ia32_vfisttp,      sim_fisttp);
-       register_sim(op_ia32_vfst,         sim_fst);
-       register_sim(op_ia32_vFtstFnstsw,  sim_FtstFnstsw);
-       register_sim(op_ia32_vFucomFnstsw, sim_Fucom);
-       register_sim(op_ia32_vFucomi,      sim_Fucom);
+       register_sim(op_ia32_fld,          sim_load);
+       register_sim(op_ia32_fild,         sim_load);
+       register_sim(op_ia32_fld1,         sim_load);
+       register_sim(op_ia32_fldz,         sim_load);
+       register_sim(op_ia32_fadd,         sim_binop);
+       register_sim(op_ia32_fsub,         sim_binop);
+       register_sim(op_ia32_fmul,         sim_binop);
+       register_sim(op_ia32_fdiv,         sim_binop);
+       register_sim(op_ia32_fprem,        sim_fprem);
+       register_sim(op_ia32_fabs,         sim_unop);
+       register_sim(op_ia32_fchs,         sim_unop);
+       register_sim(op_ia32_fist,         sim_store);
+       register_sim(op_ia32_fisttp,       sim_fisttp);
+       register_sim(op_ia32_fst,          sim_store);
+       register_sim(op_ia32_FtstFnstsw,   sim_FtstFnstsw);
+       register_sim(op_ia32_FucomFnstsw,  sim_Fucom);
+       register_sim(op_ia32_Fucomi,       sim_Fucom);
        register_sim(op_be_Copy,           sim_Copy);
        register_sim(op_be_Return,         sim_Return);
        register_sim(op_be_Perm,           sim_Perm);