2 * This file implements the x87 support and virtual to stack
3 * register translation for the ia32 backend.
5 * @author: Michael Beck
14 #include "iredges_t.h"
23 #include "..\belive_t.h"
24 #include "..\besched.h"
25 #include "..\benode_t.h"
26 #include "ia32_new_nodes.h"
27 #include "gen_ia32_new_nodes.h"
28 #include "gen_ia32_regalloc_if.h"
33 /* first and second binop index */
40 /* the store val index */
41 #define STORE_VAL_IDX 2
43 #define MASK_TOS(x) ((x) & (N_x87_REGS - 1))
45 /** the debug handle */
46 static firm_dbg_module_t *dbg = NULL;
49 * An exchange template.
50 * Note that our virtual functions have the same inputs
51 * and attributes as the real ones, so we can simple exchange
53 * Further, x87 supports inverse instructions, so we can handle them.
55 typedef struct _exchange_tmpl {
56 ir_op *normal_op; /**< the normal one */
57 ir_op *reverse_op; /**< the reverse one if exists */
58 ir_op *normal_pop_op; /**< the normal one with tos pop */
59 ir_op *reverse_pop_op; /**< the reverse one with tos pop */
63 * An entry on the simulated x87 stack.
65 typedef struct _st_entry {
66 int reg_idx; /**< the virtual register index of this stack value */
67 ir_node *node; /**< the node that produced this value */
73 typedef struct _x87_state {
74 st_entry st[N_x87_REGS]; /**< the register stack */
75 int depth; /**< the current stack depth */
76 int tos; /**< position of the tos */
79 /** An empty state, used for blocks without fp instructions. */
80 static const x87_state _empty = { {0, NULL}, 0, 0 };
81 static x87_state *empty = (x87_state *)&_empty;
83 /** The type of an instruction simulator */
84 typedef void (*sim_func)(x87_state *state, ir_node *n, const arch_env_t *env);
87 * A block state: Every block has a x87 state at the beginning and at the end.
89 typedef struct _blk_state {
90 x87_state *begin; /**< state at the begin or NULL if not assigned */
91 x87_state *end; /**< state at the end or NULL if not assigned */
94 #define PTR_TO_BLKSTATE(p) ((blk_state *)(p))
99 typedef struct _x87_simulator {
100 struct obstack obst; /**< an obstack for fast allocating */
101 pmap *blk_states; /**< map blocks to states */
102 const arch_env_t *env; /**< architecture environment */
106 * Check if the state is empty.
108 static int x87_state_is_empty(const x87_state *state) {
109 return state->depth == 0;
113 * Return the virtual register index at st(pos).
115 static int x87_get_st_reg(const x87_state *state, int pos) {
116 assert(pos < state->depth);
117 return state->st[MASK_TOS(state->tos + pos)].reg_idx;
121 * Return the node at st(pos).
123 static ir_node *x87_get_st_node(const x87_state *state, int pos) {
124 assert(pos < state->depth);
125 return state->st[MASK_TOS(state->tos + pos)].node;
130 * Dump the stack for debugging.
132 static void x87_dump_stack(const x87_state *state) {
135 for (i = state->depth - 1; i >= 0; --i) {
136 DB((dbg, LEVEL_2, "vf%d ", x87_get_st_reg(state, i)));
138 DB((dbg, LEVEL_2, "<-- TOS\n"));
140 #endif /* DEBUG_libfirm */
143 * Set a virtual register to st(pos)
145 static void x87_set_st(x87_state *state, int reg_idx, ir_node *node, int pos) {
146 assert(0 < state->depth);
147 state->st[MASK_TOS(state->tos + pos)].reg_idx = reg_idx;
148 state->st[MASK_TOS(state->tos + pos)].node = node;
150 DB((dbg, LEVEL_2, "After SET_REG:\n ")); DEBUG_ONLY(x87_dump_stack(state));
154 * Set the tos virtual register
156 static void x87_set_tos(x87_state *state, int reg_idx, ir_node *node) {
157 x87_set_st(state, reg_idx, node, 0);
161 * Flush the x87 stack.
163 static void x87_flush(x87_state *state) {
169 * Swap st(0) with st(pos).
171 static void x87_fxch(x87_state *state, int pos) {
173 assert(pos < state->depth);
175 entry = state->st[MASK_TOS(state->tos + pos)];
176 state->st[MASK_TOS(state->tos + pos)] = state->st[MASK_TOS(state->tos)];
177 state->st[MASK_TOS(state->tos)] = entry;
179 DB((dbg, LEVEL_2, "After FXCH:\n ")); DEBUG_ONLY(x87_dump_stack(state));
183 * Convert a virtual register to the stack index.
184 * Return -1 if the virtual register was not found.
186 static int x87_on_stack(const x87_state *state, int reg_idx) {
187 int i, tos = state->tos;
189 for (i = 0; i < state->depth; ++i)
190 if (state->st[MASK_TOS(tos + i)].reg_idx == reg_idx)
196 * Push a virtual Register onto the stack.
198 static void x87_push(x87_state *state, int reg_idx, ir_node *node) {
199 // assert(x87_on_stack(state, reg_idx) == -1 && "double push");
200 assert(state->depth < N_x87_REGS && "stack overrun");
203 state->tos = MASK_TOS(state->tos - 1);
204 state->st[state->tos].reg_idx = reg_idx;
205 state->st[state->tos].node = node;
207 DB((dbg, LEVEL_2, "After PUSH:\n ")); DEBUG_ONLY(x87_dump_stack(state));
211 * Pop a virtual Register from the stack.
213 static void x87_pop(x87_state *state) {
214 assert(state->depth > 0 && "stack underrun");
217 state->tos = MASK_TOS(state->tos + 1);
219 DB((dbg, LEVEL_2, "After POP:\n ")); DEBUG_ONLY(x87_dump_stack(state));
223 * Returns the block state of a block.
225 static blk_state *x87_get_bl_state(x87_simulator *sim, ir_node *block) {
226 pmap_entry *entry = pmap_find(sim->blk_states, block);
229 blk_state *bl_state = obstack_alloc(&sim->obst, sizeof(*bl_state));
230 bl_state->begin = NULL;
231 bl_state->end = NULL;
233 pmap_insert(sim->blk_states, block, bl_state);
237 return entry ? PTR_TO_BLKSTATE(entry->value) : NULL;
241 * Create a new x87 state.
243 static x87_state *x87_alloc_state(x87_simulator *sim) {
244 x87_state *res = obstack_alloc(&sim->obst, sizeof(*res));
249 * Create a new empty x87 state.
251 static x87_state *x87_alloc_empty_state(x87_simulator *sim) {
252 x87_state *res = x87_alloc_state(sim);
261 static x87_state *x87_clone_state(x87_simulator *sim, const x87_state *src) {
262 x87_state *res = x87_alloc_state(sim);
264 memcpy(res, src, sizeof(*res));
269 * Patch a virtual instruction into a x87 one and return
272 static ir_node *x87_patch_insn(ir_node *n, ir_op *op) {
273 ir_mode *mode = get_irn_mode(n);
278 if (mode == mode_T) {
279 /* patch all Proj's */
280 const ir_edge_t *edge;
282 foreach_out_edge(n, edge) {
283 ir_node *proj = get_edge_src_irn(edge);
285 mode = get_irn_mode(proj);
286 if (mode_is_float(mode)) {
288 set_irn_mode(proj, mode_E);
293 else if (mode_is_float(mode))
294 set_irn_mode(n, mode_E);
298 /* -------------- x87 perm --------------- */
301 * Creates a fxch for shuffle.
303 * @param state the x87 state
304 * @param pos parameter for fxch
305 * @param dst_block the block of the user
307 * Creates a new fxch node and reroute the user of the old node
310 * @return the fxch node
312 static ir_node *x87_fxch_shuffle(x87_state *state, int pos, ir_node *block, ir_node *dst_block)
314 const ir_edge_t *edge;
315 ir_node *n = x87_get_st_node(state, pos);
316 ir_node *user = NULL;
321 if (block == get_nodes_block(n)) {
322 /* this is a node from out block: change it's user */
323 foreach_out_edge(n, edge) {
324 ir_node *succ = get_edge_src_irn(edge);
326 if (is_Phi(succ) && get_nodes_block(succ) == dst_block) {
328 node_idx = get_edge_src_pos(edge);
335 fxch = new_rd_ia32_fxch(NULL, get_irn_irg(block), block, n, get_irn_mode(n));
336 attr = get_ia32_attr(fxch);
337 attr->x87[0] = &ia32_st_regs[pos];
338 attr->x87[2] = &ia32_st_regs[0];
340 DB((dbg, LEVEL_2, "%+F replaced input %d of %+F\n", fxch, node_idx, user));
343 set_irn_n(user, node_idx, fxch);
345 /* This is a node from another block. Changing it's user might be wrong,
346 sp just keep it alive.
347 The "right" solution would require a new Phi, but we don't care here.
352 x87_fxch(state, pos);
357 * Calculate the necessary permutations to reach dst_state.
359 * These permutations are done with fxch instructions and placed
360 * at the end of the block.
362 * Note that critical edges are removed here, so we need only
363 * a shuffle if the current block has only one successor.
365 * @param sim the simulator handle
366 * @param block the current block
367 * @param state the current x87 stack state, might be modified
368 * @param dst_block the destination block
369 * @param dst_state destination state
373 static x87_state *x87_shuffle(x87_simulator *sim, ir_node *block, x87_state *state, ir_node *dst_block, const x87_state *dst_state) {
374 int i, n_rings, k, ri;
375 unsigned rings[4], all_mask;
378 ir_node *before, *after;
380 assert(state->depth == dst_state->depth);
382 /* Some mathematics here:
383 If we have a ring of lenght n that includes the tos,
384 we need n-1 exchange operations.
385 We can always add the tos and restore it, so we need
386 n+1 exchange operations for a ring not containing the tos.
387 So, the maximum of needed operations is for a ring of 7
388 not including the tos == 8.
389 This is so same number of ops we would need for store,
390 so exchange is cheaper (we save the loads).
391 On the other hand, we might need an additional exchange
392 in the next block to bring one operand on top, so the
393 number of ops in the first case is identical.
394 Further, no more than 4 rings can exists.
396 all_mask = (1 << (state->depth)) - 1;
398 for (n_rings = 0; all_mask; ++n_rings) {
399 int src_idx, dst_idx;
401 /* find the first free slot */
402 for (i = 0; i < state->depth; ++i) {
403 if (all_mask & (1 << i)) {
404 all_mask &= ~(1 << i);
406 /* check if there are differences here */
407 if (x87_get_st_reg(state, i) != x87_get_st_reg(dst_state, i))
413 /* no more rings found */
418 rings[n_rings] = (1 << i);
419 ring_idx[n_rings][k++] = i;
420 for (src_idx = i; ; src_idx = dst_idx) {
421 dst_idx = x87_on_stack(dst_state, x87_get_st_reg(state, src_idx));
423 if ((all_mask & (1 << dst_idx)) == 0)
426 ring_idx[n_rings][k++] = dst_idx;
427 rings[n_rings] |= (1 << dst_idx);
428 all_mask &= ~(1 << dst_idx);
430 ring_idx[n_rings][k] = -1;
434 /* no permutation needed */
438 /* Hmm: permutation needed */
439 DB((dbg, LEVEL_2, "\n%+F needs permutation: from\n", block));
440 DEBUG_ONLY(x87_dump_stack(state));
441 DB((dbg, LEVEL_2, " to\n"));
442 DEBUG_ONLY(x87_dump_stack(dst_state));
446 DB((dbg, LEVEL_2, "Need %d rings\n", n_rings));
447 for (ri = 0; ri < n_rings; ++ri) {
448 DB((dbg, LEVEL_2, " Ring %d:\n ", ri));
449 for (k = 0; ring_idx[ri][k] != -1; ++k)
450 DB((dbg, LEVEL_2, " st%d ->", ring_idx[ri][k]));
451 DB((dbg, LEVEL_2, "\n"));
458 * Find the place node must be insert.
459 * We have only one successor block, so the last instruction should
462 before = sched_last(block);
463 assert(is_cfop(before));
465 /* now do the permutations */
466 for (ri = 0; ri < n_rings; ++ri) {
467 if ((rings[ri] & 1) == 0) {
468 /* this ring does not include the tos */
469 fxch = x87_fxch_shuffle(state, ring_idx[ri][0], block, dst_block);
471 sched_add_after(after, fxch);
473 sched_add_before(before, fxch);
476 for (k = 1; ring_idx[ri][k] != -1; ++k) {
477 fxch = x87_fxch_shuffle(state, ring_idx[ri][k], block, dst_block);
479 sched_add_after(after, fxch);
481 sched_add_before(before, fxch);
484 if ((rings[ri] & 1) == 0) {
485 /* this ring does not include the tos */
486 fxch = x87_fxch_shuffle(state, ring_idx[ri][0], block, dst_block);
487 sched_add_after(after, fxch);
494 * Create a fxch before node n.
496 static void x87_create_fxch(x87_state *state, ir_node *n, int pos, int op_idx) {
497 ir_node *fxch, *pred;
500 x87_fxch(state, pos);
502 pred = get_irn_n(n, op_idx);
503 fxch = new_rd_ia32_fxch(NULL, get_irn_irg(n), get_nodes_block(n), pred, get_irn_mode(pred));
504 attr = get_ia32_attr(fxch);
505 attr->x87[0] = &ia32_st_regs[pos];
506 attr->x87[2] = &ia32_st_regs[0];
507 set_irn_n(n, op_idx, fxch);
509 sched_add_before(n, fxch);
510 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(fxch), attr->x87[0]->name, attr->x87[2]->name));
514 * Create a fpush before node n.
516 static void x87_create_fpush(const arch_env_t *env, x87_state *state, ir_node *n, int pos, int op_idx) {
517 ir_node *fpush, *pred;
519 const arch_register_t *out = arch_get_irn_register(env, n);
521 x87_push(state, arch_register_get_index(out), n);
523 pred = get_irn_n(n, op_idx);
524 fpush = new_rd_ia32_fpush(NULL, get_irn_irg(n), get_nodes_block(n), pred, get_irn_mode(pred));
525 attr = get_ia32_attr(fpush);
526 attr->x87[0] = &ia32_st_regs[pos];
527 attr->x87[2] = &ia32_st_regs[0];
528 set_irn_n(n, op_idx, fpush);
530 sched_add_before(n, fpush);
531 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(fpush), attr->x87[0]->name, attr->x87[2]->name));
534 /* --------------------------------- liveness ------------------------------------------ */
537 * The liveness transfer function.
538 * Updates a live set over a single step from a given node to its predecessor.
539 * Everything defined at the node is removed from the set, the uses of the node get inserted.
540 * @param arch_env The architecture environment.
541 * @param irn The node at which liveness should be computed.
542 * @param live The bitset of registers live before @p irn. This set gets modified by updating it to
543 * the registers live after irn.
546 static unsigned vfp_liveness_transfer(const arch_env_t *arch_env, ir_node *irn, unsigned live)
549 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
551 if(arch_irn_consider_in_reg_alloc(arch_env, cls, irn)) {
552 const arch_register_t *reg = arch_get_irn_register(arch_env, irn);
553 live &= ~(1 << reg->index);
556 for(i = 0, n = get_irn_arity(irn); i < n; ++i) {
557 ir_node *op = get_irn_n(irn, i);
559 if (mode_is_float(get_irn_mode(op)) && arch_irn_consider_in_reg_alloc(arch_env, cls, op)) {
560 const arch_register_t *reg = arch_get_irn_register(arch_env, op);
561 live |= 1 << reg->index;
569 * Put all live virtual registers at the end of a block into a bitset.
570 * @param arch_env The architecture environment.
571 * @param bl The block.
572 * @return The live bitset.
574 static unsigned vfp_liveness_end_of_block(const arch_env_t *arch_env, const ir_node *bl)
578 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
580 live_foreach(bl, li) {
581 ir_node *irn = (ir_node *) li->irn;
582 if (live_is_end(li) && arch_irn_consider_in_reg_alloc(arch_env, cls, irn)) {
583 const arch_register_t *reg = arch_get_irn_register(arch_env, irn);
584 live |= 1 << reg->index;
592 * Compute a bitset of registers which are live at another node.
593 * @param arch_env The architecture environment.
594 * @param pos The node.
595 * @return The live bitset.
597 static unsigned vfp_liveness_nodes_live_at(const arch_env_t *arch_env, const ir_node *pos)
599 const ir_node *bl = is_Block(pos) ? pos : get_nodes_block(pos);
600 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
604 live = vfp_liveness_end_of_block(arch_env, bl);
606 sched_foreach_reverse(bl, irn) {
608 * If we encounter the node we want to insert the Perm after,
609 * exit immediately, so that this node is still live
614 live = vfp_liveness_transfer(arch_env, irn, live);
621 * Returns true if a register is live in a set.
623 static unsigned is_vfp_live(const arch_register_t *reg, unsigned live) {
624 return live & (1 << reg->index);
629 * dump liveness info.
631 static vfp_dump_live(unsigned live) {
634 DB((dbg, LEVEL_2, "Live registers here: \n"));
635 for (i = 0; i < 8; ++i) {
636 if (live & (1 << i)) {
637 DB((dbg, LEVEL_2, " vf%d", i));
640 DB((dbg, LEVEL_2, "\n"));
642 #endif /* DEBUG_libfirm */
644 /* --------------------------------- simulators ---------------------------------------- */
646 #define XCHG(a, b) do { int t =(a); (a) = (b); (b) = t; } while (0)
649 * Simulate a virtual binop
651 static void sim_binop(x87_state *state, ir_node *n, const arch_env_t *env, const exchange_tmpl *tmpl) {
652 int op2_idx, op1_idx = -1;
653 int out_idx, do_pop =0;
656 const arch_register_t *op1 = arch_get_irn_register(env, get_irn_n(n, BINOP_IDX_1));
657 const arch_register_t *op2 = arch_get_irn_register(env, get_irn_n(n, BINOP_IDX_2));
658 const arch_register_t *out = arch_get_irn_register(env, n);
659 unsigned live = vfp_liveness_nodes_live_at(env, n);
661 DB((dbg, LEVEL_1, ">>> %s %s, %s -> %s\n", get_irn_opname(n),
662 arch_register_get_name(op2), arch_register_get_name(op1),
663 arch_register_get_name(out)));
664 DEBUG_ONLY(vfp_dump_live(live));
666 op2_idx = x87_on_stack(state, arch_register_get_index(op2));
668 if (op1->reg_class == &ia32_reg_classes[CLASS_ia32_vfp]) {
669 /* first operand is a vfp register */
670 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
672 if (is_vfp_live(op2, live)) {
673 /* second operand is live */
675 if (is_vfp_live(op1, live)) {
676 /* both operands are live: push the first one */
677 x87_create_fpush(env, state, n, op2_idx, BINOP_IDX_2);
678 out_idx = op2_idx = 0;
680 dst = tmpl->normal_op;
684 /* second live, first operand is dead here, bring it to tos */
686 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
690 op1_idx = out_idx = 0;
691 dst = tmpl->normal_op;
696 /* second operand is dead */
697 if (is_vfp_live(op1, live)) {
698 /* first operand is live: bring second to tos */
700 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
704 op2_idx = out_idx = 0;
705 dst = tmpl->normal_op;
709 /* both operands are dead here, pop them from the stack */
712 XCHG(op2_idx, op1_idx);
713 dst = tmpl->reverse_pop_op;
716 else if (op1_idx == 0) {
718 dst = tmpl->normal_pop_op;
722 /* bring the first on top */
723 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
726 dst = tmpl->normal_pop_op;
733 /* first operand is an address mode */
734 if (is_vfp_live(op2, live)) {
735 /* second operand is live: push it here */
736 x87_create_fpush(env, state, n, op2_idx, BINOP_IDX_2);
739 /* second operand is dead: bring it to tos */
741 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
743 op2_idx = out_idx = 0;
744 dst = tmpl->normal_op;
748 x87_set_st(state, arch_register_get_index(out), x87_patch_insn(n, dst), out_idx);
752 /* patch the operation */
753 attr = get_ia32_attr(n);
755 attr->x87[0] = op1 = &ia32_st_regs[op1_idx];
756 attr->x87[1] = op2 = &ia32_st_regs[op2_idx];
757 attr->x87[2] = out = &ia32_st_regs[out_idx];
759 DB((dbg, LEVEL_1, "<<< %s %s, %s -> %s\n", get_irn_opname(n),
760 arch_register_get_name(op2), arch_register_get_name(op1),
761 arch_register_get_name(out)));
765 * Simulate a virtual Unop
767 static void sim_unop(x87_state *state, ir_node *n, const arch_env_t *env, ir_op *op) {
768 int op1_idx, out_idx;
769 const arch_register_t *op1 = arch_get_irn_register(env, get_irn_n(n, UNOP_IDX));
770 const arch_register_t *out = arch_get_irn_register(env, n);
772 unsigned live = vfp_liveness_nodes_live_at(env, n);
774 DB((dbg, LEVEL_1, ">>> %s -> %s\n", get_irn_opname(n), out->name));
775 DEBUG_ONLY(vfp_dump_live(live));
777 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
779 if (is_vfp_live(op1, live)) {
780 /* push the operand here */
781 x87_create_fpush(env, state, n, op1_idx, UNOP_IDX);
784 /* operand is dead, bring it to tos */
786 x87_create_fxch(state, n, op1_idx, UNOP_IDX);
789 x87_set_tos(state, arch_register_get_index(out), x87_patch_insn(n, op));
790 op1_idx = out_idx = 0;
791 attr = get_ia32_attr(n);
792 attr->x87[0] = op1 = &ia32_st_regs[0];
793 attr->x87[2] = out = &ia32_st_regs[0];
794 DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), out->name));
798 * Simulate a virtual Load instructions
800 static void sim_load(x87_state *state, ir_node *n, const arch_env_t *env, ir_op *op) {
801 const arch_register_t *out = arch_get_irn_register(env, n);
804 DB((dbg, LEVEL_1, ">>> %s -> %s\n", get_irn_opname(n), arch_register_get_name(out)));
805 x87_push(state, arch_register_get_index(out), x87_patch_insn(n, op));
806 attr = get_ia32_attr(n);
807 attr->x87[2] = out = &ia32_st_regs[0];
808 DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), arch_register_get_name(out)));
812 * Simulate a virtual Store
814 static void sim_fst(x87_state *state, ir_node *n, const arch_env_t *env) {
816 const arch_register_t *op2 = arch_get_irn_register(env, get_irn_n(n, STORE_VAL_IDX));
818 unsigned live = vfp_liveness_nodes_live_at(env, n);
820 op2_idx = x87_on_stack(state, arch_register_get_index(op2));
822 DB((dbg, LEVEL_1, ">>> %s %s ->\n", get_irn_opname(n), arch_register_get_name(op2)));
824 /* we can only store the tos to memory */
826 x87_create_fxch(state, n, op2_idx, STORE_VAL_IDX);
828 if (is_vfp_live(op2, live))
829 x87_patch_insn(n, op_ia32_fst);
832 x87_patch_insn(n, op_ia32_fstp);
835 attr = get_ia32_attr(n);
836 attr->x87[1] = op2 = &ia32_st_regs[0];
837 DB((dbg, LEVEL_1, "<<< %s %s ->\n", get_irn_opname(n), arch_register_get_name(op2)));
841 * Simulate a virtual Phi.
842 * Just for cosmetic reasons change the mode of Phi nodes to mode_E.
844 static void sim_Phi(x87_state *state, ir_node *n, const arch_env_t *env) {
845 ir_mode *mode = get_irn_mode(n);
847 if (mode_is_float(mode))
848 set_irn_mode(n, mode_E);
852 #define _GEN_BINOP(op, rev) \
853 static void sim_##op(x87_state *state, ir_node *n, const arch_env_t *env) { \
854 exchange_tmpl tmpl = { op_ia32_##op, op_ia32_##rev, op_ia32_##op##p, op_ia32_##rev##p }; \
855 sim_binop(state, n, env, &tmpl); \
858 #define GEN_BINOP(op) _GEN_BINOP(op, op)
859 #define GEN_BINOPR(op) _GEN_BINOP(op, op##r)
861 #define GEN_LOAD2(op, nop) \
862 static void sim_##op(x87_state *state, ir_node *n, const arch_env_t *env) { \
863 sim_load(state, n, env, op_ia32_##nop); \
866 #define GEN_LOAD(op) GEN_LOAD2(op, op)
868 #define GEN_UNOP(op) \
869 static void sim_##op(x87_state *state, ir_node *n, const arch_env_t *env) { \
870 sim_unop(state, n, env, op_ia32_##op); \
882 GEN_LOAD2(fConst, fldConst)
891 * Simulate a virtual Copy
893 static void sim_Copy(x87_state *state, ir_node *n, const arch_env_t *env) {
894 ir_mode *mode = get_irn_mode(n);
896 if (mode_is_float(mode)) {
897 const arch_register_t *op1 = arch_get_irn_register(env, get_irn_n(n, 0));
898 const arch_register_t *out = arch_get_irn_register(env, n);
899 ir_node *node, *next;
902 unsigned live = vfp_liveness_nodes_live_at(env, n);
904 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
906 DB((dbg, LEVEL_1, ">>> %s %s -> %s\n", get_irn_opname(n),
907 arch_register_get_name(op1), arch_register_get_name(out)));
908 DEBUG_ONLY(vfp_dump_live(live));
910 if (is_vfp_live(op1, live)) {
911 /* operand is still live,a real copy */
912 node = new_rd_ia32_fpush(get_irn_dbg_info(n), get_irn_irg(n), get_nodes_block(n), get_irn_n(n, 0), mode);
913 arch_set_irn_register(env, node, out);
915 x87_push(state, arch_register_get_index(out), node);
917 attr = get_ia32_attr(node);
918 attr->x87[0] = op1 = &ia32_st_regs[op1_idx];
919 attr->x87[2] = out = &ia32_st_regs[0];
921 next = sched_next(n);
924 sched_add_before(next, node);
925 DB((dbg, LEVEL_1, ">>> %s %s -> %s\n", get_irn_opname(node), op1->name, out->name));
928 /* just a virtual copy */
929 x87_set_st(state, arch_register_get_index(out), get_unop_op(n), op1_idx);
931 DB((dbg, LEVEL_1, ">>> KILLED %s\n", get_irn_opname(n)));
932 exchange(n, get_unop_op(n));
938 * Run a simulation and fix all virtual instructions for a block.
940 * @return non-zero if simulation is complete,
941 * zero if the simulation must be rerun
943 static int x87_simulate_block(x87_simulator *sim, ir_node *block) {
945 blk_state *bl_state = x87_get_bl_state(sim, block);
946 x87_state *state = bl_state->begin;
947 const ir_edge_t *edge;
948 ir_node *start_block;
950 /* if we have no assigned start state, we must wait ... */
954 assert(bl_state->end == NULL);
956 DB((dbg, LEVEL_1, "Simulate %+F\n", block));
958 /* beware, n might changed */
959 for (n = sched_first(block); !sched_is_end(n); n = next) {
960 ir_op *op = get_irn_op(n);
962 next = sched_next(n);
963 if (op->ops.generic) {
964 sim_func func = (sim_func)op->ops.generic;
966 /* have work to do */
967 if (state == bl_state->begin) {
968 /* create a new state, will be changed */
969 state = x87_clone_state(sim, state);
973 (*func)(state, n, sim->env);
977 start_block = get_irg_start_block(get_irn_irg(block));
979 /* check if the state must be shuffled */
980 foreach_block_succ(block, edge) {
981 ir_node *succ = get_edge_src_irn(edge);
982 blk_state *succ_state = x87_get_bl_state(sim, succ);
984 if (succ_state->begin && succ != start_block) {
985 /* There is already a begin state for this block, bad.
986 Do the necessary permutations.
987 Note that critical edges are removed, so this is always possible. */
988 x87_shuffle(sim, block, state, succ, succ_state->begin);
990 /* Note further, that there can be only one such situation,
991 so we can break here. */
995 bl_state->end = state;
997 /* now propagate the state to all successor blocks */
998 foreach_block_succ(block, edge) {
999 ir_node *succ = get_edge_src_irn(edge);
1000 blk_state *succ_state = x87_get_bl_state(sim, succ);
1002 if (! succ_state->begin)
1003 succ_state->begin = state;
1010 * Create a new x87 simulator.
1012 static void x87_init_simulator(x87_simulator *sim, const arch_env_t *env) {
1013 obstack_init(&sim->obst);
1014 sim->blk_states = pmap_create();
1017 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.x87");
1018 firm_dbg_set_mask(dbg, SET_LEVEL_2);
1020 DB((dbg, LEVEL_1, "x87 Simulator started\n"));
1022 /* set the generic function pointer of instruction we must simulate */
1023 clear_irp_opcodes_generic_func();
1025 #define ASSOC(op) (op_ ## op)->ops.generic = (op_func)(sim_##op)
1026 #define ASSOC_IA32(op) (op_ia32_v ## op)->ops.generic = (op_func)(sim_##op)
1027 #define ASSOC_BE(op) (op_be_ ## op)->ops.generic = (op_func)(sim_##op)
1051 * Destroy a x87 simulator.
1053 static void x87_destroy_simulator(x87_simulator *sim) {
1054 pmap_destroy(sim->blk_states);
1055 obstack_free(&sim->obst, NULL);
1056 DB((dbg, LEVEL_1, "x87 Simulator stopped\n"));
1060 * Run a simulation and fix all virtual instructions for a graph.
1062 * Needs a block-schedule.
1064 void x87_simulate_graph(const arch_env_t *env, ir_graph *irg, ir_node **blk_list) {
1065 ir_node *block, *start_block;
1067 blk_state *bl_state;
1071 /* we need liveness info for the current graph */
1074 /* create the simulator */
1075 x87_init_simulator(&sim, env);
1077 start_block = get_irg_start_block(irg);
1078 bl_state = x87_get_bl_state(&sim, start_block);
1080 /* start with the empty state */
1081 bl_state->begin = empty;
1083 worklist = new_pdeq();
1085 /* create the worklist for the schedule. */
1086 for (i = 0; i < ARR_LEN(blk_list); ++i)
1087 pdeq_putr(worklist, blk_list[i]);
1091 block = pdeq_getl(worklist);
1092 if (! x87_simulate_block(&sim, block)) {
1093 pdeq_putr(worklist, block);
1096 } while (! pdeq_empty(worklist));
1099 x87_destroy_simulator(&sim);