2 * This file implements the x87 support and virtual to stack
3 * register translation for the ia32 backend.
5 * @author: Michael Beck
12 #endif /* HAVE_CONFIG_H */
19 #include "iredges_t.h"
28 #include "../belive_t.h"
29 #include "../besched.h"
30 #include "../benode_t.h"
31 #include "ia32_new_nodes.h"
32 #include "gen_ia32_new_nodes.h"
33 #include "gen_ia32_regalloc_if.h"
37 #define DEBUG_ONLY(x) x
44 /* first and second binop index */
51 /* the store val index */
52 #define STORE_VAL_IDX 2
54 #define MASK_TOS(x) ((x) & (N_x87_REGS - 1))
56 /** the debug handle */
57 static firm_dbg_module_t *dbg = NULL;
60 * An exchange template.
61 * Note that our virtual functions have the same inputs
62 * and attributes as the real ones, so we can simple exchange
64 * Further, x87 supports inverse instructions, so we can handle them.
66 typedef struct _exchange_tmpl {
67 ir_op *normal_op; /**< the normal one */
68 ir_op *reverse_op; /**< the reverse one if exists */
69 ir_op *normal_pop_op; /**< the normal one with tos pop */
70 ir_op *reverse_pop_op; /**< the reverse one with tos pop */
74 * An entry on the simulated x87 stack.
76 typedef struct _st_entry {
77 int reg_idx; /**< the virtual register index of this stack value */
78 ir_node *node; /**< the node that produced this value */
84 typedef struct _x87_state {
85 st_entry st[N_x87_REGS]; /**< the register stack */
86 int depth; /**< the current stack depth */
87 int tos; /**< position of the tos */
90 /** An empty state, used for blocks without fp instructions. */
91 static const x87_state _empty = { {0, NULL}, 0, 0 };
92 static x87_state *empty = (x87_state *)&_empty;
94 /** The type of an instruction simulator */
95 typedef void (*sim_func)(x87_state *state, ir_node *n, const arch_env_t *env);
98 * A block state: Every block has a x87 state at the beginning and at the end.
100 typedef struct _blk_state {
101 x87_state *begin; /**< state at the begin or NULL if not assigned */
102 x87_state *end; /**< state at the end or NULL if not assigned */
105 #define PTR_TO_BLKSTATE(p) ((blk_state *)(p))
110 typedef struct _x87_simulator {
111 struct obstack obst; /**< an obstack for fast allocating */
112 pmap *blk_states; /**< map blocks to states */
113 const arch_env_t *env; /**< architecture environment */
117 * Check if the state is empty.
119 static int x87_state_is_empty(const x87_state *state) {
120 return state->depth == 0;
124 * Return the virtual register index at st(pos).
126 static int x87_get_st_reg(const x87_state *state, int pos) {
127 assert(pos < state->depth);
128 return state->st[MASK_TOS(state->tos + pos)].reg_idx;
132 * Return the node at st(pos).
134 static ir_node *x87_get_st_node(const x87_state *state, int pos) {
135 assert(pos < state->depth);
136 return state->st[MASK_TOS(state->tos + pos)].node;
141 * Dump the stack for debugging.
143 static void x87_dump_stack(const x87_state *state) {
146 for (i = state->depth - 1; i >= 0; --i) {
147 DB((dbg, LEVEL_2, "vf%d ", x87_get_st_reg(state, i)));
149 DB((dbg, LEVEL_2, "<-- TOS\n"));
151 #endif /* DEBUG_libfirm */
154 * Set a virtual register to st(pos)
156 static void x87_set_st(x87_state *state, int reg_idx, ir_node *node, int pos) {
157 assert(0 < state->depth);
158 state->st[MASK_TOS(state->tos + pos)].reg_idx = reg_idx;
159 state->st[MASK_TOS(state->tos + pos)].node = node;
161 DB((dbg, LEVEL_2, "After SET_REG:\n ")); DEBUG_ONLY(x87_dump_stack(state));
165 * Set the tos virtual register
167 static void x87_set_tos(x87_state *state, int reg_idx, ir_node *node) {
168 x87_set_st(state, reg_idx, node, 0);
172 * Flush the x87 stack.
174 static void x87_flush(x87_state *state) {
180 * Swap st(0) with st(pos).
182 static void x87_fxch(x87_state *state, int pos) {
184 assert(pos < state->depth);
186 entry = state->st[MASK_TOS(state->tos + pos)];
187 state->st[MASK_TOS(state->tos + pos)] = state->st[MASK_TOS(state->tos)];
188 state->st[MASK_TOS(state->tos)] = entry;
190 DB((dbg, LEVEL_2, "After FXCH:\n ")); DEBUG_ONLY(x87_dump_stack(state));
194 * Convert a virtual register to the stack index.
195 * Return -1 if the virtual register was not found.
197 static int x87_on_stack(const x87_state *state, int reg_idx) {
198 int i, tos = state->tos;
200 for (i = 0; i < state->depth; ++i)
201 if (state->st[MASK_TOS(tos + i)].reg_idx == reg_idx)
207 * Push a virtual Register onto the stack.
209 static void x87_push(x87_state *state, int reg_idx, ir_node *node) {
210 assert(x87_on_stack(state, reg_idx) == -1 && "double push");
211 assert(state->depth < N_x87_REGS && "stack overrun");
214 state->tos = MASK_TOS(state->tos - 1);
215 state->st[state->tos].reg_idx = reg_idx;
216 state->st[state->tos].node = node;
218 DB((dbg, LEVEL_2, "After PUSH:\n ")); DEBUG_ONLY(x87_dump_stack(state));
222 * Pop a virtual Register from the stack.
224 static void x87_pop(x87_state *state) {
225 assert(state->depth > 0 && "stack underrun");
228 state->tos = MASK_TOS(state->tos + 1);
230 DB((dbg, LEVEL_2, "After POP:\n ")); DEBUG_ONLY(x87_dump_stack(state));
234 * Returns the block state of a block.
236 static blk_state *x87_get_bl_state(x87_simulator *sim, ir_node *block) {
237 pmap_entry *entry = pmap_find(sim->blk_states, block);
240 blk_state *bl_state = obstack_alloc(&sim->obst, sizeof(*bl_state));
241 bl_state->begin = NULL;
242 bl_state->end = NULL;
244 pmap_insert(sim->blk_states, block, bl_state);
248 return entry ? PTR_TO_BLKSTATE(entry->value) : NULL;
252 * Create a new x87 state.
254 static x87_state *x87_alloc_state(x87_simulator *sim) {
255 x87_state *res = obstack_alloc(&sim->obst, sizeof(*res));
260 * Create a new empty x87 state.
262 static x87_state *x87_alloc_empty_state(x87_simulator *sim) {
263 x87_state *res = x87_alloc_state(sim);
272 static x87_state *x87_clone_state(x87_simulator *sim, const x87_state *src) {
273 x87_state *res = x87_alloc_state(sim);
275 memcpy(res, src, sizeof(*res));
280 * Patch a virtual instruction into a x87 one and return
283 static ir_node *x87_patch_insn(ir_node *n, ir_op *op) {
284 ir_mode *mode = get_irn_mode(n);
289 if (mode == mode_T) {
290 /* patch all Proj's */
291 const ir_edge_t *edge;
293 foreach_out_edge(n, edge) {
294 ir_node *proj = get_edge_src_irn(edge);
296 mode = get_irn_mode(proj);
297 if (mode_is_float(mode)) {
299 set_irn_mode(proj, mode_E);
304 else if (mode_is_float(mode))
305 set_irn_mode(n, mode_E);
309 /* -------------- x87 perm --------------- */
312 * Creates a fxch for shuffle.
314 * @param state the x87 state
315 * @param pos parameter for fxch
316 * @param dst_block the block of the user
318 * Creates a new fxch node and reroute the user of the old node
321 * @return the fxch node
323 static ir_node *x87_fxch_shuffle(x87_state *state, int pos, ir_node *block, ir_node *dst_block)
325 const ir_edge_t *edge;
326 ir_node *n = x87_get_st_node(state, pos);
327 ir_node *user = NULL;
332 if (block == get_nodes_block(n)) {
333 /* this is a node from out block: change it's user */
334 foreach_out_edge(n, edge) {
335 ir_node *succ = get_edge_src_irn(edge);
337 if (is_Phi(succ) && get_nodes_block(succ) == dst_block) {
339 node_idx = get_edge_src_pos(edge);
346 fxch = new_rd_ia32_fxch(NULL, get_irn_irg(block), block, n, get_irn_mode(n));
347 attr = get_ia32_attr(fxch);
348 attr->x87[0] = &ia32_st_regs[pos];
349 attr->x87[2] = &ia32_st_regs[0];
352 DB((dbg, LEVEL_2, "%+F replaced input %d of %+F\n", fxch, node_idx, user));
353 set_irn_n(user, node_idx, fxch);
357 * This is a node from a dominator block. Changing it's user might be wrong,
358 * so just keep it alive.
359 * The "right" solution would require a new Phi, but we don't care here.
364 x87_fxch(state, pos);
369 * Calculate the necessary permutations to reach dst_state.
371 * These permutations are done with fxch instructions and placed
372 * at the end of the block.
374 * Note that critical edges are removed here, so we need only
375 * a shuffle if the current block has only one successor.
377 * @param sim the simulator handle
378 * @param block the current block
379 * @param state the current x87 stack state, might be modified
380 * @param dst_block the destination block
381 * @param dst_state destination state
385 static x87_state *x87_shuffle(x87_simulator *sim, ir_node *block, x87_state *state, ir_node *dst_block, const x87_state *dst_state) {
386 int i, n_rings, k, ri;
387 unsigned rings[4], all_mask;
390 ir_node *before, *after;
392 assert(state->depth == dst_state->depth);
394 /* Some mathematics here:
395 If we have a ring of lenght n that includes the tos,
396 we need n-1 exchange operations.
397 We can always add the tos and restore it, so we need
398 n+1 exchange operations for a ring not containing the tos.
399 So, the maximum of needed operations is for a ring of 7
400 not including the tos == 8.
401 This is so same number of ops we would need for store,
402 so exchange is cheaper (we save the loads).
403 On the other hand, we might need an additional exchange
404 in the next block to bring one operand on top, so the
405 number of ops in the first case is identical.
406 Further, no more than 4 rings can exists.
408 all_mask = (1 << (state->depth)) - 1;
410 for (n_rings = 0; all_mask; ++n_rings) {
411 int src_idx, dst_idx;
413 /* find the first free slot */
414 for (i = 0; i < state->depth; ++i) {
415 if (all_mask & (1 << i)) {
416 all_mask &= ~(1 << i);
418 /* check if there are differences here */
419 if (x87_get_st_reg(state, i) != x87_get_st_reg(dst_state, i))
425 /* no more rings found */
430 rings[n_rings] = (1 << i);
431 ring_idx[n_rings][k++] = i;
432 for (src_idx = i; ; src_idx = dst_idx) {
433 dst_idx = x87_on_stack(dst_state, x87_get_st_reg(state, src_idx));
435 if ((all_mask & (1 << dst_idx)) == 0)
438 ring_idx[n_rings][k++] = dst_idx;
439 rings[n_rings] |= (1 << dst_idx);
440 all_mask &= ~(1 << dst_idx);
442 ring_idx[n_rings][k] = -1;
446 /* no permutation needed */
450 /* Hmm: permutation needed */
451 DB((dbg, LEVEL_2, "\n%+F needs permutation: from\n", block));
452 DEBUG_ONLY(x87_dump_stack(state));
453 DB((dbg, LEVEL_2, " to\n"));
454 DEBUG_ONLY(x87_dump_stack(dst_state));
458 DB((dbg, LEVEL_2, "Need %d rings\n", n_rings));
459 for (ri = 0; ri < n_rings; ++ri) {
460 DB((dbg, LEVEL_2, " Ring %d:\n ", ri));
461 for (k = 0; ring_idx[ri][k] != -1; ++k)
462 DB((dbg, LEVEL_2, " st%d ->", ring_idx[ri][k]));
463 DB((dbg, LEVEL_2, "\n"));
470 * Find the place node must be insert.
471 * We have only one successor block, so the last instruction should
474 before = sched_last(block);
475 assert(is_cfop(before));
477 /* now do the permutations */
478 for (ri = 0; ri < n_rings; ++ri) {
479 if ((rings[ri] & 1) == 0) {
480 /* this ring does not include the tos */
481 fxch = x87_fxch_shuffle(state, ring_idx[ri][0], block, dst_block);
483 sched_add_after(after, fxch);
485 sched_add_before(before, fxch);
488 for (k = 1; ring_idx[ri][k] != -1; ++k) {
489 fxch = x87_fxch_shuffle(state, ring_idx[ri][k], block, dst_block);
491 sched_add_after(after, fxch);
493 sched_add_before(before, fxch);
496 if ((rings[ri] & 1) == 0) {
497 /* this ring does not include the tos */
498 fxch = x87_fxch_shuffle(state, ring_idx[ri][0], block, dst_block);
499 sched_add_after(after, fxch);
506 * Create a fxch before node n.
508 static void x87_create_fxch(x87_state *state, ir_node *n, int pos, int op_idx) {
509 ir_node *fxch, *pred;
512 x87_fxch(state, pos);
514 pred = get_irn_n(n, op_idx);
515 fxch = new_rd_ia32_fxch(NULL, get_irn_irg(n), get_nodes_block(n), pred, get_irn_mode(pred));
516 attr = get_ia32_attr(fxch);
517 attr->x87[0] = &ia32_st_regs[pos];
518 attr->x87[2] = &ia32_st_regs[0];
519 set_irn_n(n, op_idx, fxch);
521 sched_add_before(n, fxch);
522 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(fxch), attr->x87[0]->name, attr->x87[2]->name));
526 * Create a fpush before node n.
528 static void x87_create_fpush(const arch_env_t *env, x87_state *state, ir_node *n, int pos, int op_idx) {
529 ir_node *fpush, *pred;
531 const arch_register_t *out = arch_get_irn_register(env, n);
533 x87_push(state, arch_register_get_index(out), n);
535 pred = get_irn_n(n, op_idx);
536 fpush = new_rd_ia32_fpush(NULL, get_irn_irg(n), get_nodes_block(n), pred, get_irn_mode(pred));
537 attr = get_ia32_attr(fpush);
538 attr->x87[0] = &ia32_st_regs[pos];
539 attr->x87[2] = &ia32_st_regs[0];
540 set_irn_n(n, op_idx, fpush);
542 sched_add_before(n, fpush);
543 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(fpush), attr->x87[0]->name, attr->x87[2]->name));
546 /* --------------------------------- liveness ------------------------------------------ */
549 * The liveness transfer function.
550 * Updates a live set over a single step from a given node to its predecessor.
551 * Everything defined at the node is removed from the set, the uses of the node get inserted.
552 * @param arch_env The architecture environment.
553 * @param irn The node at which liveness should be computed.
554 * @param live The bitset of registers live before @p irn. This set gets modified by updating it to
555 * the registers live after irn.
558 static unsigned vfp_liveness_transfer(const arch_env_t *arch_env, ir_node *irn, unsigned live)
561 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
563 if(arch_irn_consider_in_reg_alloc(arch_env, cls, irn)) {
564 const arch_register_t *reg = arch_get_irn_register(arch_env, irn);
565 live &= ~(1 << reg->index);
568 for(i = 0, n = get_irn_arity(irn); i < n; ++i) {
569 ir_node *op = get_irn_n(irn, i);
571 if (mode_is_float(get_irn_mode(op)) && arch_irn_consider_in_reg_alloc(arch_env, cls, op)) {
572 const arch_register_t *reg = arch_get_irn_register(arch_env, op);
573 live |= 1 << reg->index;
581 * Put all live virtual registers at the end of a block into a bitset.
582 * @param arch_env The architecture environment.
583 * @param bl The block.
584 * @return The live bitset.
586 static unsigned vfp_liveness_end_of_block(const arch_env_t *arch_env, const ir_node *bl)
590 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
592 live_foreach(bl, li) {
593 ir_node *irn = (ir_node *) li->irn;
594 if (live_is_end(li) && arch_irn_consider_in_reg_alloc(arch_env, cls, irn)) {
595 const arch_register_t *reg = arch_get_irn_register(arch_env, irn);
596 live |= 1 << reg->index;
604 * Compute a bitset of registers which are live at another node.
605 * @param arch_env The architecture environment.
606 * @param pos The node.
607 * @return The live bitset.
609 static unsigned vfp_liveness_nodes_live_at(const arch_env_t *arch_env, const ir_node *pos)
611 const ir_node *bl = is_Block(pos) ? pos : get_nodes_block(pos);
612 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
616 live = vfp_liveness_end_of_block(arch_env, bl);
618 sched_foreach_reverse(bl, irn) {
620 * If we encounter the node we want to insert the Perm after,
621 * exit immediately, so that this node is still live
626 live = vfp_liveness_transfer(arch_env, irn, live);
633 * Returns true if a register is live in a set.
635 static unsigned is_vfp_live(const arch_register_t *reg, unsigned live) {
636 return live & (1 << reg->index);
641 * dump liveness info.
643 static void vfp_dump_live(unsigned live) {
646 DB((dbg, LEVEL_2, "Live registers here: \n"));
647 for (i = 0; i < 8; ++i) {
648 if (live & (1 << i)) {
649 DB((dbg, LEVEL_2, " vf%d", i));
652 DB((dbg, LEVEL_2, "\n"));
654 #endif /* DEBUG_libfirm */
656 /* --------------------------------- simulators ---------------------------------------- */
658 #define XCHG(a, b) do { int t =(a); (a) = (b); (b) = t; } while (0)
661 * Simulate a virtual binop
663 static void sim_binop(x87_state *state, ir_node *n, const arch_env_t *env, const exchange_tmpl *tmpl) {
664 int op2_idx, op1_idx = -1;
665 int out_idx, do_pop =0;
668 const arch_register_t *op1 = arch_get_irn_register(env, get_irn_n(n, BINOP_IDX_1));
669 const arch_register_t *op2 = arch_get_irn_register(env, get_irn_n(n, BINOP_IDX_2));
670 const arch_register_t *out = arch_get_irn_register(env, n);
671 unsigned live = vfp_liveness_nodes_live_at(env, n);
673 DB((dbg, LEVEL_1, ">>> %s %s, %s -> %s\n", get_irn_opname(n),
674 arch_register_get_name(op2), arch_register_get_name(op1),
675 arch_register_get_name(out)));
676 DEBUG_ONLY(vfp_dump_live(live));
678 op2_idx = x87_on_stack(state, arch_register_get_index(op2));
680 if (op1->reg_class == &ia32_reg_classes[CLASS_ia32_vfp]) {
681 /* first operand is a vfp register */
682 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
684 if (is_vfp_live(op2, live)) {
685 /* second operand is live */
687 if (is_vfp_live(op1, live)) {
688 /* both operands are live: push the first one */
689 x87_create_fpush(env, state, n, op2_idx, BINOP_IDX_2);
690 out_idx = op2_idx = 0;
692 dst = tmpl->normal_op;
696 /* second live, first operand is dead here, bring it to tos */
698 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
702 op1_idx = out_idx = 0;
703 dst = tmpl->normal_op;
708 /* second operand is dead */
709 if (is_vfp_live(op1, live)) {
710 /* first operand is live: bring second to tos */
712 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
716 op2_idx = out_idx = 0;
717 dst = tmpl->normal_op;
721 /* both operands are dead here, pop them from the stack */
724 XCHG(op2_idx, op1_idx);
725 dst = tmpl->reverse_pop_op;
728 else if (op1_idx == 0) {
730 dst = tmpl->normal_pop_op;
734 /* bring the first on top */
735 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
738 dst = tmpl->normal_pop_op;
745 /* first operand is an address mode */
746 if (is_vfp_live(op2, live)) {
747 /* second operand is live: push it here */
748 x87_create_fpush(env, state, n, op2_idx, BINOP_IDX_2);
751 /* second operand is dead: bring it to tos */
753 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
755 op2_idx = out_idx = 0;
756 dst = tmpl->normal_op;
760 x87_set_st(state, arch_register_get_index(out), x87_patch_insn(n, dst), out_idx);
764 /* patch the operation */
765 attr = get_ia32_attr(n);
767 attr->x87[0] = op1 = &ia32_st_regs[op1_idx];
768 attr->x87[1] = op2 = &ia32_st_regs[op2_idx];
769 attr->x87[2] = out = &ia32_st_regs[out_idx];
771 DB((dbg, LEVEL_1, "<<< %s %s, %s -> %s\n", get_irn_opname(n),
772 arch_register_get_name(op2), arch_register_get_name(op1),
773 arch_register_get_name(out)));
777 * Simulate a virtual Unop
779 static void sim_unop(x87_state *state, ir_node *n, const arch_env_t *env, ir_op *op) {
780 int op1_idx, out_idx;
781 const arch_register_t *op1 = arch_get_irn_register(env, get_irn_n(n, UNOP_IDX));
782 const arch_register_t *out = arch_get_irn_register(env, n);
784 unsigned live = vfp_liveness_nodes_live_at(env, n);
786 DB((dbg, LEVEL_1, ">>> %s -> %s\n", get_irn_opname(n), out->name));
787 DEBUG_ONLY(vfp_dump_live(live));
789 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
791 if (is_vfp_live(op1, live)) {
792 /* push the operand here */
793 x87_create_fpush(env, state, n, op1_idx, UNOP_IDX);
796 /* operand is dead, bring it to tos */
798 x87_create_fxch(state, n, op1_idx, UNOP_IDX);
801 x87_set_tos(state, arch_register_get_index(out), x87_patch_insn(n, op));
802 op1_idx = out_idx = 0;
803 attr = get_ia32_attr(n);
804 attr->x87[0] = op1 = &ia32_st_regs[0];
805 attr->x87[2] = out = &ia32_st_regs[0];
806 DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), out->name));
810 * Simulate a virtual Load instructions
812 static void sim_load(x87_state *state, ir_node *n, const arch_env_t *env, ir_op *op) {
813 const arch_register_t *out = arch_get_irn_register(env, n);
816 DB((dbg, LEVEL_1, ">>> %s -> %s\n", get_irn_opname(n), arch_register_get_name(out)));
817 x87_push(state, arch_register_get_index(out), x87_patch_insn(n, op));
818 attr = get_ia32_attr(n);
819 attr->x87[2] = out = &ia32_st_regs[0];
820 DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), arch_register_get_name(out)));
824 * Simulate a virtual Store
826 static void sim_fst(x87_state *state, ir_node *n, const arch_env_t *env) {
828 const arch_register_t *op2 = arch_get_irn_register(env, get_irn_n(n, STORE_VAL_IDX));
830 unsigned live = vfp_liveness_nodes_live_at(env, n);
832 op2_idx = x87_on_stack(state, arch_register_get_index(op2));
834 DB((dbg, LEVEL_1, ">>> %s %s ->\n", get_irn_opname(n), arch_register_get_name(op2)));
836 /* we can only store the tos to memory */
838 x87_create_fxch(state, n, op2_idx, STORE_VAL_IDX);
840 if (is_vfp_live(op2, live))
841 x87_patch_insn(n, op_ia32_fst);
844 x87_patch_insn(n, op_ia32_fstp);
847 attr = get_ia32_attr(n);
848 attr->x87[1] = op2 = &ia32_st_regs[0];
849 DB((dbg, LEVEL_1, "<<< %s %s ->\n", get_irn_opname(n), arch_register_get_name(op2)));
853 * Simulate a virtual Phi.
854 * Just for cosmetic reasons change the mode of Phi nodes to mode_E.
856 static void sim_Phi(x87_state *state, ir_node *n, const arch_env_t *env) {
857 ir_mode *mode = get_irn_mode(n);
859 if (mode_is_float(mode))
860 set_irn_mode(n, mode_E);
864 #define _GEN_BINOP(op, rev) \
865 static void sim_##op(x87_state *state, ir_node *n, const arch_env_t *env) { \
866 exchange_tmpl tmpl = { op_ia32_##op, op_ia32_##rev, op_ia32_##op##p, op_ia32_##rev##p }; \
867 sim_binop(state, n, env, &tmpl); \
870 #define GEN_BINOP(op) _GEN_BINOP(op, op)
871 #define GEN_BINOPR(op) _GEN_BINOP(op, op##r)
873 #define GEN_LOAD2(op, nop) \
874 static void sim_##op(x87_state *state, ir_node *n, const arch_env_t *env) { \
875 sim_load(state, n, env, op_ia32_##nop); \
878 #define GEN_LOAD(op) GEN_LOAD2(op, op)
880 #define GEN_UNOP(op) \
881 static void sim_##op(x87_state *state, ir_node *n, const arch_env_t *env) { \
882 sim_unop(state, n, env, op_ia32_##op); \
894 GEN_LOAD2(fConst, fldConst)
903 * Simulate a be_Copy.
905 static void sim_Copy(x87_state *state, ir_node *n, const arch_env_t *env) {
906 ir_mode *mode = get_irn_mode(n);
908 if (mode_is_float(mode)) {
909 const arch_register_t *op1 = arch_get_irn_register(env, get_irn_n(n, 0));
910 const arch_register_t *out = arch_get_irn_register(env, n);
911 ir_node *node, *next;
914 unsigned live = vfp_liveness_nodes_live_at(env, n);
916 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
918 DB((dbg, LEVEL_1, ">>> %s %s -> %s\n", get_irn_opname(n),
919 arch_register_get_name(op1), arch_register_get_name(out)));
920 DEBUG_ONLY(vfp_dump_live(live));
922 if (is_vfp_live(op1, live)) {
923 /* operand is still live,a real copy */
924 node = new_rd_ia32_fpush(get_irn_dbg_info(n), get_irn_irg(n), get_nodes_block(n), get_irn_n(n, 0), mode);
925 arch_set_irn_register(env, node, out);
927 x87_push(state, arch_register_get_index(out), node);
929 attr = get_ia32_attr(node);
930 attr->x87[0] = op1 = &ia32_st_regs[op1_idx];
931 attr->x87[2] = out = &ia32_st_regs[0];
933 next = sched_next(n);
936 sched_add_before(next, node);
937 DB((dbg, LEVEL_1, ">>> %s %s -> %s\n", get_irn_opname(node), op1->name, out->name));
940 /* just a virtual copy */
941 x87_set_st(state, arch_register_get_index(out), get_unop_op(n), op1_idx);
943 DB((dbg, LEVEL_1, ">>> KILLED %s\n", get_irn_opname(n)));
944 exchange(n, get_unop_op(n));
952 static void sim_Call(x87_state *state, ir_node *n, const arch_env_t *env) {
953 ir_type *call_tp = be_Call_get_type(n);
955 /* at the begin of a call the x87 state should be empty */
956 assert(state->depth == 0 && "stack not empty before call");
959 * If the called function returns a float, it is returned in st(0).
960 * This even happens if the return value is NOT used.
961 * Moreover, only one return result is supported.
963 if (get_method_n_ress(call_tp) > 0) {
964 ir_type *res_type = get_method_res_type(call_tp, 0);
965 ir_mode *mode = get_type_mode(res_type);
967 if (mode && mode_is_float(mode)) {
969 * TODO: what to push here? The result might be unused and currently
970 * we have no possibility to detect this :-(
972 x87_push(state, 0, n);
978 * Simulate a be_Spill.
980 static void sim_Spill(x87_state *state, ir_node *n, const arch_env_t *env) {
981 assert(0 && "Spill not lowered");
982 sim_fst(state, n, env);
986 * Simulate a be_Reload
988 static void sim_Reload(x87_state *state, ir_node *n, const arch_env_t *env) {
989 assert(0 && "Reload not lowered");
990 sim_fld(state, n, env);
994 * Run a simulation and fix all virtual instructions for a block.
996 * @return non-zero if simulation is complete,
997 * zero if the simulation must be rerun
999 static int x87_simulate_block(x87_simulator *sim, ir_node *block) {
1001 blk_state *bl_state = x87_get_bl_state(sim, block);
1002 x87_state *state = bl_state->begin;
1003 const ir_edge_t *edge;
1004 ir_node *start_block;
1006 /* if we have no assigned start state, we must wait ... */
1010 assert(bl_state->end == NULL);
1012 DB((dbg, LEVEL_1, "Simulate %+F\n", block));
1014 /* beware, n might changed */
1015 for (n = sched_first(block); !sched_is_end(n); n = next) {
1016 ir_op *op = get_irn_op(n);
1018 next = sched_next(n);
1019 if (op->ops.generic) {
1020 sim_func func = (sim_func)op->ops.generic;
1022 /* have work to do */
1023 if (state == bl_state->begin) {
1024 /* create a new state, will be changed */
1025 state = x87_clone_state(sim, state);
1029 (*func)(state, n, sim->env);
1033 start_block = get_irg_start_block(get_irn_irg(block));
1035 /* check if the state must be shuffled */
1036 foreach_block_succ(block, edge) {
1037 ir_node *succ = get_edge_src_irn(edge);
1038 blk_state *succ_state = x87_get_bl_state(sim, succ);
1040 if (succ_state->begin && succ != start_block) {
1041 /* There is already a begin state for this block, bad.
1042 Do the necessary permutations.
1043 Note that critical edges are removed, so this is always possible. */
1044 x87_shuffle(sim, block, state, succ, succ_state->begin);
1046 /* Note further, that there can be only one such situation,
1047 so we can break here. */
1051 bl_state->end = state;
1053 /* now propagate the state to all successor blocks */
1054 foreach_block_succ(block, edge) {
1055 ir_node *succ = get_edge_src_irn(edge);
1056 blk_state *succ_state = x87_get_bl_state(sim, succ);
1058 if (! succ_state->begin)
1059 succ_state->begin = state;
1066 * Create a new x87 simulator.
1068 static void x87_init_simulator(x87_simulator *sim, ir_graph *irg, const arch_env_t *env) {
1069 obstack_init(&sim->obst);
1070 sim->blk_states = pmap_create();
1073 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.x87");
1074 firm_dbg_set_mask(dbg, SET_LEVEL_2);
1076 DB((dbg, LEVEL_1, "--------------------------------\n"
1077 "x87 Simulator started for %+F\n", irg));
1079 /* set the generic function pointer of instruction we must simulate */
1080 clear_irp_opcodes_generic_func();
1082 #define ASSOC(op) (op_ ## op)->ops.generic = (op_func)(sim_##op)
1083 #define ASSOC_IA32(op) (op_ia32_v ## op)->ops.generic = (op_func)(sim_##op)
1084 #define ASSOC_BE(op) (op_be_ ## op)->ops.generic = (op_func)(sim_##op)
1111 * Destroy a x87 simulator.
1113 static void x87_destroy_simulator(x87_simulator *sim) {
1114 pmap_destroy(sim->blk_states);
1115 obstack_free(&sim->obst, NULL);
1116 DB((dbg, LEVEL_1, "x87 Simulator stopped\n\n"));
1120 * Run a simulation and fix all virtual instructions for a graph.
1122 * Needs a block-schedule.
1124 void x87_simulate_graph(const arch_env_t *env, ir_graph *irg, ir_node **blk_list) {
1125 ir_node *block, *start_block;
1127 blk_state *bl_state;
1131 /* we need liveness info for the current graph */
1134 /* create the simulator */
1135 x87_init_simulator(&sim, irg, env);
1137 start_block = get_irg_start_block(irg);
1138 bl_state = x87_get_bl_state(&sim, start_block);
1140 /* start with the empty state */
1141 bl_state->begin = empty;
1143 worklist = new_pdeq();
1145 /* create the worklist for the schedule. */
1146 for (i = 0; i < ARR_LEN(blk_list); ++i)
1147 pdeq_putr(worklist, blk_list[i]);
1151 block = pdeq_getl(worklist);
1152 if (! x87_simulate_block(&sim, block)) {
1153 pdeq_putr(worklist, block);
1156 } while (! pdeq_empty(worklist));
1159 x87_destroy_simulator(&sim);