2 * This file implements the x87 support and virtual to stack
3 * register translation for the ia32 backend.
5 * @author: Michael Beck
14 #include "iredges_t.h"
23 #include "..\belive_t.h"
24 #include "..\besched.h"
25 #include "..\benode_t.h"
26 #include "ia32_new_nodes.h"
27 #include "gen_ia32_new_nodes.h"
28 #include "gen_ia32_regalloc_if.h"
33 /* first and second binop index */
40 /* the store val index */
41 #define STORE_VAL_IDX 2
43 #define MASK_TOS(x) ((x) & (N_x87_REGS - 1))
45 /** the debug handle */
46 static firm_dbg_module_t *dbg = NULL;
49 * An exchange template.
50 * Note that our virtual functions have the same inputs
51 * and attributes as the real ones, so we can simple exchange
53 * Further, x87 supports inverse instructions, so we can handle them.
55 typedef struct _exchange_tmpl {
56 ir_op *normal_op; /**< the normal one */
57 ir_op *reverse_op; /**< the reverse one if exists */
58 ir_op *normal_pop_op; /**< the normal one with tos pop */
59 ir_op *reverse_pop_op; /**< the reverse one with tos pop */
63 * An entry on the simulated x87 stack.
65 typedef struct _st_entry {
66 int reg_idx; /**< the virtual register index of this stack value */
67 ir_node *node; /**< the node that produced this value */
73 typedef struct _x87_state {
74 st_entry st[N_x87_REGS]; /**< the register stack */
75 int depth; /**< the current stack depth */
76 int tos; /**< position of the tos */
79 /** An empty state, used for blocks without fp instructions. */
80 static const x87_state _empty = { {0, NULL}, 0, 0 };
81 static x87_state *empty = (x87_state *)&_empty;
83 /** The type of an instruction simulator */
84 typedef void (*sim_func)(x87_state *state, ir_node *n, const arch_env_t *env);
87 * A block state: Every block has a x87 state at the beginning and at the end.
89 typedef struct _blk_state {
90 x87_state *begin; /**< state at the begin or NULL if not assigned */
91 x87_state *end; /**< state at the end or NULL if not assigned */
94 #define PTR_TO_BLKSTATE(p) ((blk_state *)(p))
99 typedef struct _x87_simulator {
100 struct obstack obst; /**< an obstack for fast allocating */
101 pmap *blk_states; /**< map blocks to states */
102 const arch_env_t *env; /**< architecture environment */
106 * Check if the state is empty.
108 static int x87_state_is_empty(const x87_state *state) {
109 return state->depth == 0;
113 * Return the virtual register index at st(pos).
115 static int x87_get_st_reg(const x87_state *state, int pos) {
116 assert(pos < state->depth);
117 return state->st[MASK_TOS(state->tos + pos)].reg_idx;
121 * Return the node at st(pos).
123 static ir_node *x87_get_st_node(const x87_state *state, int pos) {
124 assert(pos < state->depth);
125 return state->st[MASK_TOS(state->tos + pos)].node;
130 * Dump the stack for debugging.
132 static void x87_dump_stack(const x87_state *state) {
135 for (i = state->depth - 1; i >= 0; --i) {
136 DB((dbg, LEVEL_2, "vf%d ", x87_get_st_reg(state, i)));
138 DB((dbg, LEVEL_2, "<-- TOS\n"));
140 #endif /* DEBUG_libfirm */
143 * Set a virtual register to st(pos)
145 static void x87_set_st(x87_state *state, int reg_idx, ir_node *node, int pos) {
146 assert(0 < state->depth);
147 state->st[MASK_TOS(state->tos + pos)].reg_idx = reg_idx;
148 state->st[MASK_TOS(state->tos + pos)].node = node;
150 DB((dbg, LEVEL_2, "After SET_REG:\n ")); DEBUG_ONLY(x87_dump_stack(state));
154 * Set the tos virtual register
156 static void x87_set_tos(x87_state *state, int reg_idx, ir_node *node) {
157 x87_set_st(state, reg_idx, node, 0);
161 * Flush the x87 stack.
163 static void x87_flush(x87_state *state) {
169 * Swap st(0) with st(pos).
171 static void x87_fxch(x87_state *state, int pos) {
173 assert(pos < state->depth);
175 entry = state->st[MASK_TOS(state->tos + pos)];
176 state->st[MASK_TOS(state->tos + pos)] = state->st[MASK_TOS(state->tos)];
177 state->st[MASK_TOS(state->tos)] = entry;
179 DB((dbg, LEVEL_2, "After FXCH:\n ")); DEBUG_ONLY(x87_dump_stack(state));
183 * Convert a virtual register to the stack index.
184 * Return -1 if the virtual register was not found.
186 static int x87_on_stack(const x87_state *state, int reg_idx) {
187 int i, tos = state->tos;
189 for (i = 0; i < state->depth; ++i)
190 if (state->st[MASK_TOS(tos + i)].reg_idx == reg_idx)
196 * Push a virtual Register onto the stack.
198 static void x87_push(x87_state *state, int reg_idx, ir_node *node) {
199 // assert(x87_on_stack(state, reg_idx) == -1 && "double push");
200 assert(state->depth < N_x87_REGS && "stack overrun");
203 state->tos = MASK_TOS(state->tos - 1);
204 state->st[state->tos].reg_idx = reg_idx;
205 state->st[state->tos].node = node;
207 DB((dbg, LEVEL_2, "After PUSH:\n ")); DEBUG_ONLY(x87_dump_stack(state));
211 * Pop a virtual Register from the stack.
213 static void x87_pop(x87_state *state) {
214 assert(state->depth > 0 && "stack underrun");
217 state->tos = MASK_TOS(state->tos + 1);
219 DB((dbg, LEVEL_2, "After POP:\n ")); DEBUG_ONLY(x87_dump_stack(state));
223 * Returns the block state of a block.
225 static blk_state *x87_get_bl_state(x87_simulator *sim, ir_node *block) {
226 pmap_entry *entry = pmap_find(sim->blk_states, block);
229 blk_state *bl_state = obstack_alloc(&sim->obst, sizeof(*bl_state));
230 bl_state->begin = NULL;
231 bl_state->end = NULL;
233 pmap_insert(sim->blk_states, block, bl_state);
237 return entry ? PTR_TO_BLKSTATE(entry->value) : NULL;
241 * Create a new x87 state.
243 static x87_state *x87_alloc_state(x87_simulator *sim) {
244 x87_state *res = obstack_alloc(&sim->obst, sizeof(*res));
249 * Create a new empty x87 state.
251 static x87_state *x87_alloc_empty_state(x87_simulator *sim) {
252 x87_state *res = x87_alloc_state(sim);
261 static x87_state *x87_clone_state(x87_simulator *sim, const x87_state *src) {
262 x87_state *res = x87_alloc_state(sim);
264 memcpy(res, src, sizeof(*res));
269 * Patch a virtual instruction into a x87 one and return
272 static ir_node *x87_patch_insn(ir_node *n, ir_op *op) {
273 ir_mode *mode = get_irn_mode(n);
278 if (mode == mode_T) {
279 /* patch all Proj's */
280 const ir_edge_t *edge;
282 foreach_out_edge(n, edge) {
283 ir_node *proj = get_edge_src_irn(edge);
285 mode = get_irn_mode(proj);
286 if (mode_is_float(mode)) {
288 set_irn_mode(proj, mode_E);
293 else if (mode_is_float(mode))
294 set_irn_mode(n, mode_E);
298 /* -------------- x87 perm --------------- */
301 * Creates a fxch for shuffle.
303 * @param state the x87 state
304 * @param pos parameter for fxch
305 * @param dst_block the block of the user
307 * Creates a new fxch node and reroute the user of the old node
310 * @return the fxch node
312 static ir_node *x87_fxch_shuffle(x87_state *state, int pos, ir_node *block, ir_node *dst_block)
314 const ir_edge_t *edge;
315 ir_node *n = x87_get_st_node(state, pos);
316 ir_node *user = NULL;
321 if (block == get_nodes_block(n)) {
322 /* this is a node from out block: change it's user */
323 foreach_out_edge(n, edge) {
324 ir_node *succ = get_edge_src_irn(edge);
326 if (is_Phi(succ) && get_nodes_block(succ) == dst_block) {
328 node_idx = get_edge_src_pos(edge);
335 fxch = new_rd_ia32_fxch(NULL, get_irn_irg(block), block, n, get_irn_mode(n));
336 attr = get_ia32_attr(fxch);
337 attr->x87[0] = &ia32_st_regs[pos];
338 attr->x87[2] = &ia32_st_regs[0];
341 DB((dbg, LEVEL_2, "%+F replaced input %d of %+F\n", fxch, node_idx, user));
342 set_irn_n(user, node_idx, fxch);
346 * This is a node from a dominator block. Changing it's user might be wrong,
347 * so just keep it alive.
348 * The "right" solution would require a new Phi, but we don't care here.
353 x87_fxch(state, pos);
358 * Calculate the necessary permutations to reach dst_state.
360 * These permutations are done with fxch instructions and placed
361 * at the end of the block.
363 * Note that critical edges are removed here, so we need only
364 * a shuffle if the current block has only one successor.
366 * @param sim the simulator handle
367 * @param block the current block
368 * @param state the current x87 stack state, might be modified
369 * @param dst_block the destination block
370 * @param dst_state destination state
374 static x87_state *x87_shuffle(x87_simulator *sim, ir_node *block, x87_state *state, ir_node *dst_block, const x87_state *dst_state) {
375 int i, n_rings, k, ri;
376 unsigned rings[4], all_mask;
379 ir_node *before, *after;
381 assert(state->depth == dst_state->depth);
383 /* Some mathematics here:
384 If we have a ring of lenght n that includes the tos,
385 we need n-1 exchange operations.
386 We can always add the tos and restore it, so we need
387 n+1 exchange operations for a ring not containing the tos.
388 So, the maximum of needed operations is for a ring of 7
389 not including the tos == 8.
390 This is so same number of ops we would need for store,
391 so exchange is cheaper (we save the loads).
392 On the other hand, we might need an additional exchange
393 in the next block to bring one operand on top, so the
394 number of ops in the first case is identical.
395 Further, no more than 4 rings can exists.
397 all_mask = (1 << (state->depth)) - 1;
399 for (n_rings = 0; all_mask; ++n_rings) {
400 int src_idx, dst_idx;
402 /* find the first free slot */
403 for (i = 0; i < state->depth; ++i) {
404 if (all_mask & (1 << i)) {
405 all_mask &= ~(1 << i);
407 /* check if there are differences here */
408 if (x87_get_st_reg(state, i) != x87_get_st_reg(dst_state, i))
414 /* no more rings found */
419 rings[n_rings] = (1 << i);
420 ring_idx[n_rings][k++] = i;
421 for (src_idx = i; ; src_idx = dst_idx) {
422 dst_idx = x87_on_stack(dst_state, x87_get_st_reg(state, src_idx));
424 if ((all_mask & (1 << dst_idx)) == 0)
427 ring_idx[n_rings][k++] = dst_idx;
428 rings[n_rings] |= (1 << dst_idx);
429 all_mask &= ~(1 << dst_idx);
431 ring_idx[n_rings][k] = -1;
435 /* no permutation needed */
439 /* Hmm: permutation needed */
440 DB((dbg, LEVEL_2, "\n%+F needs permutation: from\n", block));
441 DEBUG_ONLY(x87_dump_stack(state));
442 DB((dbg, LEVEL_2, " to\n"));
443 DEBUG_ONLY(x87_dump_stack(dst_state));
447 DB((dbg, LEVEL_2, "Need %d rings\n", n_rings));
448 for (ri = 0; ri < n_rings; ++ri) {
449 DB((dbg, LEVEL_2, " Ring %d:\n ", ri));
450 for (k = 0; ring_idx[ri][k] != -1; ++k)
451 DB((dbg, LEVEL_2, " st%d ->", ring_idx[ri][k]));
452 DB((dbg, LEVEL_2, "\n"));
459 * Find the place node must be insert.
460 * We have only one successor block, so the last instruction should
463 before = sched_last(block);
464 assert(is_cfop(before));
466 /* now do the permutations */
467 for (ri = 0; ri < n_rings; ++ri) {
468 if ((rings[ri] & 1) == 0) {
469 /* this ring does not include the tos */
470 fxch = x87_fxch_shuffle(state, ring_idx[ri][0], block, dst_block);
472 sched_add_after(after, fxch);
474 sched_add_before(before, fxch);
477 for (k = 1; ring_idx[ri][k] != -1; ++k) {
478 fxch = x87_fxch_shuffle(state, ring_idx[ri][k], block, dst_block);
480 sched_add_after(after, fxch);
482 sched_add_before(before, fxch);
485 if ((rings[ri] & 1) == 0) {
486 /* this ring does not include the tos */
487 fxch = x87_fxch_shuffle(state, ring_idx[ri][0], block, dst_block);
488 sched_add_after(after, fxch);
495 * Create a fxch before node n.
497 static void x87_create_fxch(x87_state *state, ir_node *n, int pos, int op_idx) {
498 ir_node *fxch, *pred;
501 x87_fxch(state, pos);
503 pred = get_irn_n(n, op_idx);
504 fxch = new_rd_ia32_fxch(NULL, get_irn_irg(n), get_nodes_block(n), pred, get_irn_mode(pred));
505 attr = get_ia32_attr(fxch);
506 attr->x87[0] = &ia32_st_regs[pos];
507 attr->x87[2] = &ia32_st_regs[0];
508 set_irn_n(n, op_idx, fxch);
510 sched_add_before(n, fxch);
511 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(fxch), attr->x87[0]->name, attr->x87[2]->name));
515 * Create a fpush before node n.
517 static void x87_create_fpush(const arch_env_t *env, x87_state *state, ir_node *n, int pos, int op_idx) {
518 ir_node *fpush, *pred;
520 const arch_register_t *out = arch_get_irn_register(env, n);
522 x87_push(state, arch_register_get_index(out), n);
524 pred = get_irn_n(n, op_idx);
525 fpush = new_rd_ia32_fpush(NULL, get_irn_irg(n), get_nodes_block(n), pred, get_irn_mode(pred));
526 attr = get_ia32_attr(fpush);
527 attr->x87[0] = &ia32_st_regs[pos];
528 attr->x87[2] = &ia32_st_regs[0];
529 set_irn_n(n, op_idx, fpush);
531 sched_add_before(n, fpush);
532 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(fpush), attr->x87[0]->name, attr->x87[2]->name));
535 /* --------------------------------- liveness ------------------------------------------ */
538 * The liveness transfer function.
539 * Updates a live set over a single step from a given node to its predecessor.
540 * Everything defined at the node is removed from the set, the uses of the node get inserted.
541 * @param arch_env The architecture environment.
542 * @param irn The node at which liveness should be computed.
543 * @param live The bitset of registers live before @p irn. This set gets modified by updating it to
544 * the registers live after irn.
547 static unsigned vfp_liveness_transfer(const arch_env_t *arch_env, ir_node *irn, unsigned live)
550 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
552 if(arch_irn_consider_in_reg_alloc(arch_env, cls, irn)) {
553 const arch_register_t *reg = arch_get_irn_register(arch_env, irn);
554 live &= ~(1 << reg->index);
557 for(i = 0, n = get_irn_arity(irn); i < n; ++i) {
558 ir_node *op = get_irn_n(irn, i);
560 if (mode_is_float(get_irn_mode(op)) && arch_irn_consider_in_reg_alloc(arch_env, cls, op)) {
561 const arch_register_t *reg = arch_get_irn_register(arch_env, op);
562 live |= 1 << reg->index;
570 * Put all live virtual registers at the end of a block into a bitset.
571 * @param arch_env The architecture environment.
572 * @param bl The block.
573 * @return The live bitset.
575 static unsigned vfp_liveness_end_of_block(const arch_env_t *arch_env, const ir_node *bl)
579 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
581 live_foreach(bl, li) {
582 ir_node *irn = (ir_node *) li->irn;
583 if (live_is_end(li) && arch_irn_consider_in_reg_alloc(arch_env, cls, irn)) {
584 const arch_register_t *reg = arch_get_irn_register(arch_env, irn);
585 live |= 1 << reg->index;
593 * Compute a bitset of registers which are live at another node.
594 * @param arch_env The architecture environment.
595 * @param pos The node.
596 * @return The live bitset.
598 static unsigned vfp_liveness_nodes_live_at(const arch_env_t *arch_env, const ir_node *pos)
600 const ir_node *bl = is_Block(pos) ? pos : get_nodes_block(pos);
601 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
605 live = vfp_liveness_end_of_block(arch_env, bl);
607 sched_foreach_reverse(bl, irn) {
609 * If we encounter the node we want to insert the Perm after,
610 * exit immediately, so that this node is still live
615 live = vfp_liveness_transfer(arch_env, irn, live);
622 * Returns true if a register is live in a set.
624 static unsigned is_vfp_live(const arch_register_t *reg, unsigned live) {
625 return live & (1 << reg->index);
630 * dump liveness info.
632 static vfp_dump_live(unsigned live) {
635 DB((dbg, LEVEL_2, "Live registers here: \n"));
636 for (i = 0; i < 8; ++i) {
637 if (live & (1 << i)) {
638 DB((dbg, LEVEL_2, " vf%d", i));
641 DB((dbg, LEVEL_2, "\n"));
643 #endif /* DEBUG_libfirm */
645 /* --------------------------------- simulators ---------------------------------------- */
647 #define XCHG(a, b) do { int t =(a); (a) = (b); (b) = t; } while (0)
650 * Simulate a virtual binop
652 static void sim_binop(x87_state *state, ir_node *n, const arch_env_t *env, const exchange_tmpl *tmpl) {
653 int op2_idx, op1_idx = -1;
654 int out_idx, do_pop =0;
657 const arch_register_t *op1 = arch_get_irn_register(env, get_irn_n(n, BINOP_IDX_1));
658 const arch_register_t *op2 = arch_get_irn_register(env, get_irn_n(n, BINOP_IDX_2));
659 const arch_register_t *out = arch_get_irn_register(env, n);
660 unsigned live = vfp_liveness_nodes_live_at(env, n);
662 DB((dbg, LEVEL_1, ">>> %s %s, %s -> %s\n", get_irn_opname(n),
663 arch_register_get_name(op2), arch_register_get_name(op1),
664 arch_register_get_name(out)));
665 DEBUG_ONLY(vfp_dump_live(live));
667 op2_idx = x87_on_stack(state, arch_register_get_index(op2));
669 if (op1->reg_class == &ia32_reg_classes[CLASS_ia32_vfp]) {
670 /* first operand is a vfp register */
671 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
673 if (is_vfp_live(op2, live)) {
674 /* second operand is live */
676 if (is_vfp_live(op1, live)) {
677 /* both operands are live: push the first one */
678 x87_create_fpush(env, state, n, op2_idx, BINOP_IDX_2);
679 out_idx = op2_idx = 0;
681 dst = tmpl->normal_op;
685 /* second live, first operand is dead here, bring it to tos */
687 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
691 op1_idx = out_idx = 0;
692 dst = tmpl->normal_op;
697 /* second operand is dead */
698 if (is_vfp_live(op1, live)) {
699 /* first operand is live: bring second to tos */
701 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
705 op2_idx = out_idx = 0;
706 dst = tmpl->normal_op;
710 /* both operands are dead here, pop them from the stack */
713 XCHG(op2_idx, op1_idx);
714 dst = tmpl->reverse_pop_op;
717 else if (op1_idx == 0) {
719 dst = tmpl->normal_pop_op;
723 /* bring the first on top */
724 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
727 dst = tmpl->normal_pop_op;
734 /* first operand is an address mode */
735 if (is_vfp_live(op2, live)) {
736 /* second operand is live: push it here */
737 x87_create_fpush(env, state, n, op2_idx, BINOP_IDX_2);
740 /* second operand is dead: bring it to tos */
742 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
744 op2_idx = out_idx = 0;
745 dst = tmpl->normal_op;
749 x87_set_st(state, arch_register_get_index(out), x87_patch_insn(n, dst), out_idx);
753 /* patch the operation */
754 attr = get_ia32_attr(n);
756 attr->x87[0] = op1 = &ia32_st_regs[op1_idx];
757 attr->x87[1] = op2 = &ia32_st_regs[op2_idx];
758 attr->x87[2] = out = &ia32_st_regs[out_idx];
760 DB((dbg, LEVEL_1, "<<< %s %s, %s -> %s\n", get_irn_opname(n),
761 arch_register_get_name(op2), arch_register_get_name(op1),
762 arch_register_get_name(out)));
766 * Simulate a virtual Unop
768 static void sim_unop(x87_state *state, ir_node *n, const arch_env_t *env, ir_op *op) {
769 int op1_idx, out_idx;
770 const arch_register_t *op1 = arch_get_irn_register(env, get_irn_n(n, UNOP_IDX));
771 const arch_register_t *out = arch_get_irn_register(env, n);
773 unsigned live = vfp_liveness_nodes_live_at(env, n);
775 DB((dbg, LEVEL_1, ">>> %s -> %s\n", get_irn_opname(n), out->name));
776 DEBUG_ONLY(vfp_dump_live(live));
778 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
780 if (is_vfp_live(op1, live)) {
781 /* push the operand here */
782 x87_create_fpush(env, state, n, op1_idx, UNOP_IDX);
785 /* operand is dead, bring it to tos */
787 x87_create_fxch(state, n, op1_idx, UNOP_IDX);
790 x87_set_tos(state, arch_register_get_index(out), x87_patch_insn(n, op));
791 op1_idx = out_idx = 0;
792 attr = get_ia32_attr(n);
793 attr->x87[0] = op1 = &ia32_st_regs[0];
794 attr->x87[2] = out = &ia32_st_regs[0];
795 DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), out->name));
799 * Simulate a virtual Load instructions
801 static void sim_load(x87_state *state, ir_node *n, const arch_env_t *env, ir_op *op) {
802 const arch_register_t *out = arch_get_irn_register(env, n);
805 DB((dbg, LEVEL_1, ">>> %s -> %s\n", get_irn_opname(n), arch_register_get_name(out)));
806 x87_push(state, arch_register_get_index(out), x87_patch_insn(n, op));
807 attr = get_ia32_attr(n);
808 attr->x87[2] = out = &ia32_st_regs[0];
809 DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), arch_register_get_name(out)));
813 * Simulate a virtual Store
815 static void sim_fst(x87_state *state, ir_node *n, const arch_env_t *env) {
817 const arch_register_t *op2 = arch_get_irn_register(env, get_irn_n(n, STORE_VAL_IDX));
819 unsigned live = vfp_liveness_nodes_live_at(env, n);
821 op2_idx = x87_on_stack(state, arch_register_get_index(op2));
823 DB((dbg, LEVEL_1, ">>> %s %s ->\n", get_irn_opname(n), arch_register_get_name(op2)));
825 /* we can only store the tos to memory */
827 x87_create_fxch(state, n, op2_idx, STORE_VAL_IDX);
829 if (is_vfp_live(op2, live))
830 x87_patch_insn(n, op_ia32_fst);
833 x87_patch_insn(n, op_ia32_fstp);
836 attr = get_ia32_attr(n);
837 attr->x87[1] = op2 = &ia32_st_regs[0];
838 DB((dbg, LEVEL_1, "<<< %s %s ->\n", get_irn_opname(n), arch_register_get_name(op2)));
842 * Simulate a virtual Phi.
843 * Just for cosmetic reasons change the mode of Phi nodes to mode_E.
845 static void sim_Phi(x87_state *state, ir_node *n, const arch_env_t *env) {
846 ir_mode *mode = get_irn_mode(n);
848 if (mode_is_float(mode))
849 set_irn_mode(n, mode_E);
853 #define _GEN_BINOP(op, rev) \
854 static void sim_##op(x87_state *state, ir_node *n, const arch_env_t *env) { \
855 exchange_tmpl tmpl = { op_ia32_##op, op_ia32_##rev, op_ia32_##op##p, op_ia32_##rev##p }; \
856 sim_binop(state, n, env, &tmpl); \
859 #define GEN_BINOP(op) _GEN_BINOP(op, op)
860 #define GEN_BINOPR(op) _GEN_BINOP(op, op##r)
862 #define GEN_LOAD2(op, nop) \
863 static void sim_##op(x87_state *state, ir_node *n, const arch_env_t *env) { \
864 sim_load(state, n, env, op_ia32_##nop); \
867 #define GEN_LOAD(op) GEN_LOAD2(op, op)
869 #define GEN_UNOP(op) \
870 static void sim_##op(x87_state *state, ir_node *n, const arch_env_t *env) { \
871 sim_unop(state, n, env, op_ia32_##op); \
883 GEN_LOAD2(fConst, fldConst)
892 * Simulate a virtual Copy
894 static void sim_Copy(x87_state *state, ir_node *n, const arch_env_t *env) {
895 ir_mode *mode = get_irn_mode(n);
897 if (mode_is_float(mode)) {
898 const arch_register_t *op1 = arch_get_irn_register(env, get_irn_n(n, 0));
899 const arch_register_t *out = arch_get_irn_register(env, n);
900 ir_node *node, *next;
903 unsigned live = vfp_liveness_nodes_live_at(env, n);
905 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
907 DB((dbg, LEVEL_1, ">>> %s %s -> %s\n", get_irn_opname(n),
908 arch_register_get_name(op1), arch_register_get_name(out)));
909 DEBUG_ONLY(vfp_dump_live(live));
911 if (is_vfp_live(op1, live)) {
912 /* operand is still live,a real copy */
913 node = new_rd_ia32_fpush(get_irn_dbg_info(n), get_irn_irg(n), get_nodes_block(n), get_irn_n(n, 0), mode);
914 arch_set_irn_register(env, node, out);
916 x87_push(state, arch_register_get_index(out), node);
918 attr = get_ia32_attr(node);
919 attr->x87[0] = op1 = &ia32_st_regs[op1_idx];
920 attr->x87[2] = out = &ia32_st_regs[0];
922 next = sched_next(n);
925 sched_add_before(next, node);
926 DB((dbg, LEVEL_1, ">>> %s %s -> %s\n", get_irn_opname(node), op1->name, out->name));
929 /* just a virtual copy */
930 x87_set_st(state, arch_register_get_index(out), get_unop_op(n), op1_idx);
932 DB((dbg, LEVEL_1, ">>> KILLED %s\n", get_irn_opname(n)));
933 exchange(n, get_unop_op(n));
939 * Run a simulation and fix all virtual instructions for a block.
941 * @return non-zero if simulation is complete,
942 * zero if the simulation must be rerun
944 static int x87_simulate_block(x87_simulator *sim, ir_node *block) {
946 blk_state *bl_state = x87_get_bl_state(sim, block);
947 x87_state *state = bl_state->begin;
948 const ir_edge_t *edge;
949 ir_node *start_block;
951 /* if we have no assigned start state, we must wait ... */
955 assert(bl_state->end == NULL);
957 DB((dbg, LEVEL_1, "Simulate %+F\n", block));
959 /* beware, n might changed */
960 for (n = sched_first(block); !sched_is_end(n); n = next) {
961 ir_op *op = get_irn_op(n);
963 next = sched_next(n);
964 if (op->ops.generic) {
965 sim_func func = (sim_func)op->ops.generic;
967 /* have work to do */
968 if (state == bl_state->begin) {
969 /* create a new state, will be changed */
970 state = x87_clone_state(sim, state);
974 (*func)(state, n, sim->env);
978 start_block = get_irg_start_block(get_irn_irg(block));
980 /* check if the state must be shuffled */
981 foreach_block_succ(block, edge) {
982 ir_node *succ = get_edge_src_irn(edge);
983 blk_state *succ_state = x87_get_bl_state(sim, succ);
985 if (succ_state->begin && succ != start_block) {
986 /* There is already a begin state for this block, bad.
987 Do the necessary permutations.
988 Note that critical edges are removed, so this is always possible. */
989 x87_shuffle(sim, block, state, succ, succ_state->begin);
991 /* Note further, that there can be only one such situation,
992 so we can break here. */
996 bl_state->end = state;
998 /* now propagate the state to all successor blocks */
999 foreach_block_succ(block, edge) {
1000 ir_node *succ = get_edge_src_irn(edge);
1001 blk_state *succ_state = x87_get_bl_state(sim, succ);
1003 if (! succ_state->begin)
1004 succ_state->begin = state;
1011 * Create a new x87 simulator.
1013 static void x87_init_simulator(x87_simulator *sim, ir_graph *irg, const arch_env_t *env) {
1014 obstack_init(&sim->obst);
1015 sim->blk_states = pmap_create();
1018 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.x87");
1019 firm_dbg_set_mask(dbg, SET_LEVEL_2);
1021 DB((dbg, LEVEL_1, "--------------------------------\n"
1022 "x87 Simulator started for %+F\n", irg));
1024 /* set the generic function pointer of instruction we must simulate */
1025 clear_irp_opcodes_generic_func();
1027 #define ASSOC(op) (op_ ## op)->ops.generic = (op_func)(sim_##op)
1028 #define ASSOC_IA32(op) (op_ia32_v ## op)->ops.generic = (op_func)(sim_##op)
1029 #define ASSOC_BE(op) (op_be_ ## op)->ops.generic = (op_func)(sim_##op)
1053 * Destroy a x87 simulator.
1055 static void x87_destroy_simulator(x87_simulator *sim) {
1056 pmap_destroy(sim->blk_states);
1057 obstack_free(&sim->obst, NULL);
1058 DB((dbg, LEVEL_1, "x87 Simulator stopped\n\n"));
1062 * Run a simulation and fix all virtual instructions for a graph.
1064 * Needs a block-schedule.
1066 void x87_simulate_graph(const arch_env_t *env, ir_graph *irg, ir_node **blk_list) {
1067 ir_node *block, *start_block;
1069 blk_state *bl_state;
1073 /* we need liveness info for the current graph */
1076 /* create the simulator */
1077 x87_init_simulator(&sim, irg, env);
1079 start_block = get_irg_start_block(irg);
1080 bl_state = x87_get_bl_state(&sim, start_block);
1082 /* start with the empty state */
1083 bl_state->begin = empty;
1085 worklist = new_pdeq();
1087 /* create the worklist for the schedule. */
1088 for (i = 0; i < ARR_LEN(blk_list); ++i)
1089 pdeq_putr(worklist, blk_list[i]);
1093 block = pdeq_getl(worklist);
1094 if (! x87_simulate_block(&sim, block)) {
1095 pdeq_putr(worklist, block);
1098 } while (! pdeq_empty(worklist));
1101 x87_destroy_simulator(&sim);