2 * This file implements the x87 support and virtual to stack
3 * register translation for the ia32 backend.
5 * @author: Michael Beck
12 #endif /* HAVE_CONFIG_H */
19 #include "iredges_t.h"
28 #include "../belive_t.h"
29 #include "../besched.h"
30 #include "../benode_t.h"
31 #include "ia32_new_nodes.h"
32 #include "gen_ia32_new_nodes.h"
33 #include "gen_ia32_regalloc_if.h"
38 /* first and second binop index */
45 /* the store val index */
46 #define STORE_VAL_IDX 2
48 #define MASK_TOS(x) ((x) & (N_x87_REGS - 1))
50 /** the debug handle */
51 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
54 * An exchange template.
55 * Note that our virtual functions have the same inputs
56 * and attributes as the real ones, so we can simple exchange
58 * Further, x87 supports inverse instructions, so we can handle them.
60 typedef struct _exchange_tmpl {
61 ir_op *normal_op; /**< the normal one */
62 ir_op *reverse_op; /**< the reverse one if exists */
63 ir_op *normal_pop_op; /**< the normal one with tos pop */
64 ir_op *reverse_pop_op; /**< the reverse one with tos pop */
68 * An entry on the simulated x87 stack.
70 typedef struct _st_entry {
71 int reg_idx; /**< the virtual register index of this stack value */
72 ir_node *node; /**< the node that produced this value */
78 typedef struct _x87_state {
79 st_entry st[N_x87_REGS]; /**< the register stack */
80 int depth; /**< the current stack depth */
81 int tos; /**< position of the tos */
84 /** An empty state, used for blocks without fp instructions. */
85 static const x87_state _empty = { {0, NULL}, 0, 0 };
86 static x87_state *empty = (x87_state *)&_empty;
88 /** The type of an instruction simulator */
89 typedef void (*sim_func)(x87_state *state, ir_node *n, const arch_env_t *env);
92 * A block state: Every block has a x87 state at the beginning and at the end.
94 typedef struct _blk_state {
95 x87_state *begin; /**< state at the begin or NULL if not assigned */
96 x87_state *end; /**< state at the end or NULL if not assigned */
99 #define PTR_TO_BLKSTATE(p) ((blk_state *)(p))
104 typedef struct _x87_simulator {
105 struct obstack obst; /**< an obstack for fast allocating */
106 pmap *blk_states; /**< map blocks to states */
107 const arch_env_t *env; /**< architecture environment */
111 * Check if the state is empty.
113 static int x87_state_is_empty(const x87_state *state) {
114 return state->depth == 0;
118 * Return the virtual register index at st(pos).
120 static int x87_get_st_reg(const x87_state *state, int pos) {
121 assert(pos < state->depth);
122 return state->st[MASK_TOS(state->tos + pos)].reg_idx;
126 * Return the node at st(pos).
128 static ir_node *x87_get_st_node(const x87_state *state, int pos) {
129 assert(pos < state->depth);
130 return state->st[MASK_TOS(state->tos + pos)].node;
135 * Dump the stack for debugging.
137 static void x87_dump_stack(const x87_state *state) {
140 for (i = state->depth - 1; i >= 0; --i) {
141 DB((dbg, LEVEL_2, "vf%d ", x87_get_st_reg(state, i)));
143 DB((dbg, LEVEL_2, "<-- TOS\n"));
145 #endif /* DEBUG_libfirm */
148 * Set a virtual register to st(pos)
150 static void x87_set_st(x87_state *state, int reg_idx, ir_node *node, int pos) {
151 assert(0 < state->depth);
152 state->st[MASK_TOS(state->tos + pos)].reg_idx = reg_idx;
153 state->st[MASK_TOS(state->tos + pos)].node = node;
155 DB((dbg, LEVEL_2, "After SET_REG:\n ")); DEBUG_ONLY(x87_dump_stack(state));
159 * Set the tos virtual register
161 static void x87_set_tos(x87_state *state, int reg_idx, ir_node *node) {
162 x87_set_st(state, reg_idx, node, 0);
166 * Flush the x87 stack.
168 static void x87_flush(x87_state *state) {
174 * Swap st(0) with st(pos).
176 static void x87_fxch(x87_state *state, int pos) {
178 assert(pos < state->depth);
180 entry = state->st[MASK_TOS(state->tos + pos)];
181 state->st[MASK_TOS(state->tos + pos)] = state->st[MASK_TOS(state->tos)];
182 state->st[MASK_TOS(state->tos)] = entry;
184 DB((dbg, LEVEL_2, "After FXCH:\n ")); DEBUG_ONLY(x87_dump_stack(state));
188 * Convert a virtual register to the stack index.
189 * Return -1 if the virtual register was not found.
191 static int x87_on_stack(const x87_state *state, int reg_idx) {
192 int i, tos = state->tos;
194 for (i = 0; i < state->depth; ++i)
195 if (state->st[MASK_TOS(tos + i)].reg_idx == reg_idx)
201 * Push a virtual Register onto the stack.
203 static void x87_push(x87_state *state, int reg_idx, ir_node *node) {
204 assert(x87_on_stack(state, reg_idx) == -1 && "double push");
205 assert(state->depth < N_x87_REGS && "stack overrun");
208 state->tos = MASK_TOS(state->tos - 1);
209 state->st[state->tos].reg_idx = reg_idx;
210 state->st[state->tos].node = node;
212 DB((dbg, LEVEL_2, "After PUSH:\n ")); DEBUG_ONLY(x87_dump_stack(state));
216 * Pop a virtual Register from the stack.
218 static void x87_pop(x87_state *state) {
219 assert(state->depth > 0 && "stack underrun");
222 state->tos = MASK_TOS(state->tos + 1);
224 DB((dbg, LEVEL_2, "After POP:\n ")); DEBUG_ONLY(x87_dump_stack(state));
228 * Returns the block state of a block.
230 static blk_state *x87_get_bl_state(x87_simulator *sim, ir_node *block) {
231 pmap_entry *entry = pmap_find(sim->blk_states, block);
234 blk_state *bl_state = obstack_alloc(&sim->obst, sizeof(*bl_state));
235 bl_state->begin = NULL;
236 bl_state->end = NULL;
238 pmap_insert(sim->blk_states, block, bl_state);
242 return entry ? PTR_TO_BLKSTATE(entry->value) : NULL;
246 * Create a new x87 state.
248 static x87_state *x87_alloc_state(x87_simulator *sim) {
249 x87_state *res = obstack_alloc(&sim->obst, sizeof(*res));
254 * Create a new empty x87 state.
256 static x87_state *x87_alloc_empty_state(x87_simulator *sim) {
257 x87_state *res = x87_alloc_state(sim);
266 static x87_state *x87_clone_state(x87_simulator *sim, const x87_state *src) {
267 x87_state *res = x87_alloc_state(sim);
269 memcpy(res, src, sizeof(*res));
274 * Patch a virtual instruction into a x87 one and return
277 static ir_node *x87_patch_insn(ir_node *n, ir_op *op) {
278 ir_mode *mode = get_irn_mode(n);
283 if (mode == mode_T) {
284 /* patch all Proj's */
285 const ir_edge_t *edge;
287 foreach_out_edge(n, edge) {
288 ir_node *proj = get_edge_src_irn(edge);
290 mode = get_irn_mode(proj);
291 if (mode_is_float(mode)) {
293 set_irn_mode(proj, mode_E);
298 else if (mode_is_float(mode))
299 set_irn_mode(n, mode_E);
303 /* -------------- x87 perm --------------- */
306 * Creates a fxch for shuffle.
308 * @param state the x87 state
309 * @param pos parameter for fxch
310 * @param dst_block the block of the user
312 * Creates a new fxch node and reroute the user of the old node
315 * @return the fxch node
317 static ir_node *x87_fxch_shuffle(x87_state *state, int pos, ir_node *block, ir_node *dst_block)
319 const ir_edge_t *edge;
320 ir_node *n = x87_get_st_node(state, pos);
321 ir_node *user = NULL;
326 if (block == get_nodes_block(n)) {
327 /* this is a node from out block: change it's user */
328 foreach_out_edge(n, edge) {
329 ir_node *succ = get_edge_src_irn(edge);
331 if (is_Phi(succ) && get_nodes_block(succ) == dst_block) {
333 node_idx = get_edge_src_pos(edge);
340 fxch = new_rd_ia32_fxch(NULL, get_irn_irg(block), block, n, get_irn_mode(n));
341 attr = get_ia32_attr(fxch);
342 attr->x87[0] = &ia32_st_regs[pos];
343 attr->x87[2] = &ia32_st_regs[0];
346 DB((dbg, LEVEL_2, "%+F replaced input %d of %+F\n", fxch, node_idx, user));
347 set_irn_n(user, node_idx, fxch);
351 * This is a node from a dominator block. Changing it's user might be wrong,
352 * so just keep it alive.
353 * The "right" solution would require a new Phi, but we don't care here.
358 x87_fxch(state, pos);
363 * Calculate the necessary permutations to reach dst_state.
365 * These permutations are done with fxch instructions and placed
366 * at the end of the block.
368 * Note that critical edges are removed here, so we need only
369 * a shuffle if the current block has only one successor.
371 * @param sim the simulator handle
372 * @param block the current block
373 * @param state the current x87 stack state, might be modified
374 * @param dst_block the destination block
375 * @param dst_state destination state
379 static x87_state *x87_shuffle(x87_simulator *sim, ir_node *block, x87_state *state, ir_node *dst_block, const x87_state *dst_state) {
380 int i, n_rings, k, ri;
381 unsigned rings[4], all_mask;
384 ir_node *before, *after;
386 assert(state->depth == dst_state->depth);
388 /* Some mathematics here:
389 If we have a ring of lenght n that includes the tos,
390 we need n-1 exchange operations.
391 We can always add the tos and restore it, so we need
392 n+1 exchange operations for a ring not containing the tos.
393 So, the maximum of needed operations is for a ring of 7
394 not including the tos == 8.
395 This is so same number of ops we would need for store,
396 so exchange is cheaper (we save the loads).
397 On the other hand, we might need an additional exchange
398 in the next block to bring one operand on top, so the
399 number of ops in the first case is identical.
400 Further, no more than 4 rings can exists.
402 all_mask = (1 << (state->depth)) - 1;
404 for (n_rings = 0; all_mask; ++n_rings) {
405 int src_idx, dst_idx;
407 /* find the first free slot */
408 for (i = 0; i < state->depth; ++i) {
409 if (all_mask & (1 << i)) {
410 all_mask &= ~(1 << i);
412 /* check if there are differences here */
413 if (x87_get_st_reg(state, i) != x87_get_st_reg(dst_state, i))
419 /* no more rings found */
424 rings[n_rings] = (1 << i);
425 ring_idx[n_rings][k++] = i;
426 for (src_idx = i; ; src_idx = dst_idx) {
427 dst_idx = x87_on_stack(dst_state, x87_get_st_reg(state, src_idx));
429 if ((all_mask & (1 << dst_idx)) == 0)
432 ring_idx[n_rings][k++] = dst_idx;
433 rings[n_rings] |= (1 << dst_idx);
434 all_mask &= ~(1 << dst_idx);
436 ring_idx[n_rings][k] = -1;
440 /* no permutation needed */
444 /* Hmm: permutation needed */
445 DB((dbg, LEVEL_2, "\n%+F needs permutation: from\n", block));
446 DEBUG_ONLY(x87_dump_stack(state));
447 DB((dbg, LEVEL_2, " to\n"));
448 DEBUG_ONLY(x87_dump_stack(dst_state));
452 DB((dbg, LEVEL_2, "Need %d rings\n", n_rings));
453 for (ri = 0; ri < n_rings; ++ri) {
454 DB((dbg, LEVEL_2, " Ring %d:\n ", ri));
455 for (k = 0; ring_idx[ri][k] != -1; ++k)
456 DB((dbg, LEVEL_2, " st%d ->", ring_idx[ri][k]));
457 DB((dbg, LEVEL_2, "\n"));
464 * Find the place node must be insert.
465 * We have only one successor block, so the last instruction should
468 before = sched_last(block);
469 assert(is_cfop(before));
471 /* now do the permutations */
472 for (ri = 0; ri < n_rings; ++ri) {
473 if ((rings[ri] & 1) == 0) {
474 /* this ring does not include the tos */
475 fxch = x87_fxch_shuffle(state, ring_idx[ri][0], block, dst_block);
477 sched_add_after(after, fxch);
479 sched_add_before(before, fxch);
482 for (k = 1; ring_idx[ri][k] != -1; ++k) {
483 fxch = x87_fxch_shuffle(state, ring_idx[ri][k], block, dst_block);
485 sched_add_after(after, fxch);
487 sched_add_before(before, fxch);
490 if ((rings[ri] & 1) == 0) {
491 /* this ring does not include the tos */
492 fxch = x87_fxch_shuffle(state, ring_idx[ri][0], block, dst_block);
493 sched_add_after(after, fxch);
500 * Create a fxch before node n.
502 static void x87_create_fxch(x87_state *state, ir_node *n, int pos, int op_idx) {
503 ir_node *fxch, *pred;
506 x87_fxch(state, pos);
508 pred = get_irn_n(n, op_idx);
509 fxch = new_rd_ia32_fxch(NULL, get_irn_irg(n), get_nodes_block(n), pred, get_irn_mode(pred));
510 attr = get_ia32_attr(fxch);
511 attr->x87[0] = &ia32_st_regs[pos];
512 attr->x87[2] = &ia32_st_regs[0];
513 set_irn_n(n, op_idx, fxch);
515 sched_add_before(n, fxch);
516 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(fxch), attr->x87[0]->name, attr->x87[2]->name));
520 * Create a fpush before node n.
522 static void x87_create_fpush(const arch_env_t *env, x87_state *state, ir_node *n, int pos, int op_idx) {
523 ir_node *fpush, *pred;
525 const arch_register_t *out = arch_get_irn_register(env, n);
527 x87_push(state, arch_register_get_index(out), n);
529 pred = get_irn_n(n, op_idx);
530 fpush = new_rd_ia32_fpush(NULL, get_irn_irg(n), get_nodes_block(n), pred, get_irn_mode(pred));
531 attr = get_ia32_attr(fpush);
532 attr->x87[0] = &ia32_st_regs[pos];
533 attr->x87[2] = &ia32_st_regs[0];
534 set_irn_n(n, op_idx, fpush);
536 sched_add_before(n, fpush);
537 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(fpush), attr->x87[0]->name, attr->x87[2]->name));
540 /* --------------------------------- liveness ------------------------------------------ */
543 * The liveness transfer function.
544 * Updates a live set over a single step from a given node to its predecessor.
545 * Everything defined at the node is removed from the set, the uses of the node get inserted.
546 * @param arch_env The architecture environment.
547 * @param irn The node at which liveness should be computed.
548 * @param live The bitset of registers live before @p irn. This set gets modified by updating it to
549 * the registers live after irn.
552 static unsigned vfp_liveness_transfer(const arch_env_t *arch_env, ir_node *irn, unsigned live)
555 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
557 if(arch_irn_consider_in_reg_alloc(arch_env, cls, irn)) {
558 const arch_register_t *reg = arch_get_irn_register(arch_env, irn);
559 live &= ~(1 << reg->index);
562 for(i = 0, n = get_irn_arity(irn); i < n; ++i) {
563 ir_node *op = get_irn_n(irn, i);
565 if (mode_is_float(get_irn_mode(op)) && arch_irn_consider_in_reg_alloc(arch_env, cls, op)) {
566 const arch_register_t *reg = arch_get_irn_register(arch_env, op);
567 live |= 1 << reg->index;
575 * Put all live virtual registers at the end of a block into a bitset.
576 * @param arch_env The architecture environment.
577 * @param bl The block.
578 * @return The live bitset.
580 static unsigned vfp_liveness_end_of_block(const arch_env_t *arch_env, const ir_node *bl)
584 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
586 live_foreach(bl, li) {
587 ir_node *irn = (ir_node *) li->irn;
588 if (live_is_end(li) && arch_irn_consider_in_reg_alloc(arch_env, cls, irn)) {
589 const arch_register_t *reg = arch_get_irn_register(arch_env, irn);
590 live |= 1 << reg->index;
598 * Compute a bitset of registers which are live at another node.
599 * @param arch_env The architecture environment.
600 * @param pos The node.
601 * @return The live bitset.
603 static unsigned vfp_liveness_nodes_live_at(const arch_env_t *arch_env, const ir_node *pos)
605 const ir_node *bl = is_Block(pos) ? pos : get_nodes_block(pos);
606 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
610 live = vfp_liveness_end_of_block(arch_env, bl);
612 sched_foreach_reverse(bl, irn) {
614 * If we encounter the node we want to insert the Perm after,
615 * exit immediately, so that this node is still live
620 live = vfp_liveness_transfer(arch_env, irn, live);
627 * Returns true if a register is live in a set.
629 static unsigned is_vfp_live(const arch_register_t *reg, unsigned live) {
630 return live & (1 << reg->index);
635 * dump liveness info.
637 static void vfp_dump_live(unsigned live) {
640 DB((dbg, LEVEL_2, "Live registers here: \n"));
641 for (i = 0; i < 8; ++i) {
642 if (live & (1 << i)) {
643 DB((dbg, LEVEL_2, " vf%d", i));
646 DB((dbg, LEVEL_2, "\n"));
648 #endif /* DEBUG_libfirm */
650 /* --------------------------------- simulators ---------------------------------------- */
652 #define XCHG(a, b) do { int t =(a); (a) = (b); (b) = t; } while (0)
655 * Simulate a virtual binop
657 static void sim_binop(x87_state *state, ir_node *n, const arch_env_t *env, const exchange_tmpl *tmpl) {
658 int op2_idx, op1_idx = -1;
659 int out_idx, do_pop =0;
662 const arch_register_t *op1 = arch_get_irn_register(env, get_irn_n(n, BINOP_IDX_1));
663 const arch_register_t *op2 = arch_get_irn_register(env, get_irn_n(n, BINOP_IDX_2));
664 const arch_register_t *out = arch_get_irn_register(env, n);
665 unsigned live = vfp_liveness_nodes_live_at(env, n);
667 DB((dbg, LEVEL_1, ">>> %s %s, %s -> %s\n", get_irn_opname(n),
668 arch_register_get_name(op2), arch_register_get_name(op1),
669 arch_register_get_name(out)));
670 DEBUG_ONLY(vfp_dump_live(live));
672 op2_idx = x87_on_stack(state, arch_register_get_index(op2));
674 if (op1->reg_class == &ia32_reg_classes[CLASS_ia32_vfp]) {
675 /* first operand is a vfp register */
676 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
678 if (is_vfp_live(op2, live)) {
679 /* second operand is live */
681 if (is_vfp_live(op1, live)) {
682 /* both operands are live: push the first one */
683 x87_create_fpush(env, state, n, op2_idx, BINOP_IDX_2);
684 out_idx = op2_idx = 0;
686 dst = tmpl->normal_op;
690 /* second live, first operand is dead here, bring it to tos */
692 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
696 op1_idx = out_idx = 0;
697 dst = tmpl->normal_op;
702 /* second operand is dead */
703 if (is_vfp_live(op1, live)) {
704 /* first operand is live: bring second to tos */
706 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
710 op2_idx = out_idx = 0;
711 dst = tmpl->normal_op;
715 /* both operands are dead here, pop them from the stack */
718 XCHG(op2_idx, op1_idx);
719 dst = tmpl->reverse_pop_op;
722 else if (op1_idx == 0) {
724 dst = tmpl->normal_pop_op;
728 /* bring the first on top */
729 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
732 dst = tmpl->normal_pop_op;
739 /* first operand is an address mode */
740 if (is_vfp_live(op2, live)) {
741 /* second operand is live: push it here */
742 x87_create_fpush(env, state, n, op2_idx, BINOP_IDX_2);
745 /* second operand is dead: bring it to tos */
747 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
749 op2_idx = out_idx = 0;
750 dst = tmpl->normal_op;
754 x87_set_st(state, arch_register_get_index(out), x87_patch_insn(n, dst), out_idx);
758 /* patch the operation */
759 attr = get_ia32_attr(n);
761 attr->x87[0] = op1 = &ia32_st_regs[op1_idx];
762 attr->x87[1] = op2 = &ia32_st_regs[op2_idx];
763 attr->x87[2] = out = &ia32_st_regs[out_idx];
765 DB((dbg, LEVEL_1, "<<< %s %s, %s -> %s\n", get_irn_opname(n),
766 arch_register_get_name(op2), arch_register_get_name(op1),
767 arch_register_get_name(out)));
771 * Simulate a virtual Unop
773 static void sim_unop(x87_state *state, ir_node *n, const arch_env_t *env, ir_op *op) {
774 int op1_idx, out_idx;
775 const arch_register_t *op1 = arch_get_irn_register(env, get_irn_n(n, UNOP_IDX));
776 const arch_register_t *out = arch_get_irn_register(env, n);
778 unsigned live = vfp_liveness_nodes_live_at(env, n);
780 DB((dbg, LEVEL_1, ">>> %s -> %s\n", get_irn_opname(n), out->name));
781 DEBUG_ONLY(vfp_dump_live(live));
783 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
785 if (is_vfp_live(op1, live)) {
786 /* push the operand here */
787 x87_create_fpush(env, state, n, op1_idx, UNOP_IDX);
790 /* operand is dead, bring it to tos */
792 x87_create_fxch(state, n, op1_idx, UNOP_IDX);
795 x87_set_tos(state, arch_register_get_index(out), x87_patch_insn(n, op));
796 op1_idx = out_idx = 0;
797 attr = get_ia32_attr(n);
798 attr->x87[0] = op1 = &ia32_st_regs[0];
799 attr->x87[2] = out = &ia32_st_regs[0];
800 DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), out->name));
804 * Simulate a virtual Load instructions
806 static void sim_load(x87_state *state, ir_node *n, const arch_env_t *env, ir_op *op) {
807 const arch_register_t *out = arch_get_irn_register(env, n);
810 DB((dbg, LEVEL_1, ">>> %s -> %s\n", get_irn_opname(n), arch_register_get_name(out)));
811 x87_push(state, arch_register_get_index(out), x87_patch_insn(n, op));
812 attr = get_ia32_attr(n);
813 attr->x87[2] = out = &ia32_st_regs[0];
814 DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), arch_register_get_name(out)));
818 * Simulate a virtual Store
820 static void sim_store(x87_state *state, ir_node *n, const arch_env_t *env, ir_op *op, ir_op *op_p) {
822 const arch_register_t *op2 = arch_get_irn_register(env, get_irn_n(n, STORE_VAL_IDX));
824 unsigned live = vfp_liveness_nodes_live_at(env, n);
826 op2_idx = x87_on_stack(state, arch_register_get_index(op2));
828 DB((dbg, LEVEL_1, ">>> %s %s ->\n", get_irn_opname(n), arch_register_get_name(op2)));
830 /* we can only store the tos to memory */
832 x87_create_fxch(state, n, op2_idx, STORE_VAL_IDX);
834 if (is_vfp_live(op2, live))
835 x87_patch_insn(n, op);
838 x87_patch_insn(n, op_p);
841 attr = get_ia32_attr(n);
842 attr->x87[1] = op2 = &ia32_st_regs[0];
843 DB((dbg, LEVEL_1, "<<< %s %s ->\n", get_irn_opname(n), arch_register_get_name(op2)));
847 * Simulate a virtual Phi.
848 * Just for cosmetic reasons change the mode of Phi nodes to mode_E.
850 static void sim_Phi(x87_state *state, ir_node *n, const arch_env_t *env) {
851 ir_mode *mode = get_irn_mode(n);
853 if (mode_is_float(mode))
854 set_irn_mode(n, mode_E);
858 #define _GEN_BINOP(op, rev) \
859 static void sim_##op(x87_state *state, ir_node *n, const arch_env_t *env) { \
860 exchange_tmpl tmpl = { op_ia32_##op, op_ia32_##rev, op_ia32_##op##p, op_ia32_##rev##p }; \
861 sim_binop(state, n, env, &tmpl); \
864 #define GEN_BINOP(op) _GEN_BINOP(op, op)
865 #define GEN_BINOPR(op) _GEN_BINOP(op, op##r)
867 #define GEN_LOAD2(op, nop) \
868 static void sim_##op(x87_state *state, ir_node *n, const arch_env_t *env) { \
869 sim_load(state, n, env, op_ia32_##nop); \
872 #define GEN_LOAD(op) GEN_LOAD2(op, op)
874 #define GEN_UNOP(op) \
875 static void sim_##op(x87_state *state, ir_node *n, const arch_env_t *env) { \
876 sim_unop(state, n, env, op_ia32_##op); \
879 #define GEN_STORE(op) \
880 static void sim_##op(x87_state *state, ir_node *n, const arch_env_t *env) { \
881 sim_store(state, n, env, op_ia32_##op, op_ia32_##op##p); \
900 GEN_LOAD2(fConst, fldConst)
907 * Simulate a be_Copy.
909 static void sim_Copy(x87_state *state, ir_node *n, const arch_env_t *env) {
910 ir_mode *mode = get_irn_mode(n);
912 if (mode_is_float(mode)) {
913 const arch_register_t *op1 = arch_get_irn_register(env, get_irn_n(n, 0));
914 const arch_register_t *out = arch_get_irn_register(env, n);
915 ir_node *node, *next;
918 unsigned live = vfp_liveness_nodes_live_at(env, n);
920 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
922 DB((dbg, LEVEL_1, ">>> %s %s -> %s\n", get_irn_opname(n),
923 arch_register_get_name(op1), arch_register_get_name(out)));
924 DEBUG_ONLY(vfp_dump_live(live));
926 if (is_vfp_live(op1, live)) {
927 /* operand is still live,a real copy */
928 node = new_rd_ia32_fpush(get_irn_dbg_info(n), get_irn_irg(n), get_nodes_block(n), get_irn_n(n, 0), mode);
929 arch_set_irn_register(env, node, out);
931 x87_push(state, arch_register_get_index(out), node);
933 attr = get_ia32_attr(node);
934 attr->x87[0] = op1 = &ia32_st_regs[op1_idx];
935 attr->x87[2] = out = &ia32_st_regs[0];
937 next = sched_next(n);
940 sched_add_before(next, node);
941 DB((dbg, LEVEL_1, ">>> %s %s -> %s\n", get_irn_opname(node), op1->name, out->name));
944 /* just a virtual copy */
945 x87_set_st(state, arch_register_get_index(out), get_unop_op(n), op1_idx);
947 DB((dbg, LEVEL_1, ">>> KILLED %s\n", get_irn_opname(n)));
948 exchange(n, get_unop_op(n));
956 static void sim_Call(x87_state *state, ir_node *n, const arch_env_t *env) {
957 ir_type *call_tp = be_Call_get_type(n);
959 /* at the begin of a call the x87 state should be empty */
960 assert(state->depth == 0 && "stack not empty before call");
963 * If the called function returns a float, it is returned in st(0).
964 * This even happens if the return value is NOT used.
965 * Moreover, only one return result is supported.
967 if (get_method_n_ress(call_tp) > 0) {
968 ir_type *res_type = get_method_res_type(call_tp, 0);
969 ir_mode *mode = get_type_mode(res_type);
971 if (mode && mode_is_float(mode)) {
973 * TODO: what to push here? The result might be unused and currently
974 * we have no possibility to detect this :-(
976 x87_push(state, 0, n);
982 * Simulate a be_Spill.
984 static void sim_Spill(x87_state *state, ir_node *n, const arch_env_t *env) {
985 assert(0 && "Spill not lowered");
986 sim_fst(state, n, env);
990 * Simulate a be_Reload
992 static void sim_Reload(x87_state *state, ir_node *n, const arch_env_t *env) {
993 assert(0 && "Reload not lowered");
994 sim_fld(state, n, env);
998 * Run a simulation and fix all virtual instructions for a block.
1000 * @return non-zero if simulation is complete,
1001 * zero if the simulation must be rerun
1003 static int x87_simulate_block(x87_simulator *sim, ir_node *block) {
1005 blk_state *bl_state = x87_get_bl_state(sim, block);
1006 x87_state *state = bl_state->begin;
1007 const ir_edge_t *edge;
1008 ir_node *start_block;
1010 /* if we have no assigned start state, we must wait ... */
1014 assert(bl_state->end == NULL);
1016 DB((dbg, LEVEL_1, "Simulate %+F\n", block));
1018 /* beware, n might changed */
1019 for (n = sched_first(block); !sched_is_end(n); n = next) {
1020 ir_op *op = get_irn_op(n);
1022 next = sched_next(n);
1023 if (op->ops.generic) {
1024 sim_func func = (sim_func)op->ops.generic;
1026 /* have work to do */
1027 if (state == bl_state->begin) {
1028 /* create a new state, will be changed */
1029 state = x87_clone_state(sim, state);
1033 (*func)(state, n, sim->env);
1037 start_block = get_irg_start_block(get_irn_irg(block));
1039 /* check if the state must be shuffled */
1040 foreach_block_succ(block, edge) {
1041 ir_node *succ = get_edge_src_irn(edge);
1042 blk_state *succ_state = x87_get_bl_state(sim, succ);
1044 if (succ_state->begin && succ != start_block) {
1045 /* There is already a begin state for this block, bad.
1046 Do the necessary permutations.
1047 Note that critical edges are removed, so this is always possible. */
1048 x87_shuffle(sim, block, state, succ, succ_state->begin);
1050 /* Note further, that there can be only one such situation,
1051 so we can break here. */
1055 bl_state->end = state;
1057 /* now propagate the state to all successor blocks */
1058 foreach_block_succ(block, edge) {
1059 ir_node *succ = get_edge_src_irn(edge);
1060 blk_state *succ_state = x87_get_bl_state(sim, succ);
1062 if (! succ_state->begin)
1063 succ_state->begin = state;
1070 * Create a new x87 simulator.
1072 static void x87_init_simulator(x87_simulator *sim, ir_graph *irg, const arch_env_t *env) {
1073 obstack_init(&sim->obst);
1074 sim->blk_states = pmap_create();
1077 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.x87");
1078 firm_dbg_set_mask(dbg, SET_LEVEL_2);
1080 DB((dbg, LEVEL_1, "--------------------------------\n"
1081 "x87 Simulator started for %+F\n", irg));
1083 /* set the generic function pointer of instruction we must simulate */
1084 clear_irp_opcodes_generic_func();
1086 #define ASSOC(op) (op_ ## op)->ops.generic = (op_func)(sim_##op)
1087 #define ASSOC_IA32(op) (op_ia32_v ## op)->ops.generic = (op_func)(sim_##op)
1088 #define ASSOC_BE(op) (op_be_ ## op)->ops.generic = (op_func)(sim_##op)
1117 * Destroy a x87 simulator.
1119 static void x87_destroy_simulator(x87_simulator *sim) {
1120 pmap_destroy(sim->blk_states);
1121 obstack_free(&sim->obst, NULL);
1122 DB((dbg, LEVEL_1, "x87 Simulator stopped\n\n"));
1126 * Run a simulation and fix all virtual instructions for a graph.
1128 * Needs a block-schedule.
1130 void x87_simulate_graph(const arch_env_t *env, ir_graph *irg, ir_node **blk_list) {
1131 ir_node *block, *start_block;
1133 blk_state *bl_state;
1137 /* we need liveness info for the current graph */
1140 /* create the simulator */
1141 x87_init_simulator(&sim, irg, env);
1143 start_block = get_irg_start_block(irg);
1144 bl_state = x87_get_bl_state(&sim, start_block);
1146 /* start with the empty state */
1147 bl_state->begin = empty;
1149 worklist = new_pdeq();
1151 /* create the worklist for the schedule. */
1152 for (i = 0; i < ARR_LEN(blk_list); ++i)
1153 pdeq_putr(worklist, blk_list[i]);
1157 block = pdeq_getl(worklist);
1158 if (! x87_simulate_block(&sim, block)) {
1159 pdeq_putr(worklist, block);
1162 } while (! pdeq_empty(worklist));
1165 x87_destroy_simulator(&sim);