2 * This file implements the x87 support and virtual to stack
3 * register translation for the ia32 backend.
5 * @author: Michael Beck
12 #endif /* HAVE_CONFIG_H */
19 #include "iredges_t.h"
28 #include "../belive_t.h"
29 #include "../besched.h"
30 #include "../benode_t.h"
31 #include "ia32_new_nodes.h"
32 #include "gen_ia32_new_nodes.h"
33 #include "gen_ia32_regalloc_if.h"
38 /* first and second binop index */
45 /* the store val index */
46 #define STORE_VAL_IDX 2
48 #define MASK_TOS(x) ((x) & (N_x87_REGS - 1))
50 /** the debug handle */
51 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
54 * An exchange template.
55 * Note that our virtual functions have the same inputs
56 * and attributes as the real ones, so we can simple exchange
58 * Further, x87 supports inverse instructions, so we can handle them.
60 typedef struct _exchange_tmpl {
61 ir_op *normal_op; /**< the normal one */
62 ir_op *reverse_op; /**< the reverse one if exists */
63 ir_op *normal_pop_op; /**< the normal one with tos pop */
64 ir_op *reverse_pop_op; /**< the reverse one with tos pop */
68 * An entry on the simulated x87 stack.
70 typedef struct _st_entry {
71 int reg_idx; /**< the virtual register index of this stack value */
72 ir_node *node; /**< the node that produced this value */
78 typedef struct _x87_state {
79 st_entry st[N_x87_REGS]; /**< the register stack */
80 int depth; /**< the current stack depth */
81 int tos; /**< position of the tos */
84 /** An empty state, used for blocks without fp instructions. */
85 static const x87_state _empty = { {0, NULL}, 0, 0 };
86 static x87_state *empty = (x87_state *)&_empty;
88 /** The type of an instruction simulator */
89 typedef void (*sim_func)(x87_state *state, ir_node *n, const arch_env_t *env);
92 * A block state: Every block has a x87 state at the beginning and at the end.
94 typedef struct _blk_state {
95 x87_state *begin; /**< state at the begin or NULL if not assigned */
96 x87_state *end; /**< state at the end or NULL if not assigned */
99 #define PTR_TO_BLKSTATE(p) ((blk_state *)(p))
104 typedef struct _x87_simulator {
105 struct obstack obst; /**< an obstack for fast allocating */
106 pmap *blk_states; /**< map blocks to states */
107 const arch_env_t *env; /**< architecture environment */
111 * Returns the stack depth.
113 * @param state the x87 state
115 * @return the x87 stack depth
117 static int x87_get_depth(const x87_state *state) {
122 * Check if the state is empty.
124 * @param state the x87 state
126 * returns non-zero if the x87 stack is empty
128 static int x87_state_is_empty(const x87_state *state) {
129 return state->depth == 0;
133 * Return the virtual register index at st(pos).
135 * @param state the x87 state
136 * @param pos a stack position
138 * @return the vfp register index that produced the value at st(pos)
140 static int x87_get_st_reg(const x87_state *state, int pos) {
141 assert(pos < state->depth);
142 return state->st[MASK_TOS(state->tos + pos)].reg_idx;
146 * Return the node at st(pos).
148 * @param state the x87 state
149 * @param pos a stack position
151 * @return the IR node that produced the value at st(pos)
153 static ir_node *x87_get_st_node(const x87_state *state, int pos) {
154 assert(pos < state->depth);
155 return state->st[MASK_TOS(state->tos + pos)].node;
160 * Dump the stack for debugging.
162 * @param state the x87 state
164 static void x87_dump_stack(const x87_state *state) {
167 for (i = state->depth - 1; i >= 0; --i) {
168 DB((dbg, LEVEL_2, "vf%d ", x87_get_st_reg(state, i)));
170 DB((dbg, LEVEL_2, "<-- TOS\n"));
172 #endif /* DEBUG_libfirm */
175 * Set a virtual register to st(pos).
177 * @param state the x87 state
178 * @param reg_idx the vfp register index that should be set
179 * @param node the IR node that produces the value of the vfp register
180 * @param pos the stack position where the new value should be entered
182 static void x87_set_st(x87_state *state, int reg_idx, ir_node *node, int pos) {
183 assert(0 < state->depth);
184 state->st[MASK_TOS(state->tos + pos)].reg_idx = reg_idx;
185 state->st[MASK_TOS(state->tos + pos)].node = node;
187 DB((dbg, LEVEL_2, "After SET_REG:\n ")); DEBUG_ONLY(x87_dump_stack(state));
191 * Set the tos virtual register.
193 * @param state the x87 state
194 * @param reg_idx the vfp register index that should be set
195 * @param node the IR node that produces the value of the vfp register
197 static void x87_set_tos(x87_state *state, int reg_idx, ir_node *node) {
198 x87_set_st(state, reg_idx, node, 0);
202 * Flush the x87 stack.
204 * @param state the x87 state
206 static void x87_flush(x87_state *state) {
212 * Swap st(0) with st(pos).
214 * @param state the x87 state
215 * @param pos the stack position to change the tos with
217 static void x87_fxch(x87_state *state, int pos) {
219 assert(pos < state->depth);
221 entry = state->st[MASK_TOS(state->tos + pos)];
222 state->st[MASK_TOS(state->tos + pos)] = state->st[MASK_TOS(state->tos)];
223 state->st[MASK_TOS(state->tos)] = entry;
225 DB((dbg, LEVEL_2, "After FXCH:\n ")); DEBUG_ONLY(x87_dump_stack(state));
229 * Convert a virtual register to the stack index.
231 * @param state the x87 state
232 * @param reg_idx the register vfp index
234 * @return the stack position where the register is stacked
235 * or -1 if the virtual register was not found
237 static int x87_on_stack(const x87_state *state, int reg_idx) {
238 int i, tos = state->tos;
240 for (i = 0; i < state->depth; ++i)
241 if (state->st[MASK_TOS(tos + i)].reg_idx == reg_idx)
247 * Push a virtual Register onto the stack.
249 * @param state the x87 state
250 * @param reg_idx the register vfp index
251 * @param node the node that produces the value of the vfp register
253 static void x87_push(x87_state *state, int reg_idx, ir_node *node) {
254 assert(x87_on_stack(state, reg_idx) == -1 && "double push");
255 assert(state->depth < N_x87_REGS && "stack overrun");
258 state->tos = MASK_TOS(state->tos - 1);
259 state->st[state->tos].reg_idx = reg_idx;
260 state->st[state->tos].node = node;
262 DB((dbg, LEVEL_2, "After PUSH:\n ")); DEBUG_ONLY(x87_dump_stack(state));
266 * Pop a virtual Register from the stack.
268 static void x87_pop(x87_state *state) {
269 assert(state->depth > 0 && "stack underrun");
272 state->tos = MASK_TOS(state->tos + 1);
274 DB((dbg, LEVEL_2, "After POP:\n ")); DEBUG_ONLY(x87_dump_stack(state));
278 * Returns the block state of a block.
280 * @param sim the x87 simulator handle
281 * @param block the current block
283 * @return the block state
285 static blk_state *x87_get_bl_state(x87_simulator *sim, ir_node *block) {
286 pmap_entry *entry = pmap_find(sim->blk_states, block);
289 blk_state *bl_state = obstack_alloc(&sim->obst, sizeof(*bl_state));
290 bl_state->begin = NULL;
291 bl_state->end = NULL;
293 pmap_insert(sim->blk_states, block, bl_state);
297 return entry ? PTR_TO_BLKSTATE(entry->value) : NULL;
301 * Creates a new x87 state.
303 * @param sim the x87 simulator handle
304 * @return a new x87 state
306 static x87_state *x87_alloc_state(x87_simulator *sim) {
307 x87_state *res = obstack_alloc(&sim->obst, sizeof(*res));
312 * Create a new empty x87 state.
314 * @param sim the x87 simulator handle
315 * @return a new empty x87 state
317 static x87_state *x87_alloc_empty_state(x87_simulator *sim) {
318 x87_state *res = x87_alloc_state(sim);
327 * @param sim the x87 simulator handle
328 * @param src the x87 state that will be cloned
330 * @return a cloned copy of the src state
332 static x87_state *x87_clone_state(x87_simulator *sim, const x87_state *src) {
333 x87_state *res = x87_alloc_state(sim);
335 memcpy(res, src, sizeof(*res));
340 * Patch a virtual instruction into a x87 one and return
343 * @param n the IR node to patch
344 * @param op the x87 opcode to patch in
346 static ir_node *x87_patch_insn(ir_node *n, ir_op *op) {
347 ir_mode *mode = get_irn_mode(n);
352 if (mode == mode_T) {
353 /* patch all Proj's */
354 const ir_edge_t *edge;
356 foreach_out_edge(n, edge) {
357 ir_node *proj = get_edge_src_irn(edge);
359 mode = get_irn_mode(proj);
360 if (mode_is_float(mode)) {
362 set_irn_mode(proj, mode_E);
367 else if (mode_is_float(mode))
368 set_irn_mode(n, mode_E);
372 /* -------------- x87 perm --------------- */
375 * Creates a fxch for shuffle.
377 * @param state the x87 state
378 * @param pos parameter for fxch
379 * @param dst_block the block of the user
381 * Creates a new fxch node and reroute the user of the old node
384 * @return the fxch node
386 static ir_node *x87_fxch_shuffle(x87_state *state, int pos, ir_node *block, ir_node *dst_block)
388 const ir_edge_t *edge;
389 ir_node *n = x87_get_st_node(state, pos);
390 ir_node *user = NULL;
395 if (block == get_nodes_block(n)) {
396 /* this is a node from out block: change it's user */
397 foreach_out_edge(n, edge) {
398 ir_node *succ = get_edge_src_irn(edge);
400 if (is_Phi(succ) && get_nodes_block(succ) == dst_block) {
402 node_idx = get_edge_src_pos(edge);
409 fxch = new_rd_ia32_fxch(NULL, get_irn_irg(block), block, n, get_irn_mode(n));
410 attr = get_ia32_attr(fxch);
411 attr->x87[0] = &ia32_st_regs[pos];
412 attr->x87[2] = &ia32_st_regs[0];
415 DB((dbg, LEVEL_2, "%+F replaced input %d of %+F\n", fxch, node_idx, user));
416 set_irn_n(user, node_idx, fxch);
420 * This is a node from a dominator block. Changing it's user might be wrong,
421 * so just keep it alive.
422 * The "right" solution would require a new Phi, but we don't care here.
427 x87_fxch(state, pos);
432 * Calculate the necessary permutations to reach dst_state.
434 * These permutations are done with fxch instructions and placed
435 * at the end of the block.
437 * Note that critical edges are removed here, so we need only
438 * a shuffle if the current block has only one successor.
440 * @param sim the simulator handle
441 * @param block the current block
442 * @param state the current x87 stack state, might be modified
443 * @param dst_block the destination block
444 * @param dst_state destination state
448 static x87_state *x87_shuffle(x87_simulator *sim, ir_node *block, x87_state *state, ir_node *dst_block, const x87_state *dst_state) {
449 int i, n_cycles, k, ri;
450 unsigned cycles[4], all_mask;
451 char cycle_idx[4][8];
453 ir_node *before, *after;
455 assert(state->depth == dst_state->depth);
457 /* Some mathematics here:
458 If we have a cycle of lenght n that includes the tos,
459 we need n-1 exchange operations.
460 We can always add the tos and restore it, so we need
461 n+1 exchange operations for a cycle not containing the tos.
462 So, the maximum of needed operations is for a cycle of 7
463 not including the tos == 8.
464 This is so same number of ops we would need for store,
465 so exchange is cheaper (we save the loads).
466 On the other hand, we might need an additional exchange
467 in the next block to bring one operand on top, so the
468 number of ops in the first case is identical.
469 Further, no more than 4 cycles can exists.
471 all_mask = (1 << (state->depth)) - 1;
473 for (n_cycles = 0; all_mask; ++n_cycles) {
474 int src_idx, dst_idx;
476 /* find the first free slot */
477 for (i = 0; i < state->depth; ++i) {
478 if (all_mask & (1 << i)) {
479 all_mask &= ~(1 << i);
481 /* check if there are differences here */
482 if (x87_get_st_reg(state, i) != x87_get_st_reg(dst_state, i))
488 /* no more cycles found */
493 cycles[n_cycles] = (1 << i);
494 cycle_idx[n_cycles][k++] = i;
495 for (src_idx = i; ; src_idx = dst_idx) {
496 dst_idx = x87_on_stack(dst_state, x87_get_st_reg(state, src_idx));
498 if ((all_mask & (1 << dst_idx)) == 0)
501 cycle_idx[n_cycles][k++] = dst_idx;
502 cycles[n_cycles] |= (1 << dst_idx);
503 all_mask &= ~(1 << dst_idx);
505 cycle_idx[n_cycles][k] = -1;
509 /* no permutation needed */
513 /* Hmm: permutation needed */
514 DB((dbg, LEVEL_2, "\n%+F needs permutation: from\n", block));
515 DEBUG_ONLY(x87_dump_stack(state));
516 DB((dbg, LEVEL_2, " to\n"));
517 DEBUG_ONLY(x87_dump_stack(dst_state));
521 DB((dbg, LEVEL_2, "Need %d cycles\n", n_cycles));
522 for (ri = 0; ri < n_cycles; ++ri) {
523 DB((dbg, LEVEL_2, " Ring %d:\n ", ri));
524 for (k = 0; cycle_idx[ri][k] != -1; ++k)
525 DB((dbg, LEVEL_2, " st%d ->", cycle_idx[ri][k]));
526 DB((dbg, LEVEL_2, "\n"));
533 * Find the place node must be insert.
534 * We have only one successor block, so the last instruction should
537 before = sched_last(block);
538 assert(is_cfop(before));
540 /* now do the permutations */
541 for (ri = 0; ri < n_cycles; ++ri) {
542 if ((cycles[ri] & 1) == 0) {
543 /* this cycle does not include the tos */
544 fxch = x87_fxch_shuffle(state, cycle_idx[ri][0], block, dst_block);
546 sched_add_after(after, fxch);
548 sched_add_before(before, fxch);
551 for (k = 1; cycle_idx[ri][k] != -1; ++k) {
552 fxch = x87_fxch_shuffle(state, cycle_idx[ri][k], block, dst_block);
554 sched_add_after(after, fxch);
556 sched_add_before(before, fxch);
559 if ((cycles[ri] & 1) == 0) {
560 /* this cycle does not include the tos */
561 fxch = x87_fxch_shuffle(state, cycle_idx[ri][0], block, dst_block);
562 sched_add_after(after, fxch);
569 * Create a fxch node before another node.
571 * @param state the x87 state
572 * @param n the node before the fxch
573 * @param pos exchange st(pos) with st(0)
574 * @param op_idx if >= 0, replace input op_idx of n with the fxch result
578 static ir_node *x87_create_fxch(x87_state *state, ir_node *n, int pos, int op_idx) {
579 ir_node *fxch, *pred;
582 x87_fxch(state, pos);
585 pred = get_irn_n(n, op_idx);
587 pred = x87_get_st_node(state, pos);
589 fxch = new_rd_ia32_fxch(NULL, get_irn_irg(n), get_nodes_block(n), pred, get_irn_mode(pred));
590 attr = get_ia32_attr(fxch);
591 attr->x87[0] = &ia32_st_regs[pos];
592 attr->x87[2] = &ia32_st_regs[0];
595 set_irn_n(n, op_idx, fxch);
597 sched_add_before(n, fxch);
598 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(fxch), attr->x87[0]->name, attr->x87[2]->name));
603 * Create a fpush before node n.
605 * @param state the x87 state
606 * @param n the node before the fpush
607 * @param pos push st(pos) on stack
608 * @param op_idx if >= 0, replace input op_idx of n with the fpush result
610 static void x87_create_fpush(const arch_env_t *env, x87_state *state, ir_node *n, int pos, int op_idx) {
611 ir_node *fpush, *pred;
613 const arch_register_t *out = arch_get_irn_register(env, n);
615 x87_push(state, arch_register_get_index(out), n);
617 pred = get_irn_n(n, op_idx);
618 fpush = new_rd_ia32_fpush(NULL, get_irn_irg(n), get_nodes_block(n), pred, get_irn_mode(pred));
619 attr = get_ia32_attr(fpush);
620 attr->x87[0] = &ia32_st_regs[pos];
621 attr->x87[2] = &ia32_st_regs[0];
623 set_irn_n(n, op_idx, fpush);
625 sched_add_before(n, fpush);
626 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(fpush), attr->x87[0]->name, attr->x87[2]->name));
630 * Create a fpop before node n.
632 * @param state the x87 state
633 * @param n the node before the fpop
634 * @param num pop 1 or 2 values
635 * @param pred node to use as predecessor of the fpop
637 * @return the fpop node
639 static ir_node *x87_create_fpop(const arch_env_t *env, x87_state *state, ir_node *n, int num, ir_node *pred) {
645 fpop = new_rd_ia32_fpop(NULL, get_irn_irg(n), get_nodes_block(n), pred, mode_E);
646 attr = get_ia32_attr(fpop);
647 attr->x87[0] = &ia32_st_regs[0];
648 attr->x87[1] = &ia32_st_regs[0];
649 attr->x87[2] = &ia32_st_regs[0];
651 sched_add_before(n, fpop);
652 DB((dbg, LEVEL_1, "<<< %s %s\n", get_irn_opname(fpop), attr->x87[0]->name));
660 /* --------------------------------- liveness ------------------------------------------ */
663 * The liveness transfer function.
664 * Updates a live set over a single step from a given node to its predecessor.
665 * Everything defined at the node is removed from the set, the uses of the node get inserted.
666 * @param arch_env The architecture environment.
667 * @param irn The node at which liveness should be computed.
668 * @param live The bitset of registers live before @p irn. This set gets modified by updating it to
669 * the registers live after irn.
672 static unsigned vfp_liveness_transfer(const arch_env_t *arch_env, ir_node *irn, unsigned live)
675 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
677 if (arch_irn_consider_in_reg_alloc(arch_env, cls, irn)) {
678 const arch_register_t *reg = arch_get_irn_register(arch_env, irn);
679 live &= ~(1 << reg->index);
682 for (i = 0, n = get_irn_arity(irn); i < n; ++i) {
683 ir_node *op = get_irn_n(irn, i);
685 if (mode_is_float(get_irn_mode(op)) && arch_irn_consider_in_reg_alloc(arch_env, cls, op)) {
686 const arch_register_t *reg = arch_get_irn_register(arch_env, op);
687 live |= 1 << reg->index;
695 * Put all live virtual registers at the end of a block into a bitset.
696 * @param arch_env The architecture environment.
697 * @param bl The block.
698 * @return The live bitset.
700 static unsigned vfp_liveness_end_of_block(const arch_env_t *arch_env, const ir_node *bl)
704 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
706 live_foreach(bl, li) {
707 ir_node *irn = (ir_node *) li->irn;
708 if (live_is_end(li) && arch_irn_consider_in_reg_alloc(arch_env, cls, irn)) {
709 const arch_register_t *reg = arch_get_irn_register(arch_env, irn);
710 live |= 1 << reg->index;
718 * Compute a bitset of registers which are live at another node.
719 * @param arch_env The architecture environment.
720 * @param pos The node.
721 * @return The live bitset.
723 static unsigned vfp_liveness_nodes_live_at(const arch_env_t *arch_env, const ir_node *pos)
725 const ir_node *bl = is_Block(pos) ? pos : get_nodes_block(pos);
726 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
730 live = vfp_liveness_end_of_block(arch_env, bl);
732 sched_foreach_reverse(bl, irn) {
734 * If we encounter the node we want to insert the Perm after,
735 * exit immediately, so that this node is still live
740 live = vfp_liveness_transfer(arch_env, irn, live);
747 * Returns true if a register is live in a set.
749 * @param reg_idx the vfp register index
750 * @param live a live bitset
752 static unsigned is_vfp_live(int reg_idx, unsigned live) {
753 return live & (1 << reg_idx);
758 * Dump liveness info.
760 * @param live the live bitset
762 static void vfp_dump_live(unsigned live) {
765 DB((dbg, LEVEL_2, "Live registers here: \n"));
766 for (i = 0; i < 8; ++i) {
767 if (live & (1 << i)) {
768 DB((dbg, LEVEL_2, " vf%d", i));
771 DB((dbg, LEVEL_2, "\n"));
773 #endif /* DEBUG_libfirm */
775 /* --------------------------------- simulators ---------------------------------------- */
777 #define XCHG(a, b) do { int t = (a); (a) = (b); (b) = t; } while (0)
780 * Simulate a virtual binop.
782 * @param state the x87 state
783 * @param n the node that should be simulated (and patched)
784 * @param env the architecture environment
785 * @param tmpl the template containing the 4 possible x87 opcodes
787 static void sim_binop(x87_state *state, ir_node *n, const arch_env_t *env, const exchange_tmpl *tmpl) {
788 int op2_idx, op1_idx = -1;
789 int out_idx, do_pop =0;
792 const arch_register_t *op1 = arch_get_irn_register(env, get_irn_n(n, BINOP_IDX_1));
793 const arch_register_t *op2 = arch_get_irn_register(env, get_irn_n(n, BINOP_IDX_2));
794 const arch_register_t *out = arch_get_irn_register(env, n);
795 unsigned live = vfp_liveness_nodes_live_at(env, n);
797 DB((dbg, LEVEL_1, ">>> %s %s, %s -> %s\n", get_irn_opname(n),
798 arch_register_get_name(op2), arch_register_get_name(op1),
799 arch_register_get_name(out)));
800 DEBUG_ONLY(vfp_dump_live(live));
802 op2_idx = x87_on_stack(state, arch_register_get_index(op2));
804 if (op1->reg_class == &ia32_reg_classes[CLASS_ia32_vfp]) {
805 /* first operand is a vfp register */
806 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
808 if (is_vfp_live(op2->index, live)) {
809 /* Second operand is live. */
811 if (is_vfp_live(op1->index, live)) {
812 /* Both operands are live: push the first one.
813 This works even for op1 == op2. */
814 x87_create_fpush(env, state, n, op2_idx, BINOP_IDX_2);
815 out_idx = op2_idx = 0;
817 dst = tmpl->normal_op;
821 /* Second live, first operand is dead here, bring it to tos. */
823 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
827 op1_idx = out_idx = 0;
828 dst = tmpl->normal_op;
833 /* Second operand is dead. */
834 if (is_vfp_live(op1->index, live)) {
835 /* First operand is live: bring second to tos. */
837 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
841 op2_idx = out_idx = 0;
842 dst = tmpl->normal_op;
846 /* Both operands are dead here, pop them from the stack. */
849 XCHG(op2_idx, op1_idx);
850 if (op1_idx == op2_idx) {
851 /* Both are identically, no pop needed. */
852 dst = tmpl->reverse_op;
856 dst = tmpl->reverse_pop_op;
860 else if (op1_idx == 0) {
862 if (op1_idx == op2_idx) {
863 /* Both are identically, no pop needed. */
864 dst = tmpl->normal_op;
868 dst = tmpl->normal_pop_op;
873 /* Bring the first on top. */
874 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
875 if (op1_idx == op2_idx) {
876 /* Both are identically, no pop needed. */
877 out_idx = op1_idx = op2_idx = 0;
878 dst = tmpl->normal_op;
884 dst = tmpl->normal_pop_op;
892 /* first operand is an address mode */
893 if (is_vfp_live(op2->index, live)) {
894 /* second operand is live: push it here */
895 x87_create_fpush(env, state, n, op2_idx, BINOP_IDX_2);
898 /* second operand is dead: bring it to tos */
900 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
902 op2_idx = out_idx = 0;
903 dst = tmpl->normal_op;
907 x87_set_st(state, arch_register_get_index(out), x87_patch_insn(n, dst), out_idx);
911 /* patch the operation */
912 attr = get_ia32_attr(n);
914 attr->x87[0] = op1 = &ia32_st_regs[op1_idx];
915 attr->x87[1] = op2 = &ia32_st_regs[op2_idx];
916 attr->x87[2] = out = &ia32_st_regs[out_idx];
918 DB((dbg, LEVEL_1, "<<< %s %s, %s -> %s\n", get_irn_opname(n),
919 arch_register_get_name(op2), arch_register_get_name(op1),
920 arch_register_get_name(out)));
924 * Simulate a virtual Unop.
926 * @param state the x87 state
927 * @param n the node that should be simulated (and patched)
928 * @param env the architecture environment
929 * @param op the x87 opcode that will replace n's opcode
931 static void sim_unop(x87_state *state, ir_node *n, const arch_env_t *env, ir_op *op) {
932 int op1_idx, out_idx;
933 const arch_register_t *op1 = arch_get_irn_register(env, get_irn_n(n, UNOP_IDX));
934 const arch_register_t *out = arch_get_irn_register(env, n);
936 unsigned live = vfp_liveness_nodes_live_at(env, n);
938 DB((dbg, LEVEL_1, ">>> %s -> %s\n", get_irn_opname(n), out->name));
939 DEBUG_ONLY(vfp_dump_live(live));
941 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
943 if (is_vfp_live(op1->index, live)) {
944 /* push the operand here */
945 x87_create_fpush(env, state, n, op1_idx, UNOP_IDX);
948 /* operand is dead, bring it to tos */
950 x87_create_fxch(state, n, op1_idx, UNOP_IDX);
953 x87_set_tos(state, arch_register_get_index(out), x87_patch_insn(n, op));
954 op1_idx = out_idx = 0;
955 attr = get_ia32_attr(n);
956 attr->x87[0] = op1 = &ia32_st_regs[0];
957 attr->x87[2] = out = &ia32_st_regs[0];
958 DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), out->name));
962 * Simulate a virtual Load instruction.
964 * @param state the x87 state
965 * @param n the node that should be simulated (and patched)
966 * @param env the architecture environment
967 * @param op the x87 opcode that will replace n's opcode
969 static void sim_load(x87_state *state, ir_node *n, const arch_env_t *env, ir_op *op) {
970 const arch_register_t *out = arch_get_irn_register(env, n);
973 DB((dbg, LEVEL_1, ">>> %s -> %s\n", get_irn_opname(n), arch_register_get_name(out)));
974 x87_push(state, arch_register_get_index(out), x87_patch_insn(n, op));
975 attr = get_ia32_attr(n);
976 attr->x87[2] = out = &ia32_st_regs[0];
977 DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), arch_register_get_name(out)));
981 * Simulate a virtual Store.
983 * @param state the x87 state
984 * @param n the node that should be simulated (and patched)
985 * @param env the architecture environment
986 * @param op the x87 store opcode
987 * @param op_p the x87 store and pop opcode
989 static void sim_store(x87_state *state, ir_node *n, const arch_env_t *env, ir_op *op, ir_op *op_p) {
991 const arch_register_t *op2 = arch_get_irn_register(env, get_irn_n(n, STORE_VAL_IDX));
993 unsigned live = vfp_liveness_nodes_live_at(env, n);
995 op2_idx = x87_on_stack(state, arch_register_get_index(op2));
996 assert(op2_idx >= 0);
998 DB((dbg, LEVEL_1, ">>> %s %s ->\n", get_irn_opname(n), arch_register_get_name(op2)));
1000 /* we can only store the tos to memory */
1002 x87_create_fxch(state, n, op2_idx, STORE_VAL_IDX);
1004 if (is_vfp_live(op2->index, live))
1005 x87_patch_insn(n, op);
1008 x87_patch_insn(n, op_p);
1011 attr = get_ia32_attr(n);
1012 attr->x87[1] = op2 = &ia32_st_regs[0];
1013 DB((dbg, LEVEL_1, "<<< %s %s ->\n", get_irn_opname(n), arch_register_get_name(op2)));
1017 * Simulate a virtual Phi.
1018 * Just for cosmetic reasons change the mode of Phi nodes to mode_E.
1020 * @param state the x87 state
1021 * @param n the node that should be simulated (and patched)
1022 * @param env the architecture environment
1024 static void sim_Phi(x87_state *state, ir_node *n, const arch_env_t *env) {
1025 ir_mode *mode = get_irn_mode(n);
1027 if (mode_is_float(mode))
1028 set_irn_mode(n, mode_E);
1032 #define _GEN_BINOP(op, rev) \
1033 static void sim_##op(x87_state *state, ir_node *n, const arch_env_t *env) { \
1034 exchange_tmpl tmpl = { op_ia32_##op, op_ia32_##rev, op_ia32_##op##p, op_ia32_##rev##p }; \
1035 sim_binop(state, n, env, &tmpl); \
1038 #define GEN_BINOP(op) _GEN_BINOP(op, op)
1039 #define GEN_BINOPR(op) _GEN_BINOP(op, op##r)
1041 #define GEN_LOAD2(op, nop) \
1042 static void sim_##op(x87_state *state, ir_node *n, const arch_env_t *env) { \
1043 sim_load(state, n, env, op_ia32_##nop); \
1046 #define GEN_LOAD(op) GEN_LOAD2(op, op)
1048 #define GEN_UNOP(op) \
1049 static void sim_##op(x87_state *state, ir_node *n, const arch_env_t *env) { \
1050 sim_unop(state, n, env, op_ia32_##op); \
1053 #define GEN_STORE(op) \
1054 static void sim_##op(x87_state *state, ir_node *n, const arch_env_t *env) { \
1055 sim_store(state, n, env, op_ia32_##op, op_ia32_##op##p); \
1074 GEN_LOAD2(fConst, fldConst)
1080 * Simulate a fCondJmp.
1082 * @param state the x87 state
1083 * @param n the node that should be simulated (and patched)
1084 * @param env the architecture environment
1086 static void sim_fCondJmp(x87_state *state, ir_node *n, const arch_env_t *env) {
1087 int op2_idx, op1_idx = -1, pop_cnt = 0;
1090 const arch_register_t *op1 = arch_get_irn_register(env, get_irn_n(n, BINOP_IDX_1));
1091 const arch_register_t *op2 = arch_get_irn_register(env, get_irn_n(n, BINOP_IDX_2));
1092 unsigned live = vfp_liveness_nodes_live_at(env, n);
1094 DB((dbg, LEVEL_1, ">>> %s %s, %s\n", get_irn_opname(n),
1095 arch_register_get_name(op2), arch_register_get_name(op1)));
1096 DEBUG_ONLY(vfp_dump_live(live));
1098 op2_idx = x87_on_stack(state, arch_register_get_index(op2));
1100 /* BEWARE: check for comp a,a cases, they might happen */
1101 if (op1->reg_class == &ia32_reg_classes[CLASS_ia32_vfp]) {
1102 /* first operand is a vfp register */
1103 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1105 if (is_vfp_live(op2->index, live)) {
1106 /* second operand is live */
1108 if (is_vfp_live(op1->index, live)) {
1109 /* both operands are live: move one of them to tos */
1111 XCHG(op2_idx, op1_idx);
1112 dst = op_ia32_fcomrJmp;
1114 else if (op1_idx == 0) {
1115 dst = op_ia32_fcomJmp;
1118 /* bring the first on top */
1119 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1120 if (op1_idx == op2_idx)
1123 dst = op_ia32_fcomJmp;
1127 /* second live, first operand is dead here, bring it to tos.
1128 This means further, op1_idx != op2_idx. */
1130 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1135 dst = op_ia32_fcompJmp;
1140 /* second operand is dead */
1141 if (is_vfp_live(op1->index, live)) {
1142 /* first operand is live: bring second to tos.
1143 This means further, op1_idx != op2_idx. */
1145 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1150 dst = op_ia32_fcomrpJmp;
1154 /* both operands are dead here, check first for identity. */
1155 if (op1_idx == op2_idx) {
1156 /* identically, one one needed */
1158 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1159 op1_idx = op2_idx = 0;
1161 dst = op_ia32_fcompJmp;
1164 /* different, move them to st and st(1) and pop both.
1165 The tricky part is to get one into st(1).*/
1166 else if (op2_idx == 1) {
1167 /* good, second operand is already in the right place, move the first */
1169 /* bring the first on top */
1170 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1173 dst = op_ia32_fcomppJmp;
1176 else if (op1_idx == 1) {
1177 /* good, first operand is already in the right place, move the second */
1179 /* bring the first on top */
1180 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1183 dst = op_ia32_fcomrppJmp;
1187 /* if one is already the TOS, we need two fxch */
1189 /* first one is TOS, move to st(1) */
1190 x87_create_fxch(state, n, 1, BINOP_IDX_1);
1192 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1194 dst = op_ia32_fcomrppJmp;
1197 else if (op2_idx == 0) {
1198 /* second one is TOS, move to st(1) */
1199 x87_create_fxch(state, n, 1, BINOP_IDX_2);
1201 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1203 dst = op_ia32_fcomrppJmp;
1207 /* none of them is either TOS or st(1), 3 fxch needed */
1208 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1209 x87_create_fxch(state, n, 1, BINOP_IDX_2);
1210 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1213 dst = op_ia32_fcomppJmp;
1221 /* first operand is an address mode */
1222 if (is_vfp_live(op2->index, live)) {
1223 /* second operand is live: bring it to TOS */
1225 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1228 dst = op_ia32_fcomrJmp;
1231 /* second operand is dead: bring it to tos */
1233 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1237 dst = op_ia32_fcomrpJmp;
1241 x87_patch_insn(n, dst);
1247 /* patch the operation */
1248 attr = get_ia32_attr(n);
1250 attr->x87[0] = op1 = &ia32_st_regs[op1_idx];
1251 attr->x87[1] = op2 = &ia32_st_regs[op2_idx];
1253 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(n),
1254 arch_register_get_name(op2), arch_register_get_name(op1)));
1258 * Simulate a be_Copy.
1260 * @param state the x87 state
1261 * @param n the node that should be simulated (and patched)
1262 * @param env the architecture environment
1264 static void sim_Copy(x87_state *state, ir_node *n, const arch_env_t *env) {
1265 ir_mode *mode = get_irn_mode(n);
1267 if (mode_is_float(mode)) {
1268 const arch_register_t *op1 = arch_get_irn_register(env, get_irn_n(n, 0));
1269 const arch_register_t *out = arch_get_irn_register(env, n);
1270 ir_node *node, *next;
1272 int op1_idx, out_idx;
1273 unsigned live = vfp_liveness_nodes_live_at(env, n);
1275 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1277 DB((dbg, LEVEL_1, ">>> %s %s -> %s\n", get_irn_opname(n),
1278 arch_register_get_name(op1), arch_register_get_name(out)));
1279 DEBUG_ONLY(vfp_dump_live(live));
1281 if (is_vfp_live(op1->index, live)) {
1282 /* operand is still live,a real copy */
1283 node = new_rd_ia32_fpush(get_irn_dbg_info(n), get_irn_irg(n), get_nodes_block(n), get_irn_n(n, 0), mode);
1284 arch_set_irn_register(env, node, out);
1286 x87_push(state, arch_register_get_index(out), node);
1288 attr = get_ia32_attr(node);
1289 attr->x87[0] = op1 = &ia32_st_regs[op1_idx];
1290 attr->x87[2] = out = &ia32_st_regs[0];
1292 next = sched_next(n);
1295 sched_add_before(next, node);
1296 DB((dbg, LEVEL_1, ">>> %s %s -> %s\n", get_irn_opname(node), op1->name, out->name));
1299 out_idx = x87_on_stack(state, arch_register_get_index(out));
1301 if (out_idx >= 0 && out_idx != op1_idx) {
1302 /* op1 must be killed and placed where out is */
1304 /* best case, simple remove and rename */
1305 x87_patch_insn(n, op_ia32_Pop);
1306 attr = get_ia32_attr(n);
1307 attr->x87[0] = op1 = &ia32_st_regs[0];
1310 x87_set_st(state, arch_register_get_index(out), n, op1_idx - 1);
1313 /* move op1 to tos, store and pop it */
1315 x87_create_fxch(state, n, op1_idx, 0);
1318 x87_patch_insn(n, op_ia32_Pop);
1319 attr = get_ia32_attr(n);
1320 attr->x87[0] = op1 = &ia32_st_regs[out_idx];
1323 x87_set_st(state, arch_register_get_index(out), n, out_idx - 1);
1325 DB((dbg, LEVEL_1, ">>> %s %s\n", get_irn_opname(n), op1->name));
1328 /* just a virtual copy */
1329 x87_set_st(state, arch_register_get_index(out), get_unop_op(n), op1_idx);
1331 DB((dbg, LEVEL_1, ">>> KILLED %s\n", get_irn_opname(n)));
1332 exchange(n, get_unop_op(n));
1339 * Simulate a be_Call.
1341 * @param state the x87 state
1342 * @param n the node that should be simulated
1343 * @param env the architecture environment
1345 static void sim_Call(x87_state *state, ir_node *n, const arch_env_t *env) {
1346 ir_type *call_tp = be_Call_get_type(n);
1348 /* at the begin of a call the x87 state should be empty */
1349 assert(state->depth == 0 && "stack not empty before call");
1352 * If the called function returns a float, it is returned in st(0).
1353 * This even happens if the return value is NOT used.
1354 * Moreover, only one return result is supported.
1356 if (get_method_n_ress(call_tp) > 0) {
1357 ir_type *res_type = get_method_res_type(call_tp, 0);
1358 ir_mode *mode = get_type_mode(res_type);
1360 if (mode && mode_is_float(mode)) {
1362 * TODO: what to push here? The result might be unused and currently
1363 * we have no possibility to detect this :-(
1365 x87_push(state, 0, n);
1371 * Simulate a be_Spill.
1373 * @param state the x87 state
1374 * @param n the node that should be simulated (and patched)
1375 * @param env the architecture environment
1377 * Should not happen, spills are lowered before x87 simulator see them.
1379 static void sim_Spill(x87_state *state, ir_node *n, const arch_env_t *env) {
1380 assert(0 && "Spill not lowered");
1381 sim_fst(state, n, env);
1385 * Simulate a be_Reload.
1387 * @param state the x87 state
1388 * @param n the node that should be simulated (and patched)
1389 * @param env the architecture environment
1391 * Should not happen, reloads are lowered before x87 simulator see them.
1393 static void sim_Reload(x87_state *state, ir_node *n, const arch_env_t *env) {
1394 assert(0 && "Reload not lowered");
1395 sim_fld(state, n, env);
1399 * Simulate a be_Return.
1401 * @param state the x87 state
1402 * @param n the node that should be simulated (and patched)
1403 * @param env the architecture environment
1405 static void sim_Return(x87_state *state, ir_node *n, const arch_env_t *env) {
1406 int n_res = be_Return_get_n_rets(n);
1407 int i, n_float_res = 0;
1409 /* only floating point return values must resist on stack */
1410 for (i = 0; i < n_res; ++i) {
1411 ir_node *res = get_irn_n(n, be_pos_Return_val + i);
1413 if (mode_is_float(get_irn_mode(res)))
1416 assert(x87_get_depth(state) == n_float_res);
1418 /* pop them virtually */
1419 for (i = n_float_res - 1; i >= 0; --i)
1424 * Kill any dead registers at block start by popping them from the stack.
1426 * @param sim the simulator handle
1427 * @param block the current block
1428 * @param start_state the x87 state at the begin of the block
1430 static x87_state *x87_kill_deads(x87_simulator *sim, ir_node *block, x87_state *start_state) {
1431 x87_state *state = start_state;
1432 ir_node *first_insn = sched_first(block);
1433 ir_node *keep = NULL;
1434 unsigned live = vfp_liveness_nodes_live_at(sim->env, block);
1436 int i, depth, num_pop;
1439 depth = x87_get_depth(state);
1440 for (i = depth - 1; i >= 0; --i) {
1441 int reg = x87_get_st_reg(state, i);
1443 if (! is_vfp_live(reg, live))
1444 kill_mask |= (1 << i);
1448 /* create a new state, will be changed */
1449 state = x87_clone_state(sim, state);
1451 DB((dbg, LEVEL_1, "Killing deads:\n"));
1452 DEBUG_ONLY(vfp_dump_live(live));
1453 DEBUG_ONLY(x87_dump_stack(state));
1455 /* now kill registers */
1457 /* we can only kill from TOS, so bring them up */
1458 if (! (kill_mask & 1)) {
1459 /* search from behind, because we can to a double-pop */
1460 for (i = depth - 1; i >= 0; --i) {
1461 if (kill_mask & (1 << i)) {
1462 kill_mask &= ~(1 << i);
1469 x87_set_st(state, -1, keep, i);
1470 keep = x87_create_fxch(state, first_insn, i, -1);
1473 keep = x87_get_st_node(state, 0);
1475 if ((kill_mask & 3) == 3) {
1476 /* we can do a double-pop */
1480 /* only a single pop */
1485 kill_mask >>= num_pop;
1486 keep = x87_create_fpop(sim->env, state, first_insn, num_pop, keep);
1488 add_End_keepalive(get_irg_end(get_irn_irg(block)), keep);
1494 * Run a simulation and fix all virtual instructions for a block.
1496 * @param sim the simulator handle
1497 * @param block the current block
1499 * @return non-zero if simulation is complete,
1500 * zero if the simulation must be rerun
1502 static int x87_simulate_block(x87_simulator *sim, ir_node *block) {
1504 blk_state *bl_state = x87_get_bl_state(sim, block);
1505 x87_state *state = bl_state->begin;
1506 const ir_edge_t *edge;
1507 ir_node *start_block;
1509 /* if we have no assigned start state, we must wait ... */
1513 assert(bl_state->end == NULL);
1515 DB((dbg, LEVEL_1, "Simulate %+F\n", block));
1517 /* at block begin, kill all dead registers */
1518 state = x87_kill_deads(sim, block, state);
1520 /* beware, n might changed */
1521 for (n = sched_first(block); !sched_is_end(n); n = next) {
1522 ir_op *op = get_irn_op(n);
1524 next = sched_next(n);
1525 if (op->ops.generic) {
1526 sim_func func = (sim_func)op->ops.generic;
1528 /* have work to do */
1529 if (state == bl_state->begin) {
1530 /* create a new state, will be changed */
1531 state = x87_clone_state(sim, state);
1535 (*func)(state, n, sim->env);
1539 start_block = get_irg_start_block(get_irn_irg(block));
1541 /* check if the state must be shuffled */
1542 foreach_block_succ(block, edge) {
1543 ir_node *succ = get_edge_src_irn(edge);
1544 blk_state *succ_state = x87_get_bl_state(sim, succ);
1546 if (succ_state->begin && succ != start_block) {
1547 /* There is already a begin state for this block, bad.
1548 Do the necessary permutations.
1549 Note that critical edges are removed, so this is always possible. */
1550 x87_shuffle(sim, block, state, succ, succ_state->begin);
1552 /* Note further, that there can be only one such situation,
1553 so we can break here. */
1557 bl_state->end = state;
1559 /* now propagate the state to all successor blocks */
1560 foreach_block_succ(block, edge) {
1561 ir_node *succ = get_edge_src_irn(edge);
1562 blk_state *succ_state = x87_get_bl_state(sim, succ);
1564 if (! succ_state->begin)
1565 succ_state->begin = state;
1572 * Create a new x87 simulator.
1574 * @param sim a simulator handle, will be initialized
1575 * @param irg the current graph
1576 * @param env the architecture environment
1578 static void x87_init_simulator(x87_simulator *sim, ir_graph *irg, const arch_env_t *env) {
1579 obstack_init(&sim->obst);
1580 sim->blk_states = pmap_create();
1583 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.x87");
1585 DB((dbg, LEVEL_1, "--------------------------------\n"
1586 "x87 Simulator started for %+F\n", irg));
1588 /* set the generic function pointer of instruction we must simulate */
1589 clear_irp_opcodes_generic_func();
1591 #define ASSOC(op) (op_ ## op)->ops.generic = (op_func)(sim_##op)
1592 #define ASSOC_IA32(op) (op_ia32_v ## op)->ops.generic = (op_func)(sim_##op)
1593 #define ASSOC_BE(op) (op_be_ ## op)->ops.generic = (op_func)(sim_##op)
1611 ASSOC_IA32(fCondJmp);
1624 * Destroy a x87 simulator.
1626 * @param sim the simulator handle
1628 static void x87_destroy_simulator(x87_simulator *sim) {
1629 pmap_destroy(sim->blk_states);
1630 obstack_free(&sim->obst, NULL);
1631 DB((dbg, LEVEL_1, "x87 Simulator stopped\n\n"));
1635 * Run a simulation and fix all virtual instructions for a graph.
1637 * @param env the architecture environment
1638 * @param irg the current graph
1639 * @param blk_list the block schedule list
1641 * Needs a block-schedule.
1643 void x87_simulate_graph(const arch_env_t *env, ir_graph *irg, ir_node **blk_list) {
1644 ir_node *block, *start_block;
1646 blk_state *bl_state;
1650 /* we need liveness info for the current graph */
1653 /* create the simulator */
1654 x87_init_simulator(&sim, irg, env);
1656 start_block = get_irg_start_block(irg);
1657 bl_state = x87_get_bl_state(&sim, start_block);
1659 /* start with the empty state */
1660 bl_state->begin = empty;
1662 worklist = new_pdeq();
1664 /* create the worklist for the schedule. */
1665 for (i = 0; i < ARR_LEN(blk_list); ++i)
1666 pdeq_putr(worklist, blk_list[i]);
1670 block = pdeq_getl(worklist);
1671 if (! x87_simulate_block(&sim, block)) {
1672 pdeq_putr(worklist, block);
1675 } while (! pdeq_empty(worklist));
1678 x87_destroy_simulator(&sim);