2 * This file implements the x87 support and virtual to stack
3 * register translation for the ia32 backend.
5 * @author: Michael Beck
12 #endif /* HAVE_CONFIG_H */
19 #include "iredges_t.h"
28 #include "../belive_t.h"
29 #include "../besched.h"
30 #include "../benode_t.h"
31 #include "ia32_new_nodes.h"
32 #include "gen_ia32_new_nodes.h"
33 #include "gen_ia32_regalloc_if.h"
38 /* first and second binop index */
45 /* the store val index */
46 #define STORE_VAL_IDX 2
48 #define MASK_TOS(x) ((x) & (N_x87_REGS - 1))
50 /** the debug handle */
51 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
54 * An exchange template.
55 * Note that our virtual functions have the same inputs
56 * and attributes as the real ones, so we can simple exchange
58 * Further, x87 supports inverse instructions, so we can handle them.
60 typedef struct _exchange_tmpl {
61 ir_op *normal_op; /**< the normal one */
62 ir_op *reverse_op; /**< the reverse one if exists */
63 ir_op *normal_pop_op; /**< the normal one with tos pop */
64 ir_op *reverse_pop_op; /**< the reverse one with tos pop */
68 * An entry on the simulated x87 stack.
70 typedef struct _st_entry {
71 int reg_idx; /**< the virtual register index of this stack value */
72 ir_node *node; /**< the node that produced this value */
78 typedef struct _x87_state {
79 st_entry st[N_x87_REGS]; /**< the register stack */
80 int depth; /**< the current stack depth */
81 int tos; /**< position of the tos */
84 /** An empty state, used for blocks without fp instructions. */
85 static const x87_state _empty = { {0, NULL}, 0, 0 };
86 static x87_state *empty = (x87_state *)&_empty;
88 /** The type of an instruction simulator */
89 typedef void (*sim_func)(x87_state *state, ir_node *n, const arch_env_t *env);
92 * A block state: Every block has a x87 state at the beginning and at the end.
94 typedef struct _blk_state {
95 x87_state *begin; /**< state at the begin or NULL if not assigned */
96 x87_state *end; /**< state at the end or NULL if not assigned */
99 #define PTR_TO_BLKSTATE(p) ((blk_state *)(p))
104 typedef struct _x87_simulator {
105 struct obstack obst; /**< an obstack for fast allocating */
106 pmap *blk_states; /**< map blocks to states */
107 const arch_env_t *env; /**< architecture environment */
111 * Returns the stack depth.
113 * @param state the x87 state
115 * @return the x87 stack depth
117 static int x87_get_depth(const x87_state *state) {
122 * Check if the state is empty.
124 * @param state the x87 state
126 * returns non-zero if the x87 stack is empty
128 static int x87_state_is_empty(const x87_state *state) {
129 return state->depth == 0;
133 * Return the virtual register index at st(pos).
135 * @param state the x87 state
136 * @param pos a stack position
138 * @return the vfp register index that produced the value at st(pos)
140 static int x87_get_st_reg(const x87_state *state, int pos) {
141 assert(pos < state->depth);
142 return state->st[MASK_TOS(state->tos + pos)].reg_idx;
146 * Return the node at st(pos).
148 * @param state the x87 state
149 * @param pos a stack position
151 * @return the IR node that produced the value at st(pos)
153 static ir_node *x87_get_st_node(const x87_state *state, int pos) {
154 assert(pos < state->depth);
155 return state->st[MASK_TOS(state->tos + pos)].node;
160 * Dump the stack for debugging.
162 * @param state the x87 state
164 static void x87_dump_stack(const x87_state *state) {
167 for (i = state->depth - 1; i >= 0; --i) {
168 DB((dbg, LEVEL_2, "vf%d ", x87_get_st_reg(state, i)));
170 DB((dbg, LEVEL_2, "<-- TOS\n"));
172 #endif /* DEBUG_libfirm */
175 * Set a virtual register to st(pos).
177 * @param state the x87 state
178 * @param reg_idx the vfp register index that should be set
179 * @param node the IR node that produces the value of the vfp register
180 * @param pos the stack position where the new value should be entered
182 static void x87_set_st(x87_state *state, int reg_idx, ir_node *node, int pos) {
183 assert(0 < state->depth);
184 state->st[MASK_TOS(state->tos + pos)].reg_idx = reg_idx;
185 state->st[MASK_TOS(state->tos + pos)].node = node;
187 DB((dbg, LEVEL_2, "After SET_REG:\n ")); DEBUG_ONLY(x87_dump_stack(state));
191 * Set the tos virtual register.
193 * @param state the x87 state
194 * @param reg_idx the vfp register index that should be set
195 * @param node the IR node that produces the value of the vfp register
197 static void x87_set_tos(x87_state *state, int reg_idx, ir_node *node) {
198 x87_set_st(state, reg_idx, node, 0);
202 * Flush the x87 stack.
204 * @param state the x87 state
206 static void x87_flush(x87_state *state) {
212 * Swap st(0) with st(pos).
214 * @param state the x87 state
215 * @param pos the stack position to change the tos with
217 static void x87_fxch(x87_state *state, int pos) {
219 assert(pos < state->depth);
221 entry = state->st[MASK_TOS(state->tos + pos)];
222 state->st[MASK_TOS(state->tos + pos)] = state->st[MASK_TOS(state->tos)];
223 state->st[MASK_TOS(state->tos)] = entry;
225 DB((dbg, LEVEL_2, "After FXCH:\n ")); DEBUG_ONLY(x87_dump_stack(state));
229 * Convert a virtual register to the stack index.
231 * @param state the x87 state
232 * @param reg_idx the register vfp index
234 * @return the stack position where the register is stacked
235 * or -1 if the virtual register was not found
237 static int x87_on_stack(const x87_state *state, int reg_idx) {
238 int i, tos = state->tos;
240 for (i = 0; i < state->depth; ++i)
241 if (state->st[MASK_TOS(tos + i)].reg_idx == reg_idx)
247 * Push a virtual Register onto the stack.
249 * @param state the x87 state
250 * @param reg_idx the register vfp index
251 * @param node the node that produces the value of the vfp register
253 static void x87_push(x87_state *state, int reg_idx, ir_node *node) {
254 assert(x87_on_stack(state, reg_idx) == -1 && "double push");
255 assert(state->depth < N_x87_REGS && "stack overrun");
258 state->tos = MASK_TOS(state->tos - 1);
259 state->st[state->tos].reg_idx = reg_idx;
260 state->st[state->tos].node = node;
262 DB((dbg, LEVEL_2, "After PUSH:\n ")); DEBUG_ONLY(x87_dump_stack(state));
266 * Pop a virtual Register from the stack.
268 static void x87_pop(x87_state *state) {
269 assert(state->depth > 0 && "stack underrun");
272 state->tos = MASK_TOS(state->tos + 1);
274 DB((dbg, LEVEL_2, "After POP:\n ")); DEBUG_ONLY(x87_dump_stack(state));
278 * Returns the block state of a block.
280 * @param sim the x87 simulator handle
281 * @param block the current block
283 * @return the block state
285 static blk_state *x87_get_bl_state(x87_simulator *sim, ir_node *block) {
286 pmap_entry *entry = pmap_find(sim->blk_states, block);
289 blk_state *bl_state = obstack_alloc(&sim->obst, sizeof(*bl_state));
290 bl_state->begin = NULL;
291 bl_state->end = NULL;
293 pmap_insert(sim->blk_states, block, bl_state);
297 return entry ? PTR_TO_BLKSTATE(entry->value) : NULL;
301 * Creates a new x87 state.
303 * @param sim the x87 simulator handle
304 * @return a new x87 state
306 static x87_state *x87_alloc_state(x87_simulator *sim) {
307 x87_state *res = obstack_alloc(&sim->obst, sizeof(*res));
312 * Create a new empty x87 state.
314 * @param sim the x87 simulator handle
315 * @return a new empty x87 state
317 static x87_state *x87_alloc_empty_state(x87_simulator *sim) {
318 x87_state *res = x87_alloc_state(sim);
327 * @param sim the x87 simulator handle
328 * @param src the x87 state that will be cloned
330 * @return a cloned copy of the src state
332 static x87_state *x87_clone_state(x87_simulator *sim, const x87_state *src) {
333 x87_state *res = x87_alloc_state(sim);
335 memcpy(res, src, sizeof(*res));
340 * Patch a virtual instruction into a x87 one and return
343 * @param n the IR node to patch
344 * @param op the x87 opcode to patch in
346 static ir_node *x87_patch_insn(ir_node *n, ir_op *op) {
347 ir_mode *mode = get_irn_mode(n);
352 if (mode == mode_T) {
353 /* patch all Proj's */
354 const ir_edge_t *edge;
356 foreach_out_edge(n, edge) {
357 ir_node *proj = get_edge_src_irn(edge);
359 mode = get_irn_mode(proj);
360 if (mode_is_float(mode)) {
362 set_irn_mode(proj, mode_E);
367 else if (mode_is_float(mode))
368 set_irn_mode(n, mode_E);
372 /* -------------- x87 perm --------------- */
375 * Creates a fxch for shuffle.
377 * @param state the x87 state
378 * @param pos parameter for fxch
379 * @param dst_block the block of the user
381 * Creates a new fxch node and reroute the user of the old node
384 * @return the fxch node
386 static ir_node *x87_fxch_shuffle(x87_state *state, int pos, ir_node *block, ir_node *dst_block)
388 const ir_edge_t *edge;
389 ir_node *n = x87_get_st_node(state, pos);
390 ir_node *user = NULL;
395 if (block == get_nodes_block(n)) {
396 /* this is a node from out block: change it's user */
397 foreach_out_edge(n, edge) {
398 ir_node *succ = get_edge_src_irn(edge);
400 if (is_Phi(succ) && get_nodes_block(succ) == dst_block) {
402 node_idx = get_edge_src_pos(edge);
409 fxch = new_rd_ia32_fxch(NULL, get_irn_irg(block), block, n, get_irn_mode(n));
410 attr = get_ia32_attr(fxch);
411 attr->x87[0] = &ia32_st_regs[pos];
412 attr->x87[2] = &ia32_st_regs[0];
415 DB((dbg, LEVEL_2, "%+F replaced input %d of %+F\n", fxch, node_idx, user));
416 set_irn_n(user, node_idx, fxch);
420 * This is a node from a dominator block. Changing it's user might be wrong,
421 * so just keep it alive.
422 * The "right" solution would require a new Phi, but we don't care here.
427 x87_fxch(state, pos);
432 * Calculate the necessary permutations to reach dst_state.
434 * These permutations are done with fxch instructions and placed
435 * at the end of the block.
437 * Note that critical edges are removed here, so we need only
438 * a shuffle if the current block has only one successor.
440 * @param sim the simulator handle
441 * @param block the current block
442 * @param state the current x87 stack state, might be modified
443 * @param dst_block the destination block
444 * @param dst_state destination state
448 static x87_state *x87_shuffle(x87_simulator *sim, ir_node *block, x87_state *state, ir_node *dst_block, const x87_state *dst_state) {
449 int i, n_cycles, k, ri;
450 unsigned cycles[4], all_mask;
451 char cycle_idx[4][8];
453 ir_node *before, *after;
455 assert(state->depth == dst_state->depth);
457 /* Some mathematics here:
458 If we have a cycle of lenght n that includes the tos,
459 we need n-1 exchange operations.
460 We can always add the tos and restore it, so we need
461 n+1 exchange operations for a cycle not containing the tos.
462 So, the maximum of needed operations is for a cycle of 7
463 not including the tos == 8.
464 This is so same number of ops we would need for store,
465 so exchange is cheaper (we save the loads).
466 On the other hand, we might need an additional exchange
467 in the next block to bring one operand on top, so the
468 number of ops in the first case is identical.
469 Further, no more than 4 cycles can exists.
471 all_mask = (1 << (state->depth)) - 1;
473 for (n_cycles = 0; all_mask; ++n_cycles) {
474 int src_idx, dst_idx;
476 /* find the first free slot */
477 for (i = 0; i < state->depth; ++i) {
478 if (all_mask & (1 << i)) {
479 all_mask &= ~(1 << i);
481 /* check if there are differences here */
482 if (x87_get_st_reg(state, i) != x87_get_st_reg(dst_state, i))
488 /* no more cycles found */
493 cycles[n_cycles] = (1 << i);
494 cycle_idx[n_cycles][k++] = i;
495 for (src_idx = i; ; src_idx = dst_idx) {
496 dst_idx = x87_on_stack(dst_state, x87_get_st_reg(state, src_idx));
498 if ((all_mask & (1 << dst_idx)) == 0)
501 cycle_idx[n_cycles][k++] = dst_idx;
502 cycles[n_cycles] |= (1 << dst_idx);
503 all_mask &= ~(1 << dst_idx);
505 cycle_idx[n_cycles][k] = -1;
509 /* no permutation needed */
513 /* Hmm: permutation needed */
514 DB((dbg, LEVEL_2, "\n%+F needs permutation: from\n", block));
515 DEBUG_ONLY(x87_dump_stack(state));
516 DB((dbg, LEVEL_2, " to\n"));
517 DEBUG_ONLY(x87_dump_stack(dst_state));
521 DB((dbg, LEVEL_2, "Need %d cycles\n", n_cycles));
522 for (ri = 0; ri < n_cycles; ++ri) {
523 DB((dbg, LEVEL_2, " Ring %d:\n ", ri));
524 for (k = 0; cycle_idx[ri][k] != -1; ++k)
525 DB((dbg, LEVEL_2, " st%d ->", cycle_idx[ri][k]));
526 DB((dbg, LEVEL_2, "\n"));
533 * Find the place node must be insert.
534 * We have only one successor block, so the last instruction should
537 before = sched_last(block);
538 assert(is_cfop(before));
540 /* now do the permutations */
541 for (ri = 0; ri < n_cycles; ++ri) {
542 if ((cycles[ri] & 1) == 0) {
543 /* this cycle does not include the tos */
544 fxch = x87_fxch_shuffle(state, cycle_idx[ri][0], block, dst_block);
546 sched_add_after(after, fxch);
548 sched_add_before(before, fxch);
551 for (k = 1; cycle_idx[ri][k] != -1; ++k) {
552 fxch = x87_fxch_shuffle(state, cycle_idx[ri][k], block, dst_block);
554 sched_add_after(after, fxch);
556 sched_add_before(before, fxch);
559 if ((cycles[ri] & 1) == 0) {
560 /* this cycle does not include the tos */
561 fxch = x87_fxch_shuffle(state, cycle_idx[ri][0], block, dst_block);
562 sched_add_after(after, fxch);
569 * Create a fxch node before another node.
571 * @param state the x87 state
572 * @param n the node before the fxch
573 * @param pos exchange st(pos) with st(0)
574 * @param op_idx if >= 0, replace input op_idx of n with the fxch result
578 static ir_node *x87_create_fxch(x87_state *state, ir_node *n, int pos, int op_idx) {
579 ir_node *fxch, *pred;
582 x87_fxch(state, pos);
585 pred = get_irn_n(n, op_idx);
587 pred = x87_get_st_node(state, pos);
589 fxch = new_rd_ia32_fxch(NULL, get_irn_irg(n), get_nodes_block(n), pred, get_irn_mode(pred));
590 attr = get_ia32_attr(fxch);
591 attr->x87[0] = &ia32_st_regs[pos];
592 attr->x87[2] = &ia32_st_regs[0];
595 set_irn_n(n, op_idx, fxch);
597 sched_add_before(n, fxch);
598 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(fxch), attr->x87[0]->name, attr->x87[2]->name));
603 * Create a fpush before node n.
605 * @param state the x87 state
606 * @param n the node before the fpush
607 * @param pos push st(pos) on stack
608 * @param op_idx if >= 0, replace input op_idx of n with the fpush result
610 static void x87_create_fpush(const arch_env_t *env, x87_state *state, ir_node *n, int pos, int op_idx) {
611 ir_node *fpush, *pred;
613 const arch_register_t *out = arch_get_irn_register(env, n);
615 x87_push(state, arch_register_get_index(out), n);
617 pred = get_irn_n(n, op_idx);
618 fpush = new_rd_ia32_fpush(NULL, get_irn_irg(n), get_nodes_block(n), pred, get_irn_mode(pred));
619 attr = get_ia32_attr(fpush);
620 attr->x87[0] = &ia32_st_regs[pos];
621 attr->x87[2] = &ia32_st_regs[0];
623 set_irn_n(n, op_idx, fpush);
625 sched_add_before(n, fpush);
626 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(fpush), attr->x87[0]->name, attr->x87[2]->name));
630 * Create a fpop before node n.
632 * @param state the x87 state
633 * @param n the node before the fpop
634 * @param num pop 1 or 2 values
635 * @param pred node to use as predecessor of the fpop
637 * @return the fpop node
639 static ir_node *x87_create_fpop(const arch_env_t *env, x87_state *state, ir_node *n, int num, ir_node *pred) {
645 fpop = new_rd_ia32_fpop(NULL, get_irn_irg(n), get_nodes_block(n), pred, mode_E);
646 attr = get_ia32_attr(fpop);
647 attr->x87[0] = &ia32_st_regs[0];
648 attr->x87[1] = &ia32_st_regs[0];
649 attr->x87[2] = &ia32_st_regs[0];
651 sched_add_before(n, fpop);
652 DB((dbg, LEVEL_1, "<<< %s %s\n", get_irn_opname(fpop), attr->x87[0]->name));
660 /* --------------------------------- liveness ------------------------------------------ */
663 * The liveness transfer function.
664 * Updates a live set over a single step from a given node to its predecessor.
665 * Everything defined at the node is removed from the set, the uses of the node get inserted.
666 * @param arch_env The architecture environment.
667 * @param irn The node at which liveness should be computed.
668 * @param live The bitset of registers live before @p irn. This set gets modified by updating it to
669 * the registers live after irn.
672 static unsigned vfp_liveness_transfer(const arch_env_t *arch_env, ir_node *irn, unsigned live)
675 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
677 if (arch_irn_consider_in_reg_alloc(arch_env, cls, irn)) {
678 const arch_register_t *reg = arch_get_irn_register(arch_env, irn);
679 live &= ~(1 << reg->index);
682 for (i = 0, n = get_irn_arity(irn); i < n; ++i) {
683 ir_node *op = get_irn_n(irn, i);
685 if (mode_is_float(get_irn_mode(op)) && arch_irn_consider_in_reg_alloc(arch_env, cls, op)) {
686 const arch_register_t *reg = arch_get_irn_register(arch_env, op);
687 live |= 1 << reg->index;
695 * Put all live virtual registers at the end of a block into a bitset.
696 * @param arch_env The architecture environment.
697 * @param bl The block.
698 * @return The live bitset.
700 static unsigned vfp_liveness_end_of_block(const arch_env_t *arch_env, const ir_node *bl)
704 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
706 live_foreach(bl, li) {
707 ir_node *irn = (ir_node *) li->irn;
708 if (live_is_end(li) && arch_irn_consider_in_reg_alloc(arch_env, cls, irn)) {
709 const arch_register_t *reg = arch_get_irn_register(arch_env, irn);
710 live |= 1 << reg->index;
718 * Compute a bitset of registers which are live at another node.
719 * @param arch_env The architecture environment.
720 * @param pos The node.
721 * @return The live bitset.
723 static unsigned vfp_liveness_nodes_live_at(const arch_env_t *arch_env, const ir_node *pos)
725 const ir_node *bl = is_Block(pos) ? pos : get_nodes_block(pos);
726 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
730 live = vfp_liveness_end_of_block(arch_env, bl);
732 sched_foreach_reverse(bl, irn) {
734 * If we encounter the node we want to insert the Perm after,
735 * exit immediately, so that this node is still live
740 live = vfp_liveness_transfer(arch_env, irn, live);
747 * Returns true if a register is live in a set.
749 * @param reg_idx the vfp register index
750 * @param live a live bitset
752 static unsigned is_vfp_live(int reg_idx, unsigned live) {
753 return live & (1 << reg_idx);
758 * Dump liveness info.
760 * @param live the live bitset
762 static void vfp_dump_live(unsigned live) {
765 DB((dbg, LEVEL_2, "Live registers here: \n"));
766 for (i = 0; i < 8; ++i) {
767 if (live & (1 << i)) {
768 DB((dbg, LEVEL_2, " vf%d", i));
771 DB((dbg, LEVEL_2, "\n"));
773 #endif /* DEBUG_libfirm */
775 /* --------------------------------- simulators ---------------------------------------- */
777 #define XCHG(a, b) do { int t = (a); (a) = (b); (b) = t; } while (0)
780 * Simulate a virtual binop.
782 * @param state the x87 state
783 * @param n the node that should be simulated (and patched)
784 * @param env the architecture environment
785 * @param tmpl the template containing the 4 possible x87 opcodes
787 static void sim_binop(x87_state *state, ir_node *n, const arch_env_t *env, const exchange_tmpl *tmpl) {
788 int op2_idx, op1_idx = -1;
789 int out_idx, do_pop =0;
792 const arch_register_t *op1 = arch_get_irn_register(env, get_irn_n(n, BINOP_IDX_1));
793 const arch_register_t *op2 = arch_get_irn_register(env, get_irn_n(n, BINOP_IDX_2));
794 const arch_register_t *out = arch_get_irn_register(env, n);
795 unsigned live = vfp_liveness_nodes_live_at(env, n);
797 DB((dbg, LEVEL_1, ">>> %s %s, %s -> %s\n", get_irn_opname(n),
798 arch_register_get_name(op1), arch_register_get_name(op2),
799 arch_register_get_name(out)));
800 DEBUG_ONLY(vfp_dump_live(live));
802 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
803 op2_idx = x87_on_stack(state, arch_register_get_index(op2));
805 if (op2->index != REG_VFP_NOREG) {
806 /* second operand is a vfp register */
808 if (is_vfp_live(op2->index, live)) {
809 /* Second operand is live. */
811 if (is_vfp_live(op1->index, live)) {
812 /* Both operands are live: push the first one.
813 This works even for op1 == op2. */
814 x87_create_fpush(env, state, n, op2_idx, BINOP_IDX_2);
815 out_idx = op2_idx = 0;
817 dst = tmpl->normal_op;
821 /* Second live, first operand is dead here, bring it to tos. */
823 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
827 op1_idx = out_idx = 0;
828 dst = tmpl->normal_op;
833 /* Second operand is dead. */
834 if (is_vfp_live(op1->index, live)) {
835 /* First operand is live: bring second to tos. */
837 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
841 op2_idx = out_idx = 0;
842 dst = tmpl->normal_op;
846 /* Both operands are dead here, pop them from the stack. */
849 XCHG(op2_idx, op1_idx);
850 if (op1_idx == op2_idx) {
851 /* Both are identically, no pop needed. */
852 dst = tmpl->reverse_op;
856 dst = tmpl->reverse_pop_op;
860 else if (op1_idx == 0) {
862 if (op1_idx == op2_idx) {
863 /* Both are identically, no pop needed. */
864 dst = tmpl->normal_op;
868 dst = tmpl->normal_pop_op;
873 /* Bring the first on top. */
874 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
875 if (op1_idx == op2_idx) {
876 /* Both are identically, no pop needed. */
877 out_idx = op1_idx = op2_idx = 0;
878 dst = tmpl->normal_op;
884 dst = tmpl->normal_pop_op;
892 /* second operand is an address mode */
893 if (is_vfp_live(op1->index, live)) {
894 /* first operand is live: push it here */
895 x87_create_fpush(env, state, n, op1_idx, BINOP_IDX_1);
898 /* first operand is dead: bring it to tos */
900 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
902 op1_idx = out_idx = 0;
903 dst = tmpl->normal_op;
907 x87_set_st(state, arch_register_get_index(out), x87_patch_insn(n, dst), out_idx);
911 /* patch the operation */
912 attr = get_ia32_attr(n);
913 attr->x87[0] = op1 = &ia32_st_regs[op1_idx];
915 attr->x87[1] = op2 = &ia32_st_regs[op2_idx];
916 attr->x87[2] = out = &ia32_st_regs[out_idx];
919 DB((dbg, LEVEL_1, "<<< %s %s, %s -> %s\n", get_irn_opname(n),
920 arch_register_get_name(op1), arch_register_get_name(op2),
921 arch_register_get_name(out)));
923 DB((dbg, LEVEL_1, "<<< %s %s, [AM] -> %s\n", get_irn_opname(n),
924 arch_register_get_name(op1),
925 arch_register_get_name(out)));
929 * Simulate a virtual Unop.
931 * @param state the x87 state
932 * @param n the node that should be simulated (and patched)
933 * @param env the architecture environment
934 * @param op the x87 opcode that will replace n's opcode
936 static void sim_unop(x87_state *state, ir_node *n, const arch_env_t *env, ir_op *op) {
937 int op1_idx, out_idx;
938 const arch_register_t *op1 = arch_get_irn_register(env, get_irn_n(n, UNOP_IDX));
939 const arch_register_t *out = arch_get_irn_register(env, n);
941 unsigned live = vfp_liveness_nodes_live_at(env, n);
943 DB((dbg, LEVEL_1, ">>> %s -> %s\n", get_irn_opname(n), out->name));
944 DEBUG_ONLY(vfp_dump_live(live));
946 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
948 if (is_vfp_live(op1->index, live)) {
949 /* push the operand here */
950 x87_create_fpush(env, state, n, op1_idx, UNOP_IDX);
953 /* operand is dead, bring it to tos */
955 x87_create_fxch(state, n, op1_idx, UNOP_IDX);
958 x87_set_tos(state, arch_register_get_index(out), x87_patch_insn(n, op));
959 op1_idx = out_idx = 0;
960 attr = get_ia32_attr(n);
961 attr->x87[0] = op1 = &ia32_st_regs[0];
962 attr->x87[2] = out = &ia32_st_regs[0];
963 DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), out->name));
967 * Simulate a virtual Load instruction.
969 * @param state the x87 state
970 * @param n the node that should be simulated (and patched)
971 * @param env the architecture environment
972 * @param op the x87 opcode that will replace n's opcode
974 static void sim_load(x87_state *state, ir_node *n, const arch_env_t *env, ir_op *op) {
975 const arch_register_t *out = arch_get_irn_register(env, n);
978 DB((dbg, LEVEL_1, ">>> %s -> %s\n", get_irn_opname(n), arch_register_get_name(out)));
979 x87_push(state, arch_register_get_index(out), x87_patch_insn(n, op));
980 attr = get_ia32_attr(n);
981 attr->x87[2] = out = &ia32_st_regs[0];
982 DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), arch_register_get_name(out)));
986 * Simulate a virtual Store.
988 * @param state the x87 state
989 * @param n the node that should be simulated (and patched)
990 * @param env the architecture environment
991 * @param op the x87 store opcode
992 * @param op_p the x87 store and pop opcode
994 static void sim_store(x87_state *state, ir_node *n, const arch_env_t *env, ir_op *op, ir_op *op_p) {
996 const arch_register_t *op2 = arch_get_irn_register(env, get_irn_n(n, STORE_VAL_IDX));
998 unsigned live = vfp_liveness_nodes_live_at(env, n);
1000 op2_idx = x87_on_stack(state, arch_register_get_index(op2));
1001 assert(op2_idx >= 0);
1003 DB((dbg, LEVEL_1, ">>> %s %s ->\n", get_irn_opname(n), arch_register_get_name(op2)));
1005 /* we can only store the tos to memory */
1007 x87_create_fxch(state, n, op2_idx, STORE_VAL_IDX);
1009 if (is_vfp_live(op2->index, live))
1010 x87_patch_insn(n, op);
1013 x87_patch_insn(n, op_p);
1016 attr = get_ia32_attr(n);
1017 attr->x87[1] = op2 = &ia32_st_regs[0];
1018 DB((dbg, LEVEL_1, "<<< %s %s ->\n", get_irn_opname(n), arch_register_get_name(op2)));
1022 * Simulate a virtual Phi.
1023 * Just for cosmetic reasons change the mode of Phi nodes to mode_E.
1025 * @param state the x87 state
1026 * @param n the node that should be simulated (and patched)
1027 * @param env the architecture environment
1029 static void sim_Phi(x87_state *state, ir_node *n, const arch_env_t *env) {
1030 ir_mode *mode = get_irn_mode(n);
1032 if (mode_is_float(mode))
1033 set_irn_mode(n, mode_E);
1037 #define _GEN_BINOP(op, rev) \
1038 static void sim_##op(x87_state *state, ir_node *n, const arch_env_t *env) { \
1039 exchange_tmpl tmpl = { op_ia32_##op, op_ia32_##rev, op_ia32_##op##p, op_ia32_##rev##p }; \
1040 sim_binop(state, n, env, &tmpl); \
1043 #define GEN_BINOP(op) _GEN_BINOP(op, op)
1044 #define GEN_BINOPR(op) _GEN_BINOP(op, op##r)
1046 #define GEN_LOAD2(op, nop) \
1047 static void sim_##op(x87_state *state, ir_node *n, const arch_env_t *env) { \
1048 sim_load(state, n, env, op_ia32_##nop); \
1051 #define GEN_LOAD(op) GEN_LOAD2(op, op)
1053 #define GEN_UNOP(op) \
1054 static void sim_##op(x87_state *state, ir_node *n, const arch_env_t *env) { \
1055 sim_unop(state, n, env, op_ia32_##op); \
1058 #define GEN_STORE(op) \
1059 static void sim_##op(x87_state *state, ir_node *n, const arch_env_t *env) { \
1060 sim_store(state, n, env, op_ia32_##op, op_ia32_##op##p); \
1079 GEN_LOAD2(fConst, fldConst)
1085 * Simulate a fCondJmp.
1087 * @param state the x87 state
1088 * @param n the node that should be simulated (and patched)
1089 * @param env the architecture environment
1091 static void sim_fCondJmp(x87_state *state, ir_node *n, const arch_env_t *env) {
1092 int op2_idx, op1_idx = -1, pop_cnt = 0;
1095 const arch_register_t *op1 = arch_get_irn_register(env, get_irn_n(n, BINOP_IDX_1));
1096 const arch_register_t *op2 = arch_get_irn_register(env, get_irn_n(n, BINOP_IDX_2));
1097 unsigned live = vfp_liveness_nodes_live_at(env, n);
1099 DB((dbg, LEVEL_1, ">>> %s %s, %s\n", get_irn_opname(n),
1100 arch_register_get_name(op1), arch_register_get_name(op2)));
1101 DEBUG_ONLY(vfp_dump_live(live));
1103 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1104 op2_idx = x87_on_stack(state, arch_register_get_index(op2));
1106 /* BEWARE: check for comp a,a cases, they might happen */
1107 if (op2->index != REG_VFP_NOREG) {
1108 /* second operand is a vfp register */
1110 if (is_vfp_live(op2->index, live)) {
1111 /* second operand is live */
1113 if (is_vfp_live(op1->index, live)) {
1114 /* both operands are live: move one of them to tos */
1116 XCHG(op2_idx, op1_idx);
1117 dst = op_ia32_fcomrJmp;
1119 else if (op1_idx == 0) {
1120 dst = op_ia32_fcomJmp;
1123 /* bring the first on top */
1124 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1125 if (op1_idx == op2_idx)
1128 dst = op_ia32_fcomJmp;
1132 /* second live, first operand is dead here, bring it to tos.
1133 This means further, op1_idx != op2_idx. */
1135 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1140 dst = op_ia32_fcompJmp;
1145 /* second operand is dead */
1146 if (is_vfp_live(op1->index, live)) {
1147 /* first operand is live: bring second to tos.
1148 This means further, op1_idx != op2_idx. */
1150 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1155 dst = op_ia32_fcomrpJmp;
1159 /* both operands are dead here, check first for identity. */
1160 if (op1_idx == op2_idx) {
1161 /* identically, one one needed */
1163 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1164 op1_idx = op2_idx = 0;
1166 dst = op_ia32_fcompJmp;
1169 /* different, move them to st and st(1) and pop both.
1170 The tricky part is to get one into st(1).*/
1171 else if (op2_idx == 1) {
1172 /* good, second operand is already in the right place, move the first */
1174 /* bring the first on top */
1175 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1178 dst = op_ia32_fcomppJmp;
1181 else if (op1_idx == 1) {
1182 /* good, first operand is already in the right place, move the second */
1184 /* bring the first on top */
1185 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1188 dst = op_ia32_fcomrppJmp;
1192 /* if one is already the TOS, we need two fxch */
1194 /* first one is TOS, move to st(1) */
1195 x87_create_fxch(state, n, 1, BINOP_IDX_1);
1197 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1199 dst = op_ia32_fcomrppJmp;
1202 else if (op2_idx == 0) {
1203 /* second one is TOS, move to st(1) */
1204 x87_create_fxch(state, n, 1, BINOP_IDX_2);
1206 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1208 dst = op_ia32_fcomrppJmp;
1212 /* none of them is either TOS or st(1), 3 fxch needed */
1213 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1214 x87_create_fxch(state, n, 1, BINOP_IDX_2);
1215 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1218 dst = op_ia32_fcomppJmp;
1226 /* second operand is an address mode */
1227 if (is_vfp_live(op1->index, live)) {
1228 /* first operand is live: bring it to TOS */
1230 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1233 dst = op_ia32_fcomJmp;
1236 /* first operand is dead: bring it to tos */
1238 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1242 dst = op_ia32_fcompJmp;
1246 x87_patch_insn(n, dst);
1252 /* patch the operation */
1253 attr = get_ia32_attr(n);
1254 attr->x87[0] = op1 = &ia32_st_regs[op1_idx];
1256 attr->x87[1] = op2 = &ia32_st_regs[op2_idx];
1259 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(n),
1260 arch_register_get_name(op1), arch_register_get_name(op2)));
1262 DB((dbg, LEVEL_1, "<<< %s %s, [AM]\n", get_irn_opname(n),
1263 arch_register_get_name(op1)));
1267 * Simulate a be_Copy.
1269 * @param state the x87 state
1270 * @param n the node that should be simulated (and patched)
1271 * @param env the architecture environment
1273 static void sim_Copy(x87_state *state, ir_node *n, const arch_env_t *env) {
1274 ir_mode *mode = get_irn_mode(n);
1276 if (mode_is_float(mode)) {
1277 const arch_register_t *op1 = arch_get_irn_register(env, get_irn_n(n, 0));
1278 const arch_register_t *out = arch_get_irn_register(env, n);
1279 ir_node *node, *next;
1281 int op1_idx, out_idx;
1282 unsigned live = vfp_liveness_nodes_live_at(env, n);
1284 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1286 DB((dbg, LEVEL_1, ">>> %s %s -> %s\n", get_irn_opname(n),
1287 arch_register_get_name(op1), arch_register_get_name(out)));
1288 DEBUG_ONLY(vfp_dump_live(live));
1290 if (is_vfp_live(op1->index, live)) {
1291 /* operand is still live,a real copy */
1292 node = new_rd_ia32_fpush(get_irn_dbg_info(n), get_irn_irg(n), get_nodes_block(n), get_irn_n(n, 0), mode);
1293 arch_set_irn_register(env, node, out);
1295 x87_push(state, arch_register_get_index(out), node);
1297 attr = get_ia32_attr(node);
1298 attr->x87[0] = op1 = &ia32_st_regs[op1_idx];
1299 attr->x87[2] = out = &ia32_st_regs[0];
1301 next = sched_next(n);
1304 sched_add_before(next, node);
1305 DB((dbg, LEVEL_1, ">>> %s %s -> %s\n", get_irn_opname(node), op1->name, out->name));
1308 out_idx = x87_on_stack(state, arch_register_get_index(out));
1310 if (out_idx >= 0 && out_idx != op1_idx) {
1311 /* op1 must be killed and placed where out is */
1313 /* best case, simple remove and rename */
1314 x87_patch_insn(n, op_ia32_Pop);
1315 attr = get_ia32_attr(n);
1316 attr->x87[0] = op1 = &ia32_st_regs[0];
1319 x87_set_st(state, arch_register_get_index(out), n, op1_idx - 1);
1322 /* move op1 to tos, store and pop it */
1324 x87_create_fxch(state, n, op1_idx, 0);
1327 x87_patch_insn(n, op_ia32_Pop);
1328 attr = get_ia32_attr(n);
1329 attr->x87[0] = op1 = &ia32_st_regs[out_idx];
1332 x87_set_st(state, arch_register_get_index(out), n, out_idx - 1);
1334 DB((dbg, LEVEL_1, ">>> %s %s\n", get_irn_opname(n), op1->name));
1337 /* just a virtual copy */
1338 x87_set_st(state, arch_register_get_index(out), get_unop_op(n), op1_idx);
1340 DB((dbg, LEVEL_1, ">>> KILLED %s\n", get_irn_opname(n)));
1341 exchange(n, get_unop_op(n));
1348 * Simulate a be_Call.
1350 * @param state the x87 state
1351 * @param n the node that should be simulated
1352 * @param env the architecture environment
1354 static void sim_Call(x87_state *state, ir_node *n, const arch_env_t *env) {
1355 ir_type *call_tp = be_Call_get_type(n);
1357 /* at the begin of a call the x87 state should be empty */
1358 assert(state->depth == 0 && "stack not empty before call");
1361 * If the called function returns a float, it is returned in st(0).
1362 * This even happens if the return value is NOT used.
1363 * Moreover, only one return result is supported.
1365 if (get_method_n_ress(call_tp) > 0) {
1366 ir_type *res_type = get_method_res_type(call_tp, 0);
1367 ir_mode *mode = get_type_mode(res_type);
1369 if (mode && mode_is_float(mode)) {
1371 * TODO: what to push here? The result might be unused and currently
1372 * we have no possibility to detect this :-(
1374 x87_push(state, 0, n);
1380 * Simulate a be_Spill.
1382 * @param state the x87 state
1383 * @param n the node that should be simulated (and patched)
1384 * @param env the architecture environment
1386 * Should not happen, spills are lowered before x87 simulator see them.
1388 static void sim_Spill(x87_state *state, ir_node *n, const arch_env_t *env) {
1389 assert(0 && "Spill not lowered");
1390 sim_fst(state, n, env);
1394 * Simulate a be_Reload.
1396 * @param state the x87 state
1397 * @param n the node that should be simulated (and patched)
1398 * @param env the architecture environment
1400 * Should not happen, reloads are lowered before x87 simulator see them.
1402 static void sim_Reload(x87_state *state, ir_node *n, const arch_env_t *env) {
1403 assert(0 && "Reload not lowered");
1404 sim_fld(state, n, env);
1408 * Simulate a be_Return.
1410 * @param state the x87 state
1411 * @param n the node that should be simulated (and patched)
1412 * @param env the architecture environment
1414 static void sim_Return(x87_state *state, ir_node *n, const arch_env_t *env) {
1415 int n_res = be_Return_get_n_rets(n);
1416 int i, n_float_res = 0;
1418 /* only floating point return values must resist on stack */
1419 for (i = 0; i < n_res; ++i) {
1420 ir_node *res = get_irn_n(n, be_pos_Return_val + i);
1422 if (mode_is_float(get_irn_mode(res)))
1425 assert(x87_get_depth(state) == n_float_res);
1427 /* pop them virtually */
1428 for (i = n_float_res - 1; i >= 0; --i)
1433 * Kill any dead registers at block start by popping them from the stack.
1435 * @param sim the simulator handle
1436 * @param block the current block
1437 * @param start_state the x87 state at the begin of the block
1439 static x87_state *x87_kill_deads(x87_simulator *sim, ir_node *block, x87_state *start_state) {
1440 x87_state *state = start_state;
1441 ir_node *first_insn = sched_first(block);
1442 ir_node *keep = NULL;
1443 unsigned live = vfp_liveness_nodes_live_at(sim->env, block);
1445 int i, depth, num_pop;
1448 depth = x87_get_depth(state);
1449 for (i = depth - 1; i >= 0; --i) {
1450 int reg = x87_get_st_reg(state, i);
1452 if (! is_vfp_live(reg, live))
1453 kill_mask |= (1 << i);
1457 /* create a new state, will be changed */
1458 state = x87_clone_state(sim, state);
1460 DB((dbg, LEVEL_1, "Killing deads:\n"));
1461 DEBUG_ONLY(vfp_dump_live(live));
1462 DEBUG_ONLY(x87_dump_stack(state));
1464 /* now kill registers */
1466 /* we can only kill from TOS, so bring them up */
1467 if (! (kill_mask & 1)) {
1468 /* search from behind, because we can to a double-pop */
1469 for (i = depth - 1; i >= 0; --i) {
1470 if (kill_mask & (1 << i)) {
1471 kill_mask &= ~(1 << i);
1478 x87_set_st(state, -1, keep, i);
1479 keep = x87_create_fxch(state, first_insn, i, -1);
1482 keep = x87_get_st_node(state, 0);
1484 if ((kill_mask & 3) == 3) {
1485 /* we can do a double-pop */
1489 /* only a single pop */
1494 kill_mask >>= num_pop;
1495 keep = x87_create_fpop(sim->env, state, first_insn, num_pop, keep);
1497 add_End_keepalive(get_irg_end(get_irn_irg(block)), keep);
1503 * Run a simulation and fix all virtual instructions for a block.
1505 * @param sim the simulator handle
1506 * @param block the current block
1508 * @return non-zero if simulation is complete,
1509 * zero if the simulation must be rerun
1511 static int x87_simulate_block(x87_simulator *sim, ir_node *block) {
1513 blk_state *bl_state = x87_get_bl_state(sim, block);
1514 x87_state *state = bl_state->begin;
1515 const ir_edge_t *edge;
1516 ir_node *start_block;
1518 /* if we have no assigned start state, we must wait ... */
1522 assert(bl_state->end == NULL);
1524 DB((dbg, LEVEL_1, "Simulate %+F\n", block));
1526 /* at block begin, kill all dead registers */
1527 state = x87_kill_deads(sim, block, state);
1529 /* beware, n might changed */
1530 for (n = sched_first(block); !sched_is_end(n); n = next) {
1531 ir_op *op = get_irn_op(n);
1533 next = sched_next(n);
1534 if (op->ops.generic) {
1535 sim_func func = (sim_func)op->ops.generic;
1537 /* have work to do */
1538 if (state == bl_state->begin) {
1539 /* create a new state, will be changed */
1540 state = x87_clone_state(sim, state);
1544 (*func)(state, n, sim->env);
1548 start_block = get_irg_start_block(get_irn_irg(block));
1550 /* check if the state must be shuffled */
1551 foreach_block_succ(block, edge) {
1552 ir_node *succ = get_edge_src_irn(edge);
1553 blk_state *succ_state = x87_get_bl_state(sim, succ);
1555 if (succ_state->begin && succ != start_block) {
1556 /* There is already a begin state for this block, bad.
1557 Do the necessary permutations.
1558 Note that critical edges are removed, so this is always possible. */
1559 x87_shuffle(sim, block, state, succ, succ_state->begin);
1561 /* Note further, that there can be only one such situation,
1562 so we can break here. */
1566 bl_state->end = state;
1568 /* now propagate the state to all successor blocks */
1569 foreach_block_succ(block, edge) {
1570 ir_node *succ = get_edge_src_irn(edge);
1571 blk_state *succ_state = x87_get_bl_state(sim, succ);
1573 if (! succ_state->begin)
1574 succ_state->begin = state;
1581 * Create a new x87 simulator.
1583 * @param sim a simulator handle, will be initialized
1584 * @param irg the current graph
1585 * @param env the architecture environment
1587 static void x87_init_simulator(x87_simulator *sim, ir_graph *irg, const arch_env_t *env) {
1588 obstack_init(&sim->obst);
1589 sim->blk_states = pmap_create();
1592 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.x87");
1594 DB((dbg, LEVEL_1, "--------------------------------\n"
1595 "x87 Simulator started for %+F\n", irg));
1597 /* set the generic function pointer of instruction we must simulate */
1598 clear_irp_opcodes_generic_func();
1600 #define ASSOC(op) (op_ ## op)->ops.generic = (op_func)(sim_##op)
1601 #define ASSOC_IA32(op) (op_ia32_v ## op)->ops.generic = (op_func)(sim_##op)
1602 #define ASSOC_BE(op) (op_be_ ## op)->ops.generic = (op_func)(sim_##op)
1620 ASSOC_IA32(fCondJmp);
1633 * Destroy a x87 simulator.
1635 * @param sim the simulator handle
1637 static void x87_destroy_simulator(x87_simulator *sim) {
1638 pmap_destroy(sim->blk_states);
1639 obstack_free(&sim->obst, NULL);
1640 DB((dbg, LEVEL_1, "x87 Simulator stopped\n\n"));
1644 * Run a simulation and fix all virtual instructions for a graph.
1646 * @param env the architecture environment
1647 * @param irg the current graph
1648 * @param blk_list the block schedule list
1650 * Needs a block-schedule.
1652 void x87_simulate_graph(const arch_env_t *env, ir_graph *irg, ir_node **blk_list) {
1653 ir_node *block, *start_block;
1655 blk_state *bl_state;
1659 /* we need liveness info for the current graph */
1662 /* create the simulator */
1663 x87_init_simulator(&sim, irg, env);
1665 start_block = get_irg_start_block(irg);
1666 bl_state = x87_get_bl_state(&sim, start_block);
1668 /* start with the empty state */
1669 bl_state->begin = empty;
1671 worklist = new_pdeq();
1673 /* create the worklist for the schedule. */
1674 for (i = 0; i < ARR_LEN(blk_list); ++i)
1675 pdeq_putr(worklist, blk_list[i]);
1679 block = pdeq_getl(worklist);
1680 if (! x87_simulate_block(&sim, block)) {
1681 pdeq_putr(worklist, block);
1684 } while (! pdeq_empty(worklist));
1687 x87_destroy_simulator(&sim);