2 * This file implements the x87 support and virtual to stack
3 * register translation for the ia32 backend.
5 * @author: Michael Beck
18 #include "iredges_t.h"
27 #include "../belive_t.h"
28 #include "../besched_t.h"
29 #include "../benode_t.h"
30 #include "ia32_new_nodes.h"
31 #include "gen_ia32_new_nodes.h"
32 #include "gen_ia32_regalloc_if.h"
37 /* first and second binop index */
44 /* the store val index */
45 #define STORE_VAL_IDX 2
47 #define MASK_TOS(x) ((x) & (N_x87_REGS - 1))
49 /** the debug handle */
50 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
52 /* Forward declaration. */
53 typedef struct _x87_simulator x87_simulator;
56 * An exchange template.
57 * Note that our virtual functions have the same inputs
58 * and attributes as the real ones, so we can simple exchange
60 * Further, x87 supports inverse instructions, so we can handle them.
62 typedef struct _exchange_tmpl {
63 ir_op *normal_op; /**< the normal one */
64 ir_op *reverse_op; /**< the reverse one if exists */
65 ir_op *normal_pop_op; /**< the normal one with tos pop */
66 ir_op *reverse_pop_op; /**< the reverse one with tos pop */
70 * An entry on the simulated x87 stack.
72 typedef struct _st_entry {
73 int reg_idx; /**< the virtual register index of this stack value */
74 ir_node *node; /**< the node that produced this value */
80 typedef struct _x87_state {
81 st_entry st[N_x87_REGS]; /**< the register stack */
82 int depth; /**< the current stack depth */
83 int tos; /**< position of the tos */
84 x87_simulator *sim; /**< The simulator. */
87 /** An empty state, used for blocks without fp instructions. */
88 static x87_state _empty = { { {0, NULL}, }, 0, 0 };
89 static x87_state *empty = (x87_state *)&_empty;
91 /** The type of an instruction simulator function. */
92 typedef int (*sim_func)(x87_state *state, ir_node *n);
95 * A block state: Every block has a x87 state at the beginning and at the end.
97 typedef struct _blk_state {
98 x87_state *begin; /**< state at the begin or NULL if not assigned */
99 x87_state *end; /**< state at the end or NULL if not assigned */
102 #define PTR_TO_BLKSTATE(p) ((blk_state *)(p))
104 /** liveness bitset for vfp registers. */
105 typedef unsigned char vfp_liveness;
110 struct _x87_simulator {
111 struct obstack obst; /**< An obstack for fast allocating. */
112 pmap *blk_states; /**< Map blocks to states. */
113 const arch_env_t *env; /**< The architecture environment. */
114 be_lv_t *lv; /**< intrablock liveness. */
115 vfp_liveness *live; /**< Liveness information. */
116 unsigned n_idx; /**< The cached get_irg_last_idx() result. */
117 waitq *worklist; /**< list of blocks to process. */
121 * Returns the current stack depth.
123 * @param state the x87 state
125 * @return the x87 stack depth
127 static int x87_get_depth(const x87_state *state) {
133 * Check if the state is empty.
135 * @param state the x87 state
137 * returns non-zero if the x87 stack is empty
139 static int x87_state_is_empty(const x87_state *state) {
140 return state->depth == 0;
145 * Return the virtual register index at st(pos).
147 * @param state the x87 state
148 * @param pos a stack position
150 * @return the vfp register index that produced the value at st(pos)
152 static int x87_get_st_reg(const x87_state *state, int pos) {
153 assert(pos < state->depth);
154 return state->st[MASK_TOS(state->tos + pos)].reg_idx;
158 * Return the node at st(pos).
160 * @param state the x87 state
161 * @param pos a stack position
163 * @return the IR node that produced the value at st(pos)
165 static ir_node *x87_get_st_node(const x87_state *state, int pos) {
166 assert(pos < state->depth);
167 return state->st[MASK_TOS(state->tos + pos)].node;
168 } /* x87_get_st_node */
172 * Dump the stack for debugging.
174 * @param state the x87 state
176 static void x87_dump_stack(const x87_state *state) {
179 for (i = state->depth - 1; i >= 0; --i) {
180 DB((dbg, LEVEL_2, "vf%d(%+F) ", x87_get_st_reg(state, i),
181 x87_get_st_node(state, i)));
183 DB((dbg, LEVEL_2, "<-- TOS\n"));
184 } /* x87_dump_stack */
185 #endif /* DEBUG_libfirm */
188 * Set a virtual register to st(pos).
190 * @param state the x87 state
191 * @param reg_idx the vfp register index that should be set
192 * @param node the IR node that produces the value of the vfp register
193 * @param pos the stack position where the new value should be entered
195 static void x87_set_st(x87_state *state, int reg_idx, ir_node *node, int pos) {
196 assert(0 < state->depth);
197 state->st[MASK_TOS(state->tos + pos)].reg_idx = reg_idx;
198 state->st[MASK_TOS(state->tos + pos)].node = node;
200 DB((dbg, LEVEL_2, "After SET_REG: "));
201 DEBUG_ONLY(x87_dump_stack(state));
205 * Set the tos virtual register.
207 * @param state the x87 state
208 * @param reg_idx the vfp register index that should be set
209 * @param node the IR node that produces the value of the vfp register
211 static void x87_set_tos(x87_state *state, int reg_idx, ir_node *node) {
212 x87_set_st(state, reg_idx, node, 0);
217 * Flush the x87 stack.
219 * @param state the x87 state
221 static void x87_flush(x87_state *state) {
228 * Swap st(0) with st(pos).
230 * @param state the x87 state
231 * @param pos the stack position to change the tos with
233 static void x87_fxch(x87_state *state, int pos) {
235 assert(pos < state->depth);
237 entry = state->st[MASK_TOS(state->tos + pos)];
238 state->st[MASK_TOS(state->tos + pos)] = state->st[MASK_TOS(state->tos)];
239 state->st[MASK_TOS(state->tos)] = entry;
241 DB((dbg, LEVEL_2, "After FXCH: ")); DEBUG_ONLY(x87_dump_stack(state));
245 * Convert a virtual register to the stack index.
247 * @param state the x87 state
248 * @param reg_idx the register vfp index
250 * @return the stack position where the register is stacked
251 * or -1 if the virtual register was not found
253 static int x87_on_stack(const x87_state *state, int reg_idx) {
254 int i, tos = state->tos;
256 for (i = 0; i < state->depth; ++i)
257 if (state->st[MASK_TOS(tos + i)].reg_idx == reg_idx)
263 * Push a virtual Register onto the stack, double pushed allowed.
265 * @param state the x87 state
266 * @param reg_idx the register vfp index
267 * @param node the node that produces the value of the vfp register
269 static void x87_push_dbl(x87_state *state, int reg_idx, ir_node *node) {
270 assert(state->depth < N_x87_REGS && "stack overrun");
273 state->tos = MASK_TOS(state->tos - 1);
274 state->st[state->tos].reg_idx = reg_idx;
275 state->st[state->tos].node = node;
277 DB((dbg, LEVEL_2, "After PUSH: ")); DEBUG_ONLY(x87_dump_stack(state));
281 * Push a virtual Register onto the stack, double pushes are NOT allowed..
283 * @param state the x87 state
284 * @param reg_idx the register vfp index
285 * @param node the node that produces the value of the vfp register
286 * @param dbl_push if != 0 double pushes are allowed
288 static void x87_push(x87_state *state, int reg_idx, ir_node *node) {
289 assert(x87_on_stack(state, reg_idx) == -1 && "double push");
291 x87_push_dbl(state, reg_idx, node);
295 * Pop a virtual Register from the stack.
297 static void x87_pop(x87_state *state) {
298 assert(state->depth > 0 && "stack underrun");
301 state->tos = MASK_TOS(state->tos + 1);
303 DB((dbg, LEVEL_2, "After POP: ")); DEBUG_ONLY(x87_dump_stack(state));
307 * Returns the block state of a block.
309 * @param sim the x87 simulator handle
310 * @param block the current block
312 * @return the block state
314 static blk_state *x87_get_bl_state(x87_simulator *sim, ir_node *block) {
315 pmap_entry *entry = pmap_find(sim->blk_states, block);
318 blk_state *bl_state = obstack_alloc(&sim->obst, sizeof(*bl_state));
319 bl_state->begin = NULL;
320 bl_state->end = NULL;
322 pmap_insert(sim->blk_states, block, bl_state);
326 return PTR_TO_BLKSTATE(entry->value);
327 } /* x87_get_bl_state */
330 * Creates a new x87 state.
332 * @param sim the x87 simulator handle
334 * @return a new x87 state
336 static x87_state *x87_alloc_state(x87_simulator *sim) {
337 x87_state *res = obstack_alloc(&sim->obst, sizeof(*res));
341 } /* x87_alloc_state */
345 * Create a new empty x87 state.
347 * @param sim the x87 simulator handle
349 * @return a new empty x87 state
351 static x87_state *x87_alloc_empty_state(x87_simulator *sim) {
352 x87_state *res = x87_alloc_state(sim);
356 } /* x87_alloc_empty_state */
362 * @param sim the x87 simulator handle
363 * @param src the x87 state that will be cloned
365 * @return a cloned copy of the src state
367 static x87_state *x87_clone_state(x87_simulator *sim, const x87_state *src) {
368 x87_state *res = x87_alloc_state(sim);
370 memcpy(res, src, sizeof(*res));
372 } /* x87_clone_state */
375 * Patch a virtual instruction into a x87 one and return
378 * @param n the IR node to patch
379 * @param op the x87 opcode to patch in
381 static ir_node *x87_patch_insn(ir_node *n, ir_op *op) {
382 ir_mode *mode = get_irn_mode(n);
387 if (mode == mode_T) {
388 /* patch all Proj's */
389 const ir_edge_t *edge;
391 foreach_out_edge(n, edge) {
392 ir_node *proj = get_edge_src_irn(edge);
394 mode = get_irn_mode(proj);
395 if (mode_is_float(mode)) {
397 set_irn_mode(proj, mode_E);
402 else if (mode_is_float(mode))
403 set_irn_mode(n, mode_E);
405 } /* x87_patch_insn */
408 * Returns the first Proj of a mode_T node having a given mode.
410 * @param n the mode_T node
411 * @param m the desired mode of the Proj
412 * @return The first Proj of mode @p m found or NULL.
414 static ir_node *get_irn_Proj_for_mode(ir_node *n, ir_mode *m) {
415 const ir_edge_t *edge;
417 assert(get_irn_mode(n) == mode_T && "Need mode_T node");
419 foreach_out_edge(n, edge) {
420 ir_node *proj = get_edge_src_irn(edge);
421 if (get_irn_mode(proj) == m)
426 } /* get_irn_Proj_for_mode */
429 * Wrap the arch_* function here so we can check for errors.
431 static INLINE const arch_register_t *x87_get_irn_register(x87_simulator *sim, const ir_node *irn) {
432 const arch_register_t *res;
434 res = arch_get_irn_register(sim->env, irn);
435 assert(res->reg_class->regs == ia32_vfp_regs);
439 /* -------------- x87 perm --------------- */
442 * Creates a fxch for shuffle.
444 * @param state the x87 state
445 * @param pos parameter for fxch
446 * @param block the block were fxch is inserted
448 * Creates a new fxch node and reroute the user of the old node
451 * @return the fxch node
453 static ir_node *x87_fxch_shuffle(x87_state *state, int pos, ir_node *block)
458 fxch = new_rd_ia32_fxch(NULL, get_irn_irg(block), block, mode_E);
459 attr = get_ia32_attr(fxch);
460 attr->x87[0] = &ia32_st_regs[pos];
461 attr->x87[2] = &ia32_st_regs[0];
465 x87_fxch(state, pos);
467 } /* x87_fxch_shuffle */
470 * Calculate the necessary permutations to reach dst_state.
472 * These permutations are done with fxch instructions and placed
473 * at the end of the block.
475 * Note that critical edges are removed here, so we need only
476 * a shuffle if the current block has only one successor.
478 * @param sim the simulator handle
479 * @param block the current block
480 * @param state the current x87 stack state, might be modified
481 * @param dst_block the destination block
482 * @param dst_state destination state
486 static x87_state *x87_shuffle(x87_simulator *sim, ir_node *block, x87_state *state, ir_node *dst_block, const x87_state *dst_state) {
487 int i, n_cycles, k, ri;
488 unsigned cycles[4], all_mask;
489 char cycle_idx[4][8];
490 ir_node *fxch, *before, *after;
492 assert(state->depth == dst_state->depth);
494 /* Some mathematics here:
495 If we have a cycle of length n that includes the tos,
496 we need n-1 exchange operations.
497 We can always add the tos and restore it, so we need
498 n+1 exchange operations for a cycle not containing the tos.
499 So, the maximum of needed operations is for a cycle of 7
500 not including the tos == 8.
501 This is the same number of ops we would need for using stores,
502 so exchange is cheaper (we save the loads).
503 On the other hand, we might need an additional exchange
504 in the next block to bring one operand on top, so the
505 number of ops in the first case is identical.
506 Further, no more than 4 cycles can exists (4 x 2).
508 all_mask = (1 << (state->depth)) - 1;
510 for (n_cycles = 0; all_mask; ++n_cycles) {
511 int src_idx, dst_idx;
513 /* find the first free slot */
514 for (i = 0; i < state->depth; ++i) {
515 if (all_mask & (1 << i)) {
516 all_mask &= ~(1 << i);
518 /* check if there are differences here */
519 if (x87_get_st_reg(state, i) != x87_get_st_reg(dst_state, i))
525 /* no more cycles found */
530 cycles[n_cycles] = (1 << i);
531 cycle_idx[n_cycles][k++] = i;
532 for (src_idx = i; ; src_idx = dst_idx) {
533 dst_idx = x87_on_stack(dst_state, x87_get_st_reg(state, src_idx));
535 if ((all_mask & (1 << dst_idx)) == 0)
538 cycle_idx[n_cycles][k++] = dst_idx;
539 cycles[n_cycles] |= (1 << dst_idx);
540 all_mask &= ~(1 << dst_idx);
542 cycle_idx[n_cycles][k] = -1;
546 /* no permutation needed */
550 /* Hmm: permutation needed */
551 DB((dbg, LEVEL_2, "\n%+F needs permutation: from\n", block));
552 DEBUG_ONLY(x87_dump_stack(state));
553 DB((dbg, LEVEL_2, " to\n"));
554 DEBUG_ONLY(x87_dump_stack(dst_state));
558 DB((dbg, LEVEL_2, "Need %d cycles\n", n_cycles));
559 for (ri = 0; ri < n_cycles; ++ri) {
560 DB((dbg, LEVEL_2, " Ring %d:\n ", ri));
561 for (k = 0; cycle_idx[ri][k] != -1; ++k)
562 DB((dbg, LEVEL_2, " st%d ->", cycle_idx[ri][k]));
563 DB((dbg, LEVEL_2, "\n"));
570 * Find the place node must be insert.
571 * We have only one successor block, so the last instruction should
574 before = sched_last(block);
575 assert(is_cfop(before));
577 /* now do the permutations */
578 for (ri = 0; ri < n_cycles; ++ri) {
579 if ((cycles[ri] & 1) == 0) {
580 /* this cycle does not include the tos */
581 fxch = x87_fxch_shuffle(state, cycle_idx[ri][0], block);
583 sched_add_after(after, fxch);
585 sched_add_before(before, fxch);
588 for (k = 1; cycle_idx[ri][k] != -1; ++k) {
589 fxch = x87_fxch_shuffle(state, cycle_idx[ri][k], block);
591 sched_add_after(after, fxch);
593 sched_add_before(before, fxch);
596 if ((cycles[ri] & 1) == 0) {
597 /* this cycle does not include the tos */
598 fxch = x87_fxch_shuffle(state, cycle_idx[ri][0], block);
599 sched_add_after(after, fxch);
606 * Create a fxch node before another node.
608 * @param state the x87 state
609 * @param n the node after the fxch
610 * @param pos exchange st(pos) with st(0)
611 * @param op_idx if >= 0, replace input op_idx of n with the fxch result
615 static ir_node *x87_create_fxch(x87_state *state, ir_node *n, int pos, int op_idx) {
618 ir_graph *irg = get_irn_irg(n);
619 ir_node *block = get_nodes_block(n);
621 x87_fxch(state, pos);
623 fxch = new_rd_ia32_fxch(NULL, irg, block, mode_E);
624 attr = get_ia32_attr(fxch);
625 attr->x87[0] = &ia32_st_regs[pos];
626 attr->x87[2] = &ia32_st_regs[0];
630 sched_add_before(n, fxch);
631 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(fxch), attr->x87[0]->name, attr->x87[2]->name));
633 } /* x87_create_fxch */
636 * Create a fpush before node n.
638 * @param state the x87 state
639 * @param n the node after the fpush
640 * @param pos push st(pos) on stack
641 * @param op_idx replace input op_idx of n with the fpush result
643 static void x87_create_fpush(x87_state *state, ir_node *n, int pos, int op_idx) {
644 ir_node *fpush, *pred = get_irn_n(n, op_idx);
646 const arch_register_t *out = x87_get_irn_register(state->sim, pred);
648 x87_push_dbl(state, arch_register_get_index(out), pred);
650 fpush = new_rd_ia32_fpush(NULL, get_irn_irg(n), get_nodes_block(n), mode_E);
651 attr = get_ia32_attr(fpush);
652 attr->x87[0] = &ia32_st_regs[pos];
653 attr->x87[2] = &ia32_st_regs[0];
656 sched_add_before(n, fpush);
658 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(fpush), attr->x87[0]->name, attr->x87[2]->name));
659 } /* x87_create_fpush */
662 * Create a fpop before node n.
664 * @param state the x87 state
665 * @param n the node after the fpop
666 * @param num pop 1 or 2 values
667 * @param pred node to use as predecessor of the fpop
669 * @return the fpop node
671 static ir_node *x87_create_fpop(x87_state *state, ir_node *n, int num, ir_node *pred) {
672 ir_node *fpop = pred;
679 fpop = new_rd_ia32_fpop(NULL, get_irn_irg(n), get_nodes_block(n), mode_E);
680 attr = get_ia32_attr(fpop);
681 attr->x87[0] = &ia32_st_regs[0];
682 attr->x87[1] = &ia32_st_regs[0];
683 attr->x87[2] = &ia32_st_regs[0];
685 sched_add_before(n, fpop);
686 DB((dbg, LEVEL_1, "<<< %s %s\n", get_irn_opname(fpop), attr->x87[0]->name));
692 } /* x87_create_fpop */
695 * Creates an fldz before node n
697 * @param state the x87 state
698 * @param n the node after the fldz
700 * @return the fldz node
702 static ir_node *x87_create_fldz(x87_state *state, ir_node *n, int regidx) {
703 ir_graph *irg = get_irn_irg(n);
704 ir_node *block = get_nodes_block(n);
707 fldz = new_rd_ia32_fldz(NULL, irg, block, mode_E);
709 sched_add_before(n, fldz);
710 DB((dbg, LEVEL_1, "<<< %s\n", get_irn_opname(fldz)));
713 x87_push(state, regidx, fldz);
718 /* --------------------------------- liveness ------------------------------------------ */
721 * The liveness transfer function.
722 * Updates a live set over a single step from a given node to its predecessor.
723 * Everything defined at the node is removed from the set, the uses of the node get inserted.
725 * @param sim The simulator handle.
726 * @param irn The node at which liveness should be computed.
727 * @param live The bitset of registers live before @p irn. This set gets modified by updating it to
728 * the registers live after irn.
730 * @return The live bitset.
732 static vfp_liveness vfp_liveness_transfer(x87_simulator *sim, ir_node *irn, vfp_liveness live)
735 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
736 const arch_env_t *arch_env = sim->env;
738 if (arch_irn_consider_in_reg_alloc(arch_env, cls, irn)) {
739 const arch_register_t *reg = x87_get_irn_register(sim, irn);
740 live &= ~(1 << arch_register_get_index(reg));
743 for (i = 0, n = get_irn_arity(irn); i < n; ++i) {
744 ir_node *op = get_irn_n(irn, i);
746 if (mode_is_float(get_irn_mode(op)) && arch_irn_consider_in_reg_alloc(arch_env, cls, op)) {
747 const arch_register_t *reg = x87_get_irn_register(sim, op);
748 live |= 1 << arch_register_get_index(reg);
752 } /* vfp_liveness_transfer */
755 * Put all live virtual registers at the end of a block into a bitset.
757 * @param sim the simulator handle
758 * @param lv the liveness information
759 * @param bl the block
761 * @return The live bitset at the end of this block
763 static vfp_liveness vfp_liveness_end_of_block(x87_simulator *sim, const ir_node *block)
766 vfp_liveness live = 0;
767 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
768 const arch_env_t *arch_env = sim->env;
769 const be_lv_t *lv = sim->lv;
771 be_lv_foreach(lv, block, be_lv_state_end, i) {
772 const arch_register_t *reg;
773 const ir_node *node = be_lv_get_irn(lv, block, i);
774 if (!arch_irn_consider_in_reg_alloc(arch_env, cls, node))
777 reg = x87_get_irn_register(sim, node);
778 live |= 1 << arch_register_get_index(reg);
782 } /* vfp_liveness_end_of_block */
784 /** get the register mask from an arch_register */
785 #define REGMASK(reg) (1 << (arch_register_get_index(reg)))
788 * Return a bitset of argument registers which are live at the end of a node.
790 * @param sim the simulator handle
791 * @param pos the node
792 * @param kill kill mask for the output registers
794 * @return The live bitset.
796 static unsigned vfp_live_args_after(x87_simulator *sim, const ir_node *pos, unsigned kill)
798 unsigned idx = get_irn_idx(pos);
800 assert(idx < sim->n_idx);
801 return sim->live[idx] & ~kill;
802 } /* vfp_live_args_after */
805 * Calculate the liveness for a whole block and cache it.
807 * @param sim the simulator handle
808 * @param lv the liveness handle
809 * @param block the block
811 static void update_liveness(x87_simulator *sim, ir_node *block) {
812 vfp_liveness live = vfp_liveness_end_of_block(sim, block);
816 /* now iterate through the block backward and cache the results */
817 sched_foreach_reverse(block, irn) {
818 /* stop at the first Phi: this produces the live-in */
822 idx = get_irn_idx(irn);
823 sim->live[idx] = live;
825 live = vfp_liveness_transfer(sim, irn, live);
827 idx = get_irn_idx(block);
828 sim->live[idx] = live;
829 } /* update_liveness */
832 * Returns true if a register is live in a set.
834 * @param reg_idx the vfp register index
835 * @param live a live bitset
837 #define is_vfp_live(reg_idx, live) ((live) & (1 << (reg_idx)))
841 * Dump liveness info.
843 * @param live the live bitset
845 static void vfp_dump_live(vfp_liveness live) {
848 DB((dbg, LEVEL_2, "Live after: "));
849 for (i = 0; i < 8; ++i) {
850 if (live & (1 << i)) {
851 DB((dbg, LEVEL_2, "vf%d ", i));
854 DB((dbg, LEVEL_2, "\n"));
855 } /* vfp_dump_live */
856 #endif /* DEBUG_libfirm */
858 /* --------------------------------- simulators ---------------------------------------- */
860 #define XCHG(a, b) do { int t = (a); (a) = (b); (b) = t; } while (0)
863 * Simulate a virtual binop.
865 * @param state the x87 state
866 * @param n the node that should be simulated (and patched)
867 * @param tmpl the template containing the 4 possible x87 opcodes
869 static int sim_binop(x87_state *state, ir_node *n, const exchange_tmpl *tmpl) {
870 int op2_idx, op1_idx;
871 int out_idx, do_pop = 0;
873 ir_node *patched_insn;
875 x87_simulator *sim = state->sim;
876 const arch_register_t *op1 = x87_get_irn_register(sim, get_irn_n(n, BINOP_IDX_1));
877 const arch_register_t *op2 = x87_get_irn_register(sim, get_irn_n(n, BINOP_IDX_2));
878 const arch_register_t *out = x87_get_irn_register(sim, n);
879 int reg_index_1 = arch_register_get_index(op1);
880 int reg_index_2 = arch_register_get_index(op2);
881 vfp_liveness live = vfp_live_args_after(sim, n, REGMASK(out));
883 DB((dbg, LEVEL_1, ">>> %+F %s, %s -> %s\n", n,
884 arch_register_get_name(op1), arch_register_get_name(op2),
885 arch_register_get_name(out)));
886 DEBUG_ONLY(vfp_dump_live(live));
887 DB((dbg, LEVEL_1, "Stack before: "));
888 DEBUG_ONLY(x87_dump_stack(state));
890 op1_idx = x87_on_stack(state, reg_index_1);
891 assert(op1_idx >= 0);
893 if (reg_index_2 != REG_VFP_NOREG) {
894 /* second operand is a vfp register */
895 op2_idx = x87_on_stack(state, reg_index_2);
896 assert(op2_idx >= 0);
898 if (is_vfp_live(arch_register_get_index(op2), live)) {
899 /* Second operand is live. */
901 if (is_vfp_live(arch_register_get_index(op1), live)) {
902 /* Both operands are live: push the first one.
903 This works even for op1 == op2. */
904 x87_create_fpush(state, n, op1_idx, BINOP_IDX_2);
905 /* now do fxxx (tos=tos X op) */
909 dst = tmpl->normal_op;
911 /* Second live, first operand is dead here, bring it to tos. */
913 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
918 /* now do fxxx (tos=tos X op) */
920 dst = tmpl->normal_op;
923 /* Second operand is dead. */
924 if (is_vfp_live(arch_register_get_index(op1), live)) {
925 /* First operand is live: bring second to tos. */
927 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
932 /* now do fxxxr (tos = op X tos) */
934 dst = tmpl->reverse_op;
936 /* Both operands are dead here, pop them from the stack. */
939 /* Both are identically and on tos, no pop needed. */
940 /* here fxxx (tos = tos X tos) */
941 dst = tmpl->normal_op;
944 /* now do fxxxp (op = op X tos, pop) */
945 dst = tmpl->normal_pop_op;
949 } else if (op1_idx == 0) {
950 assert(op1_idx != op2_idx);
951 /* now do fxxxrp (op = tos X op, pop) */
952 dst = tmpl->reverse_pop_op;
956 /* Bring the second on top. */
957 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
958 if (op1_idx == op2_idx) {
959 /* Both are identically and on tos now, no pop needed. */
962 /* use fxxx (tos = tos X tos) */
963 dst = tmpl->normal_op;
966 /* op2 is on tos now */
968 /* use fxxxp (op = op X tos, pop) */
969 dst = tmpl->normal_pop_op;
977 /* second operand is an address mode */
978 if (is_vfp_live(arch_register_get_index(op1), live)) {
979 /* first operand is live: push it here */
980 x87_create_fpush(state, n, op1_idx, BINOP_IDX_1);
982 /* use fxxx (tos = tos X mem) */
983 dst = tmpl->normal_op;
986 /* first operand is dead: bring it to tos */
988 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
992 /* use fxxxp (tos = tos X mem) */
993 dst = tmpl->normal_op;
998 patched_insn = x87_patch_insn(n, dst);
999 x87_set_st(state, arch_register_get_index(out), patched_insn, out_idx);
1004 /* patch the operation */
1005 attr = get_ia32_attr(n);
1006 attr->x87[0] = op1 = &ia32_st_regs[op1_idx];
1007 if (reg_index_2 != REG_VFP_NOREG) {
1008 attr->x87[1] = op2 = &ia32_st_regs[op2_idx];
1010 attr->x87[2] = out = &ia32_st_regs[out_idx];
1012 if (reg_index_2 != REG_VFP_NOREG) {
1013 DB((dbg, LEVEL_1, "<<< %s %s, %s -> %s\n", get_irn_opname(n),
1014 arch_register_get_name(op1), arch_register_get_name(op2),
1015 arch_register_get_name(out)));
1017 DB((dbg, LEVEL_1, "<<< %s %s, [AM] -> %s\n", get_irn_opname(n),
1018 arch_register_get_name(op1),
1019 arch_register_get_name(out)));
1026 * Simulate a virtual Unop.
1028 * @param state the x87 state
1029 * @param n the node that should be simulated (and patched)
1030 * @param op the x87 opcode that will replace n's opcode
1032 static int sim_unop(x87_state *state, ir_node *n, ir_op *op) {
1033 int op1_idx, out_idx;
1034 x87_simulator *sim = state->sim;
1035 const arch_register_t *op1 = x87_get_irn_register(sim, get_irn_n(n, UNOP_IDX));
1036 const arch_register_t *out = x87_get_irn_register(sim, n);
1038 unsigned live = vfp_live_args_after(sim, n, REGMASK(out));
1040 DB((dbg, LEVEL_1, ">>> %+F -> %s\n", n, out->name));
1041 DEBUG_ONLY(vfp_dump_live(live));
1043 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1045 if (is_vfp_live(arch_register_get_index(op1), live)) {
1046 /* push the operand here */
1047 x87_create_fpush(state, n, op1_idx, UNOP_IDX);
1051 /* operand is dead, bring it to tos */
1053 x87_create_fxch(state, n, op1_idx, UNOP_IDX);
1058 x87_set_tos(state, arch_register_get_index(out), x87_patch_insn(n, op));
1060 attr = get_ia32_attr(n);
1061 attr->x87[0] = op1 = &ia32_st_regs[0];
1062 attr->x87[2] = out = &ia32_st_regs[0];
1063 DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), out->name));
1069 * Simulate a virtual Load instruction.
1071 * @param state the x87 state
1072 * @param n the node that should be simulated (and patched)
1073 * @param op the x87 opcode that will replace n's opcode
1075 static int sim_load(x87_state *state, ir_node *n, ir_op *op) {
1076 const arch_register_t *out = x87_get_irn_register(state->sim, n);
1079 DB((dbg, LEVEL_1, ">>> %+F -> %s\n", n, arch_register_get_name(out)));
1080 x87_push(state, arch_register_get_index(out), x87_patch_insn(n, op));
1081 assert(out == x87_get_irn_register(state->sim, n));
1082 attr = get_ia32_attr(n);
1083 attr->x87[2] = out = &ia32_st_regs[0];
1084 DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), arch_register_get_name(out)));
1090 * Rewire all users of @p old_val to @new_val iff they are scheduled after @p store.
1092 * @param store The store
1093 * @param old_val The former value
1094 * @param new_val The new value
1096 static void collect_and_rewire_users(ir_node *store, ir_node *old_val, ir_node *new_val) {
1097 const ir_edge_t *edge, *ne;
1099 foreach_out_edge_safe(old_val, edge, ne) {
1100 ir_node *user = get_edge_src_irn(edge);
1102 if (! user || user == store)
1105 /* if the user is scheduled after the store: rewire */
1106 if (sched_is_scheduled(user) && sched_comes_after(store, user)) {
1108 /* find the input of the user pointing to the old value */
1109 for (i = get_irn_arity(user) - 1; i >= 0; i--) {
1110 if (get_irn_n(user, i) == old_val)
1111 set_irn_n(user, i, new_val);
1115 } /* collect_and_rewire_users */
1118 * Simulate a virtual Store.
1120 * @param state the x87 state
1121 * @param n the node that should be simulated (and patched)
1122 * @param op the x87 store opcode
1123 * @param op_p the x87 store and pop opcode
1125 static int sim_store(x87_state *state, ir_node *n, ir_op *op, ir_op *op_p) {
1126 x87_simulator *sim = state->sim;
1127 ir_node *val = get_irn_n(n, STORE_VAL_IDX);
1128 const arch_register_t *op2 = x87_get_irn_register(sim, val);
1129 unsigned live = vfp_live_args_after(sim, n, 0);
1132 int op2_reg_idx, op2_idx, depth;
1133 int live_after_node;
1136 op2_reg_idx = arch_register_get_index(op2);
1137 if (op2_reg_idx == REG_VFP_UKNWN) {
1138 // just take any value from stack
1139 if(state->depth > 0) {
1141 DEBUG_ONLY(op2 = NULL);
1142 live_after_node = 1;
1144 // produce a new value which we will consume imediately
1145 x87_create_fldz(state, n, op2_reg_idx);
1146 live_after_node = 0;
1147 op2_idx = x87_on_stack(state, op2_reg_idx);
1148 assert(op2_idx >= 0);
1151 op2_idx = x87_on_stack(state, op2_reg_idx);
1152 live_after_node = is_vfp_live(arch_register_get_index(op2), live);
1153 assert(op2_idx >= 0);
1154 DB((dbg, LEVEL_1, ">>> %+F %s ->\n", n, arch_register_get_name(op2)));
1157 mode = get_ia32_ls_mode(n);
1158 depth = x87_get_depth(state);
1160 if (live_after_node) {
1162 Problem: fst doesn't support mode_E (spills), only fstp does
1164 - stack not full: push value and fstp
1165 - stack full: fstp value and load again
1167 if (mode == mode_E) {
1168 if (depth < N_x87_REGS) {
1169 /* ok, we have a free register: push + fstp */
1170 x87_create_fpush(state, n, op2_idx, STORE_VAL_IDX);
1172 x87_patch_insn(n, op_p);
1175 ir_node *vfld, *mem, *block, *rproj, *mproj;
1178 /* stack full here: need fstp + load */
1180 x87_patch_insn(n, op_p);
1182 block = get_nodes_block(n);
1183 irg = get_irn_irg(n);
1184 vfld = new_rd_ia32_vfld(NULL, irg, block, get_irn_n(n, 0), get_irn_n(n, 1), new_rd_NoMem(irg));
1186 /* copy all attributes */
1187 set_ia32_frame_ent(vfld, get_ia32_frame_ent(n));
1188 if (is_ia32_use_frame(n))
1189 set_ia32_use_frame(vfld);
1190 set_ia32_am_flavour(vfld, get_ia32_am_flavour(n));
1191 set_ia32_op_type(vfld, ia32_am_Source);
1192 add_ia32_am_offs_int(vfld, get_ia32_am_offs_int(n));
1193 set_ia32_am_sc(vfld, get_ia32_am_sc(n));
1194 set_ia32_ls_mode(vfld, get_ia32_ls_mode(n));
1196 rproj = new_r_Proj(irg, block, vfld, get_ia32_ls_mode(vfld), pn_ia32_vfld_res);
1197 mproj = new_r_Proj(irg, block, vfld, mode_M, pn_ia32_vfld_M);
1198 mem = get_irn_Proj_for_mode(n, mode_M);
1200 assert(mem && "Store memory not found");
1202 arch_set_irn_register(sim->env, rproj, op2);
1204 /* reroute all former users of the store memory to the load memory */
1205 edges_reroute(mem, mproj, irg);
1206 /* set the memory input of the load to the store memory */
1207 set_irn_n(vfld, 2, mem);
1209 sched_add_after(n, vfld);
1210 sched_add_after(vfld, rproj);
1212 /* rewire all users, scheduled after the store, to the loaded value */
1213 collect_and_rewire_users(n, val, rproj);
1219 /* we can only store the tos to memory */
1221 x87_create_fxch(state, n, op2_idx, STORE_VAL_IDX);
1223 /* mode != mode_E -> use normal fst */
1224 x87_patch_insn(n, op);
1228 /* we can only store the tos to memory */
1230 x87_create_fxch(state, n, op2_idx, STORE_VAL_IDX);
1233 x87_patch_insn(n, op_p);
1236 attr = get_ia32_attr(n);
1237 attr->x87[1] = op2 = &ia32_st_regs[0];
1238 DB((dbg, LEVEL_1, "<<< %s %s ->\n", get_irn_opname(n), arch_register_get_name(op2)));
1244 * Simulate a virtual Phi.
1245 * Just for cosmetic reasons change the mode of Phi nodes to mode_E.
1247 * @param state the x87 state
1248 * @param n the node that should be simulated (and patched)
1249 * @param env the architecture environment
1251 static int sim_Phi(x87_state *state, ir_node *n, const arch_env_t *env) {
1252 ir_mode *mode = get_irn_mode(n);
1254 if (mode_is_float(mode))
1255 set_irn_mode(n, mode_E);
1260 #define _GEN_BINOP(op, rev) \
1261 static int sim_##op(x87_state *state, ir_node *n) { \
1262 exchange_tmpl tmpl = { op_ia32_##op, op_ia32_##rev, op_ia32_##op##p, op_ia32_##rev##p }; \
1263 return sim_binop(state, n, &tmpl); \
1266 #define GEN_BINOP(op) _GEN_BINOP(op, op)
1267 #define GEN_BINOPR(op) _GEN_BINOP(op, op##r)
1269 #define GEN_LOAD2(op, nop) \
1270 static int sim_##op(x87_state *state, ir_node *n) { \
1271 return sim_load(state, n, op_ia32_##nop); \
1274 #define GEN_LOAD(op) GEN_LOAD2(op, op)
1276 #define GEN_UNOP(op) \
1277 static int sim_##op(x87_state *state, ir_node *n) { \
1278 return sim_unop(state, n, op_ia32_##op); \
1281 #define GEN_STORE(op) \
1282 static int sim_##op(x87_state *state, ir_node *n) { \
1283 return sim_store(state, n, op_ia32_##op, op_ia32_##op##p); \
1303 GEN_LOAD2(fConst, fldConst)
1309 * Simulate a fCondJmp.
1311 * @param state the x87 state
1312 * @param n the node that should be simulated (and patched)
1314 static int sim_fCondJmp(x87_state *state, ir_node *n) {
1320 x87_simulator *sim = state->sim;
1321 const arch_register_t *op1 = x87_get_irn_register(sim, get_irn_n(n, BINOP_IDX_1));
1322 const arch_register_t *op2 = x87_get_irn_register(sim, get_irn_n(n, BINOP_IDX_2));
1323 int reg_index_1 = arch_register_get_index(op1);
1324 int reg_index_2 = arch_register_get_index(op2);
1325 unsigned live = vfp_live_args_after(sim, n, 0);
1327 DB((dbg, LEVEL_1, ">>> %+F %s, %s\n", n,
1328 arch_register_get_name(op1), arch_register_get_name(op2)));
1329 DEBUG_ONLY(vfp_dump_live(live));
1330 DB((dbg, LEVEL_1, "Stack before: "));
1331 DEBUG_ONLY(x87_dump_stack(state));
1333 op1_idx = x87_on_stack(state, reg_index_1);
1334 assert(op1_idx >= 0);
1336 /* BEWARE: check for comp a,a cases, they might happen */
1337 if (reg_index_2 != REG_VFP_NOREG) {
1338 /* second operand is a vfp register */
1339 op2_idx = x87_on_stack(state, reg_index_2);
1340 assert(op2_idx >= 0);
1342 if (is_vfp_live(arch_register_get_index(op2), live)) {
1343 /* second operand is live */
1345 if (is_vfp_live(arch_register_get_index(op1), live)) {
1346 /* both operands are live */
1349 /* res = tos X op */
1350 dst = op_ia32_fcomJmp;
1351 } else if (op2_idx == 0) {
1352 /* res = op X tos */
1353 dst = op_ia32_fcomrJmp;
1355 /* bring the first one to tos */
1356 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1360 /* res = tos X op */
1361 dst = op_ia32_fcomJmp;
1364 /* second live, first operand is dead here, bring it to tos.
1365 This means further, op1_idx != op2_idx. */
1366 assert(op1_idx != op2_idx);
1368 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1373 /* res = tos X op, pop */
1374 dst = op_ia32_fcompJmp;
1378 /* second operand is dead */
1379 if (is_vfp_live(arch_register_get_index(op1), live)) {
1380 /* first operand is live: bring second to tos.
1381 This means further, op1_idx != op2_idx. */
1382 assert(op1_idx != op2_idx);
1384 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1389 /* res = op X tos, pop */
1390 dst = op_ia32_fcomrpJmp;
1393 /* both operands are dead here, check first for identity. */
1394 if (op1_idx == op2_idx) {
1395 /* identically, one pop needed */
1397 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1401 /* res = tos X op, pop */
1402 dst = op_ia32_fcompJmp;
1405 /* different, move them to st and st(1) and pop both.
1406 The tricky part is to get one into st(1).*/
1407 else if (op2_idx == 1) {
1408 /* good, second operand is already in the right place, move the first */
1410 /* bring the first on top */
1411 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1412 assert(op2_idx != 0);
1415 /* res = tos X op, pop, pop */
1416 dst = op_ia32_fcomppJmp;
1418 } else if (op1_idx == 1) {
1419 /* good, first operand is already in the right place, move the second */
1421 /* bring the first on top */
1422 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1423 assert(op1_idx != 0);
1426 dst = op_ia32_fcomrppJmp;
1429 /* if one is already the TOS, we need two fxch */
1431 /* first one is TOS, move to st(1) */
1432 x87_create_fxch(state, n, 1, BINOP_IDX_1);
1433 assert(op2_idx != 1);
1435 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1437 /* res = op X tos, pop, pop */
1438 dst = op_ia32_fcomrppJmp;
1440 } else if (op2_idx == 0) {
1441 /* second one is TOS, move to st(1) */
1442 x87_create_fxch(state, n, 1, BINOP_IDX_2);
1443 assert(op1_idx != 1);
1445 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1447 /* res = tos X op, pop, pop */
1448 dst = op_ia32_fcomppJmp;
1451 /* none of them is either TOS or st(1), 3 fxch needed */
1452 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1453 assert(op1_idx != 0);
1454 x87_create_fxch(state, n, 1, BINOP_IDX_2);
1456 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1458 /* res = tos X op, pop, pop */
1459 dst = op_ia32_fcomppJmp;
1466 /* second operand is an address mode */
1467 if (is_vfp_live(arch_register_get_index(op1), live)) {
1468 /* first operand is live: bring it to TOS */
1470 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1473 dst = op_ia32_fcomJmp;
1475 /* first operand is dead: bring it to tos */
1477 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1480 dst = op_ia32_fcompJmp;
1485 x87_patch_insn(n, dst);
1486 assert(pop_cnt < 3);
1492 /* patch the operation */
1493 attr = get_ia32_attr(n);
1494 op1 = &ia32_st_regs[op1_idx];
1497 op2 = &ia32_st_regs[op2_idx];
1500 attr->x87[2] = NULL;
1503 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(n),
1504 arch_register_get_name(op1), arch_register_get_name(op2)));
1506 DB((dbg, LEVEL_1, "<<< %s %s, [AM]\n", get_irn_opname(n),
1507 arch_register_get_name(op1)));
1510 } /* sim_fCondJmp */
1512 static ir_node *create_Copy(x87_state *state, ir_node *n) {
1513 x87_simulator *sim = state->sim;
1514 ir_graph *irg = get_irn_irg(n);
1515 dbg_info *n_dbg = get_irn_dbg_info(n);
1516 ir_mode *mode = get_irn_mode(n);
1517 ir_node *block = get_nodes_block(n);
1518 ir_node *pred = get_irn_n(n, 0);
1519 ir_node *(*cnstr)(dbg_info *, ir_graph *, ir_node *, ir_mode *) = NULL;
1521 const arch_register_t *out;
1522 const arch_register_t *op1;
1525 /* Do not copy constants, recreate them. */
1526 switch (get_ia32_irn_opcode(pred)) {
1528 cnstr = new_rd_ia32_fldz;
1531 cnstr = new_rd_ia32_fld1;
1533 case iro_ia32_fldpi:
1534 cnstr = new_rd_ia32_fldpi;
1536 case iro_ia32_fldl2e:
1537 cnstr = new_rd_ia32_fldl2e;
1539 case iro_ia32_fldl2t:
1540 cnstr = new_rd_ia32_fldl2t;
1542 case iro_ia32_fldlg2:
1543 cnstr = new_rd_ia32_fldlg2;
1545 case iro_ia32_fldln2:
1546 cnstr = new_rd_ia32_fldln2;
1550 out = x87_get_irn_register(sim, n);
1551 op1 = x87_get_irn_register(sim, pred);
1554 /* copy a constant */
1555 res = (*cnstr)(n_dbg, irg, block, mode);
1557 attr = get_ia32_attr(res);
1558 attr->x87[2] = out = &ia32_st_regs[0];
1560 int op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1562 res = new_rd_ia32_fpushCopy(n_dbg, irg, block, pred, mode);
1564 attr = get_ia32_attr(res);
1565 attr->x87[0] = op1 = &ia32_st_regs[op1_idx];
1566 attr->x87[2] = out = &ia32_st_regs[0];
1569 arch_set_irn_register(sim->env, res, out);
1570 x87_push(state, arch_register_get_index(out), res);
1572 DB((dbg, LEVEL_1, ">>> %+F -> %s\n", res, arch_register_get_name(out)));
1577 * Simulate a be_Copy.
1579 * @param state the x87 state
1580 * @param n the node that should be simulated (and patched)
1582 static int sim_Copy(x87_state *state, ir_node *n) {
1585 const arch_register_t *out;
1586 const arch_register_t *op1;
1587 ir_node *node, *next;
1589 int op1_idx, out_idx;
1592 ir_mode *mode = get_irn_mode(n);
1594 if (!mode_is_float(mode))
1598 pred = get_irn_n(n, 0);
1599 out = x87_get_irn_register(sim, n);
1600 op1 = x87_get_irn_register(sim, pred);
1601 live = vfp_live_args_after(sim, n, REGMASK(out));
1603 DB((dbg, LEVEL_1, ">>> %+F %s -> %s\n", n,
1604 arch_register_get_name(op1), arch_register_get_name(out)));
1605 DEBUG_ONLY(vfp_dump_live(live));
1607 /* handle the infamous unknown value */
1608 if (arch_register_get_index(op1) == REG_VFP_UKNWN) {
1609 /* Matze: copies of unknowns should not happen (anymore) */
1613 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1615 if (is_vfp_live(arch_register_get_index(op1), live)) {
1616 /* Operand is still live, a real copy. We need here an fpush that can
1617 hold a a register, so use the fpushCopy or recreate constants */
1618 node = create_Copy(state, n);
1620 next = sched_next(n);
1623 sched_add_before(next, node);
1624 DB((dbg, LEVEL_1, ">>> %+F %s -> %s\n", node, op1->name, out->name));
1626 out_idx = x87_on_stack(state, arch_register_get_index(out));
1628 if (out_idx >= 0 && out_idx != op1_idx) {
1629 /* Matze: out already on stack? how can this happen? */
1632 /* op1 must be killed and placed where out is */
1634 /* best case, simple remove and rename */
1635 x87_patch_insn(n, op_ia32_Pop);
1636 attr = get_ia32_attr(n);
1637 attr->x87[0] = op1 = &ia32_st_regs[0];
1640 x87_set_st(state, arch_register_get_index(out), n, op1_idx - 1);
1642 /* move op1 to tos, store and pop it */
1644 x87_create_fxch(state, n, op1_idx, 0);
1647 x87_patch_insn(n, op_ia32_Pop);
1648 attr = get_ia32_attr(n);
1649 attr->x87[0] = op1 = &ia32_st_regs[out_idx];
1652 x87_set_st(state, arch_register_get_index(out), n, out_idx - 1);
1654 DB((dbg, LEVEL_1, ">>> %+F %s\n", n, op1->name));
1656 /* just a virtual copy */
1657 x87_set_st(state, arch_register_get_index(out), get_unop_op(n), op1_idx);
1659 DB((dbg, LEVEL_1, ">>> KILLED %s\n", get_irn_opname(n)));
1660 exchange(n, get_unop_op(n));
1668 * Simulate a be_Call.
1670 * @param state the x87 state
1671 * @param n the node that should be simulated
1672 * @param env the architecture environment
1674 static int sim_Call(x87_state *state, ir_node *n, const arch_env_t *env) {
1675 ir_type *call_tp = be_Call_get_type(n);
1677 /* at the begin of a call the x87 state should be empty */
1678 assert(state->depth == 0 && "stack not empty before call");
1681 * If the called function returns a float, it is returned in st(0).
1682 * This even happens if the return value is NOT used.
1683 * Moreover, only one return result is supported.
1685 if (get_method_n_ress(call_tp) > 0) {
1686 ir_type *res_type = get_method_res_type(call_tp, 0);
1687 ir_mode *mode = get_type_mode(res_type);
1689 if (mode && mode_is_float(mode)) {
1691 * TODO: what to push here? The result might be unused and currently
1692 * we have no possibility to detect this :-(
1694 x87_push(state, 0, n);
1702 * Simulate a be_Spill.
1704 * @param state the x87 state
1705 * @param n the node that should be simulated (and patched)
1706 * @param env the architecture environment
1708 * Should not happen, spills are lowered before x87 simulator see them.
1710 static int sim_Spill(x87_state *state, ir_node *n) {
1711 assert(0 && "Spill not lowered");
1712 return sim_fst(state, n);
1716 * Simulate a be_Reload.
1718 * @param state the x87 state
1719 * @param n the node that should be simulated (and patched)
1720 * @param env the architecture environment
1722 * Should not happen, reloads are lowered before x87 simulator see them.
1724 static int sim_Reload(x87_state *state, ir_node *n) {
1725 assert(0 && "Reload not lowered");
1726 return sim_fld(state, n);
1730 * Simulate a be_Return.
1732 * @param state the x87 state
1733 * @param n the node that should be simulated (and patched)
1734 * @param env the architecture environment
1736 static int sim_Return(x87_state *state, ir_node *n) {
1737 int n_res = be_Return_get_n_rets(n);
1738 int i, n_float_res = 0;
1740 /* only floating point return values must resist on stack */
1741 for (i = 0; i < n_res; ++i) {
1742 ir_node *res = get_irn_n(n, be_pos_Return_val + i);
1744 if (mode_is_float(get_irn_mode(res)))
1747 assert(x87_get_depth(state) == n_float_res);
1749 /* pop them virtually */
1750 for (i = n_float_res - 1; i >= 0; --i)
1756 typedef struct _perm_data_t {
1757 const arch_register_t *in;
1758 const arch_register_t *out;
1762 * Simulate a be_Perm.
1764 * @param state the x87 state
1765 * @param irn the node that should be simulated (and patched)
1767 static int sim_Perm(x87_state *state, ir_node *irn) {
1769 x87_simulator *sim = state->sim;
1770 ir_node *pred = get_irn_n(irn, 0);
1772 const ir_edge_t *edge;
1774 /* handle only floating point Perms */
1775 if (! mode_is_float(get_irn_mode(pred)))
1778 DB((dbg, LEVEL_1, ">>> %+F\n", irn));
1780 /* Perm is a pure virtual instruction on x87.
1781 All inputs must be on the FPU stack and are pairwise
1782 different from each other.
1783 So, all we need to do is to permutate the stack state. */
1784 n = get_irn_arity(irn);
1785 NEW_ARR_A(int, stack_pos, n);
1787 /* collect old stack positions */
1788 for (i = 0; i < n; ++i) {
1789 const arch_register_t *inreg = x87_get_irn_register(sim, get_irn_n(irn, i));
1790 int idx = x87_on_stack(state, arch_register_get_index(inreg));
1792 assert(idx >= 0 && "Perm argument not on x87 stack");
1796 /* now do the permutation */
1797 foreach_out_edge(irn, edge) {
1798 ir_node *proj = get_edge_src_irn(edge);
1799 const arch_register_t *out = x87_get_irn_register(sim, proj);
1800 long num = get_Proj_proj(proj);
1802 assert(0 <= num && num < n && "More Proj's than Perm inputs");
1803 x87_set_st(state, arch_register_get_index(out), proj, stack_pos[(unsigned)num]);
1805 DB((dbg, LEVEL_1, "<<< %+F\n", irn));
1811 * Kill any dead registers at block start by popping them from the stack.
1813 * @param sim the simulator handle
1814 * @param block the current block
1815 * @param start_state the x87 state at the begin of the block
1817 static x87_state *x87_kill_deads(x87_simulator *sim, ir_node *block, x87_state *start_state) {
1818 x87_state *state = start_state;
1819 ir_node *first_insn = sched_first(block);
1820 ir_node *keep = NULL;
1821 unsigned live = vfp_live_args_after(sim, block, 0);
1823 int i, depth, num_pop;
1826 depth = x87_get_depth(state);
1827 for (i = depth - 1; i >= 0; --i) {
1828 int reg = x87_get_st_reg(state, i);
1830 if (! is_vfp_live(reg, live))
1831 kill_mask |= (1 << i);
1835 /* create a new state, will be changed */
1836 state = x87_clone_state(sim, state);
1838 DB((dbg, LEVEL_1, "Killing deads:\n"));
1839 DEBUG_ONLY(vfp_dump_live(live));
1840 DEBUG_ONLY(x87_dump_stack(state));
1842 /* now kill registers */
1844 /* we can only kill from TOS, so bring them up */
1845 if (! (kill_mask & 1)) {
1846 /* search from behind, because we can to a double-pop */
1847 for (i = depth - 1; i >= 0; --i) {
1848 if (kill_mask & (1 << i)) {
1849 kill_mask &= ~(1 << i);
1856 x87_set_st(state, -1, keep, i);
1857 keep = x87_create_fxch(state, first_insn, i, -1);
1860 keep = x87_get_st_node(state, 0);
1862 if ((kill_mask & 3) == 3) {
1863 /* we can do a double-pop */
1867 /* only a single pop */
1872 kill_mask >>= num_pop;
1873 keep = x87_create_fpop(state, first_insn, num_pop, keep);
1878 } /* x87_kill_deads */
1881 * Run a simulation and fix all virtual instructions for a block.
1883 * @param sim the simulator handle
1884 * @param block the current block
1886 * @return non-zero if simulation is complete,
1887 * zero if the simulation must be rerun
1889 static void x87_simulate_block(x87_simulator *sim, ir_node *block) {
1891 blk_state *bl_state = x87_get_bl_state(sim, block);
1892 x87_state *state = bl_state->begin;
1893 const ir_edge_t *edge;
1894 ir_node *start_block;
1896 assert(state != NULL);
1897 // already processed?
1898 if(bl_state->end != NULL)
1901 update_liveness(sim, block);
1903 DB((dbg, LEVEL_1, "Simulate %+F\n", block));
1904 DB((dbg, LEVEL_2, "State at Block begin:\n "));
1905 DEBUG_ONLY(x87_dump_stack(state));
1907 /* at block begin, kill all dead registers */
1908 state = x87_kill_deads(sim, block, state);
1910 /* beware, n might changed */
1911 for (n = sched_first(block); !sched_is_end(n); n = next) {
1914 ir_op *op = get_irn_op(n);
1916 next = sched_next(n);
1917 if (op->ops.generic == NULL)
1920 func = (sim_func)op->ops.generic;
1922 /* have work to do */
1923 if (state == bl_state->begin) {
1924 /* create a new state, will be changed */
1925 state = x87_clone_state(sim, state);
1929 node_inserted = (*func)(state, n);
1932 sim_func might have added additional nodes after n,
1934 beware: n must not be changed by sim_func
1935 (i.e. removed from schedule) in this case
1938 next = sched_next(n);
1941 start_block = get_irg_start_block(get_irn_irg(block));
1943 /* check if the state must be shuffled */
1944 foreach_block_succ(block, edge) {
1945 ir_node *succ = get_edge_src_irn(edge);
1946 blk_state *succ_state;
1948 if(succ == start_block)
1951 succ_state = x87_get_bl_state(sim, succ);
1953 if (succ_state->begin == NULL) {
1954 succ_state->begin = state;
1955 waitq_put(sim->worklist, succ);
1957 /* There is already a begin state for the successor, bad.
1958 Do the necessary permutations.
1959 Note that critical edges are removed, so this is always possible. */
1960 x87_shuffle(sim, block, state, succ, succ_state->begin);
1963 bl_state->end = state;
1965 DB((dbg, LEVEL_2, "State at Block end:\n ")); DEBUG_ONLY(x87_dump_stack(state));
1966 } /* x87_simulate_block */
1969 * Create a new x87 simulator.
1971 * @param sim a simulator handle, will be initialized
1972 * @param irg the current graph
1973 * @param env the architecture environment
1975 static void x87_init_simulator(x87_simulator *sim, ir_graph *irg, const arch_env_t *env)
1977 obstack_init(&sim->obst);
1978 sim->blk_states = pmap_create();
1980 sim->n_idx = get_irg_last_idx(irg);
1981 sim->live = obstack_alloc(&sim->obst, sizeof(*sim->live) * sim->n_idx);
1983 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.x87");
1985 DB((dbg, LEVEL_1, "--------------------------------\n"
1986 "x87 Simulator started for %+F\n", irg));
1988 /* set the generic function pointer of instruction we must simulate */
1989 clear_irp_opcodes_generic_func();
1991 #define ASSOC(op) (op_ ## op)->ops.generic = (op_func)(sim_##op)
1992 #define ASSOC_IA32(op) (op_ia32_v ## op)->ops.generic = (op_func)(sim_##op)
1993 #define ASSOC_BE(op) (op_be_ ## op)->ops.generic = (op_func)(sim_##op)
2011 ASSOC_IA32(fCondJmp);
2022 } /* x87_init_simulator */
2025 * Destroy a x87 simulator.
2027 * @param sim the simulator handle
2029 static void x87_destroy_simulator(x87_simulator *sim) {
2030 pmap_destroy(sim->blk_states);
2031 obstack_free(&sim->obst, NULL);
2032 DB((dbg, LEVEL_1, "x87 Simulator stopped\n\n"));
2033 } /* x87_destroy_simulator */
2036 * Run a simulation and fix all virtual instructions for a graph.
2038 * @param env the architecture environment
2039 * @param irg the current graph
2041 * Needs a block-schedule.
2043 void x87_simulate_graph(const arch_env_t *env, be_irg_t *birg) {
2044 ir_node *block, *start_block;
2045 blk_state *bl_state;
2047 ir_graph *irg = birg->irg;
2049 /* create the simulator */
2050 x87_init_simulator(&sim, irg, env);
2052 start_block = get_irg_start_block(irg);
2053 bl_state = x87_get_bl_state(&sim, start_block);
2055 /* start with the empty state */
2056 bl_state->begin = empty;
2059 sim.worklist = new_waitq();
2060 waitq_put(sim.worklist, start_block);
2062 be_invalidate_liveness(birg);
2063 be_assure_liveness(birg);
2067 /* Create the worklist for the schedule and calculate the liveness
2068 for all nodes. We must precalculate this info, because the
2069 simulator adds new nodes (and possible before Phi nodes) which
2070 let fail the old lazy calculation.
2071 On the other hand we reduce the computation amount due to
2072 precaching from O(n^2) to O(n) at the expense of O(n) cache memory.
2074 for (i = 0, n = ARR_LEN(blk_list); i < n; ++i) {
2075 update_liveness(&sim, lv, blk_list[i]);
2076 waitq_put(worklist, blk_list[i]);
2082 block = waitq_get(sim.worklist);
2083 x87_simulate_block(&sim, block);
2084 } while (! pdeq_empty(sim.worklist));
2087 del_waitq(sim.worklist);
2088 x87_destroy_simulator(&sim);
2089 } /* x87_simulate_graph */