2 * This file implements the x87 support and virtual to stack
3 * register translation for the ia32 backend.
5 * @author: Michael Beck
18 #include "iredges_t.h"
27 #include "../belive_t.h"
28 #include "../besched_t.h"
29 #include "../benode_t.h"
30 #include "ia32_new_nodes.h"
31 #include "gen_ia32_new_nodes.h"
32 #include "gen_ia32_regalloc_if.h"
37 /* first and second binop index */
44 /* the store val index */
45 #define STORE_VAL_IDX 2
47 #define MASK_TOS(x) ((x) & (N_x87_REGS - 1))
49 /** the debug handle */
50 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
52 /* Forward declaration. */
53 typedef struct _x87_simulator x87_simulator;
56 * An exchange template.
57 * Note that our virtual functions have the same inputs
58 * and attributes as the real ones, so we can simple exchange
60 * Further, x87 supports inverse instructions, so we can handle them.
62 typedef struct _exchange_tmpl {
63 ir_op *normal_op; /**< the normal one */
64 ir_op *reverse_op; /**< the reverse one if exists */
65 ir_op *normal_pop_op; /**< the normal one with tos pop */
66 ir_op *reverse_pop_op; /**< the reverse one with tos pop */
70 * An entry on the simulated x87 stack.
72 typedef struct _st_entry {
73 int reg_idx; /**< the virtual register index of this stack value */
74 ir_node *node; /**< the node that produced this value */
80 typedef struct _x87_state {
81 st_entry st[N_x87_REGS]; /**< the register stack */
82 int depth; /**< the current stack depth */
83 int tos; /**< position of the tos */
84 x87_simulator *sim; /**< The simulator. */
87 /** An empty state, used for blocks without fp instructions. */
88 static x87_state _empty = { { {0, NULL}, }, 0, 0 };
89 static x87_state *empty = (x87_state *)&_empty;
91 /** The type of an instruction simulator function. */
92 typedef int (*sim_func)(x87_state *state, ir_node *n);
95 * A block state: Every block has a x87 state at the beginning and at the end.
97 typedef struct _blk_state {
98 x87_state *begin; /**< state at the begin or NULL if not assigned */
99 x87_state *end; /**< state at the end or NULL if not assigned */
102 #define PTR_TO_BLKSTATE(p) ((blk_state *)(p))
104 /** liveness bitset for vfp registers. */
105 typedef unsigned char vfp_liveness;
110 struct _x87_simulator {
111 struct obstack obst; /**< An obstack for fast allocating. */
112 pmap *blk_states; /**< Map blocks to states. */
113 const arch_env_t *env; /**< The architecture environment. */
114 be_lv_t *lv; /**< intrablock liveness. */
115 vfp_liveness *live; /**< Liveness information. */
116 unsigned n_idx; /**< The cached get_irg_last_idx() result. */
117 waitq *worklist; /**< list of blocks to process. */
121 * Returns the current stack depth.
123 * @param state the x87 state
125 * @return the x87 stack depth
127 static int x87_get_depth(const x87_state *state) {
133 * Check if the state is empty.
135 * @param state the x87 state
137 * returns non-zero if the x87 stack is empty
139 static int x87_state_is_empty(const x87_state *state) {
140 return state->depth == 0;
145 * Return the virtual register index at st(pos).
147 * @param state the x87 state
148 * @param pos a stack position
150 * @return the vfp register index that produced the value at st(pos)
152 static int x87_get_st_reg(const x87_state *state, int pos) {
153 assert(pos < state->depth);
154 return state->st[MASK_TOS(state->tos + pos)].reg_idx;
158 * Return the node at st(pos).
160 * @param state the x87 state
161 * @param pos a stack position
163 * @return the IR node that produced the value at st(pos)
165 static ir_node *x87_get_st_node(const x87_state *state, int pos) {
166 assert(pos < state->depth);
167 return state->st[MASK_TOS(state->tos + pos)].node;
168 } /* x87_get_st_node */
172 * Dump the stack for debugging.
174 * @param state the x87 state
176 static void x87_dump_stack(const x87_state *state) {
179 for (i = state->depth - 1; i >= 0; --i) {
180 DB((dbg, LEVEL_2, "vf%d(%+F) ", x87_get_st_reg(state, i),
181 x87_get_st_node(state, i)));
183 DB((dbg, LEVEL_2, "<-- TOS\n"));
184 } /* x87_dump_stack */
185 #endif /* DEBUG_libfirm */
188 * Set a virtual register to st(pos).
190 * @param state the x87 state
191 * @param reg_idx the vfp register index that should be set
192 * @param node the IR node that produces the value of the vfp register
193 * @param pos the stack position where the new value should be entered
195 static void x87_set_st(x87_state *state, int reg_idx, ir_node *node, int pos) {
196 assert(0 < state->depth);
197 state->st[MASK_TOS(state->tos + pos)].reg_idx = reg_idx;
198 state->st[MASK_TOS(state->tos + pos)].node = node;
200 DB((dbg, LEVEL_2, "After SET_REG: "));
201 DEBUG_ONLY(x87_dump_stack(state));
205 * Set the tos virtual register.
207 * @param state the x87 state
208 * @param reg_idx the vfp register index that should be set
209 * @param node the IR node that produces the value of the vfp register
211 static void x87_set_tos(x87_state *state, int reg_idx, ir_node *node) {
212 x87_set_st(state, reg_idx, node, 0);
217 * Flush the x87 stack.
219 * @param state the x87 state
221 static void x87_flush(x87_state *state) {
228 * Swap st(0) with st(pos).
230 * @param state the x87 state
231 * @param pos the stack position to change the tos with
233 static void x87_fxch(x87_state *state, int pos) {
235 assert(pos < state->depth);
237 entry = state->st[MASK_TOS(state->tos + pos)];
238 state->st[MASK_TOS(state->tos + pos)] = state->st[MASK_TOS(state->tos)];
239 state->st[MASK_TOS(state->tos)] = entry;
241 DB((dbg, LEVEL_2, "After FXCH: ")); DEBUG_ONLY(x87_dump_stack(state));
245 * Convert a virtual register to the stack index.
247 * @param state the x87 state
248 * @param reg_idx the register vfp index
250 * @return the stack position where the register is stacked
251 * or -1 if the virtual register was not found
253 static int x87_on_stack(const x87_state *state, int reg_idx) {
254 int i, tos = state->tos;
256 for (i = 0; i < state->depth; ++i)
257 if (state->st[MASK_TOS(tos + i)].reg_idx == reg_idx)
263 * Push a virtual Register onto the stack, double pushed allowed.
265 * @param state the x87 state
266 * @param reg_idx the register vfp index
267 * @param node the node that produces the value of the vfp register
269 static void x87_push_dbl(x87_state *state, int reg_idx, ir_node *node) {
270 assert(state->depth < N_x87_REGS && "stack overrun");
273 state->tos = MASK_TOS(state->tos - 1);
274 state->st[state->tos].reg_idx = reg_idx;
275 state->st[state->tos].node = node;
277 DB((dbg, LEVEL_2, "After PUSH: ")); DEBUG_ONLY(x87_dump_stack(state));
281 * Push a virtual Register onto the stack, double pushes are NOT allowed..
283 * @param state the x87 state
284 * @param reg_idx the register vfp index
285 * @param node the node that produces the value of the vfp register
286 * @param dbl_push if != 0 double pushes are allowed
288 static void x87_push(x87_state *state, int reg_idx, ir_node *node) {
289 assert(x87_on_stack(state, reg_idx) == -1 && "double push");
291 x87_push_dbl(state, reg_idx, node);
295 * Pop a virtual Register from the stack.
297 static void x87_pop(x87_state *state) {
298 assert(state->depth > 0 && "stack underrun");
301 state->tos = MASK_TOS(state->tos + 1);
303 DB((dbg, LEVEL_2, "After POP: ")); DEBUG_ONLY(x87_dump_stack(state));
307 * Returns the block state of a block.
309 * @param sim the x87 simulator handle
310 * @param block the current block
312 * @return the block state
314 static blk_state *x87_get_bl_state(x87_simulator *sim, ir_node *block) {
315 pmap_entry *entry = pmap_find(sim->blk_states, block);
318 blk_state *bl_state = obstack_alloc(&sim->obst, sizeof(*bl_state));
319 bl_state->begin = NULL;
320 bl_state->end = NULL;
322 pmap_insert(sim->blk_states, block, bl_state);
326 return PTR_TO_BLKSTATE(entry->value);
327 } /* x87_get_bl_state */
330 * Creates a new x87 state.
332 * @param sim the x87 simulator handle
334 * @return a new x87 state
336 static x87_state *x87_alloc_state(x87_simulator *sim) {
337 x87_state *res = obstack_alloc(&sim->obst, sizeof(*res));
341 } /* x87_alloc_state */
345 * Create a new empty x87 state.
347 * @param sim the x87 simulator handle
349 * @return a new empty x87 state
351 static x87_state *x87_alloc_empty_state(x87_simulator *sim) {
352 x87_state *res = x87_alloc_state(sim);
356 } /* x87_alloc_empty_state */
362 * @param sim the x87 simulator handle
363 * @param src the x87 state that will be cloned
365 * @return a cloned copy of the src state
367 static x87_state *x87_clone_state(x87_simulator *sim, const x87_state *src) {
368 x87_state *res = x87_alloc_state(sim);
370 memcpy(res, src, sizeof(*res));
372 } /* x87_clone_state */
375 * Patch a virtual instruction into a x87 one and return
378 * @param n the IR node to patch
379 * @param op the x87 opcode to patch in
381 static ir_node *x87_patch_insn(ir_node *n, ir_op *op) {
382 ir_mode *mode = get_irn_mode(n);
387 if (mode == mode_T) {
388 /* patch all Proj's */
389 const ir_edge_t *edge;
391 foreach_out_edge(n, edge) {
392 ir_node *proj = get_edge_src_irn(edge);
394 mode = get_irn_mode(proj);
395 if (mode_is_float(mode)) {
397 set_irn_mode(proj, mode_E);
402 else if (mode_is_float(mode))
403 set_irn_mode(n, mode_E);
405 } /* x87_patch_insn */
408 * Returns the first Proj of a mode_T node having a given mode.
410 * @param n the mode_T node
411 * @param m the desired mode of the Proj
412 * @return The first Proj of mode @p m found or NULL.
414 static ir_node *get_irn_Proj_for_mode(ir_node *n, ir_mode *m) {
415 const ir_edge_t *edge;
417 assert(get_irn_mode(n) == mode_T && "Need mode_T node");
419 foreach_out_edge(n, edge) {
420 ir_node *proj = get_edge_src_irn(edge);
421 if (get_irn_mode(proj) == m)
426 } /* get_irn_Proj_for_mode */
429 * Wrap the arch_* function here so we can check for errors.
431 static INLINE const arch_register_t *x87_get_irn_register(x87_simulator *sim, const ir_node *irn) {
432 const arch_register_t *res;
434 res = arch_get_irn_register(sim->env, irn);
435 assert(res->reg_class->regs == ia32_vfp_regs);
439 /* -------------- x87 perm --------------- */
442 * Creates a fxch for shuffle.
444 * @param state the x87 state
445 * @param pos parameter for fxch
446 * @param block the block were fxch is inserted
448 * Creates a new fxch node and reroute the user of the old node
451 * @return the fxch node
453 static ir_node *x87_fxch_shuffle(x87_state *state, int pos, ir_node *block)
458 fxch = new_rd_ia32_fxch(NULL, get_irn_irg(block), block, mode_E);
459 attr = get_ia32_attr(fxch);
460 attr->x87[0] = &ia32_st_regs[pos];
461 attr->x87[2] = &ia32_st_regs[0];
465 x87_fxch(state, pos);
467 } /* x87_fxch_shuffle */
470 * Calculate the necessary permutations to reach dst_state.
472 * These permutations are done with fxch instructions and placed
473 * at the end of the block.
475 * Note that critical edges are removed here, so we need only
476 * a shuffle if the current block has only one successor.
478 * @param sim the simulator handle
479 * @param block the current block
480 * @param state the current x87 stack state, might be modified
481 * @param dst_block the destination block
482 * @param dst_state destination state
486 static x87_state *x87_shuffle(x87_simulator *sim, ir_node *block, x87_state *state, ir_node *dst_block, const x87_state *dst_state) {
487 int i, n_cycles, k, ri;
488 unsigned cycles[4], all_mask;
489 char cycle_idx[4][8];
490 ir_node *fxch, *before, *after;
492 assert(state->depth == dst_state->depth);
494 /* Some mathematics here:
495 If we have a cycle of length n that includes the tos,
496 we need n-1 exchange operations.
497 We can always add the tos and restore it, so we need
498 n+1 exchange operations for a cycle not containing the tos.
499 So, the maximum of needed operations is for a cycle of 7
500 not including the tos == 8.
501 This is the same number of ops we would need for using stores,
502 so exchange is cheaper (we save the loads).
503 On the other hand, we might need an additional exchange
504 in the next block to bring one operand on top, so the
505 number of ops in the first case is identical.
506 Further, no more than 4 cycles can exists (4 x 2).
508 all_mask = (1 << (state->depth)) - 1;
510 for (n_cycles = 0; all_mask; ++n_cycles) {
511 int src_idx, dst_idx;
513 /* find the first free slot */
514 for (i = 0; i < state->depth; ++i) {
515 if (all_mask & (1 << i)) {
516 all_mask &= ~(1 << i);
518 /* check if there are differences here */
519 if (x87_get_st_reg(state, i) != x87_get_st_reg(dst_state, i))
525 /* no more cycles found */
530 cycles[n_cycles] = (1 << i);
531 cycle_idx[n_cycles][k++] = i;
532 for (src_idx = i; ; src_idx = dst_idx) {
533 dst_idx = x87_on_stack(dst_state, x87_get_st_reg(state, src_idx));
535 if ((all_mask & (1 << dst_idx)) == 0)
538 cycle_idx[n_cycles][k++] = dst_idx;
539 cycles[n_cycles] |= (1 << dst_idx);
540 all_mask &= ~(1 << dst_idx);
542 cycle_idx[n_cycles][k] = -1;
546 /* no permutation needed */
550 /* Hmm: permutation needed */
551 DB((dbg, LEVEL_2, "\n%+F needs permutation: from\n", block));
552 DEBUG_ONLY(x87_dump_stack(state));
553 DB((dbg, LEVEL_2, " to\n"));
554 DEBUG_ONLY(x87_dump_stack(dst_state));
558 DB((dbg, LEVEL_2, "Need %d cycles\n", n_cycles));
559 for (ri = 0; ri < n_cycles; ++ri) {
560 DB((dbg, LEVEL_2, " Ring %d:\n ", ri));
561 for (k = 0; cycle_idx[ri][k] != -1; ++k)
562 DB((dbg, LEVEL_2, " st%d ->", cycle_idx[ri][k]));
563 DB((dbg, LEVEL_2, "\n"));
570 * Find the place node must be insert.
571 * We have only one successor block, so the last instruction should
574 before = sched_last(block);
575 assert(is_cfop(before));
577 /* now do the permutations */
578 for (ri = 0; ri < n_cycles; ++ri) {
579 if ((cycles[ri] & 1) == 0) {
580 /* this cycle does not include the tos */
581 fxch = x87_fxch_shuffle(state, cycle_idx[ri][0], block);
583 sched_add_after(after, fxch);
585 sched_add_before(before, fxch);
588 for (k = 1; cycle_idx[ri][k] != -1; ++k) {
589 fxch = x87_fxch_shuffle(state, cycle_idx[ri][k], block);
591 sched_add_after(after, fxch);
593 sched_add_before(before, fxch);
596 if ((cycles[ri] & 1) == 0) {
597 /* this cycle does not include the tos */
598 fxch = x87_fxch_shuffle(state, cycle_idx[ri][0], block);
599 sched_add_after(after, fxch);
606 * Create a fxch node before another node.
608 * @param state the x87 state
609 * @param n the node after the fxch
610 * @param pos exchange st(pos) with st(0)
611 * @param op_idx if >= 0, replace input op_idx of n with the fxch result
615 static ir_node *x87_create_fxch(x87_state *state, ir_node *n, int pos, int op_idx) {
618 ir_graph *irg = get_irn_irg(n);
619 ir_node *block = get_nodes_block(n);
621 x87_fxch(state, pos);
623 fxch = new_rd_ia32_fxch(NULL, irg, block, mode_E);
624 attr = get_ia32_attr(fxch);
625 attr->x87[0] = &ia32_st_regs[pos];
626 attr->x87[2] = &ia32_st_regs[0];
630 sched_add_before(n, fxch);
631 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(fxch), attr->x87[0]->name, attr->x87[2]->name));
633 } /* x87_create_fxch */
636 * Create a fpush before node n.
638 * @param state the x87 state
639 * @param n the node after the fpush
640 * @param pos push st(pos) on stack
641 * @param op_idx replace input op_idx of n with the fpush result
643 static void x87_create_fpush(x87_state *state, ir_node *n, int pos, int op_idx) {
644 ir_node *fpush, *pred = get_irn_n(n, op_idx);
646 const arch_register_t *out = x87_get_irn_register(state->sim, pred);
648 x87_push_dbl(state, arch_register_get_index(out), pred);
650 fpush = new_rd_ia32_fpush(NULL, get_irn_irg(n), get_nodes_block(n), mode_E);
651 attr = get_ia32_attr(fpush);
652 attr->x87[0] = &ia32_st_regs[pos];
653 attr->x87[2] = &ia32_st_regs[0];
656 sched_add_before(n, fpush);
658 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(fpush), attr->x87[0]->name, attr->x87[2]->name));
659 } /* x87_create_fpush */
662 * Create a fpop before node n.
664 * @param state the x87 state
665 * @param n the node after the fpop
666 * @param num pop 1 or 2 values
667 * @param pred node to use as predecessor of the fpop
669 * @return the fpop node
671 static ir_node *x87_create_fpop(x87_state *state, ir_node *n, int num, ir_node *pred) {
672 ir_node *fpop = pred;
679 fpop = new_rd_ia32_fpop(NULL, get_irn_irg(n), get_nodes_block(n), mode_E);
680 attr = get_ia32_attr(fpop);
681 attr->x87[0] = &ia32_st_regs[0];
682 attr->x87[1] = &ia32_st_regs[0];
683 attr->x87[2] = &ia32_st_regs[0];
685 sched_add_before(n, fpop);
686 DB((dbg, LEVEL_1, "<<< %s %s\n", get_irn_opname(fpop), attr->x87[0]->name));
692 } /* x87_create_fpop */
695 * Creates an fldz before node n
697 * @param state the x87 state
698 * @param n the node after the fldz
700 * @return the fldz node
702 static ir_node *x87_create_fldz(x87_state *state, ir_node *n, int regidx) {
703 ir_graph *irg = get_irn_irg(n);
704 ir_node *block = get_nodes_block(n);
707 fldz = new_rd_ia32_fldz(NULL, irg, block, mode_E);
709 sched_add_before(n, fldz);
710 DB((dbg, LEVEL_1, "<<< %s\n", get_irn_opname(fldz)));
713 x87_push(state, regidx, fldz);
718 /* --------------------------------- liveness ------------------------------------------ */
721 * The liveness transfer function.
722 * Updates a live set over a single step from a given node to its predecessor.
723 * Everything defined at the node is removed from the set, the uses of the node get inserted.
725 * @param sim The simulator handle.
726 * @param irn The node at which liveness should be computed.
727 * @param live The bitset of registers live before @p irn. This set gets modified by updating it to
728 * the registers live after irn.
730 * @return The live bitset.
732 static vfp_liveness vfp_liveness_transfer(x87_simulator *sim, ir_node *irn, vfp_liveness live)
735 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
736 const arch_env_t *arch_env = sim->env;
738 if (arch_irn_consider_in_reg_alloc(arch_env, cls, irn)) {
739 const arch_register_t *reg = x87_get_irn_register(sim, irn);
740 live &= ~(1 << arch_register_get_index(reg));
743 for (i = 0, n = get_irn_arity(irn); i < n; ++i) {
744 ir_node *op = get_irn_n(irn, i);
746 if (mode_is_float(get_irn_mode(op)) && arch_irn_consider_in_reg_alloc(arch_env, cls, op)) {
747 const arch_register_t *reg = x87_get_irn_register(sim, op);
748 live |= 1 << arch_register_get_index(reg);
752 } /* vfp_liveness_transfer */
755 * Put all live virtual registers at the end of a block into a bitset.
757 * @param sim the simulator handle
758 * @param lv the liveness information
759 * @param bl the block
761 * @return The live bitset at the end of this block
763 static vfp_liveness vfp_liveness_end_of_block(x87_simulator *sim, const ir_node *block)
766 vfp_liveness live = 0;
767 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
768 const arch_env_t *arch_env = sim->env;
769 const be_lv_t *lv = sim->lv;
771 be_lv_foreach(lv, block, be_lv_state_end, i) {
772 const arch_register_t *reg;
773 const ir_node *node = be_lv_get_irn(lv, block, i);
774 if (!arch_irn_consider_in_reg_alloc(arch_env, cls, node))
777 reg = x87_get_irn_register(sim, node);
778 live |= 1 << arch_register_get_index(reg);
782 } /* vfp_liveness_end_of_block */
784 /** get the register mask from an arch_register */
785 #define REGMASK(reg) (1 << (arch_register_get_index(reg)))
788 * Return a bitset of argument registers which are live at the end of a node.
790 * @param sim the simulator handle
791 * @param pos the node
792 * @param kill kill mask for the output registers
794 * @return The live bitset.
796 static unsigned vfp_live_args_after(x87_simulator *sim, const ir_node *pos, unsigned kill)
798 unsigned idx = get_irn_idx(pos);
800 assert(idx < sim->n_idx);
801 return sim->live[idx] & ~kill;
802 } /* vfp_live_args_after */
805 * Calculate the liveness for a whole block and cache it.
807 * @param sim the simulator handle
808 * @param lv the liveness handle
809 * @param block the block
811 static void update_liveness(x87_simulator *sim, ir_node *block) {
812 vfp_liveness live = vfp_liveness_end_of_block(sim, block);
816 /* now iterate through the block backward and cache the results */
817 sched_foreach_reverse(block, irn) {
818 /* stop at the first Phi: this produces the live-in */
822 idx = get_irn_idx(irn);
823 sim->live[idx] = live;
825 live = vfp_liveness_transfer(sim, irn, live);
827 idx = get_irn_idx(block);
828 sim->live[idx] = live;
829 } /* update_liveness */
832 * Returns true if a register is live in a set.
834 * @param reg_idx the vfp register index
835 * @param live a live bitset
837 #define is_vfp_live(reg_idx, live) ((live) & (1 << (reg_idx)))
841 * Dump liveness info.
843 * @param live the live bitset
845 static void vfp_dump_live(vfp_liveness live) {
848 DB((dbg, LEVEL_2, "Live after: "));
849 for (i = 0; i < 8; ++i) {
850 if (live & (1 << i)) {
851 DB((dbg, LEVEL_2, "vf%d ", i));
854 DB((dbg, LEVEL_2, "\n"));
855 } /* vfp_dump_live */
856 #endif /* DEBUG_libfirm */
858 /* --------------------------------- simulators ---------------------------------------- */
860 #define XCHG(a, b) do { int t = (a); (a) = (b); (b) = t; } while (0)
863 * Simulate a virtual binop.
865 * @param state the x87 state
866 * @param n the node that should be simulated (and patched)
867 * @param tmpl the template containing the 4 possible x87 opcodes
869 static int sim_binop(x87_state *state, ir_node *n, const exchange_tmpl *tmpl) {
870 int op2_idx, op1_idx;
871 int out_idx, do_pop = 0;
873 ir_node *patched_insn;
875 x87_simulator *sim = state->sim;
876 const arch_register_t *op1 = x87_get_irn_register(sim, get_irn_n(n, BINOP_IDX_1));
877 const arch_register_t *op2 = x87_get_irn_register(sim, get_irn_n(n, BINOP_IDX_2));
878 const arch_register_t *out = x87_get_irn_register(sim, n);
879 int reg_index_1 = arch_register_get_index(op1);
880 int reg_index_2 = arch_register_get_index(op2);
881 vfp_liveness live = vfp_live_args_after(sim, n, REGMASK(out));
883 DB((dbg, LEVEL_1, ">>> %+F %s, %s -> %s\n", n,
884 arch_register_get_name(op1), arch_register_get_name(op2),
885 arch_register_get_name(out)));
886 DEBUG_ONLY(vfp_dump_live(live));
887 DB((dbg, LEVEL_1, "Stack before: "));
888 DEBUG_ONLY(x87_dump_stack(state));
890 op1_idx = x87_on_stack(state, reg_index_1);
891 assert(op1_idx >= 0);
893 if (reg_index_2 != REG_VFP_NOREG) {
894 /* second operand is a vfp register */
895 op2_idx = x87_on_stack(state, reg_index_2);
896 assert(op2_idx >= 0);
898 if (is_vfp_live(arch_register_get_index(op2), live)) {
899 /* Second operand is live. */
901 if (is_vfp_live(arch_register_get_index(op1), live)) {
902 /* Both operands are live: push the first one.
903 This works even for op1 == op2. */
904 x87_create_fpush(state, n, op1_idx, BINOP_IDX_2);
905 /* now do fxxx (tos=tos X op) */
909 dst = tmpl->normal_op;
911 /* Second live, first operand is dead here, bring it to tos. */
913 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
918 /* now do fxxx (tos=tos X op) */
920 dst = tmpl->normal_op;
923 /* Second operand is dead. */
924 if (is_vfp_live(arch_register_get_index(op1), live)) {
925 /* First operand is live: bring second to tos. */
927 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
932 /* now do fxxxr (tos = op X tos) */
934 dst = tmpl->reverse_op;
936 /* Both operands are dead here, pop them from the stack. */
939 /* Both are identically and on tos, no pop needed. */
940 /* here fxxx (tos = tos X tos) */
941 dst = tmpl->normal_op;
944 /* now do fxxxp (op = op X tos, pop) */
945 dst = tmpl->normal_pop_op;
949 } else if (op1_idx == 0) {
950 assert(op1_idx != op2_idx);
951 /* now do fxxxrp (op = tos X op, pop) */
952 dst = tmpl->reverse_pop_op;
956 /* Bring the second on top. */
957 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
958 if (op1_idx == op2_idx) {
959 /* Both are identically and on tos now, no pop needed. */
962 /* use fxxx (tos = tos X tos) */
963 dst = tmpl->normal_op;
966 /* op2 is on tos now */
968 /* use fxxxp (op = op X tos, pop) */
969 dst = tmpl->normal_pop_op;
977 /* second operand is an address mode */
978 if (is_vfp_live(arch_register_get_index(op1), live)) {
979 /* first operand is live: push it here */
980 x87_create_fpush(state, n, op1_idx, BINOP_IDX_1);
982 /* use fxxx (tos = tos X mem) */
983 dst = tmpl->normal_op;
986 /* first operand is dead: bring it to tos */
988 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
992 /* use fxxxp (tos = tos X mem) */
993 dst = tmpl->normal_op;
998 patched_insn = x87_patch_insn(n, dst);
999 x87_set_st(state, arch_register_get_index(out), patched_insn, out_idx);
1004 /* patch the operation */
1005 attr = get_ia32_attr(n);
1006 attr->x87[0] = op1 = &ia32_st_regs[op1_idx];
1007 if (reg_index_2 != REG_VFP_NOREG) {
1008 attr->x87[1] = op2 = &ia32_st_regs[op2_idx];
1010 attr->x87[2] = out = &ia32_st_regs[out_idx];
1012 if (reg_index_2 != REG_VFP_NOREG) {
1013 DB((dbg, LEVEL_1, "<<< %s %s, %s -> %s\n", get_irn_opname(n),
1014 arch_register_get_name(op1), arch_register_get_name(op2),
1015 arch_register_get_name(out)));
1017 DB((dbg, LEVEL_1, "<<< %s %s, [AM] -> %s\n", get_irn_opname(n),
1018 arch_register_get_name(op1),
1019 arch_register_get_name(out)));
1026 * Simulate a virtual Unop.
1028 * @param state the x87 state
1029 * @param n the node that should be simulated (and patched)
1030 * @param op the x87 opcode that will replace n's opcode
1032 static int sim_unop(x87_state *state, ir_node *n, ir_op *op) {
1033 int op1_idx, out_idx;
1034 x87_simulator *sim = state->sim;
1035 const arch_register_t *op1 = x87_get_irn_register(sim, get_irn_n(n, UNOP_IDX));
1036 const arch_register_t *out = x87_get_irn_register(sim, n);
1038 unsigned live = vfp_live_args_after(sim, n, REGMASK(out));
1040 DB((dbg, LEVEL_1, ">>> %+F -> %s\n", n, out->name));
1041 DEBUG_ONLY(vfp_dump_live(live));
1043 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1045 if (is_vfp_live(arch_register_get_index(op1), live)) {
1046 /* push the operand here */
1047 x87_create_fpush(state, n, op1_idx, UNOP_IDX);
1051 /* operand is dead, bring it to tos */
1053 x87_create_fxch(state, n, op1_idx, UNOP_IDX);
1058 x87_set_tos(state, arch_register_get_index(out), x87_patch_insn(n, op));
1060 attr = get_ia32_attr(n);
1061 attr->x87[0] = op1 = &ia32_st_regs[0];
1062 attr->x87[2] = out = &ia32_st_regs[0];
1063 DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), out->name));
1069 * Simulate a virtual Load instruction.
1071 * @param state the x87 state
1072 * @param n the node that should be simulated (and patched)
1073 * @param op the x87 opcode that will replace n's opcode
1075 static int sim_load(x87_state *state, ir_node *n, ir_op *op) {
1076 const arch_register_t *out = x87_get_irn_register(state->sim, n);
1079 DB((dbg, LEVEL_1, ">>> %+F -> %s\n", n, arch_register_get_name(out)));
1080 x87_push(state, arch_register_get_index(out), x87_patch_insn(n, op));
1081 assert(out == x87_get_irn_register(state->sim, n));
1082 attr = get_ia32_attr(n);
1083 attr->x87[2] = out = &ia32_st_regs[0];
1084 DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), arch_register_get_name(out)));
1090 * Rewire all users of @p old_val to @new_val iff they are scheduled after @p store.
1092 * @param store The store
1093 * @param old_val The former value
1094 * @param new_val The new value
1096 static void collect_and_rewire_users(ir_node *store, ir_node *old_val, ir_node *new_val) {
1097 const ir_edge_t *edge, *ne;
1099 foreach_out_edge_safe(old_val, edge, ne) {
1100 ir_node *user = get_edge_src_irn(edge);
1102 if (! user || user == store)
1105 /* if the user is scheduled after the store: rewire */
1106 if (sched_is_scheduled(user) && sched_comes_after(store, user)) {
1108 /* find the input of the user pointing to the old value */
1109 for (i = get_irn_arity(user) - 1; i >= 0; i--) {
1110 if (get_irn_n(user, i) == old_val)
1111 set_irn_n(user, i, new_val);
1115 } /* collect_and_rewire_users */
1118 * Simulate a virtual Store.
1120 * @param state the x87 state
1121 * @param n the node that should be simulated (and patched)
1122 * @param op the x87 store opcode
1123 * @param op_p the x87 store and pop opcode
1125 static int sim_store(x87_state *state, ir_node *n, ir_op *op, ir_op *op_p) {
1126 x87_simulator *sim = state->sim;
1127 ir_node *val = get_irn_n(n, STORE_VAL_IDX);
1128 const arch_register_t *op2 = x87_get_irn_register(sim, val);
1129 unsigned live = vfp_live_args_after(sim, n, 0);
1132 int op2_reg_idx, op2_idx, depth;
1133 int live_after_node;
1136 op2_reg_idx = arch_register_get_index(op2);
1137 if (op2_reg_idx == REG_VFP_UKNWN) {
1138 // just take any value from stack
1139 if(state->depth > 0) {
1141 DEBUG_ONLY(op2 = NULL);
1142 live_after_node = 1;
1144 // produce a new value which we will consume imediately
1145 x87_create_fldz(state, n, op2_reg_idx);
1146 live_after_node = 0;
1147 op2_idx = x87_on_stack(state, op2_reg_idx);
1148 assert(op2_idx >= 0);
1151 op2_idx = x87_on_stack(state, op2_reg_idx);
1152 live_after_node = is_vfp_live(arch_register_get_index(op2), live);
1153 assert(op2_idx >= 0);
1154 DB((dbg, LEVEL_1, ">>> %+F %s ->\n", n, arch_register_get_name(op2)));
1157 mode = get_ia32_ls_mode(n);
1158 depth = x87_get_depth(state);
1160 if (live_after_node) {
1162 Problem: fst doesn't support mode_E (spills), only fstp does
1164 - stack not full: push value and fstp
1165 - stack full: fstp value and load again
1167 if (mode == mode_E) {
1168 if (depth < N_x87_REGS) {
1169 /* ok, we have a free register: push + fstp */
1170 x87_create_fpush(state, n, op2_idx, STORE_VAL_IDX);
1172 x87_patch_insn(n, op_p);
1175 ir_node *vfld, *mem, *block, *rproj, *mproj;
1178 /* stack full here: need fstp + load */
1180 x87_patch_insn(n, op_p);
1182 block = get_nodes_block(n);
1183 irg = get_irn_irg(n);
1184 vfld = new_rd_ia32_vfld(NULL, irg, block, get_irn_n(n, 0), get_irn_n(n, 1), new_rd_NoMem(irg));
1186 /* copy all attributes */
1187 set_ia32_frame_ent(vfld, get_ia32_frame_ent(n));
1188 if (is_ia32_use_frame(n))
1189 set_ia32_use_frame(vfld);
1190 set_ia32_am_flavour(vfld, get_ia32_am_flavour(n));
1191 set_ia32_op_type(vfld, ia32_am_Source);
1192 add_ia32_am_offs_int(vfld, get_ia32_am_offs_int(n));
1193 set_ia32_am_sc(vfld, get_ia32_am_sc(n));
1194 set_ia32_ls_mode(vfld, get_ia32_ls_mode(n));
1196 rproj = new_r_Proj(irg, block, vfld, get_ia32_ls_mode(vfld), pn_ia32_vfld_res);
1197 mproj = new_r_Proj(irg, block, vfld, mode_M, pn_ia32_vfld_M);
1198 mem = get_irn_Proj_for_mode(n, mode_M);
1200 assert(mem && "Store memory not found");
1202 arch_set_irn_register(sim->env, rproj, op2);
1204 /* reroute all former users of the store memory to the load memory */
1205 edges_reroute(mem, mproj, irg);
1206 /* set the memory input of the load to the store memory */
1207 set_irn_n(vfld, 2, mem);
1209 sched_add_after(n, vfld);
1210 sched_add_after(vfld, rproj);
1212 /* rewire all users, scheduled after the store, to the loaded value */
1213 collect_and_rewire_users(n, val, rproj);
1219 /* we can only store the tos to memory */
1221 x87_create_fxch(state, n, op2_idx, STORE_VAL_IDX);
1223 /* mode != mode_E -> use normal fst */
1224 x87_patch_insn(n, op);
1228 /* we can only store the tos to memory */
1230 x87_create_fxch(state, n, op2_idx, STORE_VAL_IDX);
1233 x87_patch_insn(n, op_p);
1236 attr = get_ia32_attr(n);
1237 attr->x87[1] = op2 = &ia32_st_regs[0];
1238 DB((dbg, LEVEL_1, "<<< %s %s ->\n", get_irn_opname(n), arch_register_get_name(op2)));
1244 * Simulate a virtual Phi.
1245 * Just for cosmetic reasons change the mode of Phi nodes to mode_E.
1247 * @param state the x87 state
1248 * @param n the node that should be simulated (and patched)
1249 * @param env the architecture environment
1251 static int sim_Phi(x87_state *state, ir_node *n, const arch_env_t *env) {
1252 ir_mode *mode = get_irn_mode(n);
1254 if (mode_is_float(mode))
1255 set_irn_mode(n, mode_E);
1260 #define _GEN_BINOP(op, rev) \
1261 static int sim_##op(x87_state *state, ir_node *n) { \
1262 exchange_tmpl tmpl = { op_ia32_##op, op_ia32_##rev, op_ia32_##op##p, op_ia32_##rev##p }; \
1263 return sim_binop(state, n, &tmpl); \
1266 #define GEN_BINOP(op) _GEN_BINOP(op, op)
1267 #define GEN_BINOPR(op) _GEN_BINOP(op, op##r)
1269 #define GEN_LOAD2(op, nop) \
1270 static int sim_##op(x87_state *state, ir_node *n) { \
1271 return sim_load(state, n, op_ia32_##nop); \
1274 #define GEN_LOAD(op) GEN_LOAD2(op, op)
1276 #define GEN_UNOP(op) \
1277 static int sim_##op(x87_state *state, ir_node *n) { \
1278 return sim_unop(state, n, op_ia32_##op); \
1281 #define GEN_STORE(op) \
1282 static int sim_##op(x87_state *state, ir_node *n) { \
1283 return sim_store(state, n, op_ia32_##op, op_ia32_##op##p); \
1303 GEN_LOAD2(fConst, fldConst)
1309 * Simulate a fCondJmp.
1311 * @param state the x87 state
1312 * @param n the node that should be simulated (and patched)
1314 static int sim_fCondJmp(x87_state *state, ir_node *n) {
1320 x87_simulator *sim = state->sim;
1321 const arch_register_t *op1 = x87_get_irn_register(sim, get_irn_n(n, BINOP_IDX_1));
1322 const arch_register_t *op2 = x87_get_irn_register(sim, get_irn_n(n, BINOP_IDX_2));
1323 int reg_index_1 = arch_register_get_index(op1);
1324 int reg_index_2 = arch_register_get_index(op2);
1325 unsigned live = vfp_live_args_after(sim, n, 0);
1327 DB((dbg, LEVEL_1, ">>> %+F %s, %s\n", n,
1328 arch_register_get_name(op1), arch_register_get_name(op2)));
1329 DEBUG_ONLY(vfp_dump_live(live));
1330 DB((dbg, LEVEL_1, "Stack before: "));
1331 DEBUG_ONLY(x87_dump_stack(state));
1333 op1_idx = x87_on_stack(state, reg_index_1);
1334 assert(op1_idx >= 0);
1336 /* BEWARE: check for comp a,a cases, they might happen */
1337 if (reg_index_2 != REG_VFP_NOREG) {
1338 /* second operand is a vfp register */
1339 op2_idx = x87_on_stack(state, reg_index_2);
1340 assert(op2_idx >= 0);
1342 if (is_vfp_live(arch_register_get_index(op2), live)) {
1343 /* second operand is live */
1345 if (is_vfp_live(arch_register_get_index(op1), live)) {
1346 /* both operands are live */
1349 dst = op_ia32_fcomJmp;
1350 } else if (op2_idx == 0) {
1351 dst = op_ia32_fcomrJmp;
1353 /* bring the first one to tos */
1354 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1358 dst = op_ia32_fcomJmp;
1361 /* second live, first operand is dead here, bring it to tos.
1362 This means further, op1_idx != op2_idx. */
1363 assert(op1_idx != op2_idx);
1365 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1370 dst = op_ia32_fcompJmp;
1374 /* second operand is dead */
1375 if (is_vfp_live(arch_register_get_index(op1), live)) {
1376 /* first operand is live: bring second to tos.
1377 This means further, op1_idx != op2_idx. */
1378 assert(op1_idx != op2_idx);
1380 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1385 dst = op_ia32_fcomrpJmp;
1388 /* both operands are dead here, check first for identity. */
1389 if (op1_idx == op2_idx) {
1390 /* identically, one pop needed */
1392 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1396 dst = op_ia32_fcompJmp;
1399 /* different, move them to st and st(1) and pop both.
1400 The tricky part is to get one into st(1).*/
1401 else if (op2_idx == 1) {
1402 /* good, second operand is already in the right place, move the first */
1404 /* bring the first on top */
1405 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1408 dst = op_ia32_fcomppJmp;
1410 } else if (op1_idx == 1) {
1411 /* good, first operand is already in the right place, move the second */
1413 /* bring the first on top */
1414 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1417 dst = op_ia32_fcomrppJmp;
1420 /* if one is already the TOS, we need two fxch */
1422 /* first one is TOS, move to st(1) */
1423 x87_create_fxch(state, n, 1, BINOP_IDX_1);
1425 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1427 dst = op_ia32_fcomrppJmp;
1429 } else if (op2_idx == 0) {
1430 /* second one is TOS, move to st(1) */
1431 x87_create_fxch(state, n, 1, BINOP_IDX_2);
1433 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1435 dst = op_ia32_fcomrppJmp;
1438 /* none of them is either TOS or st(1), 3 fxch needed */
1439 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1440 x87_create_fxch(state, n, 1, BINOP_IDX_2);
1442 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1444 dst = op_ia32_fcomppJmp;
1451 /* second operand is an address mode */
1452 if (is_vfp_live(arch_register_get_index(op1), live)) {
1453 /* first operand is live: bring it to TOS */
1455 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1458 dst = op_ia32_fcomJmp;
1460 /* first operand is dead: bring it to tos */
1462 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1465 dst = op_ia32_fcompJmp;
1470 x87_patch_insn(n, dst);
1471 assert(pop_cnt < 3);
1477 /* patch the operation */
1478 attr = get_ia32_attr(n);
1479 op1 = &ia32_st_regs[op1_idx];
1482 op2 = &ia32_st_regs[op2_idx];
1485 attr->x87[2] = NULL;
1488 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(n),
1489 arch_register_get_name(op1), arch_register_get_name(op2)));
1491 DB((dbg, LEVEL_1, "<<< %s %s, [AM]\n", get_irn_opname(n),
1492 arch_register_get_name(op1)));
1495 } /* sim_fCondJmp */
1497 static ir_node *create_Copy(x87_state *state, ir_node *n) {
1498 x87_simulator *sim = state->sim;
1499 ir_graph *irg = get_irn_irg(n);
1500 dbg_info *n_dbg = get_irn_dbg_info(n);
1501 ir_mode *mode = get_irn_mode(n);
1502 ir_node *block = get_nodes_block(n);
1503 ir_node *pred = get_irn_n(n, 0);
1504 ir_node *(*cnstr)(dbg_info *, ir_graph *, ir_node *, ir_mode *) = NULL;
1506 const arch_register_t *out;
1507 const arch_register_t *op1;
1510 /* Do not copy constants, recreate them. */
1511 switch (get_ia32_irn_opcode(pred)) {
1513 cnstr = new_rd_ia32_fldz;
1516 cnstr = new_rd_ia32_fld1;
1518 case iro_ia32_fldpi:
1519 cnstr = new_rd_ia32_fldpi;
1521 case iro_ia32_fldl2e:
1522 cnstr = new_rd_ia32_fldl2e;
1524 case iro_ia32_fldl2t:
1525 cnstr = new_rd_ia32_fldl2t;
1527 case iro_ia32_fldlg2:
1528 cnstr = new_rd_ia32_fldlg2;
1530 case iro_ia32_fldln2:
1531 cnstr = new_rd_ia32_fldln2;
1535 out = x87_get_irn_register(sim, n);
1536 op1 = x87_get_irn_register(sim, pred);
1539 /* copy a constant */
1540 res = (*cnstr)(n_dbg, irg, block, mode);
1542 attr = get_ia32_attr(res);
1543 attr->x87[2] = out = &ia32_st_regs[0];
1545 int op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1547 res = new_rd_ia32_fpushCopy(n_dbg, irg, block, pred, mode);
1549 attr = get_ia32_attr(res);
1550 attr->x87[0] = op1 = &ia32_st_regs[op1_idx];
1551 attr->x87[2] = out = &ia32_st_regs[0];
1554 arch_set_irn_register(sim->env, res, out);
1555 x87_push(state, arch_register_get_index(out), res);
1557 DB((dbg, LEVEL_1, ">>> %+F -> %s\n", res, arch_register_get_name(out)));
1562 * Simulate a be_Copy.
1564 * @param state the x87 state
1565 * @param n the node that should be simulated (and patched)
1567 static int sim_Copy(x87_state *state, ir_node *n) {
1570 const arch_register_t *out;
1571 const arch_register_t *op1;
1572 ir_node *node, *next;
1574 int op1_idx, out_idx;
1577 ir_mode *mode = get_irn_mode(n);
1579 if (!mode_is_float(mode))
1583 pred = get_irn_n(n, 0);
1584 out = x87_get_irn_register(sim, n);
1585 op1 = x87_get_irn_register(sim, pred);
1586 live = vfp_live_args_after(sim, n, REGMASK(out));
1588 DB((dbg, LEVEL_1, ">>> %+F %s -> %s\n", n,
1589 arch_register_get_name(op1), arch_register_get_name(out)));
1590 DEBUG_ONLY(vfp_dump_live(live));
1592 /* handle the infamous unknown value */
1593 if (arch_register_get_index(op1) == REG_VFP_UKNWN) {
1594 /* Matze: copies of unknowns should not happen (anymore) */
1598 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1600 if (is_vfp_live(arch_register_get_index(op1), live)) {
1601 /* Operand is still live, a real copy. We need here an fpush that can
1602 hold a a register, so use the fpushCopy or recreate constants */
1603 node = create_Copy(state, n);
1605 next = sched_next(n);
1608 sched_add_before(next, node);
1609 DB((dbg, LEVEL_1, ">>> %+F %s -> %s\n", node, op1->name, out->name));
1611 out_idx = x87_on_stack(state, arch_register_get_index(out));
1613 if (out_idx >= 0 && out_idx != op1_idx) {
1614 /* Matze: out already on stack? how can this happen? */
1617 /* op1 must be killed and placed where out is */
1619 /* best case, simple remove and rename */
1620 x87_patch_insn(n, op_ia32_Pop);
1621 attr = get_ia32_attr(n);
1622 attr->x87[0] = op1 = &ia32_st_regs[0];
1625 x87_set_st(state, arch_register_get_index(out), n, op1_idx - 1);
1627 /* move op1 to tos, store and pop it */
1629 x87_create_fxch(state, n, op1_idx, 0);
1632 x87_patch_insn(n, op_ia32_Pop);
1633 attr = get_ia32_attr(n);
1634 attr->x87[0] = op1 = &ia32_st_regs[out_idx];
1637 x87_set_st(state, arch_register_get_index(out), n, out_idx - 1);
1639 DB((dbg, LEVEL_1, ">>> %+F %s\n", n, op1->name));
1641 /* just a virtual copy */
1642 x87_set_st(state, arch_register_get_index(out), get_unop_op(n), op1_idx);
1644 DB((dbg, LEVEL_1, ">>> KILLED %s\n", get_irn_opname(n)));
1645 exchange(n, get_unop_op(n));
1653 * Simulate a be_Call.
1655 * @param state the x87 state
1656 * @param n the node that should be simulated
1657 * @param env the architecture environment
1659 static int sim_Call(x87_state *state, ir_node *n, const arch_env_t *env) {
1660 ir_type *call_tp = be_Call_get_type(n);
1662 /* at the begin of a call the x87 state should be empty */
1663 assert(state->depth == 0 && "stack not empty before call");
1666 * If the called function returns a float, it is returned in st(0).
1667 * This even happens if the return value is NOT used.
1668 * Moreover, only one return result is supported.
1670 if (get_method_n_ress(call_tp) > 0) {
1671 ir_type *res_type = get_method_res_type(call_tp, 0);
1672 ir_mode *mode = get_type_mode(res_type);
1674 if (mode && mode_is_float(mode)) {
1676 * TODO: what to push here? The result might be unused and currently
1677 * we have no possibility to detect this :-(
1679 x87_push(state, 0, n);
1687 * Simulate a be_Spill.
1689 * @param state the x87 state
1690 * @param n the node that should be simulated (and patched)
1691 * @param env the architecture environment
1693 * Should not happen, spills are lowered before x87 simulator see them.
1695 static int sim_Spill(x87_state *state, ir_node *n) {
1696 assert(0 && "Spill not lowered");
1697 return sim_fst(state, n);
1701 * Simulate a be_Reload.
1703 * @param state the x87 state
1704 * @param n the node that should be simulated (and patched)
1705 * @param env the architecture environment
1707 * Should not happen, reloads are lowered before x87 simulator see them.
1709 static int sim_Reload(x87_state *state, ir_node *n) {
1710 assert(0 && "Reload not lowered");
1711 return sim_fld(state, n);
1715 * Simulate a be_Return.
1717 * @param state the x87 state
1718 * @param n the node that should be simulated (and patched)
1719 * @param env the architecture environment
1721 static int sim_Return(x87_state *state, ir_node *n) {
1722 int n_res = be_Return_get_n_rets(n);
1723 int i, n_float_res = 0;
1725 /* only floating point return values must resist on stack */
1726 for (i = 0; i < n_res; ++i) {
1727 ir_node *res = get_irn_n(n, be_pos_Return_val + i);
1729 if (mode_is_float(get_irn_mode(res)))
1732 assert(x87_get_depth(state) == n_float_res);
1734 /* pop them virtually */
1735 for (i = n_float_res - 1; i >= 0; --i)
1741 typedef struct _perm_data_t {
1742 const arch_register_t *in;
1743 const arch_register_t *out;
1747 * Simulate a be_Perm.
1749 * @param state the x87 state
1750 * @param irn the node that should be simulated (and patched)
1752 static int sim_Perm(x87_state *state, ir_node *irn) {
1754 x87_simulator *sim = state->sim;
1755 ir_node *pred = get_irn_n(irn, 0);
1757 const ir_edge_t *edge;
1759 /* handle only floating point Perms */
1760 if (! mode_is_float(get_irn_mode(pred)))
1763 DB((dbg, LEVEL_1, ">>> %+F\n", irn));
1765 /* Perm is a pure virtual instruction on x87.
1766 All inputs must be on the FPU stack and are pairwise
1767 different from each other.
1768 So, all we need to do is to permutate the stack state. */
1769 n = get_irn_arity(irn);
1770 NEW_ARR_A(int, stack_pos, n);
1772 /* collect old stack positions */
1773 for (i = 0; i < n; ++i) {
1774 const arch_register_t *inreg = x87_get_irn_register(sim, get_irn_n(irn, i));
1775 int idx = x87_on_stack(state, arch_register_get_index(inreg));
1777 assert(idx >= 0 && "Perm argument not on x87 stack");
1781 /* now do the permutation */
1782 foreach_out_edge(irn, edge) {
1783 ir_node *proj = get_edge_src_irn(edge);
1784 const arch_register_t *out = x87_get_irn_register(sim, proj);
1785 long num = get_Proj_proj(proj);
1787 assert(0 <= num && num < n && "More Proj's than Perm inputs");
1788 x87_set_st(state, arch_register_get_index(out), proj, stack_pos[(unsigned)num]);
1790 DB((dbg, LEVEL_1, "<<< %+F\n", irn));
1796 * Kill any dead registers at block start by popping them from the stack.
1798 * @param sim the simulator handle
1799 * @param block the current block
1800 * @param start_state the x87 state at the begin of the block
1802 static x87_state *x87_kill_deads(x87_simulator *sim, ir_node *block, x87_state *start_state) {
1803 x87_state *state = start_state;
1804 ir_node *first_insn = sched_first(block);
1805 ir_node *keep = NULL;
1806 unsigned live = vfp_live_args_after(sim, block, 0);
1808 int i, depth, num_pop;
1811 depth = x87_get_depth(state);
1812 for (i = depth - 1; i >= 0; --i) {
1813 int reg = x87_get_st_reg(state, i);
1815 if (! is_vfp_live(reg, live))
1816 kill_mask |= (1 << i);
1820 /* create a new state, will be changed */
1821 state = x87_clone_state(sim, state);
1823 DB((dbg, LEVEL_1, "Killing deads:\n"));
1824 DEBUG_ONLY(vfp_dump_live(live));
1825 DEBUG_ONLY(x87_dump_stack(state));
1827 /* now kill registers */
1829 /* we can only kill from TOS, so bring them up */
1830 if (! (kill_mask & 1)) {
1831 /* search from behind, because we can to a double-pop */
1832 for (i = depth - 1; i >= 0; --i) {
1833 if (kill_mask & (1 << i)) {
1834 kill_mask &= ~(1 << i);
1841 x87_set_st(state, -1, keep, i);
1842 keep = x87_create_fxch(state, first_insn, i, -1);
1845 keep = x87_get_st_node(state, 0);
1847 if ((kill_mask & 3) == 3) {
1848 /* we can do a double-pop */
1852 /* only a single pop */
1857 kill_mask >>= num_pop;
1858 keep = x87_create_fpop(state, first_insn, num_pop, keep);
1863 } /* x87_kill_deads */
1866 * Run a simulation and fix all virtual instructions for a block.
1868 * @param sim the simulator handle
1869 * @param block the current block
1871 * @return non-zero if simulation is complete,
1872 * zero if the simulation must be rerun
1874 static void x87_simulate_block(x87_simulator *sim, ir_node *block) {
1876 blk_state *bl_state = x87_get_bl_state(sim, block);
1877 x87_state *state = bl_state->begin;
1878 const ir_edge_t *edge;
1879 ir_node *start_block;
1881 assert(state != NULL);
1882 // already processed?
1883 if(bl_state->end != NULL)
1886 update_liveness(sim, block);
1888 DB((dbg, LEVEL_1, "Simulate %+F\n", block));
1889 DB((dbg, LEVEL_2, "State at Block begin:\n "));
1890 DEBUG_ONLY(x87_dump_stack(state));
1892 /* at block begin, kill all dead registers */
1893 state = x87_kill_deads(sim, block, state);
1895 /* beware, n might changed */
1896 for (n = sched_first(block); !sched_is_end(n); n = next) {
1899 ir_op *op = get_irn_op(n);
1901 next = sched_next(n);
1902 if (op->ops.generic == NULL)
1905 func = (sim_func)op->ops.generic;
1907 /* have work to do */
1908 if (state == bl_state->begin) {
1909 /* create a new state, will be changed */
1910 state = x87_clone_state(sim, state);
1914 node_inserted = (*func)(state, n);
1917 sim_func might have added additional nodes after n,
1919 beware: n must not be changed by sim_func
1920 (i.e. removed from schedule) in this case
1923 next = sched_next(n);
1926 start_block = get_irg_start_block(get_irn_irg(block));
1928 /* check if the state must be shuffled */
1929 foreach_block_succ(block, edge) {
1930 ir_node *succ = get_edge_src_irn(edge);
1931 blk_state *succ_state;
1933 if(succ == start_block)
1936 succ_state = x87_get_bl_state(sim, succ);
1938 if (succ_state->begin == NULL) {
1939 succ_state->begin = state;
1940 waitq_put(sim->worklist, succ);
1942 /* There is already a begin state for the successor, bad.
1943 Do the necessary permutations.
1944 Note that critical edges are removed, so this is always possible. */
1945 x87_shuffle(sim, block, state, succ, succ_state->begin);
1948 bl_state->end = state;
1950 DB((dbg, LEVEL_2, "State at Block end:\n ")); DEBUG_ONLY(x87_dump_stack(state));
1951 } /* x87_simulate_block */
1954 * Create a new x87 simulator.
1956 * @param sim a simulator handle, will be initialized
1957 * @param irg the current graph
1958 * @param env the architecture environment
1960 static void x87_init_simulator(x87_simulator *sim, ir_graph *irg, const arch_env_t *env)
1962 obstack_init(&sim->obst);
1963 sim->blk_states = pmap_create();
1965 sim->n_idx = get_irg_last_idx(irg);
1966 sim->live = obstack_alloc(&sim->obst, sizeof(*sim->live) * sim->n_idx);
1968 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.x87");
1970 DB((dbg, LEVEL_1, "--------------------------------\n"
1971 "x87 Simulator started for %+F\n", irg));
1973 /* set the generic function pointer of instruction we must simulate */
1974 clear_irp_opcodes_generic_func();
1976 #define ASSOC(op) (op_ ## op)->ops.generic = (op_func)(sim_##op)
1977 #define ASSOC_IA32(op) (op_ia32_v ## op)->ops.generic = (op_func)(sim_##op)
1978 #define ASSOC_BE(op) (op_be_ ## op)->ops.generic = (op_func)(sim_##op)
1996 ASSOC_IA32(fCondJmp);
2007 } /* x87_init_simulator */
2010 * Destroy a x87 simulator.
2012 * @param sim the simulator handle
2014 static void x87_destroy_simulator(x87_simulator *sim) {
2015 pmap_destroy(sim->blk_states);
2016 obstack_free(&sim->obst, NULL);
2017 DB((dbg, LEVEL_1, "x87 Simulator stopped\n\n"));
2018 } /* x87_destroy_simulator */
2021 * Run a simulation and fix all virtual instructions for a graph.
2023 * @param env the architecture environment
2024 * @param irg the current graph
2026 * Needs a block-schedule.
2028 void x87_simulate_graph(const arch_env_t *env, be_irg_t *birg) {
2029 ir_node *block, *start_block;
2030 blk_state *bl_state;
2032 ir_graph *irg = birg->irg;
2034 /* create the simulator */
2035 x87_init_simulator(&sim, irg, env);
2037 start_block = get_irg_start_block(irg);
2038 bl_state = x87_get_bl_state(&sim, start_block);
2040 /* start with the empty state */
2041 bl_state->begin = empty;
2044 sim.worklist = new_waitq();
2045 waitq_put(sim.worklist, start_block);
2047 be_invalidate_liveness(birg);
2048 be_assure_liveness(birg);
2052 /* Create the worklist for the schedule and calculate the liveness
2053 for all nodes. We must precalculate this info, because the
2054 simulator adds new nodes (and possible before Phi nodes) which
2055 let fail the old lazy calculation.
2056 On the other hand we reduce the computation amount due to
2057 precaching from O(n^2) to O(n) at the expense of O(n) cache memory.
2059 for (i = 0, n = ARR_LEN(blk_list); i < n; ++i) {
2060 update_liveness(&sim, lv, blk_list[i]);
2061 waitq_put(worklist, blk_list[i]);
2067 block = waitq_get(sim.worklist);
2068 x87_simulate_block(&sim, block);
2069 } while (! pdeq_empty(sim.worklist));
2072 del_waitq(sim.worklist);
2073 x87_destroy_simulator(&sim);
2074 } /* x87_simulate_graph */