2 * This file implements the x87 support and virtual to stack
3 * register translation for the ia32 backend.
5 * @author: Michael Beck
12 #endif /* HAVE_CONFIG_H */
19 #include "iredges_t.h"
28 #include "../belive_t.h"
29 #include "../besched_t.h"
30 #include "../benode_t.h"
31 #include "ia32_new_nodes.h"
32 #include "gen_ia32_new_nodes.h"
33 #include "gen_ia32_regalloc_if.h"
38 /* first and second binop index */
45 /* the store val index */
46 #define STORE_VAL_IDX 2
48 #define MASK_TOS(x) ((x) & (N_x87_REGS - 1))
50 /** the debug handle */
51 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
53 /* Forward declaration. */
54 typedef struct _x87_simulator x87_simulator;
57 * An exchange template.
58 * Note that our virtual functions have the same inputs
59 * and attributes as the real ones, so we can simple exchange
61 * Further, x87 supports inverse instructions, so we can handle them.
63 typedef struct _exchange_tmpl {
64 ir_op *normal_op; /**< the normal one */
65 ir_op *reverse_op; /**< the reverse one if exists */
66 ir_op *normal_pop_op; /**< the normal one with tos pop */
67 ir_op *reverse_pop_op; /**< the reverse one with tos pop */
71 * An entry on the simulated x87 stack.
73 typedef struct _st_entry {
74 int reg_idx; /**< the virtual register index of this stack value */
75 ir_node *node; /**< the node that produced this value */
81 typedef struct _x87_state {
82 st_entry st[N_x87_REGS]; /**< the register stack */
83 int depth; /**< the current stack depth */
84 int tos; /**< position of the tos */
85 x87_simulator *sim; /**< The simulator. */
88 /** An empty state, used for blocks without fp instructions. */
89 static x87_state _empty = { { {0, NULL}, }, 0, 0 };
90 static x87_state *empty = (x87_state *)&_empty;
92 /** The type of an instruction simulator function. */
93 typedef int (*sim_func)(x87_state *state, ir_node *n);
96 * A block state: Every block has a x87 state at the beginning and at the end.
98 typedef struct _blk_state {
99 x87_state *begin; /**< state at the begin or NULL if not assigned */
100 x87_state *end; /**< state at the end or NULL if not assigned */
103 #define PTR_TO_BLKSTATE(p) ((blk_state *)(p))
105 /** liveness bitset for vfp registers. */
106 typedef unsigned char vfp_liveness;
111 struct _x87_simulator {
112 struct obstack obst; /**< An obstack for fast allocating. */
113 pmap *blk_states; /**< Map blocks to states. */
114 const arch_env_t *env; /**< The architecture environment. */
115 be_lv_t *lv; /**< intrablock liveness. */
116 vfp_liveness *live; /**< Liveness information. */
117 unsigned n_idx; /**< The cached get_irg_last_idx() result. */
118 waitq *worklist; /**< list of blocks to process. */
122 * Returns the current stack depth.
124 * @param state the x87 state
126 * @return the x87 stack depth
128 static int x87_get_depth(const x87_state *state) {
133 * Check if the state is empty.
135 * @param state the x87 state
137 * returns non-zero if the x87 stack is empty
139 static int x87_state_is_empty(const x87_state *state) {
140 return state->depth == 0;
144 * Return the virtual register index at st(pos).
146 * @param state the x87 state
147 * @param pos a stack position
149 * @return the vfp register index that produced the value at st(pos)
151 static int x87_get_st_reg(const x87_state *state, int pos) {
152 assert(pos < state->depth);
153 return state->st[MASK_TOS(state->tos + pos)].reg_idx;
157 * Return the node at st(pos).
159 * @param state the x87 state
160 * @param pos a stack position
162 * @return the IR node that produced the value at st(pos)
164 static ir_node *x87_get_st_node(const x87_state *state, int pos) {
165 assert(pos < state->depth);
166 return state->st[MASK_TOS(state->tos + pos)].node;
167 } /* x87_get_st_node */
171 * Dump the stack for debugging.
173 * @param state the x87 state
175 static void x87_dump_stack(const x87_state *state) {
178 for (i = state->depth - 1; i >= 0; --i) {
179 DB((dbg, LEVEL_2, "vf%d ", x87_get_st_reg(state, i)));
181 DB((dbg, LEVEL_2, "<-- TOS\n"));
182 } /* x87_dump_stack */
183 #endif /* DEBUG_libfirm */
186 * Set a virtual register to st(pos).
188 * @param state the x87 state
189 * @param reg_idx the vfp register index that should be set
190 * @param node the IR node that produces the value of the vfp register
191 * @param pos the stack position where the new value should be entered
193 static void x87_set_st(x87_state *state, int reg_idx, ir_node *node, int pos) {
194 assert(0 < state->depth);
195 state->st[MASK_TOS(state->tos + pos)].reg_idx = reg_idx;
196 state->st[MASK_TOS(state->tos + pos)].node = node;
198 DB((dbg, LEVEL_2, "After SET_REG: "));
199 DEBUG_ONLY(x87_dump_stack(state));
203 * Set the tos virtual register.
205 * @param state the x87 state
206 * @param reg_idx the vfp register index that should be set
207 * @param node the IR node that produces the value of the vfp register
209 static void x87_set_tos(x87_state *state, int reg_idx, ir_node *node) {
210 x87_set_st(state, reg_idx, node, 0);
215 * Flush the x87 stack.
217 * @param state the x87 state
219 static void x87_flush(x87_state *state) {
226 * Swap st(0) with st(pos).
228 * @param state the x87 state
229 * @param pos the stack position to change the tos with
231 static void x87_fxch(x87_state *state, int pos) {
233 assert(pos < state->depth);
235 entry = state->st[MASK_TOS(state->tos + pos)];
236 state->st[MASK_TOS(state->tos + pos)] = state->st[MASK_TOS(state->tos)];
237 state->st[MASK_TOS(state->tos)] = entry;
239 DB((dbg, LEVEL_2, "After FXCH: ")); DEBUG_ONLY(x87_dump_stack(state));
243 * Convert a virtual register to the stack index.
245 * @param state the x87 state
246 * @param reg_idx the register vfp index
248 * @return the stack position where the register is stacked
249 * or -1 if the virtual register was not found
251 static int x87_on_stack(const x87_state *state, int reg_idx) {
252 int i, tos = state->tos;
254 for (i = 0; i < state->depth; ++i)
255 if (state->st[MASK_TOS(tos + i)].reg_idx == reg_idx)
261 * Push a virtual Register onto the stack, double pushed allowed.
263 * @param state the x87 state
264 * @param reg_idx the register vfp index
265 * @param node the node that produces the value of the vfp register
267 static void x87_push_dbl(x87_state *state, int reg_idx, ir_node *node) {
268 assert(state->depth < N_x87_REGS && "stack overrun");
271 state->tos = MASK_TOS(state->tos - 1);
272 state->st[state->tos].reg_idx = reg_idx;
273 state->st[state->tos].node = node;
275 DB((dbg, LEVEL_2, "After PUSH: ")); DEBUG_ONLY(x87_dump_stack(state));
279 * Push a virtual Register onto the stack, double pushes are NOT allowed..
281 * @param state the x87 state
282 * @param reg_idx the register vfp index
283 * @param node the node that produces the value of the vfp register
284 * @param dbl_push if != 0 double pushes are allowed
286 static void x87_push(x87_state *state, int reg_idx, ir_node *node) {
287 assert(x87_on_stack(state, reg_idx) == -1 && "double push");
289 x87_push_dbl(state, reg_idx, node);
293 * Pop a virtual Register from the stack.
295 static void x87_pop(x87_state *state) {
296 assert(state->depth > 0 && "stack underrun");
299 state->tos = MASK_TOS(state->tos + 1);
301 DB((dbg, LEVEL_2, "After POP: ")); DEBUG_ONLY(x87_dump_stack(state));
305 * Returns the block state of a block.
307 * @param sim the x87 simulator handle
308 * @param block the current block
310 * @return the block state
312 static blk_state *x87_get_bl_state(x87_simulator *sim, ir_node *block) {
313 pmap_entry *entry = pmap_find(sim->blk_states, block);
316 blk_state *bl_state = obstack_alloc(&sim->obst, sizeof(*bl_state));
317 bl_state->begin = NULL;
318 bl_state->end = NULL;
320 pmap_insert(sim->blk_states, block, bl_state);
324 return PTR_TO_BLKSTATE(entry->value);
325 } /* x87_get_bl_state */
328 * Creates a new x87 state.
330 * @param sim the x87 simulator handle
332 * @return a new x87 state
334 static x87_state *x87_alloc_state(x87_simulator *sim) {
335 x87_state *res = obstack_alloc(&sim->obst, sizeof(*res));
339 } /* x87_alloc_state */
343 * Create a new empty x87 state.
345 * @param sim the x87 simulator handle
347 * @return a new empty x87 state
349 static x87_state *x87_alloc_empty_state(x87_simulator *sim) {
350 x87_state *res = x87_alloc_state(sim);
354 } /* x87_alloc_empty_state */
360 * @param sim the x87 simulator handle
361 * @param src the x87 state that will be cloned
363 * @return a cloned copy of the src state
365 static x87_state *x87_clone_state(x87_simulator *sim, const x87_state *src) {
366 x87_state *res = x87_alloc_state(sim);
368 memcpy(res, src, sizeof(*res));
370 } /* x87_clone_state */
373 * Patch a virtual instruction into a x87 one and return
376 * @param n the IR node to patch
377 * @param op the x87 opcode to patch in
379 static ir_node *x87_patch_insn(ir_node *n, ir_op *op) {
380 ir_mode *mode = get_irn_mode(n);
385 if (mode == mode_T) {
386 /* patch all Proj's */
387 const ir_edge_t *edge;
389 foreach_out_edge(n, edge) {
390 ir_node *proj = get_edge_src_irn(edge);
392 mode = get_irn_mode(proj);
393 if (mode_is_float(mode)) {
395 set_irn_mode(proj, mode_E);
400 else if (mode_is_float(mode))
401 set_irn_mode(n, mode_E);
403 } /* x87_patch_insn */
406 * Returns the first Proj of a mode_T node having a given mode.
408 * @param n the mode_T node
409 * @param m the desired mode of the Proj
410 * @return The first Proj of mode @p m found or NULL.
412 static ir_node *get_irn_Proj_for_mode(ir_node *n, ir_mode *m) {
413 const ir_edge_t *edge;
415 assert(get_irn_mode(n) == mode_T && "Need mode_T node");
417 foreach_out_edge(n, edge) {
418 ir_node *proj = get_edge_src_irn(edge);
419 if (get_irn_mode(proj) == m)
424 } /* get_irn_Proj_for_mode */
427 * Wrap the arch_* function here so we can check for errors.
429 static INLINE const arch_register_t *x87_get_irn_register(x87_simulator *sim, const ir_node *irn) {
430 const arch_register_t *res;
432 res = arch_get_irn_register(sim->env, irn);
433 assert(res->reg_class->regs == ia32_vfp_regs);
437 /* -------------- x87 perm --------------- */
440 * Creates a fxch for shuffle.
442 * @param state the x87 state
443 * @param pos parameter for fxch
444 * @param block the block were fxch is inserted
446 * Creates a new fxch node and reroute the user of the old node
449 * @return the fxch node
451 static ir_node *x87_fxch_shuffle(x87_state *state, int pos, ir_node *block)
456 fxch = new_rd_ia32_fxch(NULL, get_irn_irg(block), block, mode_E);
457 attr = get_ia32_attr(fxch);
458 attr->x87[0] = &ia32_st_regs[pos];
459 attr->x87[2] = &ia32_st_regs[0];
463 x87_fxch(state, pos);
465 } /* x87_fxch_shuffle */
468 * Calculate the necessary permutations to reach dst_state.
470 * These permutations are done with fxch instructions and placed
471 * at the end of the block.
473 * Note that critical edges are removed here, so we need only
474 * a shuffle if the current block has only one successor.
476 * @param sim the simulator handle
477 * @param block the current block
478 * @param state the current x87 stack state, might be modified
479 * @param dst_block the destination block
480 * @param dst_state destination state
484 static x87_state *x87_shuffle(x87_simulator *sim, ir_node *block, x87_state *state, ir_node *dst_block, const x87_state *dst_state) {
485 int i, n_cycles, k, ri;
486 unsigned cycles[4], all_mask;
487 char cycle_idx[4][8];
488 ir_node *fxch, *before, *after;
490 assert(state->depth == dst_state->depth);
492 /* Some mathematics here:
493 If we have a cycle of length n that includes the tos,
494 we need n-1 exchange operations.
495 We can always add the tos and restore it, so we need
496 n+1 exchange operations for a cycle not containing the tos.
497 So, the maximum of needed operations is for a cycle of 7
498 not including the tos == 8.
499 This is the same number of ops we would need for using stores,
500 so exchange is cheaper (we save the loads).
501 On the other hand, we might need an additional exchange
502 in the next block to bring one operand on top, so the
503 number of ops in the first case is identical.
504 Further, no more than 4 cycles can exists (4 x 2).
506 all_mask = (1 << (state->depth)) - 1;
508 for (n_cycles = 0; all_mask; ++n_cycles) {
509 int src_idx, dst_idx;
511 /* find the first free slot */
512 for (i = 0; i < state->depth; ++i) {
513 if (all_mask & (1 << i)) {
514 all_mask &= ~(1 << i);
516 /* check if there are differences here */
517 if (x87_get_st_reg(state, i) != x87_get_st_reg(dst_state, i))
523 /* no more cycles found */
528 cycles[n_cycles] = (1 << i);
529 cycle_idx[n_cycles][k++] = i;
530 for (src_idx = i; ; src_idx = dst_idx) {
531 dst_idx = x87_on_stack(dst_state, x87_get_st_reg(state, src_idx));
533 if ((all_mask & (1 << dst_idx)) == 0)
536 cycle_idx[n_cycles][k++] = dst_idx;
537 cycles[n_cycles] |= (1 << dst_idx);
538 all_mask &= ~(1 << dst_idx);
540 cycle_idx[n_cycles][k] = -1;
544 /* no permutation needed */
548 /* Hmm: permutation needed */
549 DB((dbg, LEVEL_2, "\n%+F needs permutation: from\n", block));
550 DEBUG_ONLY(x87_dump_stack(state));
551 DB((dbg, LEVEL_2, " to\n"));
552 DEBUG_ONLY(x87_dump_stack(dst_state));
556 DB((dbg, LEVEL_2, "Need %d cycles\n", n_cycles));
557 for (ri = 0; ri < n_cycles; ++ri) {
558 DB((dbg, LEVEL_2, " Ring %d:\n ", ri));
559 for (k = 0; cycle_idx[ri][k] != -1; ++k)
560 DB((dbg, LEVEL_2, " st%d ->", cycle_idx[ri][k]));
561 DB((dbg, LEVEL_2, "\n"));
568 * Find the place node must be insert.
569 * We have only one successor block, so the last instruction should
572 before = sched_last(block);
573 assert(is_cfop(before));
575 /* now do the permutations */
576 for (ri = 0; ri < n_cycles; ++ri) {
577 if ((cycles[ri] & 1) == 0) {
578 /* this cycle does not include the tos */
579 fxch = x87_fxch_shuffle(state, cycle_idx[ri][0], block);
581 sched_add_after(after, fxch);
583 sched_add_before(before, fxch);
586 for (k = 1; cycle_idx[ri][k] != -1; ++k) {
587 fxch = x87_fxch_shuffle(state, cycle_idx[ri][k], block);
589 sched_add_after(after, fxch);
591 sched_add_before(before, fxch);
594 if ((cycles[ri] & 1) == 0) {
595 /* this cycle does not include the tos */
596 fxch = x87_fxch_shuffle(state, cycle_idx[ri][0], block);
597 sched_add_after(after, fxch);
604 * Create a fxch node before another node.
606 * @param state the x87 state
607 * @param n the node after the fxch
608 * @param pos exchange st(pos) with st(0)
609 * @param op_idx if >= 0, replace input op_idx of n with the fxch result
613 static ir_node *x87_create_fxch(x87_state *state, ir_node *n, int pos, int op_idx) {
616 ir_graph *irg = get_irn_irg(n);
617 ir_node *block = get_nodes_block(n);
619 x87_fxch(state, pos);
621 fxch = new_rd_ia32_fxch(NULL, irg, block, mode_E);
622 attr = get_ia32_attr(fxch);
623 attr->x87[0] = &ia32_st_regs[pos];
624 attr->x87[2] = &ia32_st_regs[0];
628 sched_add_before(n, fxch);
629 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(fxch), attr->x87[0]->name, attr->x87[2]->name));
631 } /* x87_create_fxch */
634 * Create a fpush before node n.
636 * @param state the x87 state
637 * @param n the node after the fpush
638 * @param pos push st(pos) on stack
639 * @param op_idx replace input op_idx of n with the fpush result
641 static void x87_create_fpush(x87_state *state, ir_node *n, int pos, int op_idx) {
642 ir_node *fpush, *pred = get_irn_n(n, op_idx);
644 const arch_register_t *out = x87_get_irn_register(state->sim, pred);
646 x87_push_dbl(state, arch_register_get_index(out), pred);
648 fpush = new_rd_ia32_fpush(NULL, get_irn_irg(n), get_nodes_block(n), mode_E);
649 attr = get_ia32_attr(fpush);
650 attr->x87[0] = &ia32_st_regs[pos];
651 attr->x87[2] = &ia32_st_regs[0];
654 sched_add_before(n, fpush);
656 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(fpush), attr->x87[0]->name, attr->x87[2]->name));
657 } /* x87_create_fpush */
660 * Create a fpop before node n.
662 * @param state the x87 state
663 * @param n the node after the fpop
664 * @param num pop 1 or 2 values
665 * @param pred node to use as predecessor of the fpop
667 * @return the fpop node
669 static ir_node *x87_create_fpop(x87_state *state, ir_node *n, int num, ir_node *pred) {
670 ir_node *fpop = pred;
677 fpop = new_rd_ia32_fpop(NULL, get_irn_irg(n), get_nodes_block(n), mode_E);
678 attr = get_ia32_attr(fpop);
679 attr->x87[0] = &ia32_st_regs[0];
680 attr->x87[1] = &ia32_st_regs[0];
681 attr->x87[2] = &ia32_st_regs[0];
683 sched_add_before(n, fpop);
684 DB((dbg, LEVEL_1, "<<< %s %s\n", get_irn_opname(fpop), attr->x87[0]->name));
690 } /* x87_create_fpop */
693 * Creates an fldz before node n
695 * @param state the x87 state
696 * @param n the node after the fldz
698 * @return the fldz node
700 static ir_node *x87_create_fldz(x87_state *state, ir_node *n, int regidx) {
701 ir_graph *irg = get_irn_irg(n);
702 ir_node *block = get_nodes_block(n);
705 fldz = new_rd_ia32_fldz(NULL, irg, block, mode_E);
707 sched_add_before(n, fldz);
708 DB((dbg, LEVEL_1, "<<< %s\n", get_irn_opname(fldz)));
711 x87_push(state, regidx, fldz);
716 /* --------------------------------- liveness ------------------------------------------ */
719 * The liveness transfer function.
720 * Updates a live set over a single step from a given node to its predecessor.
721 * Everything defined at the node is removed from the set, the uses of the node get inserted.
723 * @param sim The simulator handle.
724 * @param irn The node at which liveness should be computed.
725 * @param live The bitset of registers live before @p irn. This set gets modified by updating it to
726 * the registers live after irn.
728 * @return The live bitset.
730 static vfp_liveness vfp_liveness_transfer(x87_simulator *sim, ir_node *irn, vfp_liveness live)
733 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
734 const arch_env_t *arch_env = sim->env;
736 if (arch_irn_consider_in_reg_alloc(arch_env, cls, irn)) {
737 const arch_register_t *reg = x87_get_irn_register(sim, irn);
738 live &= ~(1 << arch_register_get_index(reg));
741 for (i = 0, n = get_irn_arity(irn); i < n; ++i) {
742 ir_node *op = get_irn_n(irn, i);
744 if (mode_is_float(get_irn_mode(op)) && arch_irn_consider_in_reg_alloc(arch_env, cls, op)) {
745 const arch_register_t *reg = x87_get_irn_register(sim, op);
746 live |= 1 << arch_register_get_index(reg);
750 } /* vfp_liveness_transfer */
753 * Put all live virtual registers at the end of a block into a bitset.
755 * @param sim the simulator handle
756 * @param lv the liveness information
757 * @param bl the block
759 * @return The live bitset at the end of this block
761 static vfp_liveness vfp_liveness_end_of_block(x87_simulator *sim, const ir_node *block)
764 vfp_liveness live = 0;
765 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
766 const arch_env_t *arch_env = sim->env;
767 const be_lv_t *lv = sim->lv;
769 be_lv_foreach(lv, block, be_lv_state_end, i) {
770 const arch_register_t *reg;
771 const ir_node *node = be_lv_get_irn(lv, block, i);
772 if (!arch_irn_consider_in_reg_alloc(arch_env, cls, node))
775 reg = x87_get_irn_register(sim, node);
776 live |= 1 << arch_register_get_index(reg);
780 } /* vfp_liveness_end_of_block */
782 /** get the register mask from an arch_register */
783 #define REGMASK(reg) (1 << (arch_register_get_index(reg)))
786 * Return a bitset of argument registers which are live at the end of a node.
788 * @param sim the simulator handle
789 * @param pos the node
790 * @param kill kill mask for the output registers
792 * @return The live bitset.
794 static unsigned vfp_live_args_after(x87_simulator *sim, const ir_node *pos, unsigned kill)
796 unsigned idx = get_irn_idx(pos);
798 assert(idx < sim->n_idx);
799 return sim->live[idx] & ~kill;
800 } /* vfp_live_args_after */
803 * Calculate the liveness for a whole block and cache it.
805 * @param sim the simulator handle
806 * @param lv the liveness handle
807 * @param block the block
809 static void update_liveness(x87_simulator *sim, ir_node *block) {
810 vfp_liveness live = vfp_liveness_end_of_block(sim, block);
814 /* now iterate through the block backward and cache the results */
815 sched_foreach_reverse(block, irn) {
816 /* stop at the first Phi: this produces the live-in */
820 idx = get_irn_idx(irn);
821 sim->live[idx] = live;
823 live = vfp_liveness_transfer(sim, irn, live);
825 idx = get_irn_idx(block);
826 sim->live[idx] = live;
827 } /* update_liveness */
830 * Returns true if a register is live in a set.
832 * @param reg_idx the vfp register index
833 * @param live a live bitset
835 #define is_vfp_live(reg_idx, live) ((live) & (1 << (reg_idx)))
839 * Dump liveness info.
841 * @param live the live bitset
843 static void vfp_dump_live(vfp_liveness live) {
846 DB((dbg, LEVEL_2, "Live after: "));
847 for (i = 0; i < 8; ++i) {
848 if (live & (1 << i)) {
849 DB((dbg, LEVEL_2, "vf%d ", i));
852 DB((dbg, LEVEL_2, "\n"));
853 } /* vfp_dump_live */
854 #endif /* DEBUG_libfirm */
856 /* --------------------------------- simulators ---------------------------------------- */
858 #define XCHG(a, b) do { int t = (a); (a) = (b); (b) = t; } while (0)
861 * Simulate a virtual binop.
863 * @param state the x87 state
864 * @param n the node that should be simulated (and patched)
865 * @param tmpl the template containing the 4 possible x87 opcodes
867 static int sim_binop(x87_state *state, ir_node *n, const exchange_tmpl *tmpl) {
868 int op2_idx, op1_idx;
869 int out_idx, do_pop = 0;
872 x87_simulator *sim = state->sim;
873 const arch_register_t *op1 = x87_get_irn_register(sim, get_irn_n(n, BINOP_IDX_1));
874 const arch_register_t *op2 = x87_get_irn_register(sim, get_irn_n(n, BINOP_IDX_2));
875 const arch_register_t *out = x87_get_irn_register(sim, n);
876 int reg_index_1 = arch_register_get_index(op1);
877 int reg_index_2 = arch_register_get_index(op2);
878 vfp_liveness live = vfp_live_args_after(sim, n, REGMASK(out));
880 DB((dbg, LEVEL_1, ">>> %+F %s, %s -> %s\n", n,
881 arch_register_get_name(op1), arch_register_get_name(op2),
882 arch_register_get_name(out)));
883 DEBUG_ONLY(vfp_dump_live(live));
884 DB((dbg, LEVEL_1, "Stack before: "));
885 DEBUG_ONLY(x87_dump_stack(state));
887 op1_idx = x87_on_stack(state, reg_index_1);
888 assert(op1_idx >= 0);
890 if (reg_index_2 != REG_VFP_NOREG) {
891 /* second operand is a vfp register */
892 op2_idx = x87_on_stack(state, reg_index_2);
893 assert(op2_idx >= 0);
895 if (is_vfp_live(arch_register_get_index(op2), live)) {
896 /* Second operand is live. */
898 if (is_vfp_live(arch_register_get_index(op1), live)) {
899 /* Both operands are live: push the first one.
900 This works even for op1 == op2. */
901 x87_create_fpush(state, n, op1_idx, BINOP_IDX_2);
902 /* now do fxxx (tos=tos X op) */
906 dst = tmpl->normal_op;
908 /* Second live, first operand is dead here, bring it to tos. */
910 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
915 /* now do fxxx (tos=tos X op) */
917 dst = tmpl->normal_op;
921 /* Second operand is dead. */
922 if (is_vfp_live(arch_register_get_index(op1), live)) {
923 /* First operand is live: bring second to tos. */
925 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
930 /* now do fxxxr (tos = op X tos) */
932 dst = tmpl->reverse_op;
935 /* Both operands are dead here, pop them from the stack. */
938 /* Both are identically and on tos, no pop needed. */
939 /* here fxxx (tos = tos X tos) */
940 dst = tmpl->normal_op;
944 /* now do fxxxp (op = op X tos, pop) */
945 dst = tmpl->normal_pop_op;
950 else if (op1_idx == 0) {
951 assert(op1_idx != op2_idx);
952 /* now do fxxxrp (op = tos X op, pop) */
953 dst = tmpl->reverse_pop_op;
958 /* Bring the second on top. */
959 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
960 if (op1_idx == op2_idx) {
961 /* Both are identically and on tos now, no pop needed. */
964 /* use fxxx (tos = tos X tos) */
965 dst = tmpl->normal_op;
969 /* op2 is on tos now */
971 /* use fxxxp (op = op X tos, pop) */
972 dst = tmpl->normal_pop_op;
981 /* second operand is an address mode */
982 if (is_vfp_live(arch_register_get_index(op1), live)) {
983 /* first operand is live: push it here */
984 x87_create_fpush(state, n, op1_idx, BINOP_IDX_1);
986 /* use fxxx (tos = tos X mem) */
987 dst = tmpl->normal_op;
991 /* first operand is dead: bring it to tos */
993 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
997 /* use fxxxp (tos = tos X mem) */
998 dst = tmpl->normal_op;
1003 x87_set_st(state, arch_register_get_index(out), x87_patch_insn(n, dst), out_idx);
1008 /* patch the operation */
1009 attr = get_ia32_attr(n);
1010 attr->x87[0] = op1 = &ia32_st_regs[op1_idx];
1011 if (reg_index_2 != REG_VFP_NOREG) {
1012 attr->x87[1] = op2 = &ia32_st_regs[op2_idx];
1014 attr->x87[2] = out = &ia32_st_regs[out_idx];
1016 if (reg_index_2 != REG_VFP_NOREG) {
1017 DB((dbg, LEVEL_1, "<<< %s %s, %s -> %s\n", get_irn_opname(n),
1018 arch_register_get_name(op1), arch_register_get_name(op2),
1019 arch_register_get_name(out)));
1021 DB((dbg, LEVEL_1, "<<< %s %s, [AM] -> %s\n", get_irn_opname(n),
1022 arch_register_get_name(op1),
1023 arch_register_get_name(out)));
1030 * Simulate a virtual Unop.
1032 * @param state the x87 state
1033 * @param n the node that should be simulated (and patched)
1034 * @param op the x87 opcode that will replace n's opcode
1036 static int sim_unop(x87_state *state, ir_node *n, ir_op *op) {
1037 int op1_idx, out_idx;
1038 x87_simulator *sim = state->sim;
1039 const arch_register_t *op1 = x87_get_irn_register(sim, get_irn_n(n, UNOP_IDX));
1040 const arch_register_t *out = x87_get_irn_register(sim, n);
1042 unsigned live = vfp_live_args_after(sim, n, REGMASK(out));
1044 DB((dbg, LEVEL_1, ">>> %+F -> %s\n", n, out->name));
1045 DEBUG_ONLY(vfp_dump_live(live));
1047 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1049 if (is_vfp_live(arch_register_get_index(op1), live)) {
1050 /* push the operand here */
1051 x87_create_fpush(state, n, op1_idx, UNOP_IDX);
1055 /* operand is dead, bring it to tos */
1057 x87_create_fxch(state, n, op1_idx, UNOP_IDX);
1062 x87_set_tos(state, arch_register_get_index(out), x87_patch_insn(n, op));
1064 attr = get_ia32_attr(n);
1065 attr->x87[0] = op1 = &ia32_st_regs[0];
1066 attr->x87[2] = out = &ia32_st_regs[0];
1067 DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), out->name));
1073 * Simulate a virtual Load instruction.
1075 * @param state the x87 state
1076 * @param n the node that should be simulated (and patched)
1077 * @param op the x87 opcode that will replace n's opcode
1079 static int sim_load(x87_state *state, ir_node *n, ir_op *op) {
1080 const arch_register_t *out = x87_get_irn_register(state->sim, n);
1083 DB((dbg, LEVEL_1, ">>> %+F -> %s\n", n, arch_register_get_name(out)));
1084 x87_push(state, arch_register_get_index(out), x87_patch_insn(n, op));
1085 assert(out == x87_get_irn_register(state->sim, n));
1086 attr = get_ia32_attr(n);
1087 attr->x87[2] = out = &ia32_st_regs[0];
1088 DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), arch_register_get_name(out)));
1094 * Rewire all users of @p old_val to @new_val iff they are scheduled after @p store.
1096 * @param store The store
1097 * @param old_val The former value
1098 * @param new_val The new value
1100 static void collect_and_rewire_users(ir_node *store, ir_node *old_val, ir_node *new_val) {
1101 const ir_edge_t *edge, *ne;
1103 foreach_out_edge_safe(old_val, edge, ne) {
1104 ir_node *user = get_edge_src_irn(edge);
1106 if (! user || user == store)
1109 /* if the user is scheduled after the store: rewire */
1110 if (sched_is_scheduled(user) && sched_comes_after(store, user)) {
1112 /* find the input of the user pointing to the old value */
1113 for (i = get_irn_arity(user) - 1; i >= 0; i--) {
1114 if (get_irn_n(user, i) == old_val)
1115 set_irn_n(user, i, new_val);
1119 } /* collect_and_rewire_users */
1122 * Simulate a virtual Store.
1124 * @param state the x87 state
1125 * @param n the node that should be simulated (and patched)
1126 * @param op the x87 store opcode
1127 * @param op_p the x87 store and pop opcode
1129 static int sim_store(x87_state *state, ir_node *n, ir_op *op, ir_op *op_p) {
1130 x87_simulator *sim = state->sim;
1131 ir_node *val = get_irn_n(n, STORE_VAL_IDX);
1132 const arch_register_t *op2 = x87_get_irn_register(sim, val);
1133 unsigned live = vfp_live_args_after(sim, n, 0);
1136 int op2_reg_idx, op2_idx, depth;
1137 int live_after_node;
1140 op2_reg_idx = arch_register_get_index(op2);
1141 if (op2_reg_idx == REG_VFP_UKNWN) {
1142 // just take any value from stack
1143 if(state->depth > 0) {
1145 DEBUG_ONLY(op2 = NULL);
1146 live_after_node = 1;
1148 // produce a new value which we will consume imediately
1149 x87_create_fldz(state, n, op2_reg_idx);
1150 live_after_node = 0;
1151 op2_idx = x87_on_stack(state, op2_reg_idx);
1152 assert(op2_idx >= 0);
1155 op2_idx = x87_on_stack(state, op2_reg_idx);
1156 live_after_node = is_vfp_live(arch_register_get_index(op2), live);
1157 assert(op2_idx >= 0);
1158 DB((dbg, LEVEL_1, ">>> %+F %s ->\n", n, arch_register_get_name(op2)));
1161 mode = get_ia32_ls_mode(n);
1162 depth = x87_get_depth(state);
1164 if (live_after_node) {
1166 Problem: fst doesn't support mode_E (spills), only fstp does
1168 - stack not full: push value and fstp
1169 - stack full: fstp value and load again
1171 if (mode == mode_E) {
1172 if (depth < N_x87_REGS) {
1173 /* ok, we have a free register: push + fstp */
1174 x87_create_fpush(state, n, op2_idx, STORE_VAL_IDX);
1176 x87_patch_insn(n, op_p);
1179 ir_node *vfld, *mem, *block, *rproj, *mproj;
1182 /* stack full here: need fstp + load */
1184 x87_patch_insn(n, op_p);
1186 block = get_nodes_block(n);
1187 irg = get_irn_irg(n);
1188 vfld = new_rd_ia32_vfld(NULL, irg, block, get_irn_n(n, 0), get_irn_n(n, 1), new_rd_NoMem(irg));
1190 /* copy all attributes */
1191 set_ia32_frame_ent(vfld, get_ia32_frame_ent(n));
1192 if (is_ia32_use_frame(n))
1193 set_ia32_use_frame(vfld);
1194 set_ia32_am_flavour(vfld, get_ia32_am_flavour(n));
1195 set_ia32_op_type(vfld, ia32_am_Source);
1196 add_ia32_am_offs_int(vfld, get_ia32_am_offs_int(n));
1197 set_ia32_am_sc(vfld, get_ia32_am_sc(n));
1198 set_ia32_ls_mode(vfld, get_ia32_ls_mode(n));
1200 rproj = new_r_Proj(irg, block, vfld, get_ia32_ls_mode(vfld), pn_ia32_vfld_res);
1201 mproj = new_r_Proj(irg, block, vfld, mode_M, pn_ia32_vfld_M);
1202 mem = get_irn_Proj_for_mode(n, mode_M);
1204 assert(mem && "Store memory not found");
1206 arch_set_irn_register(sim->env, rproj, op2);
1208 /* reroute all former users of the store memory to the load memory */
1209 edges_reroute(mem, mproj, irg);
1210 /* set the memory input of the load to the store memory */
1211 set_irn_n(vfld, 2, mem);
1213 sched_add_after(n, vfld);
1214 sched_add_after(vfld, rproj);
1216 /* rewire all users, scheduled after the store, to the loaded value */
1217 collect_and_rewire_users(n, val, rproj);
1223 /* we can only store the tos to memory */
1225 x87_create_fxch(state, n, op2_idx, STORE_VAL_IDX);
1227 /* mode != mode_E -> use normal fst */
1228 x87_patch_insn(n, op);
1232 /* we can only store the tos to memory */
1234 x87_create_fxch(state, n, op2_idx, STORE_VAL_IDX);
1237 x87_patch_insn(n, op_p);
1240 attr = get_ia32_attr(n);
1241 attr->x87[1] = op2 = &ia32_st_regs[0];
1242 DB((dbg, LEVEL_1, "<<< %s %s ->\n", get_irn_opname(n), arch_register_get_name(op2)));
1248 * Simulate a virtual Phi.
1249 * Just for cosmetic reasons change the mode of Phi nodes to mode_E.
1251 * @param state the x87 state
1252 * @param n the node that should be simulated (and patched)
1253 * @param env the architecture environment
1255 static int sim_Phi(x87_state *state, ir_node *n, const arch_env_t *env) {
1256 ir_mode *mode = get_irn_mode(n);
1258 if (mode_is_float(mode))
1259 set_irn_mode(n, mode_E);
1265 #define _GEN_BINOP(op, rev) \
1266 static int sim_##op(x87_state *state, ir_node *n) { \
1267 exchange_tmpl tmpl = { op_ia32_##op, op_ia32_##rev, op_ia32_##op##p, op_ia32_##rev##p }; \
1268 return sim_binop(state, n, &tmpl); \
1271 #define GEN_BINOP(op) _GEN_BINOP(op, op)
1272 #define GEN_BINOPR(op) _GEN_BINOP(op, op##r)
1274 #define GEN_LOAD2(op, nop) \
1275 static int sim_##op(x87_state *state, ir_node *n) { \
1276 return sim_load(state, n, op_ia32_##nop); \
1279 #define GEN_LOAD(op) GEN_LOAD2(op, op)
1281 #define GEN_UNOP(op) \
1282 static int sim_##op(x87_state *state, ir_node *n) { \
1283 return sim_unop(state, n, op_ia32_##op); \
1286 #define GEN_STORE(op) \
1287 static int sim_##op(x87_state *state, ir_node *n) { \
1288 return sim_store(state, n, op_ia32_##op, op_ia32_##op##p); \
1308 GEN_LOAD2(fConst, fldConst)
1314 * Simulate a fCondJmp.
1316 * @param state the x87 state
1317 * @param n the node that should be simulated (and patched)
1319 static int sim_fCondJmp(x87_state *state, ir_node *n) {
1325 x87_simulator *sim = state->sim;
1326 const arch_register_t *op1 = x87_get_irn_register(sim, get_irn_n(n, BINOP_IDX_1));
1327 const arch_register_t *op2 = x87_get_irn_register(sim, get_irn_n(n, BINOP_IDX_2));
1328 int reg_index_1 = arch_register_get_index(op1);
1329 int reg_index_2 = arch_register_get_index(op2);
1330 unsigned live = vfp_live_args_after(sim, n, 0);
1332 DB((dbg, LEVEL_1, ">>> %+F %s, %s\n", n,
1333 arch_register_get_name(op1), arch_register_get_name(op2)));
1334 DEBUG_ONLY(vfp_dump_live(live));
1335 DB((dbg, LEVEL_1, "Stack before: "));
1336 DEBUG_ONLY(x87_dump_stack(state));
1338 op1_idx = x87_on_stack(state, reg_index_1);
1339 assert(op1_idx >= 0);
1341 /* BEWARE: check for comp a,a cases, they might happen */
1342 if (reg_index_2 != REG_VFP_NOREG) {
1343 /* second operand is a vfp register */
1344 op2_idx = x87_on_stack(state, reg_index_2);
1345 assert(op2_idx >= 0);
1347 if (is_vfp_live(arch_register_get_index(op2), live)) {
1348 /* second operand is live */
1350 if (is_vfp_live(arch_register_get_index(op1), live)) {
1351 /* both operands are live */
1354 dst = op_ia32_fcomJmp;
1355 } else if (op2_idx == 0) {
1356 dst = op_ia32_fcomrJmp;
1358 /* bring the first one to tos */
1359 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1363 dst = op_ia32_fcomJmp;
1367 /* second live, first operand is dead here, bring it to tos.
1368 This means further, op1_idx != op2_idx. */
1369 assert(op1_idx != op2_idx);
1371 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1376 dst = op_ia32_fcompJmp;
1381 /* second operand is dead */
1382 if (is_vfp_live(arch_register_get_index(op1), live)) {
1383 /* first operand is live: bring second to tos.
1384 This means further, op1_idx != op2_idx. */
1385 assert(op1_idx != op2_idx);
1387 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1392 dst = op_ia32_fcomrpJmp;
1396 /* both operands are dead here, check first for identity. */
1397 if (op1_idx == op2_idx) {
1398 /* identically, one pop needed */
1400 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1404 dst = op_ia32_fcompJmp;
1407 /* different, move them to st and st(1) and pop both.
1408 The tricky part is to get one into st(1).*/
1409 else if (op2_idx == 1) {
1410 /* good, second operand is already in the right place, move the first */
1412 /* bring the first on top */
1413 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1416 dst = op_ia32_fcomppJmp;
1419 else if (op1_idx == 1) {
1420 /* good, first operand is already in the right place, move the second */
1422 /* bring the first on top */
1423 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1426 dst = op_ia32_fcomrppJmp;
1430 /* if one is already the TOS, we need two fxch */
1432 /* first one is TOS, move to st(1) */
1433 x87_create_fxch(state, n, 1, BINOP_IDX_1);
1435 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1437 dst = op_ia32_fcomrppJmp;
1440 else if (op2_idx == 0) {
1441 /* second one is TOS, move to st(1) */
1442 x87_create_fxch(state, n, 1, BINOP_IDX_2);
1444 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1446 dst = op_ia32_fcomrppJmp;
1450 /* none of them is either TOS or st(1), 3 fxch needed */
1451 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1452 x87_create_fxch(state, n, 1, BINOP_IDX_2);
1454 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1456 dst = op_ia32_fcomppJmp;
1464 /* second operand is an address mode */
1465 if (is_vfp_live(arch_register_get_index(op1), live)) {
1466 /* first operand is live: bring it to TOS */
1468 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1471 dst = op_ia32_fcomJmp;
1474 /* first operand is dead: bring it to tos */
1476 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1479 dst = op_ia32_fcompJmp;
1484 x87_patch_insn(n, dst);
1485 assert(pop_cnt < 3);
1491 /* patch the operation */
1492 attr = get_ia32_attr(n);
1493 op1 = &ia32_st_regs[op1_idx];
1496 op2 = &ia32_st_regs[op2_idx];
1499 attr->x87[2] = NULL;
1502 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(n),
1503 arch_register_get_name(op1), arch_register_get_name(op2)));
1505 DB((dbg, LEVEL_1, "<<< %s %s, [AM]\n", get_irn_opname(n),
1506 arch_register_get_name(op1)));
1509 } /* sim_fCondJmp */
1512 * Simulate a be_Copy.
1514 * @param state the x87 state
1515 * @param n the node that should be simulated (and patched)
1517 static int sim_Copy(x87_state *state, ir_node *n) {
1518 ir_mode *mode = get_irn_mode(n);
1520 if (mode_is_float(mode)) {
1521 x87_simulator *sim = state->sim;
1522 ir_node *pred = get_irn_n(n, 0);
1523 const arch_register_t *out = x87_get_irn_register(sim, n);
1524 const arch_register_t *op1 = x87_get_irn_register(sim, pred);
1525 ir_node *node, *next;
1527 int op1_idx, out_idx;
1528 unsigned live = vfp_live_args_after(sim, n, REGMASK(out));
1529 ir_node *(*cnstr)(dbg_info *, ir_graph *, ir_node *, ir_mode *);
1531 DB((dbg, LEVEL_1, ">>> %+F %s -> %s\n", n,
1532 arch_register_get_name(op1), arch_register_get_name(out)));
1533 DEBUG_ONLY(vfp_dump_live(live));
1535 /* Do not copy constants, recreate them. */
1536 switch (get_ia32_irn_opcode(pred)) {
1538 cnstr = new_rd_ia32_fldz;
1541 cnstr = new_rd_ia32_fld1;
1543 case iro_ia32_fldpi:
1544 cnstr = new_rd_ia32_fldpi;
1546 case iro_ia32_fldl2e:
1547 cnstr = new_rd_ia32_fldl2e;
1549 case iro_ia32_fldl2t:
1550 cnstr = new_rd_ia32_fldl2t;
1552 case iro_ia32_fldlg2:
1553 cnstr = new_rd_ia32_fldlg2;
1555 case iro_ia32_fldln2:
1556 cnstr = new_rd_ia32_fldln2;
1562 /* copy a constant */
1563 node = (*cnstr)(get_irn_dbg_info(n), get_irn_irg(n), get_nodes_block(n), mode);
1564 arch_set_irn_register(sim->env, node, out);
1566 x87_push(state, arch_register_get_index(out), node);
1568 attr = get_ia32_attr(node);
1569 attr->x87[2] = out = &ia32_st_regs[0];
1571 next = sched_next(n);
1574 sched_add_before(next, node);
1575 DB((dbg, LEVEL_1, ">>> %+F -> %s\n", node, arch_register_get_name(out)));
1579 /* handle the infamous unknown value */
1580 if (arch_register_get_index(op1) == REG_VFP_UKNWN) {
1581 /* This happens before Phi nodes */
1582 if (x87_state_is_empty(state)) {
1583 /* create some value */
1584 x87_patch_insn(n, op_ia32_fldz);
1585 attr = get_ia32_attr(n);
1586 attr->x87[2] = out = &ia32_st_regs[0];
1587 DB((dbg, LEVEL_1, "<<< %+F -> %s\n", n,
1588 arch_register_get_name(out)));
1590 /* Just copy one. We need here an fpush that can hold a
1591 a register, so use the fpushCopy. */
1592 node = new_rd_ia32_fpushCopy(get_irn_dbg_info(n), get_irn_irg(n), get_nodes_block(n), get_irn_n(n, 0), mode);
1593 arch_set_irn_register(sim->env, node, out);
1595 x87_push(state, arch_register_get_index(out), node);
1597 attr = get_ia32_attr(node);
1598 attr->x87[0] = op1 =
1599 attr->x87[2] = out = &ia32_st_regs[0];
1601 next = sched_next(n);
1604 sched_add_before(next, node);
1605 DB((dbg, LEVEL_1, "<<< %+F %s -> %s\n", node,
1606 arch_register_get_name(op1),
1607 arch_register_get_name(out)));
1612 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1614 if (is_vfp_live(arch_register_get_index(op1), live)) {
1615 /* Operand is still live,a real copy. We need here an fpush that can hold a
1616 a register, so use the fpushCopy. */
1617 node = new_rd_ia32_fpushCopy(get_irn_dbg_info(n), get_irn_irg(n), get_nodes_block(n), get_irn_n(n, 0), mode);
1618 arch_set_irn_register(sim->env, node, out);
1620 x87_push(state, arch_register_get_index(out), node);
1622 attr = get_ia32_attr(node);
1623 attr->x87[0] = op1 = &ia32_st_regs[op1_idx];
1624 attr->x87[2] = out = &ia32_st_regs[0];
1626 next = sched_next(n);
1629 sched_add_before(next, node);
1630 DB((dbg, LEVEL_1, ">>> %+F %s -> %s\n", node, op1->name, out->name));
1633 out_idx = x87_on_stack(state, arch_register_get_index(out));
1635 if (out_idx >= 0 && out_idx != op1_idx) {
1636 /* op1 must be killed and placed where out is */
1638 /* best case, simple remove and rename */
1639 x87_patch_insn(n, op_ia32_Pop);
1640 attr = get_ia32_attr(n);
1641 attr->x87[0] = op1 = &ia32_st_regs[0];
1644 x87_set_st(state, arch_register_get_index(out), n, op1_idx - 1);
1647 /* move op1 to tos, store and pop it */
1649 x87_create_fxch(state, n, op1_idx, 0);
1652 x87_patch_insn(n, op_ia32_Pop);
1653 attr = get_ia32_attr(n);
1654 attr->x87[0] = op1 = &ia32_st_regs[out_idx];
1657 x87_set_st(state, arch_register_get_index(out), n, out_idx - 1);
1659 DB((dbg, LEVEL_1, ">>> %+F %s\n", n, op1->name));
1662 /* just a virtual copy */
1663 x87_set_st(state, arch_register_get_index(out), get_unop_op(n), op1_idx);
1665 DB((dbg, LEVEL_1, ">>> KILLED %s\n", get_irn_opname(n)));
1666 exchange(n, get_unop_op(n));
1674 * Simulate a be_Call.
1676 * @param state the x87 state
1677 * @param n the node that should be simulated
1678 * @param env the architecture environment
1680 static int sim_Call(x87_state *state, ir_node *n, const arch_env_t *env) {
1681 ir_type *call_tp = be_Call_get_type(n);
1683 /* at the begin of a call the x87 state should be empty */
1684 assert(state->depth == 0 && "stack not empty before call");
1687 * If the called function returns a float, it is returned in st(0).
1688 * This even happens if the return value is NOT used.
1689 * Moreover, only one return result is supported.
1691 if (get_method_n_ress(call_tp) > 0) {
1692 ir_type *res_type = get_method_res_type(call_tp, 0);
1693 ir_mode *mode = get_type_mode(res_type);
1695 if (mode && mode_is_float(mode)) {
1697 * TODO: what to push here? The result might be unused and currently
1698 * we have no possibility to detect this :-(
1700 x87_push(state, 0, n);
1708 * Simulate a be_Spill.
1710 * @param state the x87 state
1711 * @param n the node that should be simulated (and patched)
1712 * @param env the architecture environment
1714 * Should not happen, spills are lowered before x87 simulator see them.
1716 static int sim_Spill(x87_state *state, ir_node *n) {
1717 assert(0 && "Spill not lowered");
1718 return sim_fst(state, n);
1722 * Simulate a be_Reload.
1724 * @param state the x87 state
1725 * @param n the node that should be simulated (and patched)
1726 * @param env the architecture environment
1728 * Should not happen, reloads are lowered before x87 simulator see them.
1730 static int sim_Reload(x87_state *state, ir_node *n) {
1731 assert(0 && "Reload not lowered");
1732 return sim_fld(state, n);
1736 * Simulate a be_Return.
1738 * @param state the x87 state
1739 * @param n the node that should be simulated (and patched)
1740 * @param env the architecture environment
1742 static int sim_Return(x87_state *state, ir_node *n) {
1743 int n_res = be_Return_get_n_rets(n);
1744 int i, n_float_res = 0;
1746 /* only floating point return values must resist on stack */
1747 for (i = 0; i < n_res; ++i) {
1748 ir_node *res = get_irn_n(n, be_pos_Return_val + i);
1750 if (mode_is_float(get_irn_mode(res)))
1753 assert(x87_get_depth(state) == n_float_res);
1755 /* pop them virtually */
1756 for (i = n_float_res - 1; i >= 0; --i)
1762 typedef struct _perm_data_t {
1763 const arch_register_t *in;
1764 const arch_register_t *out;
1768 * Simulate a be_Perm.
1770 * @param state the x87 state
1771 * @param irn the node that should be simulated (and patched)
1773 static int sim_Perm(x87_state *state, ir_node *irn) {
1775 x87_simulator *sim = state->sim;
1776 ir_node *pred = get_irn_n(irn, 0);
1778 const ir_edge_t *edge;
1780 /* handle only floating point Perms */
1781 if (! mode_is_float(get_irn_mode(pred)))
1784 DB((dbg, LEVEL_1, ">>> %+F\n", irn));
1786 /* Perm is a pure virtual instruction on x87.
1787 All inputs must be on the FPU stack and are pairwise
1788 different from each other.
1789 So, all we need to do is to permutate the stack state. */
1790 n = get_irn_arity(irn);
1791 NEW_ARR_A(int, stack_pos, n);
1793 /* collect old stack positions */
1794 for (i = 0; i < n; ++i) {
1795 const arch_register_t *inreg = x87_get_irn_register(sim, get_irn_n(irn, i));
1796 int idx = x87_on_stack(state, arch_register_get_index(inreg));
1798 assert(idx >= 0 && "Perm argument not on x87 stack");
1802 /* now do the permutation */
1803 foreach_out_edge(irn, edge) {
1804 ir_node *proj = get_edge_src_irn(edge);
1805 const arch_register_t *out = x87_get_irn_register(sim, proj);
1806 long num = get_Proj_proj(proj);
1808 assert(0 <= num && num < n && "More Proj's than Perm inputs");
1809 x87_set_st(state, arch_register_get_index(out), proj, stack_pos[(unsigned)num]);
1811 DB((dbg, LEVEL_1, "<<< %+F\n", irn));
1817 * Kill any dead registers at block start by popping them from the stack.
1819 * @param sim the simulator handle
1820 * @param block the current block
1821 * @param start_state the x87 state at the begin of the block
1823 static x87_state *x87_kill_deads(x87_simulator *sim, ir_node *block, x87_state *start_state) {
1824 x87_state *state = start_state;
1825 ir_node *first_insn = sched_first(block);
1826 ir_node *keep = NULL;
1827 unsigned live = vfp_live_args_after(sim, block, 0);
1829 int i, depth, num_pop;
1832 depth = x87_get_depth(state);
1833 for (i = depth - 1; i >= 0; --i) {
1834 int reg = x87_get_st_reg(state, i);
1836 if (! is_vfp_live(reg, live))
1837 kill_mask |= (1 << i);
1841 /* create a new state, will be changed */
1842 state = x87_clone_state(sim, state);
1844 DB((dbg, LEVEL_1, "Killing deads:\n"));
1845 DEBUG_ONLY(vfp_dump_live(live));
1846 DEBUG_ONLY(x87_dump_stack(state));
1848 /* now kill registers */
1850 /* we can only kill from TOS, so bring them up */
1851 if (! (kill_mask & 1)) {
1852 /* search from behind, because we can to a double-pop */
1853 for (i = depth - 1; i >= 0; --i) {
1854 if (kill_mask & (1 << i)) {
1855 kill_mask &= ~(1 << i);
1862 x87_set_st(state, -1, keep, i);
1863 keep = x87_create_fxch(state, first_insn, i, -1);
1866 keep = x87_get_st_node(state, 0);
1868 if ((kill_mask & 3) == 3) {
1869 /* we can do a double-pop */
1873 /* only a single pop */
1878 kill_mask >>= num_pop;
1879 keep = x87_create_fpop(state, first_insn, num_pop, keep);
1884 } /* x87_kill_deads */
1887 * Run a simulation and fix all virtual instructions for a block.
1889 * @param sim the simulator handle
1890 * @param block the current block
1892 * @return non-zero if simulation is complete,
1893 * zero if the simulation must be rerun
1895 static void x87_simulate_block(x87_simulator *sim, ir_node *block) {
1897 blk_state *bl_state = x87_get_bl_state(sim, block);
1898 x87_state *state = bl_state->begin;
1899 const ir_edge_t *edge;
1900 ir_node *start_block;
1902 assert(state != NULL);
1903 // already processed?
1904 if(bl_state->end != NULL)
1907 update_liveness(sim, block);
1909 DB((dbg, LEVEL_1, "Simulate %+F\n", block));
1910 DB((dbg, LEVEL_2, "State at Block begin:\n "));
1911 DEBUG_ONLY(x87_dump_stack(state));
1913 /* at block begin, kill all dead registers */
1914 state = x87_kill_deads(sim, block, state);
1916 /* beware, n might changed */
1917 for (n = sched_first(block); !sched_is_end(n); n = next) {
1920 ir_op *op = get_irn_op(n);
1922 next = sched_next(n);
1923 if (op->ops.generic == NULL)
1926 func = (sim_func)op->ops.generic;
1928 /* have work to do */
1929 if (state == bl_state->begin) {
1930 /* create a new state, will be changed */
1931 state = x87_clone_state(sim, state);
1935 node_inserted = (*func)(state, n);
1938 sim_func might have added additional nodes after n,
1940 beware: n must not be changed by sim_func
1941 (i.e. removed from schedule) in this case
1944 next = sched_next(n);
1947 start_block = get_irg_start_block(get_irn_irg(block));
1949 /* check if the state must be shuffled */
1950 foreach_block_succ(block, edge) {
1951 ir_node *succ = get_edge_src_irn(edge);
1952 blk_state *succ_state;
1954 if(succ == start_block)
1957 succ_state = x87_get_bl_state(sim, succ);
1959 if (succ_state->begin == NULL) {
1960 succ_state->begin = state;
1961 waitq_put(sim->worklist, succ);
1963 /* There is already a begin state for the successor, bad.
1964 Do the necessary permutations.
1965 Note that critical edges are removed, so this is always possible. */
1966 x87_shuffle(sim, block, state, succ, succ_state->begin);
1969 bl_state->end = state;
1971 DB((dbg, LEVEL_2, "State at Block end:\n ")); DEBUG_ONLY(x87_dump_stack(state));
1972 } /* x87_simulate_block */
1975 * Create a new x87 simulator.
1977 * @param sim a simulator handle, will be initialized
1978 * @param irg the current graph
1979 * @param env the architecture environment
1981 static void x87_init_simulator(x87_simulator *sim, ir_graph *irg, const arch_env_t *env)
1983 obstack_init(&sim->obst);
1984 sim->blk_states = pmap_create();
1986 sim->n_idx = get_irg_last_idx(irg);
1987 sim->live = obstack_alloc(&sim->obst, sizeof(*sim->live) * sim->n_idx);
1989 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.x87");
1991 DB((dbg, LEVEL_1, "--------------------------------\n"
1992 "x87 Simulator started for %+F\n", irg));
1994 /* set the generic function pointer of instruction we must simulate */
1995 clear_irp_opcodes_generic_func();
1997 #define ASSOC(op) (op_ ## op)->ops.generic = (op_func)(sim_##op)
1998 #define ASSOC_IA32(op) (op_ia32_v ## op)->ops.generic = (op_func)(sim_##op)
1999 #define ASSOC_BE(op) (op_be_ ## op)->ops.generic = (op_func)(sim_##op)
2017 ASSOC_IA32(fCondJmp);
2028 } /* x87_init_simulator */
2031 * Destroy a x87 simulator.
2033 * @param sim the simulator handle
2035 static void x87_destroy_simulator(x87_simulator *sim) {
2036 pmap_destroy(sim->blk_states);
2037 obstack_free(&sim->obst, NULL);
2038 DB((dbg, LEVEL_1, "x87 Simulator stopped\n\n"));
2039 } /* x87_destroy_simulator */
2042 * Run a simulation and fix all virtual instructions for a graph.
2044 * @param env the architecture environment
2045 * @param irg the current graph
2047 * Needs a block-schedule.
2049 void x87_simulate_graph(const arch_env_t *env, be_irg_t *birg) {
2050 ir_node *block, *start_block;
2051 blk_state *bl_state;
2053 ir_graph *irg = birg->irg;
2055 /* create the simulator */
2056 x87_init_simulator(&sim, irg, env);
2058 start_block = get_irg_start_block(irg);
2059 bl_state = x87_get_bl_state(&sim, start_block);
2061 /* start with the empty state */
2062 bl_state->begin = empty;
2065 sim.worklist = new_waitq();
2066 waitq_put(sim.worklist, start_block);
2068 be_invalidate_liveness(birg);
2069 be_assure_liveness(birg);
2073 /* Create the worklist for the schedule and calculate the liveness
2074 for all nodes. We must precalculate this info, because the
2075 simulator adds new nodes (and possible before Phi nodes) which
2076 let fail the old lazy calculation.
2077 On the other hand we reduce the computation amount due to
2078 precaching from O(n^2) to O(n) at the expense of O(n) cache memory.
2080 for (i = 0, n = ARR_LEN(blk_list); i < n; ++i) {
2081 update_liveness(&sim, lv, blk_list[i]);
2082 waitq_put(worklist, blk_list[i]);
2088 block = waitq_get(sim.worklist);
2089 x87_simulate_block(&sim, block);
2090 } while (! pdeq_empty(sim.worklist));
2093 del_waitq(sim.worklist);
2094 x87_destroy_simulator(&sim);
2095 } /* x87_simulate_graph */