2 * This file implements the x87 support and virtual to stack
3 * register translation for the ia32 backend.
5 * @author: Michael Beck
12 #endif /* HAVE_CONFIG_H */
19 #include "iredges_t.h"
28 #include "../belive_t.h"
29 #include "../besched_t.h"
30 #include "../benode_t.h"
31 #include "ia32_new_nodes.h"
32 #include "gen_ia32_new_nodes.h"
33 #include "gen_ia32_regalloc_if.h"
38 /* first and second binop index */
45 /* the store val index */
46 #define STORE_VAL_IDX 2
48 #define MASK_TOS(x) ((x) & (N_x87_REGS - 1))
50 /** the debug handle */
51 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
53 /* Forward declaration. */
54 typedef struct _x87_simulator x87_simulator;
57 * An exchange template.
58 * Note that our virtual functions have the same inputs
59 * and attributes as the real ones, so we can simple exchange
61 * Further, x87 supports inverse instructions, so we can handle them.
63 typedef struct _exchange_tmpl {
64 ir_op *normal_op; /**< the normal one */
65 ir_op *reverse_op; /**< the reverse one if exists */
66 ir_op *normal_pop_op; /**< the normal one with tos pop */
67 ir_op *reverse_pop_op; /**< the reverse one with tos pop */
71 * An entry on the simulated x87 stack.
73 typedef struct _st_entry {
74 int reg_idx; /**< the virtual register index of this stack value */
75 ir_node *node; /**< the node that produced this value */
81 typedef struct _x87_state {
82 st_entry st[N_x87_REGS]; /**< the register stack */
83 int depth; /**< the current stack depth */
84 int tos; /**< position of the tos */
85 x87_simulator *sim; /**< The simulator. */
88 /** An empty state, used for blocks without fp instructions. */
89 static x87_state _empty = { { {0, NULL}, }, 0, 0 };
90 static x87_state *empty = (x87_state *)&_empty;
92 /** The type of an instruction simulator function. */
93 typedef int (*sim_func)(x87_state *state, ir_node *n);
96 * A block state: Every block has a x87 state at the beginning and at the end.
98 typedef struct _blk_state {
99 x87_state *begin; /**< state at the begin or NULL if not assigned */
100 x87_state *end; /**< state at the end or NULL if not assigned */
103 #define PTR_TO_BLKSTATE(p) ((blk_state *)(p))
105 /** liveness bitset for vfp registers. */
106 typedef unsigned char vfp_liveness;
111 struct _x87_simulator {
112 struct obstack obst; /**< An obstack for fast allocating. */
113 pmap *blk_states; /**< Map blocks to states. */
114 const arch_env_t *env; /**< The architecture environment. */
115 be_lv_t *lv; /**< intrablock liveness. */
116 vfp_liveness *live; /**< Liveness information. */
117 unsigned n_idx; /**< The cached get_irg_last_idx() result. */
118 waitq *worklist; /**< list of blocks to process. */
122 * Returns the current stack depth.
124 * @param state the x87 state
126 * @return the x87 stack depth
128 static int x87_get_depth(const x87_state *state) {
133 * Check if the state is empty.
135 * @param state the x87 state
137 * returns non-zero if the x87 stack is empty
139 static int x87_state_is_empty(const x87_state *state) {
140 return state->depth == 0;
144 * Return the virtual register index at st(pos).
146 * @param state the x87 state
147 * @param pos a stack position
149 * @return the vfp register index that produced the value at st(pos)
151 static int x87_get_st_reg(const x87_state *state, int pos) {
152 assert(pos < state->depth);
153 return state->st[MASK_TOS(state->tos + pos)].reg_idx;
157 * Return the node at st(pos).
159 * @param state the x87 state
160 * @param pos a stack position
162 * @return the IR node that produced the value at st(pos)
164 static ir_node *x87_get_st_node(const x87_state *state, int pos) {
165 assert(pos < state->depth);
166 return state->st[MASK_TOS(state->tos + pos)].node;
167 } /* x87_get_st_node */
171 * Dump the stack for debugging.
173 * @param state the x87 state
175 static void x87_dump_stack(const x87_state *state) {
178 for (i = state->depth - 1; i >= 0; --i) {
179 DB((dbg, LEVEL_2, "vf%d ", x87_get_st_reg(state, i)));
181 DB((dbg, LEVEL_2, "<-- TOS\n"));
182 } /* x87_dump_stack */
183 #endif /* DEBUG_libfirm */
186 * Set a virtual register to st(pos).
188 * @param state the x87 state
189 * @param reg_idx the vfp register index that should be set
190 * @param node the IR node that produces the value of the vfp register
191 * @param pos the stack position where the new value should be entered
193 static void x87_set_st(x87_state *state, int reg_idx, ir_node *node, int pos) {
194 assert(0 < state->depth);
195 state->st[MASK_TOS(state->tos + pos)].reg_idx = reg_idx;
196 state->st[MASK_TOS(state->tos + pos)].node = node;
198 DB((dbg, LEVEL_2, "After SET_REG: "));
199 DEBUG_ONLY(x87_dump_stack(state));
203 * Set the tos virtual register.
205 * @param state the x87 state
206 * @param reg_idx the vfp register index that should be set
207 * @param node the IR node that produces the value of the vfp register
209 static void x87_set_tos(x87_state *state, int reg_idx, ir_node *node) {
210 x87_set_st(state, reg_idx, node, 0);
215 * Flush the x87 stack.
217 * @param state the x87 state
219 static void x87_flush(x87_state *state) {
226 * Swap st(0) with st(pos).
228 * @param state the x87 state
229 * @param pos the stack position to change the tos with
231 static void x87_fxch(x87_state *state, int pos) {
233 assert(pos < state->depth);
235 entry = state->st[MASK_TOS(state->tos + pos)];
236 state->st[MASK_TOS(state->tos + pos)] = state->st[MASK_TOS(state->tos)];
237 state->st[MASK_TOS(state->tos)] = entry;
239 DB((dbg, LEVEL_2, "After FXCH: ")); DEBUG_ONLY(x87_dump_stack(state));
243 * Convert a virtual register to the stack index.
245 * @param state the x87 state
246 * @param reg_idx the register vfp index
248 * @return the stack position where the register is stacked
249 * or -1 if the virtual register was not found
251 static int x87_on_stack(const x87_state *state, int reg_idx) {
252 int i, tos = state->tos;
254 for (i = 0; i < state->depth; ++i)
255 if (state->st[MASK_TOS(tos + i)].reg_idx == reg_idx)
261 * Push a virtual Register onto the stack, double pushed allowed.
263 * @param state the x87 state
264 * @param reg_idx the register vfp index
265 * @param node the node that produces the value of the vfp register
267 static void x87_push_dbl(x87_state *state, int reg_idx, ir_node *node) {
268 assert(state->depth < N_x87_REGS && "stack overrun");
271 state->tos = MASK_TOS(state->tos - 1);
272 state->st[state->tos].reg_idx = reg_idx;
273 state->st[state->tos].node = node;
275 DB((dbg, LEVEL_2, "After PUSH: ")); DEBUG_ONLY(x87_dump_stack(state));
279 * Push a virtual Register onto the stack, double pushes are NOT allowed..
281 * @param state the x87 state
282 * @param reg_idx the register vfp index
283 * @param node the node that produces the value of the vfp register
284 * @param dbl_push if != 0 double pushes are allowed
286 static void x87_push(x87_state *state, int reg_idx, ir_node *node) {
287 assert(x87_on_stack(state, reg_idx) == -1 && "double push");
289 x87_push_dbl(state, reg_idx, node);
293 * Pop a virtual Register from the stack.
295 static void x87_pop(x87_state *state) {
296 assert(state->depth > 0 && "stack underrun");
299 state->tos = MASK_TOS(state->tos + 1);
301 DB((dbg, LEVEL_2, "After POP: ")); DEBUG_ONLY(x87_dump_stack(state));
305 * Returns the block state of a block.
307 * @param sim the x87 simulator handle
308 * @param block the current block
310 * @return the block state
312 static blk_state *x87_get_bl_state(x87_simulator *sim, ir_node *block) {
313 pmap_entry *entry = pmap_find(sim->blk_states, block);
316 blk_state *bl_state = obstack_alloc(&sim->obst, sizeof(*bl_state));
317 bl_state->begin = NULL;
318 bl_state->end = NULL;
320 pmap_insert(sim->blk_states, block, bl_state);
324 return PTR_TO_BLKSTATE(entry->value);
325 } /* x87_get_bl_state */
328 * Creates a new x87 state.
330 * @param sim the x87 simulator handle
332 * @return a new x87 state
334 static x87_state *x87_alloc_state(x87_simulator *sim) {
335 x87_state *res = obstack_alloc(&sim->obst, sizeof(*res));
339 } /* x87_alloc_state */
343 * Create a new empty x87 state.
345 * @param sim the x87 simulator handle
347 * @return a new empty x87 state
349 static x87_state *x87_alloc_empty_state(x87_simulator *sim) {
350 x87_state *res = x87_alloc_state(sim);
354 } /* x87_alloc_empty_state */
360 * @param sim the x87 simulator handle
361 * @param src the x87 state that will be cloned
363 * @return a cloned copy of the src state
365 static x87_state *x87_clone_state(x87_simulator *sim, const x87_state *src) {
366 x87_state *res = x87_alloc_state(sim);
368 memcpy(res, src, sizeof(*res));
370 } /* x87_clone_state */
373 * Patch a virtual instruction into a x87 one and return
376 * @param n the IR node to patch
377 * @param op the x87 opcode to patch in
379 static ir_node *x87_patch_insn(ir_node *n, ir_op *op) {
380 ir_mode *mode = get_irn_mode(n);
385 if (mode == mode_T) {
386 /* patch all Proj's */
387 const ir_edge_t *edge;
389 foreach_out_edge(n, edge) {
390 ir_node *proj = get_edge_src_irn(edge);
392 mode = get_irn_mode(proj);
393 if (mode_is_float(mode)) {
395 set_irn_mode(proj, mode_E);
400 else if (mode_is_float(mode))
401 set_irn_mode(n, mode_E);
403 } /* x87_patch_insn */
406 * Returns the first Proj of a mode_T node having a given mode.
408 * @param n the mode_T node
409 * @param m the desired mode of the Proj
410 * @return The first Proj of mode @p m found or NULL.
412 static ir_node *get_irn_Proj_for_mode(ir_node *n, ir_mode *m) {
413 const ir_edge_t *edge;
415 assert(get_irn_mode(n) == mode_T && "Need mode_T node");
417 foreach_out_edge(n, edge) {
418 ir_node *proj = get_edge_src_irn(edge);
419 if (get_irn_mode(proj) == m)
424 } /* get_irn_Proj_for_mode */
427 * Wrap the arch_* function here so we can check for errors.
429 static INLINE const arch_register_t *x87_get_irn_register(x87_simulator *sim, const ir_node *irn) {
430 const arch_register_t *res;
432 res = arch_get_irn_register(sim->env, irn);
433 assert(res->reg_class->regs == ia32_vfp_regs);
437 /* -------------- x87 perm --------------- */
440 * Creates a fxch for shuffle.
442 * @param state the x87 state
443 * @param pos parameter for fxch
444 * @param block the block were fxch is inserted
446 * Creates a new fxch node and reroute the user of the old node
449 * @return the fxch node
451 static ir_node *x87_fxch_shuffle(x87_state *state, int pos, ir_node *block)
456 fxch = new_rd_ia32_fxch(NULL, get_irn_irg(block), block, mode_E);
457 attr = get_ia32_attr(fxch);
458 attr->x87[0] = &ia32_st_regs[pos];
459 attr->x87[2] = &ia32_st_regs[0];
463 x87_fxch(state, pos);
465 } /* x87_fxch_shuffle */
468 * Calculate the necessary permutations to reach dst_state.
470 * These permutations are done with fxch instructions and placed
471 * at the end of the block.
473 * Note that critical edges are removed here, so we need only
474 * a shuffle if the current block has only one successor.
476 * @param sim the simulator handle
477 * @param block the current block
478 * @param state the current x87 stack state, might be modified
479 * @param dst_block the destination block
480 * @param dst_state destination state
484 static x87_state *x87_shuffle(x87_simulator *sim, ir_node *block, x87_state *state, ir_node *dst_block, const x87_state *dst_state) {
485 int i, n_cycles, k, ri;
486 unsigned cycles[4], all_mask;
487 char cycle_idx[4][8];
488 ir_node *fxch, *before, *after;
490 assert(state->depth == dst_state->depth);
492 /* Some mathematics here:
493 If we have a cycle of length n that includes the tos,
494 we need n-1 exchange operations.
495 We can always add the tos and restore it, so we need
496 n+1 exchange operations for a cycle not containing the tos.
497 So, the maximum of needed operations is for a cycle of 7
498 not including the tos == 8.
499 This is the same number of ops we would need for using stores,
500 so exchange is cheaper (we save the loads).
501 On the other hand, we might need an additional exchange
502 in the next block to bring one operand on top, so the
503 number of ops in the first case is identical.
504 Further, no more than 4 cycles can exists (4 x 2).
506 all_mask = (1 << (state->depth)) - 1;
508 for (n_cycles = 0; all_mask; ++n_cycles) {
509 int src_idx, dst_idx;
511 /* find the first free slot */
512 for (i = 0; i < state->depth; ++i) {
513 if (all_mask & (1 << i)) {
514 all_mask &= ~(1 << i);
516 /* check if there are differences here */
517 if (x87_get_st_reg(state, i) != x87_get_st_reg(dst_state, i))
523 /* no more cycles found */
528 cycles[n_cycles] = (1 << i);
529 cycle_idx[n_cycles][k++] = i;
530 for (src_idx = i; ; src_idx = dst_idx) {
531 dst_idx = x87_on_stack(dst_state, x87_get_st_reg(state, src_idx));
533 if ((all_mask & (1 << dst_idx)) == 0)
536 cycle_idx[n_cycles][k++] = dst_idx;
537 cycles[n_cycles] |= (1 << dst_idx);
538 all_mask &= ~(1 << dst_idx);
540 cycle_idx[n_cycles][k] = -1;
544 /* no permutation needed */
548 /* Hmm: permutation needed */
549 DB((dbg, LEVEL_2, "\n%+F needs permutation: from\n", block));
550 DEBUG_ONLY(x87_dump_stack(state));
551 DB((dbg, LEVEL_2, " to\n"));
552 DEBUG_ONLY(x87_dump_stack(dst_state));
556 DB((dbg, LEVEL_2, "Need %d cycles\n", n_cycles));
557 for (ri = 0; ri < n_cycles; ++ri) {
558 DB((dbg, LEVEL_2, " Ring %d:\n ", ri));
559 for (k = 0; cycle_idx[ri][k] != -1; ++k)
560 DB((dbg, LEVEL_2, " st%d ->", cycle_idx[ri][k]));
561 DB((dbg, LEVEL_2, "\n"));
568 * Find the place node must be insert.
569 * We have only one successor block, so the last instruction should
572 before = sched_last(block);
573 assert(is_cfop(before));
575 /* now do the permutations */
576 for (ri = 0; ri < n_cycles; ++ri) {
577 if ((cycles[ri] & 1) == 0) {
578 /* this cycle does not include the tos */
579 fxch = x87_fxch_shuffle(state, cycle_idx[ri][0], block);
581 sched_add_after(after, fxch);
583 sched_add_before(before, fxch);
586 for (k = 1; cycle_idx[ri][k] != -1; ++k) {
587 fxch = x87_fxch_shuffle(state, cycle_idx[ri][k], block);
589 sched_add_after(after, fxch);
591 sched_add_before(before, fxch);
594 if ((cycles[ri] & 1) == 0) {
595 /* this cycle does not include the tos */
596 fxch = x87_fxch_shuffle(state, cycle_idx[ri][0], block);
597 sched_add_after(after, fxch);
604 * Create a fxch node before another node.
606 * @param state the x87 state
607 * @param n the node after the fxch
608 * @param pos exchange st(pos) with st(0)
609 * @param op_idx if >= 0, replace input op_idx of n with the fxch result
613 static ir_node *x87_create_fxch(x87_state *state, ir_node *n, int pos, int op_idx) {
616 ir_graph *irg = get_irn_irg(n);
617 ir_node *block = get_nodes_block(n);
619 x87_fxch(state, pos);
621 fxch = new_rd_ia32_fxch(NULL, irg, block, mode_E);
622 attr = get_ia32_attr(fxch);
623 attr->x87[0] = &ia32_st_regs[pos];
624 attr->x87[2] = &ia32_st_regs[0];
628 sched_add_before(n, fxch);
629 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(fxch), attr->x87[0]->name, attr->x87[2]->name));
631 } /* x87_create_fxch */
634 * Create a fpush before node n.
636 * @param state the x87 state
637 * @param n the node after the fpush
638 * @param pos push st(pos) on stack
639 * @param op_idx replace input op_idx of n with the fpush result
641 static void x87_create_fpush(x87_state *state, ir_node *n, int pos, int op_idx) {
642 ir_node *fpush, *pred = get_irn_n(n, op_idx);
644 const arch_register_t *out = x87_get_irn_register(state->sim, pred);
646 x87_push_dbl(state, arch_register_get_index(out), pred);
648 fpush = new_rd_ia32_fpush(NULL, get_irn_irg(n), get_nodes_block(n), mode_E);
649 attr = get_ia32_attr(fpush);
650 attr->x87[0] = &ia32_st_regs[pos];
651 attr->x87[2] = &ia32_st_regs[0];
654 sched_add_before(n, fpush);
656 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(fpush), attr->x87[0]->name, attr->x87[2]->name));
657 } /* x87_create_fpush */
660 * Create a fpop before node n.
662 * @param state the x87 state
663 * @param n the node after the fpop
664 * @param num pop 1 or 2 values
665 * @param pred node to use as predecessor of the fpop
667 * @return the fpop node
669 static ir_node *x87_create_fpop(x87_state *state, ir_node *n, int num, ir_node *pred) {
670 ir_node *fpop = pred;
677 fpop = new_rd_ia32_fpop(NULL, get_irn_irg(n), get_nodes_block(n), mode_E);
678 attr = get_ia32_attr(fpop);
679 attr->x87[0] = &ia32_st_regs[0];
680 attr->x87[1] = &ia32_st_regs[0];
681 attr->x87[2] = &ia32_st_regs[0];
683 sched_add_before(n, fpop);
684 DB((dbg, LEVEL_1, "<<< %s %s\n", get_irn_opname(fpop), attr->x87[0]->name));
690 } /* x87_create_fpop */
692 /* --------------------------------- liveness ------------------------------------------ */
695 * The liveness transfer function.
696 * Updates a live set over a single step from a given node to its predecessor.
697 * Everything defined at the node is removed from the set, the uses of the node get inserted.
699 * @param sim The simulator handle.
700 * @param irn The node at which liveness should be computed.
701 * @param live The bitset of registers live before @p irn. This set gets modified by updating it to
702 * the registers live after irn.
704 * @return The live bitset.
706 static vfp_liveness vfp_liveness_transfer(x87_simulator *sim, ir_node *irn, vfp_liveness live)
709 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
710 const arch_env_t *arch_env = sim->env;
712 if (arch_irn_consider_in_reg_alloc(arch_env, cls, irn)) {
713 const arch_register_t *reg = x87_get_irn_register(sim, irn);
714 live &= ~(1 << arch_register_get_index(reg));
717 for (i = 0, n = get_irn_arity(irn); i < n; ++i) {
718 ir_node *op = get_irn_n(irn, i);
720 if (mode_is_float(get_irn_mode(op)) && arch_irn_consider_in_reg_alloc(arch_env, cls, op)) {
721 const arch_register_t *reg = x87_get_irn_register(sim, op);
722 live |= 1 << arch_register_get_index(reg);
726 } /* vfp_liveness_transfer */
729 * Put all live virtual registers at the end of a block into a bitset.
731 * @param sim the simulator handle
732 * @param lv the liveness information
733 * @param bl the block
735 * @return The live bitset at the end of this block
737 static vfp_liveness vfp_liveness_end_of_block(x87_simulator *sim, const ir_node *block)
740 vfp_liveness live = 0;
741 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
742 const arch_env_t *arch_env = sim->env;
743 const be_lv_t *lv = sim->lv;
745 be_lv_foreach(lv, block, be_lv_state_end, i) {
746 const arch_register_t *reg;
747 const ir_node *node = be_lv_get_irn(lv, block, i);
748 if (!arch_irn_consider_in_reg_alloc(arch_env, cls, node))
751 reg = x87_get_irn_register(sim, node);
752 live |= 1 << arch_register_get_index(reg);
756 } /* vfp_liveness_end_of_block */
758 /** get the register mask from an arch_register */
759 #define REGMASK(reg) (1 << (arch_register_get_index(reg)))
762 * Return a bitset of argument registers which are live at the end of a node.
764 * @param sim the simulator handle
765 * @param pos the node
766 * @param kill kill mask for the output registers
768 * @return The live bitset.
770 static unsigned vfp_live_args_after(x87_simulator *sim, const ir_node *pos, unsigned kill)
772 unsigned idx = get_irn_idx(pos);
774 assert(idx < sim->n_idx);
775 return sim->live[idx] & ~kill;
776 } /* vfp_live_args_after */
779 * Calculate the liveness for a whole block and cache it.
781 * @param sim the simulator handle
782 * @param lv the liveness handle
783 * @param block the block
785 static void update_liveness(x87_simulator *sim, ir_node *block) {
786 vfp_liveness live = vfp_liveness_end_of_block(sim, block);
790 /* now iterate through the block backward and cache the results */
791 sched_foreach_reverse(block, irn) {
792 /* stop at the first Phi: this produces the live-in */
796 idx = get_irn_idx(irn);
797 sim->live[idx] = live;
799 live = vfp_liveness_transfer(sim, irn, live);
801 idx = get_irn_idx(block);
802 sim->live[idx] = live;
803 } /* update_liveness */
806 * Returns true if a register is live in a set.
808 * @param reg_idx the vfp register index
809 * @param live a live bitset
811 #define is_vfp_live(reg_idx, live) ((live) & (1 << (reg_idx)))
815 * Dump liveness info.
817 * @param live the live bitset
819 static void vfp_dump_live(vfp_liveness live) {
822 DB((dbg, LEVEL_2, "Live after: "));
823 for (i = 0; i < 8; ++i) {
824 if (live & (1 << i)) {
825 DB((dbg, LEVEL_2, "vf%d ", i));
828 DB((dbg, LEVEL_2, "\n"));
829 } /* vfp_dump_live */
830 #endif /* DEBUG_libfirm */
832 /* --------------------------------- simulators ---------------------------------------- */
834 #define XCHG(a, b) do { int t = (a); (a) = (b); (b) = t; } while (0)
837 * Simulate a virtual binop.
839 * @param state the x87 state
840 * @param n the node that should be simulated (and patched)
841 * @param tmpl the template containing the 4 possible x87 opcodes
843 static int sim_binop(x87_state *state, ir_node *n, const exchange_tmpl *tmpl) {
844 int op2_idx, op1_idx;
845 int out_idx, do_pop = 0;
848 x87_simulator *sim = state->sim;
849 const arch_register_t *op1 = x87_get_irn_register(sim, get_irn_n(n, BINOP_IDX_1));
850 const arch_register_t *op2 = x87_get_irn_register(sim, get_irn_n(n, BINOP_IDX_2));
851 const arch_register_t *out = x87_get_irn_register(sim, n);
852 int reg_index_1 = arch_register_get_index(op1);
853 int reg_index_2 = arch_register_get_index(op2);
854 vfp_liveness live = vfp_live_args_after(sim, n, REGMASK(out));
856 DB((dbg, LEVEL_1, ">>> %+F %s, %s -> %s\n", n,
857 arch_register_get_name(op1), arch_register_get_name(op2),
858 arch_register_get_name(out)));
859 DEBUG_ONLY(vfp_dump_live(live));
860 DB((dbg, LEVEL_1, "Stack before: "));
861 DEBUG_ONLY(x87_dump_stack(state));
863 op1_idx = x87_on_stack(state, reg_index_1);
864 assert(op1_idx >= 0);
866 if (reg_index_2 != REG_VFP_NOREG) {
867 /* second operand is a vfp register */
868 op2_idx = x87_on_stack(state, reg_index_2);
869 assert(op2_idx >= 0);
871 if (is_vfp_live(arch_register_get_index(op2), live)) {
872 /* Second operand is live. */
874 if (is_vfp_live(arch_register_get_index(op1), live)) {
875 /* Both operands are live: push the first one.
876 This works even for op1 == op2. */
877 x87_create_fpush(state, n, op1_idx, BINOP_IDX_2);
878 /* now do fxxx (tos=tos X op) */
882 dst = tmpl->normal_op;
884 /* Second live, first operand is dead here, bring it to tos. */
886 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
891 /* now do fxxx (tos=tos X op) */
893 dst = tmpl->normal_op;
897 /* Second operand is dead. */
898 if (is_vfp_live(arch_register_get_index(op1), live)) {
899 /* First operand is live: bring second to tos. */
901 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
906 /* now do fxxxr (tos = op X tos) */
908 dst = tmpl->reverse_op;
911 /* Both operands are dead here, pop them from the stack. */
914 /* Both are identically and on tos, no pop needed. */
915 /* here fxxx (tos = tos X tos) */
916 dst = tmpl->normal_op;
920 /* now do fxxxp (op = op X tos, pop) */
921 dst = tmpl->normal_pop_op;
926 else if (op1_idx == 0) {
927 assert(op1_idx != op2_idx);
928 /* now do fxxxrp (op = tos X op, pop) */
929 dst = tmpl->reverse_pop_op;
934 /* Bring the second on top. */
935 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
936 if (op1_idx == op2_idx) {
937 /* Both are identically and on tos now, no pop needed. */
940 /* use fxxx (tos = tos X tos) */
941 dst = tmpl->normal_op;
945 /* op2 is on tos now */
947 /* use fxxxp (op = op X tos, pop) */
948 dst = tmpl->normal_pop_op;
957 /* second operand is an address mode */
958 if (is_vfp_live(arch_register_get_index(op1), live)) {
959 /* first operand is live: push it here */
960 x87_create_fpush(state, n, op1_idx, BINOP_IDX_1);
962 /* use fxxx (tos = tos X mem) */
963 dst = tmpl->normal_op;
967 /* first operand is dead: bring it to tos */
969 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
973 /* use fxxxp (tos = tos X mem) */
974 dst = tmpl->normal_op;
979 x87_set_st(state, arch_register_get_index(out), x87_patch_insn(n, dst), out_idx);
984 /* patch the operation */
985 attr = get_ia32_attr(n);
986 attr->x87[0] = op1 = &ia32_st_regs[op1_idx];
987 if (reg_index_2 != REG_VFP_NOREG) {
988 attr->x87[1] = op2 = &ia32_st_regs[op2_idx];
990 attr->x87[2] = out = &ia32_st_regs[out_idx];
992 if (reg_index_2 != REG_VFP_NOREG) {
993 DB((dbg, LEVEL_1, "<<< %s %s, %s -> %s\n", get_irn_opname(n),
994 arch_register_get_name(op1), arch_register_get_name(op2),
995 arch_register_get_name(out)));
997 DB((dbg, LEVEL_1, "<<< %s %s, [AM] -> %s\n", get_irn_opname(n),
998 arch_register_get_name(op1),
999 arch_register_get_name(out)));
1006 * Simulate a virtual Unop.
1008 * @param state the x87 state
1009 * @param n the node that should be simulated (and patched)
1010 * @param op the x87 opcode that will replace n's opcode
1012 static int sim_unop(x87_state *state, ir_node *n, ir_op *op) {
1013 int op1_idx, out_idx;
1014 x87_simulator *sim = state->sim;
1015 const arch_register_t *op1 = x87_get_irn_register(sim, get_irn_n(n, UNOP_IDX));
1016 const arch_register_t *out = x87_get_irn_register(sim, n);
1018 unsigned live = vfp_live_args_after(sim, n, REGMASK(out));
1020 DB((dbg, LEVEL_1, ">>> %+F -> %s\n", n, out->name));
1021 DEBUG_ONLY(vfp_dump_live(live));
1023 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1025 if (is_vfp_live(arch_register_get_index(op1), live)) {
1026 /* push the operand here */
1027 x87_create_fpush(state, n, op1_idx, UNOP_IDX);
1031 /* operand is dead, bring it to tos */
1033 x87_create_fxch(state, n, op1_idx, UNOP_IDX);
1038 x87_set_tos(state, arch_register_get_index(out), x87_patch_insn(n, op));
1040 attr = get_ia32_attr(n);
1041 attr->x87[0] = op1 = &ia32_st_regs[0];
1042 attr->x87[2] = out = &ia32_st_regs[0];
1043 DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), out->name));
1049 * Simulate a virtual Load instruction.
1051 * @param state the x87 state
1052 * @param n the node that should be simulated (and patched)
1053 * @param op the x87 opcode that will replace n's opcode
1055 static int sim_load(x87_state *state, ir_node *n, ir_op *op) {
1056 const arch_register_t *out = x87_get_irn_register(state->sim, n);
1059 DB((dbg, LEVEL_1, ">>> %+F -> %s\n", n, arch_register_get_name(out)));
1060 x87_push(state, arch_register_get_index(out), x87_patch_insn(n, op));
1061 assert(out == x87_get_irn_register(state->sim, n));
1062 attr = get_ia32_attr(n);
1063 attr->x87[2] = out = &ia32_st_regs[0];
1064 DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), arch_register_get_name(out)));
1070 * Rewire all users of @p old_val to @new_val iff they are scheduled after @p store.
1072 * @param store The store
1073 * @param old_val The former value
1074 * @param new_val The new value
1076 static void collect_and_rewire_users(ir_node *store, ir_node *old_val, ir_node *new_val) {
1077 const ir_edge_t *edge, *ne;
1079 foreach_out_edge_safe(old_val, edge, ne) {
1080 ir_node *user = get_edge_src_irn(edge);
1082 if (! user || user == store)
1085 /* if the user is scheduled after the store: rewire */
1086 if (sched_is_scheduled(user) && sched_comes_after(store, user)) {
1088 /* find the input of the user pointing to the old value */
1089 for (i = get_irn_arity(user) - 1; i >= 0; i--) {
1090 if (get_irn_n(user, i) == old_val)
1091 set_irn_n(user, i, new_val);
1095 } /* collect_and_rewire_users */
1098 * Simulate a virtual Store.
1100 * @param state the x87 state
1101 * @param n the node that should be simulated (and patched)
1102 * @param op the x87 store opcode
1103 * @param op_p the x87 store and pop opcode
1105 static int sim_store(x87_state *state, ir_node *n, ir_op *op, ir_op *op_p) {
1106 x87_simulator *sim = state->sim;
1107 ir_node *val = get_irn_n(n, STORE_VAL_IDX);
1108 const arch_register_t *op2 = x87_get_irn_register(sim, val);
1109 unsigned live = vfp_live_args_after(sim, n, 0);
1115 op2_idx = x87_on_stack(state, arch_register_get_index(op2));
1116 assert(op2_idx >= 0);
1118 DB((dbg, LEVEL_1, ">>> %+F %s ->\n", n, arch_register_get_name(op2)));
1120 mode = get_ia32_ls_mode(n);
1121 depth = x87_get_depth(state);
1123 if (is_vfp_live(arch_register_get_index(op2), live)) {
1125 Problem: fst doesn't support mode_E (spills), only fstp does
1127 - stack not full: push value and fstp
1128 - stack full: fstp value and load again
1130 if (mode == mode_E) {
1131 if (depth < N_x87_REGS) {
1132 /* ok, we have a free register: push + fstp */
1133 x87_create_fpush(state, n, op2_idx, STORE_VAL_IDX);
1135 x87_patch_insn(n, op_p);
1138 ir_node *vfld, *mem, *block, *rproj, *mproj;
1141 /* stack full here: need fstp + load */
1143 x87_patch_insn(n, op_p);
1145 block = get_nodes_block(n);
1146 irg = get_irn_irg(n);
1147 vfld = new_rd_ia32_vfld(NULL, irg, block, get_irn_n(n, 0), get_irn_n(n, 1), new_rd_NoMem(irg));
1149 /* copy all attributes */
1150 set_ia32_frame_ent(vfld, get_ia32_frame_ent(n));
1151 if (is_ia32_use_frame(n))
1152 set_ia32_use_frame(vfld);
1153 set_ia32_am_flavour(vfld, get_ia32_am_flavour(n));
1154 set_ia32_op_type(vfld, ia32_am_Source);
1155 add_ia32_am_offs(vfld, get_ia32_am_offs(n));
1156 set_ia32_am_sc(vfld, get_ia32_am_sc(n));
1157 set_ia32_ls_mode(vfld, get_ia32_ls_mode(n));
1159 rproj = new_r_Proj(irg, block, vfld, get_ia32_ls_mode(vfld), pn_ia32_vfld_res);
1160 mproj = new_r_Proj(irg, block, vfld, mode_M, pn_ia32_vfld_M);
1161 mem = get_irn_Proj_for_mode(n, mode_M);
1163 assert(mem && "Store memory not found");
1165 arch_set_irn_register(sim->env, rproj, op2);
1167 /* reroute all former users of the store memory to the load memory */
1168 edges_reroute(mem, mproj, irg);
1169 /* set the memory input of the load to the store memory */
1170 set_irn_n(vfld, 2, mem);
1172 sched_add_after(n, vfld);
1173 sched_add_after(vfld, rproj);
1175 /* rewire all users, scheduled after the store, to the loaded value */
1176 collect_and_rewire_users(n, val, rproj);
1182 /* we can only store the tos to memory */
1184 x87_create_fxch(state, n, op2_idx, STORE_VAL_IDX);
1186 /* mode != mode_E -> use normal fst */
1187 x87_patch_insn(n, op);
1191 /* we can only store the tos to memory */
1193 x87_create_fxch(state, n, op2_idx, STORE_VAL_IDX);
1196 x87_patch_insn(n, op_p);
1199 attr = get_ia32_attr(n);
1200 attr->x87[1] = op2 = &ia32_st_regs[0];
1201 DB((dbg, LEVEL_1, "<<< %s %s ->\n", get_irn_opname(n), arch_register_get_name(op2)));
1207 * Simulate a virtual Phi.
1208 * Just for cosmetic reasons change the mode of Phi nodes to mode_E.
1210 * @param state the x87 state
1211 * @param n the node that should be simulated (and patched)
1212 * @param env the architecture environment
1214 static int sim_Phi(x87_state *state, ir_node *n, const arch_env_t *env) {
1215 ir_mode *mode = get_irn_mode(n);
1217 if (mode_is_float(mode))
1218 set_irn_mode(n, mode_E);
1224 #define _GEN_BINOP(op, rev) \
1225 static int sim_##op(x87_state *state, ir_node *n) { \
1226 exchange_tmpl tmpl = { op_ia32_##op, op_ia32_##rev, op_ia32_##op##p, op_ia32_##rev##p }; \
1227 return sim_binop(state, n, &tmpl); \
1230 #define GEN_BINOP(op) _GEN_BINOP(op, op)
1231 #define GEN_BINOPR(op) _GEN_BINOP(op, op##r)
1233 #define GEN_LOAD2(op, nop) \
1234 static int sim_##op(x87_state *state, ir_node *n) { \
1235 return sim_load(state, n, op_ia32_##nop); \
1238 #define GEN_LOAD(op) GEN_LOAD2(op, op)
1240 #define GEN_UNOP(op) \
1241 static int sim_##op(x87_state *state, ir_node *n) { \
1242 return sim_unop(state, n, op_ia32_##op); \
1245 #define GEN_STORE(op) \
1246 static int sim_##op(x87_state *state, ir_node *n) { \
1247 return sim_store(state, n, op_ia32_##op, op_ia32_##op##p); \
1267 GEN_LOAD2(fConst, fldConst)
1273 * Simulate a fCondJmp.
1275 * @param state the x87 state
1276 * @param n the node that should be simulated (and patched)
1278 static int sim_fCondJmp(x87_state *state, ir_node *n) {
1284 x87_simulator *sim = state->sim;
1285 const arch_register_t *op1 = x87_get_irn_register(sim, get_irn_n(n, BINOP_IDX_1));
1286 const arch_register_t *op2 = x87_get_irn_register(sim, get_irn_n(n, BINOP_IDX_2));
1287 int reg_index_1 = arch_register_get_index(op1);
1288 int reg_index_2 = arch_register_get_index(op2);
1289 unsigned live = vfp_live_args_after(sim, n, 0);
1291 DB((dbg, LEVEL_1, ">>> %+F %s, %s\n", n,
1292 arch_register_get_name(op1), arch_register_get_name(op2)));
1293 DEBUG_ONLY(vfp_dump_live(live));
1294 DB((dbg, LEVEL_1, "Stack before: "));
1295 DEBUG_ONLY(x87_dump_stack(state));
1297 op1_idx = x87_on_stack(state, reg_index_1);
1298 assert(op1_idx >= 0);
1300 /* BEWARE: check for comp a,a cases, they might happen */
1301 if (reg_index_2 != REG_VFP_NOREG) {
1302 /* second operand is a vfp register */
1303 op2_idx = x87_on_stack(state, reg_index_2);
1304 assert(op2_idx >= 0);
1306 if (is_vfp_live(arch_register_get_index(op2), live)) {
1307 /* second operand is live */
1309 if (is_vfp_live(arch_register_get_index(op1), live)) {
1310 /* both operands are live */
1313 dst = op_ia32_fcomJmp;
1314 } else if (op2_idx == 0) {
1315 dst = op_ia32_fcomrJmp;
1317 /* bring the first one to tos */
1318 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1322 dst = op_ia32_fcomJmp;
1326 /* second live, first operand is dead here, bring it to tos.
1327 This means further, op1_idx != op2_idx. */
1328 assert(op1_idx != op2_idx);
1330 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1335 dst = op_ia32_fcompJmp;
1340 /* second operand is dead */
1341 if (is_vfp_live(arch_register_get_index(op1), live)) {
1342 /* first operand is live: bring second to tos.
1343 This means further, op1_idx != op2_idx. */
1344 assert(op1_idx != op2_idx);
1346 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1351 dst = op_ia32_fcomrpJmp;
1355 /* both operands are dead here, check first for identity. */
1356 if (op1_idx == op2_idx) {
1357 /* identically, one pop needed */
1359 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1363 dst = op_ia32_fcompJmp;
1366 /* different, move them to st and st(1) and pop both.
1367 The tricky part is to get one into st(1).*/
1368 else if (op2_idx == 1) {
1369 /* good, second operand is already in the right place, move the first */
1371 /* bring the first on top */
1372 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1375 dst = op_ia32_fcomppJmp;
1378 else if (op1_idx == 1) {
1379 /* good, first operand is already in the right place, move the second */
1381 /* bring the first on top */
1382 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1385 dst = op_ia32_fcomrppJmp;
1389 /* if one is already the TOS, we need two fxch */
1391 /* first one is TOS, move to st(1) */
1392 x87_create_fxch(state, n, 1, BINOP_IDX_1);
1394 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1396 dst = op_ia32_fcomrppJmp;
1399 else if (op2_idx == 0) {
1400 /* second one is TOS, move to st(1) */
1401 x87_create_fxch(state, n, 1, BINOP_IDX_2);
1403 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1405 dst = op_ia32_fcomrppJmp;
1409 /* none of them is either TOS or st(1), 3 fxch needed */
1410 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1411 x87_create_fxch(state, n, 1, BINOP_IDX_2);
1413 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1415 dst = op_ia32_fcomppJmp;
1423 /* second operand is an address mode */
1424 if (is_vfp_live(arch_register_get_index(op1), live)) {
1425 /* first operand is live: bring it to TOS */
1427 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1430 dst = op_ia32_fcomJmp;
1433 /* first operand is dead: bring it to tos */
1435 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1438 dst = op_ia32_fcompJmp;
1443 x87_patch_insn(n, dst);
1444 assert(pop_cnt < 3);
1450 /* patch the operation */
1451 attr = get_ia32_attr(n);
1452 op1 = &ia32_st_regs[op1_idx];
1455 op2 = &ia32_st_regs[op2_idx];
1458 attr->x87[2] = NULL;
1461 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(n),
1462 arch_register_get_name(op1), arch_register_get_name(op2)));
1464 DB((dbg, LEVEL_1, "<<< %s %s, [AM]\n", get_irn_opname(n),
1465 arch_register_get_name(op1)));
1468 } /* sim_fCondJmp */
1471 * Simulate a be_Copy.
1473 * @param state the x87 state
1474 * @param n the node that should be simulated (and patched)
1476 static int sim_Copy(x87_state *state, ir_node *n) {
1477 ir_mode *mode = get_irn_mode(n);
1479 if (mode_is_float(mode)) {
1480 x87_simulator *sim = state->sim;
1481 ir_node *pred = get_irn_n(n, 0);
1482 const arch_register_t *out = x87_get_irn_register(sim, n);
1483 const arch_register_t *op1 = x87_get_irn_register(sim, pred);
1484 ir_node *node, *next;
1486 int op1_idx, out_idx;
1487 unsigned live = vfp_live_args_after(sim, n, REGMASK(out));
1488 ir_node *(*cnstr)(dbg_info *, ir_graph *, ir_node *, ir_mode *);
1490 DB((dbg, LEVEL_1, ">>> %+F %s -> %s\n", n,
1491 arch_register_get_name(op1), arch_register_get_name(out)));
1492 DEBUG_ONLY(vfp_dump_live(live));
1494 /* Do not copy constants, recreate them. */
1495 switch (get_ia32_irn_opcode(pred)) {
1497 cnstr = new_rd_ia32_fldz;
1500 cnstr = new_rd_ia32_fld1;
1502 case iro_ia32_fldpi:
1503 cnstr = new_rd_ia32_fldpi;
1505 case iro_ia32_fldl2e:
1506 cnstr = new_rd_ia32_fldl2e;
1508 case iro_ia32_fldl2t:
1509 cnstr = new_rd_ia32_fldl2t;
1511 case iro_ia32_fldlg2:
1512 cnstr = new_rd_ia32_fldlg2;
1514 case iro_ia32_fldln2:
1515 cnstr = new_rd_ia32_fldln2;
1521 /* copy a constant */
1522 node = (*cnstr)(get_irn_dbg_info(n), get_irn_irg(n), get_nodes_block(n), mode);
1523 arch_set_irn_register(sim->env, node, out);
1525 x87_push(state, arch_register_get_index(out), node);
1527 attr = get_ia32_attr(node);
1528 attr->x87[2] = out = &ia32_st_regs[0];
1530 next = sched_next(n);
1533 sched_add_before(next, node);
1534 DB((dbg, LEVEL_1, ">>> %+F -> %s\n", node, arch_register_get_name(out)));
1538 /* handle the infamous unknown value */
1539 if (arch_register_get_index(op1) == REG_VFP_UKNWN) {
1540 /* This happens before Phi nodes */
1541 if (x87_state_is_empty(state)) {
1542 /* create some value */
1543 x87_patch_insn(n, op_ia32_fldz);
1544 attr = get_ia32_attr(n);
1545 attr->x87[2] = out = &ia32_st_regs[0];
1546 DB((dbg, LEVEL_1, "<<< %+F -> %s\n", n,
1547 arch_register_get_name(out)));
1549 /* Just copy one. We need here an fpush that can hold a
1550 a register, so use the fpushCopy. */
1551 node = new_rd_ia32_fpushCopy(get_irn_dbg_info(n), get_irn_irg(n), get_nodes_block(n), get_irn_n(n, 0), mode);
1552 arch_set_irn_register(sim->env, node, out);
1554 x87_push(state, arch_register_get_index(out), node);
1556 attr = get_ia32_attr(node);
1557 attr->x87[0] = op1 =
1558 attr->x87[2] = out = &ia32_st_regs[0];
1560 next = sched_next(n);
1563 sched_add_before(next, node);
1564 DB((dbg, LEVEL_1, "<<< %+F %s -> %s\n", node,
1565 arch_register_get_name(op1),
1566 arch_register_get_name(out)));
1571 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1573 if (is_vfp_live(arch_register_get_index(op1), live)) {
1574 /* Operand is still live,a real copy. We need here an fpush that can hold a
1575 a register, so use the fpushCopy. */
1576 node = new_rd_ia32_fpushCopy(get_irn_dbg_info(n), get_irn_irg(n), get_nodes_block(n), get_irn_n(n, 0), mode);
1577 arch_set_irn_register(sim->env, node, out);
1579 x87_push(state, arch_register_get_index(out), node);
1581 attr = get_ia32_attr(node);
1582 attr->x87[0] = op1 = &ia32_st_regs[op1_idx];
1583 attr->x87[2] = out = &ia32_st_regs[0];
1585 next = sched_next(n);
1588 sched_add_before(next, node);
1589 DB((dbg, LEVEL_1, ">>> %+F %s -> %s\n", node, op1->name, out->name));
1592 out_idx = x87_on_stack(state, arch_register_get_index(out));
1594 if (out_idx >= 0 && out_idx != op1_idx) {
1595 /* op1 must be killed and placed where out is */
1597 /* best case, simple remove and rename */
1598 x87_patch_insn(n, op_ia32_Pop);
1599 attr = get_ia32_attr(n);
1600 attr->x87[0] = op1 = &ia32_st_regs[0];
1603 x87_set_st(state, arch_register_get_index(out), n, op1_idx - 1);
1606 /* move op1 to tos, store and pop it */
1608 x87_create_fxch(state, n, op1_idx, 0);
1611 x87_patch_insn(n, op_ia32_Pop);
1612 attr = get_ia32_attr(n);
1613 attr->x87[0] = op1 = &ia32_st_regs[out_idx];
1616 x87_set_st(state, arch_register_get_index(out), n, out_idx - 1);
1618 DB((dbg, LEVEL_1, ">>> %+F %s\n", n, op1->name));
1621 /* just a virtual copy */
1622 x87_set_st(state, arch_register_get_index(out), get_unop_op(n), op1_idx);
1624 DB((dbg, LEVEL_1, ">>> KILLED %s\n", get_irn_opname(n)));
1625 exchange(n, get_unop_op(n));
1633 * Simulate a be_Call.
1635 * @param state the x87 state
1636 * @param n the node that should be simulated
1637 * @param env the architecture environment
1639 static int sim_Call(x87_state *state, ir_node *n, const arch_env_t *env) {
1640 ir_type *call_tp = be_Call_get_type(n);
1642 /* at the begin of a call the x87 state should be empty */
1643 assert(state->depth == 0 && "stack not empty before call");
1646 * If the called function returns a float, it is returned in st(0).
1647 * This even happens if the return value is NOT used.
1648 * Moreover, only one return result is supported.
1650 if (get_method_n_ress(call_tp) > 0) {
1651 ir_type *res_type = get_method_res_type(call_tp, 0);
1652 ir_mode *mode = get_type_mode(res_type);
1654 if (mode && mode_is_float(mode)) {
1656 * TODO: what to push here? The result might be unused and currently
1657 * we have no possibility to detect this :-(
1659 x87_push(state, 0, n);
1667 * Simulate a be_Spill.
1669 * @param state the x87 state
1670 * @param n the node that should be simulated (and patched)
1671 * @param env the architecture environment
1673 * Should not happen, spills are lowered before x87 simulator see them.
1675 static int sim_Spill(x87_state *state, ir_node *n) {
1676 assert(0 && "Spill not lowered");
1677 return sim_fst(state, n);
1681 * Simulate a be_Reload.
1683 * @param state the x87 state
1684 * @param n the node that should be simulated (and patched)
1685 * @param env the architecture environment
1687 * Should not happen, reloads are lowered before x87 simulator see them.
1689 static int sim_Reload(x87_state *state, ir_node *n) {
1690 assert(0 && "Reload not lowered");
1691 return sim_fld(state, n);
1695 * Simulate a be_Return.
1697 * @param state the x87 state
1698 * @param n the node that should be simulated (and patched)
1699 * @param env the architecture environment
1701 static int sim_Return(x87_state *state, ir_node *n) {
1702 int n_res = be_Return_get_n_rets(n);
1703 int i, n_float_res = 0;
1705 /* only floating point return values must resist on stack */
1706 for (i = 0; i < n_res; ++i) {
1707 ir_node *res = get_irn_n(n, be_pos_Return_val + i);
1709 if (mode_is_float(get_irn_mode(res)))
1712 assert(x87_get_depth(state) == n_float_res);
1714 /* pop them virtually */
1715 for (i = n_float_res - 1; i >= 0; --i)
1721 typedef struct _perm_data_t {
1722 const arch_register_t *in;
1723 const arch_register_t *out;
1727 * Simulate a be_Perm.
1729 * @param state the x87 state
1730 * @param irn the node that should be simulated (and patched)
1732 static int sim_Perm(x87_state *state, ir_node *irn) {
1734 x87_simulator *sim = state->sim;
1735 ir_node *pred = get_irn_n(irn, 0);
1737 const ir_edge_t *edge;
1739 /* handle only floating point Perms */
1740 if (! mode_is_float(get_irn_mode(pred)))
1743 DB((dbg, LEVEL_1, ">>> %+F\n", irn));
1745 /* Perm is a pure virtual instruction on x87.
1746 All inputs must be on the FPU stack and are pairwise
1747 different from each other.
1748 So, all we need to do is to permutate the stack state. */
1749 n = get_irn_arity(irn);
1750 NEW_ARR_A(int, stack_pos, n);
1752 /* collect old stack positions */
1753 for (i = 0; i < n; ++i) {
1754 const arch_register_t *inreg = x87_get_irn_register(sim, get_irn_n(irn, i));
1755 int idx = x87_on_stack(state, arch_register_get_index(inreg));
1757 assert(idx >= 0 && "Perm argument not on x87 stack");
1761 /* now do the permutation */
1762 foreach_out_edge(irn, edge) {
1763 ir_node *proj = get_edge_src_irn(edge);
1764 const arch_register_t *out = x87_get_irn_register(sim, proj);
1765 long num = get_Proj_proj(proj);
1767 assert(0 <= num && num < n && "More Proj's than Perm inputs");
1768 x87_set_st(state, arch_register_get_index(out), proj, stack_pos[(unsigned)num]);
1770 DB((dbg, LEVEL_1, "<<< %+F\n", irn));
1776 * Kill any dead registers at block start by popping them from the stack.
1778 * @param sim the simulator handle
1779 * @param block the current block
1780 * @param start_state the x87 state at the begin of the block
1782 static x87_state *x87_kill_deads(x87_simulator *sim, ir_node *block, x87_state *start_state) {
1783 x87_state *state = start_state;
1784 ir_node *first_insn = sched_first(block);
1785 ir_node *keep = NULL;
1786 unsigned live = vfp_live_args_after(sim, block, 0);
1788 int i, depth, num_pop;
1791 depth = x87_get_depth(state);
1792 for (i = depth - 1; i >= 0; --i) {
1793 int reg = x87_get_st_reg(state, i);
1795 if (! is_vfp_live(reg, live))
1796 kill_mask |= (1 << i);
1800 /* create a new state, will be changed */
1801 state = x87_clone_state(sim, state);
1803 DB((dbg, LEVEL_1, "Killing deads:\n"));
1804 DEBUG_ONLY(vfp_dump_live(live));
1805 DEBUG_ONLY(x87_dump_stack(state));
1807 /* now kill registers */
1809 /* we can only kill from TOS, so bring them up */
1810 if (! (kill_mask & 1)) {
1811 /* search from behind, because we can to a double-pop */
1812 for (i = depth - 1; i >= 0; --i) {
1813 if (kill_mask & (1 << i)) {
1814 kill_mask &= ~(1 << i);
1821 x87_set_st(state, -1, keep, i);
1822 keep = x87_create_fxch(state, first_insn, i, -1);
1825 keep = x87_get_st_node(state, 0);
1827 if ((kill_mask & 3) == 3) {
1828 /* we can do a double-pop */
1832 /* only a single pop */
1837 kill_mask >>= num_pop;
1838 keep = x87_create_fpop(state, first_insn, num_pop, keep);
1843 } /* x87_kill_deads */
1846 * Run a simulation and fix all virtual instructions for a block.
1848 * @param sim the simulator handle
1849 * @param block the current block
1851 * @return non-zero if simulation is complete,
1852 * zero if the simulation must be rerun
1854 static void x87_simulate_block(x87_simulator *sim, ir_node *block) {
1856 blk_state *bl_state = x87_get_bl_state(sim, block);
1857 x87_state *state = bl_state->begin;
1858 const ir_edge_t *edge;
1859 ir_node *start_block;
1861 assert(state != NULL);
1862 // already processed?
1863 if(bl_state->end != NULL)
1866 update_liveness(sim, block);
1868 DB((dbg, LEVEL_1, "Simulate %+F\n", block));
1869 DB((dbg, LEVEL_2, "State at Block begin:\n "));
1870 DEBUG_ONLY(x87_dump_stack(state));
1872 /* at block begin, kill all dead registers */
1873 state = x87_kill_deads(sim, block, state);
1875 /* beware, n might changed */
1876 for (n = sched_first(block); !sched_is_end(n); n = next) {
1879 ir_op *op = get_irn_op(n);
1881 next = sched_next(n);
1882 if (op->ops.generic == NULL)
1885 func = (sim_func)op->ops.generic;
1887 /* have work to do */
1888 if (state == bl_state->begin) {
1889 /* create a new state, will be changed */
1890 state = x87_clone_state(sim, state);
1894 node_inserted = (*func)(state, n);
1897 sim_func might have added additional nodes after n,
1899 beware: n must not be changed by sim_func
1900 (i.e. removed from schedule) in this case
1903 next = sched_next(n);
1906 start_block = get_irg_start_block(get_irn_irg(block));
1908 /* check if the state must be shuffled */
1909 foreach_block_succ(block, edge) {
1910 ir_node *succ = get_edge_src_irn(edge);
1911 blk_state *succ_state;
1913 if(succ == start_block)
1916 succ_state = x87_get_bl_state(sim, succ);
1918 if (succ_state->begin == NULL) {
1919 succ_state->begin = state;
1920 waitq_put(sim->worklist, succ);
1922 /* There is already a begin state for the successor, bad.
1923 Do the necessary permutations.
1924 Note that critical edges are removed, so this is always possible. */
1925 x87_shuffle(sim, block, state, succ, succ_state->begin);
1928 bl_state->end = state;
1930 DB((dbg, LEVEL_2, "State at Block end:\n ")); DEBUG_ONLY(x87_dump_stack(state));
1931 } /* x87_simulate_block */
1934 * Create a new x87 simulator.
1936 * @param sim a simulator handle, will be initialized
1937 * @param irg the current graph
1938 * @param env the architecture environment
1940 static void x87_init_simulator(x87_simulator *sim, ir_graph *irg, const arch_env_t *env)
1942 obstack_init(&sim->obst);
1943 sim->blk_states = pmap_create();
1945 sim->n_idx = get_irg_last_idx(irg);
1946 sim->live = obstack_alloc(&sim->obst, sizeof(*sim->live) * sim->n_idx);
1948 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.x87");
1950 DB((dbg, LEVEL_1, "--------------------------------\n"
1951 "x87 Simulator started for %+F\n", irg));
1953 /* set the generic function pointer of instruction we must simulate */
1954 clear_irp_opcodes_generic_func();
1956 #define ASSOC(op) (op_ ## op)->ops.generic = (op_func)(sim_##op)
1957 #define ASSOC_IA32(op) (op_ia32_v ## op)->ops.generic = (op_func)(sim_##op)
1958 #define ASSOC_BE(op) (op_be_ ## op)->ops.generic = (op_func)(sim_##op)
1976 ASSOC_IA32(fCondJmp);
1987 } /* x87_init_simulator */
1990 * Destroy a x87 simulator.
1992 * @param sim the simulator handle
1994 static void x87_destroy_simulator(x87_simulator *sim) {
1995 pmap_destroy(sim->blk_states);
1996 obstack_free(&sim->obst, NULL);
1997 DB((dbg, LEVEL_1, "x87 Simulator stopped\n\n"));
1998 } /* x87_destroy_simulator */
2001 * Run a simulation and fix all virtual instructions for a graph.
2003 * @param env the architecture environment
2004 * @param irg the current graph
2006 * Needs a block-schedule.
2008 void x87_simulate_graph(const arch_env_t *env, be_irg_t *birg) {
2009 ir_node *block, *start_block;
2010 blk_state *bl_state;
2012 ir_graph *irg = birg->irg;
2014 /* create the simulator */
2015 x87_init_simulator(&sim, irg, env);
2017 start_block = get_irg_start_block(irg);
2018 bl_state = x87_get_bl_state(&sim, start_block);
2020 /* start with the empty state */
2021 bl_state->begin = empty;
2024 sim.worklist = new_waitq();
2025 waitq_put(sim.worklist, start_block);
2027 be_invalidate_liveness(birg);
2028 be_assure_liveness(birg);
2032 /* Create the worklist for the schedule and calculate the liveness
2033 for all nodes. We must precalculate this info, because the
2034 simulator adds new nodes (and possible before Phi nodes) which
2035 let fail the old lazy calculation.
2036 On the other hand we reduce the computation amount due to
2037 precaching from O(n^2) to O(n) at the expense of O(n) cache memory.
2039 for (i = 0, n = ARR_LEN(blk_list); i < n; ++i) {
2040 update_liveness(&sim, lv, blk_list[i]);
2041 waitq_put(worklist, blk_list[i]);
2047 block = waitq_get(sim.worklist);
2048 x87_simulate_block(&sim, block);
2049 } while (! pdeq_empty(sim.worklist));
2052 del_waitq(sim.worklist);
2053 x87_destroy_simulator(&sim);
2054 } /* x87_simulate_graph */