2 * This file implements the x87 support and virtual to stack
3 * register translation for the ia32 backend.
5 * @author: Michael Beck
12 #endif /* HAVE_CONFIG_H */
19 #include "iredges_t.h"
28 #include "../belive_t.h"
29 #include "../besched_t.h"
30 #include "../benode_t.h"
31 #include "ia32_new_nodes.h"
32 #include "gen_ia32_new_nodes.h"
33 #include "gen_ia32_regalloc_if.h"
38 /* first and second binop index */
45 /* the store val index */
46 #define STORE_VAL_IDX 2
48 #define MASK_TOS(x) ((x) & (N_x87_REGS - 1))
50 /** the debug handle */
51 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
53 /* Forward declaration. */
54 typedef struct _x87_simulator x87_simulator;
57 * An exchange template.
58 * Note that our virtual functions have the same inputs
59 * and attributes as the real ones, so we can simple exchange
61 * Further, x87 supports inverse instructions, so we can handle them.
63 typedef struct _exchange_tmpl {
64 ir_op *normal_op; /**< the normal one */
65 ir_op *reverse_op; /**< the reverse one if exists */
66 ir_op *normal_pop_op; /**< the normal one with tos pop */
67 ir_op *reverse_pop_op; /**< the reverse one with tos pop */
71 * An entry on the simulated x87 stack.
73 typedef struct _st_entry {
74 int reg_idx; /**< the virtual register index of this stack value */
75 ir_node *node; /**< the node that produced this value */
81 typedef struct _x87_state {
82 st_entry st[N_x87_REGS]; /**< the register stack */
83 int depth; /**< the current stack depth */
84 int tos; /**< position of the tos */
85 x87_simulator *sim; /**< The simulator. */
88 /** An empty state, used for blocks without fp instructions. */
89 static x87_state _empty = { { {0, NULL}, }, 0, 0 };
90 static x87_state *empty = (x87_state *)&_empty;
92 /** The type of an instruction simulator function. */
93 typedef int (*sim_func)(x87_state *state, ir_node *n);
96 * A block state: Every block has a x87 state at the beginning and at the end.
98 typedef struct _blk_state {
99 x87_state *begin; /**< state at the begin or NULL if not assigned */
100 x87_state *end; /**< state at the end or NULL if not assigned */
103 #define PTR_TO_BLKSTATE(p) ((blk_state *)(p))
105 /** liveness bitset for vfp registers. */
106 typedef unsigned char vfp_liveness;
111 struct _x87_simulator {
112 struct obstack obst; /**< An obstack for fast allocating. */
113 pmap *blk_states; /**< Map blocks to states. */
114 const arch_env_t *env; /**< The architecture environment. */
115 be_lv_t *lv; /**< intrablock liveness. */
116 vfp_liveness *live; /**< Liveness information. */
117 unsigned n_idx; /**< The cached get_irg_last_idx() result. */
118 waitq *worklist; /**< list of blocks to process. */
122 * Returns the current stack depth.
124 * @param state the x87 state
126 * @return the x87 stack depth
128 static int x87_get_depth(const x87_state *state) {
133 * Check if the state is empty.
135 * @param state the x87 state
137 * returns non-zero if the x87 stack is empty
139 static int x87_state_is_empty(const x87_state *state) {
140 return state->depth == 0;
144 * Return the virtual register index at st(pos).
146 * @param state the x87 state
147 * @param pos a stack position
149 * @return the vfp register index that produced the value at st(pos)
151 static int x87_get_st_reg(const x87_state *state, int pos) {
152 assert(pos < state->depth);
153 return state->st[MASK_TOS(state->tos + pos)].reg_idx;
157 * Return the node at st(pos).
159 * @param state the x87 state
160 * @param pos a stack position
162 * @return the IR node that produced the value at st(pos)
164 static ir_node *x87_get_st_node(const x87_state *state, int pos) {
165 assert(pos < state->depth);
166 return state->st[MASK_TOS(state->tos + pos)].node;
167 } /* x87_get_st_node */
171 * Dump the stack for debugging.
173 * @param state the x87 state
175 static void x87_dump_stack(const x87_state *state) {
178 for (i = state->depth - 1; i >= 0; --i) {
179 DB((dbg, LEVEL_2, "vf%d ", x87_get_st_reg(state, i)));
181 DB((dbg, LEVEL_2, "<-- TOS\n"));
182 } /* x87_dump_stack */
183 #endif /* DEBUG_libfirm */
186 * Set a virtual register to st(pos).
188 * @param state the x87 state
189 * @param reg_idx the vfp register index that should be set
190 * @param node the IR node that produces the value of the vfp register
191 * @param pos the stack position where the new value should be entered
193 static void x87_set_st(x87_state *state, int reg_idx, ir_node *node, int pos) {
194 assert(0 < state->depth);
195 state->st[MASK_TOS(state->tos + pos)].reg_idx = reg_idx;
196 state->st[MASK_TOS(state->tos + pos)].node = node;
198 DB((dbg, LEVEL_2, "After SET_REG: "));
199 DEBUG_ONLY(x87_dump_stack(state));
203 * Set the tos virtual register.
205 * @param state the x87 state
206 * @param reg_idx the vfp register index that should be set
207 * @param node the IR node that produces the value of the vfp register
209 static void x87_set_tos(x87_state *state, int reg_idx, ir_node *node) {
210 x87_set_st(state, reg_idx, node, 0);
215 * Flush the x87 stack.
217 * @param state the x87 state
219 static void x87_flush(x87_state *state) {
226 * Swap st(0) with st(pos).
228 * @param state the x87 state
229 * @param pos the stack position to change the tos with
231 static void x87_fxch(x87_state *state, int pos) {
233 assert(pos < state->depth);
235 entry = state->st[MASK_TOS(state->tos + pos)];
236 state->st[MASK_TOS(state->tos + pos)] = state->st[MASK_TOS(state->tos)];
237 state->st[MASK_TOS(state->tos)] = entry;
239 DB((dbg, LEVEL_2, "After FXCH: ")); DEBUG_ONLY(x87_dump_stack(state));
243 * Convert a virtual register to the stack index.
245 * @param state the x87 state
246 * @param reg_idx the register vfp index
248 * @return the stack position where the register is stacked
249 * or -1 if the virtual register was not found
251 static int x87_on_stack(const x87_state *state, int reg_idx) {
252 int i, tos = state->tos;
254 for (i = 0; i < state->depth; ++i)
255 if (state->st[MASK_TOS(tos + i)].reg_idx == reg_idx)
261 * Push a virtual Register onto the stack, double pushed allowed.
263 * @param state the x87 state
264 * @param reg_idx the register vfp index
265 * @param node the node that produces the value of the vfp register
267 static void x87_push_dbl(x87_state *state, int reg_idx, ir_node *node) {
268 assert(state->depth < N_x87_REGS && "stack overrun");
271 state->tos = MASK_TOS(state->tos - 1);
272 state->st[state->tos].reg_idx = reg_idx;
273 state->st[state->tos].node = node;
275 DB((dbg, LEVEL_2, "After PUSH: ")); DEBUG_ONLY(x87_dump_stack(state));
279 * Push a virtual Register onto the stack, double pushes are NOT allowed..
281 * @param state the x87 state
282 * @param reg_idx the register vfp index
283 * @param node the node that produces the value of the vfp register
284 * @param dbl_push if != 0 double pushes are allowed
286 static void x87_push(x87_state *state, int reg_idx, ir_node *node) {
287 assert(x87_on_stack(state, reg_idx) == -1 && "double push");
289 x87_push_dbl(state, reg_idx, node);
293 * Pop a virtual Register from the stack.
295 static void x87_pop(x87_state *state) {
296 assert(state->depth > 0 && "stack underrun");
299 state->tos = MASK_TOS(state->tos + 1);
301 DB((dbg, LEVEL_2, "After POP: ")); DEBUG_ONLY(x87_dump_stack(state));
305 * Returns the block state of a block.
307 * @param sim the x87 simulator handle
308 * @param block the current block
310 * @return the block state
312 static blk_state *x87_get_bl_state(x87_simulator *sim, ir_node *block) {
313 pmap_entry *entry = pmap_find(sim->blk_states, block);
316 blk_state *bl_state = obstack_alloc(&sim->obst, sizeof(*bl_state));
317 bl_state->begin = NULL;
318 bl_state->end = NULL;
320 pmap_insert(sim->blk_states, block, bl_state);
324 return PTR_TO_BLKSTATE(entry->value);
325 } /* x87_get_bl_state */
328 * Creates a new x87 state.
330 * @param sim the x87 simulator handle
332 * @return a new x87 state
334 static x87_state *x87_alloc_state(x87_simulator *sim) {
335 x87_state *res = obstack_alloc(&sim->obst, sizeof(*res));
339 } /* x87_alloc_state */
343 * Create a new empty x87 state.
345 * @param sim the x87 simulator handle
347 * @return a new empty x87 state
349 static x87_state *x87_alloc_empty_state(x87_simulator *sim) {
350 x87_state *res = x87_alloc_state(sim);
354 } /* x87_alloc_empty_state */
360 * @param sim the x87 simulator handle
361 * @param src the x87 state that will be cloned
363 * @return a cloned copy of the src state
365 static x87_state *x87_clone_state(x87_simulator *sim, const x87_state *src) {
366 x87_state *res = x87_alloc_state(sim);
368 memcpy(res, src, sizeof(*res));
370 } /* x87_clone_state */
373 * Patch a virtual instruction into a x87 one and return
376 * @param n the IR node to patch
377 * @param op the x87 opcode to patch in
379 static ir_node *x87_patch_insn(ir_node *n, ir_op *op) {
380 ir_mode *mode = get_irn_mode(n);
385 if (mode == mode_T) {
386 /* patch all Proj's */
387 const ir_edge_t *edge;
389 foreach_out_edge(n, edge) {
390 ir_node *proj = get_edge_src_irn(edge);
392 mode = get_irn_mode(proj);
393 if (mode_is_float(mode)) {
395 set_irn_mode(proj, mode_E);
400 else if (mode_is_float(mode))
401 set_irn_mode(n, mode_E);
403 } /* x87_patch_insn */
406 * Returns the first Proj of a mode_T node having a given mode.
408 * @param n the mode_T node
409 * @param m the desired mode of the Proj
410 * @return The first Proj of mode @p m found or NULL.
412 static ir_node *get_irn_Proj_for_mode(ir_node *n, ir_mode *m) {
413 const ir_edge_t *edge;
415 assert(get_irn_mode(n) == mode_T && "Need mode_T node");
417 foreach_out_edge(n, edge) {
418 ir_node *proj = get_edge_src_irn(edge);
419 if (get_irn_mode(proj) == m)
424 } /* get_irn_Proj_for_mode */
427 * Wrap the arch_* function here so we can check for errors.
429 static INLINE const arch_register_t *x87_get_irn_register(x87_simulator *sim, const ir_node *irn) {
430 const arch_register_t *res;
432 res = arch_get_irn_register(sim->env, irn);
433 assert(res->reg_class->regs == ia32_vfp_regs);
437 /* -------------- x87 perm --------------- */
440 * Creates a fxch for shuffle.
442 * @param state the x87 state
443 * @param pos parameter for fxch
444 * @param block the block were fxch is inserted
446 * Creates a new fxch node and reroute the user of the old node
449 * @return the fxch node
451 static ir_node *x87_fxch_shuffle(x87_state *state, int pos, ir_node *block)
456 fxch = new_rd_ia32_fxch(NULL, get_irn_irg(block), block, mode_E);
457 attr = get_ia32_attr(fxch);
458 attr->x87[0] = &ia32_st_regs[pos];
459 attr->x87[2] = &ia32_st_regs[0];
463 x87_fxch(state, pos);
465 } /* x87_fxch_shuffle */
468 * Calculate the necessary permutations to reach dst_state.
470 * These permutations are done with fxch instructions and placed
471 * at the end of the block.
473 * Note that critical edges are removed here, so we need only
474 * a shuffle if the current block has only one successor.
476 * @param sim the simulator handle
477 * @param block the current block
478 * @param state the current x87 stack state, might be modified
479 * @param dst_block the destination block
480 * @param dst_state destination state
484 static x87_state *x87_shuffle(x87_simulator *sim, ir_node *block, x87_state *state, ir_node *dst_block, const x87_state *dst_state) {
485 int i, n_cycles, k, ri;
486 unsigned cycles[4], all_mask;
487 char cycle_idx[4][8];
488 ir_node *fxch, *before, *after;
490 assert(state->depth == dst_state->depth);
492 /* Some mathematics here:
493 If we have a cycle of length n that includes the tos,
494 we need n-1 exchange operations.
495 We can always add the tos and restore it, so we need
496 n+1 exchange operations for a cycle not containing the tos.
497 So, the maximum of needed operations is for a cycle of 7
498 not including the tos == 8.
499 This is the same number of ops we would need for using stores,
500 so exchange is cheaper (we save the loads).
501 On the other hand, we might need an additional exchange
502 in the next block to bring one operand on top, so the
503 number of ops in the first case is identical.
504 Further, no more than 4 cycles can exists (4 x 2).
506 all_mask = (1 << (state->depth)) - 1;
508 for (n_cycles = 0; all_mask; ++n_cycles) {
509 int src_idx, dst_idx;
511 /* find the first free slot */
512 for (i = 0; i < state->depth; ++i) {
513 if (all_mask & (1 << i)) {
514 all_mask &= ~(1 << i);
516 /* check if there are differences here */
517 if (x87_get_st_reg(state, i) != x87_get_st_reg(dst_state, i))
523 /* no more cycles found */
528 cycles[n_cycles] = (1 << i);
529 cycle_idx[n_cycles][k++] = i;
530 for (src_idx = i; ; src_idx = dst_idx) {
531 dst_idx = x87_on_stack(dst_state, x87_get_st_reg(state, src_idx));
533 if ((all_mask & (1 << dst_idx)) == 0)
536 cycle_idx[n_cycles][k++] = dst_idx;
537 cycles[n_cycles] |= (1 << dst_idx);
538 all_mask &= ~(1 << dst_idx);
540 cycle_idx[n_cycles][k] = -1;
544 /* no permutation needed */
548 /* Hmm: permutation needed */
549 DB((dbg, LEVEL_2, "\n%+F needs permutation: from\n", block));
550 DEBUG_ONLY(x87_dump_stack(state));
551 DB((dbg, LEVEL_2, " to\n"));
552 DEBUG_ONLY(x87_dump_stack(dst_state));
556 DB((dbg, LEVEL_2, "Need %d cycles\n", n_cycles));
557 for (ri = 0; ri < n_cycles; ++ri) {
558 DB((dbg, LEVEL_2, " Ring %d:\n ", ri));
559 for (k = 0; cycle_idx[ri][k] != -1; ++k)
560 DB((dbg, LEVEL_2, " st%d ->", cycle_idx[ri][k]));
561 DB((dbg, LEVEL_2, "\n"));
568 * Find the place node must be insert.
569 * We have only one successor block, so the last instruction should
572 before = sched_last(block);
573 assert(is_cfop(before));
575 /* now do the permutations */
576 for (ri = 0; ri < n_cycles; ++ri) {
577 if ((cycles[ri] & 1) == 0) {
578 /* this cycle does not include the tos */
579 fxch = x87_fxch_shuffle(state, cycle_idx[ri][0], block);
581 sched_add_after(after, fxch);
583 sched_add_before(before, fxch);
586 for (k = 1; cycle_idx[ri][k] != -1; ++k) {
587 fxch = x87_fxch_shuffle(state, cycle_idx[ri][k], block);
589 sched_add_after(after, fxch);
591 sched_add_before(before, fxch);
594 if ((cycles[ri] & 1) == 0) {
595 /* this cycle does not include the tos */
596 fxch = x87_fxch_shuffle(state, cycle_idx[ri][0], block);
597 sched_add_after(after, fxch);
604 * Create a fxch node before another node.
606 * @param state the x87 state
607 * @param n the node after the fxch
608 * @param pos exchange st(pos) with st(0)
609 * @param op_idx if >= 0, replace input op_idx of n with the fxch result
613 static ir_node *x87_create_fxch(x87_state *state, ir_node *n, int pos, int op_idx) {
616 ir_graph *irg = get_irn_irg(n);
617 ir_node *block = get_nodes_block(n);
619 x87_fxch(state, pos);
621 fxch = new_rd_ia32_fxch(NULL, irg, block, mode_E);
622 attr = get_ia32_attr(fxch);
623 attr->x87[0] = &ia32_st_regs[pos];
624 attr->x87[2] = &ia32_st_regs[0];
628 sched_add_before(n, fxch);
629 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(fxch), attr->x87[0]->name, attr->x87[2]->name));
631 } /* x87_create_fxch */
634 * Create a fpush before node n.
636 * @param state the x87 state
637 * @param n the node after the fpush
638 * @param pos push st(pos) on stack
639 * @param op_idx replace input op_idx of n with the fpush result
641 static void x87_create_fpush(x87_state *state, ir_node *n, int pos, int op_idx) {
642 ir_node *fpush, *pred = get_irn_n(n, op_idx);
644 const arch_register_t *out = x87_get_irn_register(state->sim, pred);
646 x87_push_dbl(state, arch_register_get_index(out), pred);
648 fpush = new_rd_ia32_fpush(NULL, get_irn_irg(n), get_nodes_block(n), mode_E);
649 attr = get_ia32_attr(fpush);
650 attr->x87[0] = &ia32_st_regs[pos];
651 attr->x87[2] = &ia32_st_regs[0];
654 sched_add_before(n, fpush);
656 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(fpush), attr->x87[0]->name, attr->x87[2]->name));
657 } /* x87_create_fpush */
660 * Create a fpop before node n.
662 * @param state the x87 state
663 * @param n the node after the fpop
664 * @param num pop 1 or 2 values
665 * @param pred node to use as predecessor of the fpop
667 * @return the fpop node
669 static ir_node *x87_create_fpop(x87_state *state, ir_node *n, int num, ir_node *pred) {
670 ir_node *fpop = pred;
677 fpop = new_rd_ia32_fpop(NULL, get_irn_irg(n), get_nodes_block(n), mode_E);
678 attr = get_ia32_attr(fpop);
679 attr->x87[0] = &ia32_st_regs[0];
680 attr->x87[1] = &ia32_st_regs[0];
681 attr->x87[2] = &ia32_st_regs[0];
683 sched_add_before(n, fpop);
684 DB((dbg, LEVEL_1, "<<< %s %s\n", get_irn_opname(fpop), attr->x87[0]->name));
690 } /* x87_create_fpop */
692 /* --------------------------------- liveness ------------------------------------------ */
695 * The liveness transfer function.
696 * Updates a live set over a single step from a given node to its predecessor.
697 * Everything defined at the node is removed from the set, the uses of the node get inserted.
699 * @param sim The simulator handle.
700 * @param irn The node at which liveness should be computed.
701 * @param live The bitset of registers live before @p irn. This set gets modified by updating it to
702 * the registers live after irn.
704 * @return The live bitset.
706 static vfp_liveness vfp_liveness_transfer(x87_simulator *sim, ir_node *irn, vfp_liveness live)
709 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
710 const arch_env_t *arch_env = sim->env;
712 if (arch_irn_consider_in_reg_alloc(arch_env, cls, irn)) {
713 const arch_register_t *reg = x87_get_irn_register(sim, irn);
714 live &= ~(1 << arch_register_get_index(reg));
717 for (i = 0, n = get_irn_arity(irn); i < n; ++i) {
718 ir_node *op = get_irn_n(irn, i);
720 if (mode_is_float(get_irn_mode(op)) && arch_irn_consider_in_reg_alloc(arch_env, cls, op)) {
721 const arch_register_t *reg = x87_get_irn_register(sim, op);
722 live |= 1 << arch_register_get_index(reg);
726 } /* vfp_liveness_transfer */
729 * Put all live virtual registers at the end of a block into a bitset.
731 * @param sim the simulator handle
732 * @param lv the liveness information
733 * @param bl the block
735 * @return The live bitset at the end of this block
737 static vfp_liveness vfp_liveness_end_of_block(x87_simulator *sim, const ir_node *block)
740 vfp_liveness live = 0;
741 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
742 const arch_env_t *arch_env = sim->env;
743 const be_lv_t *lv = sim->lv;
745 be_lv_foreach(lv, block, be_lv_state_end, i) {
746 const arch_register_t *reg;
747 const ir_node *node = be_lv_get_irn(lv, block, i);
748 if (!arch_irn_consider_in_reg_alloc(arch_env, cls, node))
751 reg = x87_get_irn_register(sim, node);
752 live |= 1 << arch_register_get_index(reg);
756 } /* vfp_liveness_end_of_block */
758 /** get the register mask from an arch_register */
759 #define REGMASK(reg) (1 << (arch_register_get_index(reg)))
762 * Return a bitset of argument registers which are live at the end of a node.
764 * @param sim the simulator handle
765 * @param pos the node
766 * @param kill kill mask for the output registers
768 * @return The live bitset.
770 static unsigned vfp_live_args_after(x87_simulator *sim, const ir_node *pos, unsigned kill)
772 unsigned idx = get_irn_idx(pos);
774 assert(idx < sim->n_idx);
775 return sim->live[idx] & ~kill;
776 } /* vfp_live_args_after */
779 * Calculate the liveness for a whole block and cache it.
781 * @param sim the simulator handle
782 * @param lv the liveness handle
783 * @param block the block
785 static void update_liveness(x87_simulator *sim, ir_node *block) {
786 vfp_liveness live = vfp_liveness_end_of_block(sim, block);
790 /* now iterate through the block backward and cache the results */
791 sched_foreach_reverse(block, irn) {
792 /* stop at the first Phi: this produces the live-in */
796 idx = get_irn_idx(irn);
797 sim->live[idx] = live;
799 live = vfp_liveness_transfer(sim, irn, live);
801 idx = get_irn_idx(block);
802 sim->live[idx] = live;
803 } /* update_liveness */
806 * Returns true if a register is live in a set.
808 * @param reg_idx the vfp register index
809 * @param live a live bitset
811 #define is_vfp_live(reg_idx, live) ((live) & (1 << (reg_idx)))
815 * Dump liveness info.
817 * @param live the live bitset
819 static void vfp_dump_live(vfp_liveness live) {
822 DB((dbg, LEVEL_2, "Live after: "));
823 for (i = 0; i < 8; ++i) {
824 if (live & (1 << i)) {
825 DB((dbg, LEVEL_2, "vf%d ", i));
828 DB((dbg, LEVEL_2, "\n"));
829 } /* vfp_dump_live */
830 #endif /* DEBUG_libfirm */
832 /* --------------------------------- simulators ---------------------------------------- */
834 #define XCHG(a, b) do { int t = (a); (a) = (b); (b) = t; } while (0)
837 * Simulate a virtual binop.
839 * @param state the x87 state
840 * @param n the node that should be simulated (and patched)
841 * @param tmpl the template containing the 4 possible x87 opcodes
843 static int sim_binop(x87_state *state, ir_node *n, const exchange_tmpl *tmpl) {
844 int op2_idx, op1_idx;
845 int out_idx, do_pop = 0;
848 x87_simulator *sim = state->sim;
849 const arch_register_t *op1 = x87_get_irn_register(sim, get_irn_n(n, BINOP_IDX_1));
850 const arch_register_t *op2 = x87_get_irn_register(sim, get_irn_n(n, BINOP_IDX_2));
851 const arch_register_t *out = x87_get_irn_register(sim, n);
852 int reg_index_1 = arch_register_get_index(op1);
853 int reg_index_2 = arch_register_get_index(op2);
854 vfp_liveness live = vfp_live_args_after(sim, n, REGMASK(out));
856 DB((dbg, LEVEL_1, ">>> %+F %s, %s -> %s\n", n,
857 arch_register_get_name(op1), arch_register_get_name(op2),
858 arch_register_get_name(out)));
859 DEBUG_ONLY(vfp_dump_live(live));
860 DB((dbg, LEVEL_1, "Stack before: "));
861 DEBUG_ONLY(x87_dump_stack(state));
863 op1_idx = x87_on_stack(state, reg_index_1);
864 assert(op1_idx >= 0);
866 if (reg_index_2 != REG_VFP_NOREG) {
867 /* second operand is a vfp register */
868 op2_idx = x87_on_stack(state, reg_index_2);
869 assert(op2_idx >= 0);
871 if (is_vfp_live(arch_register_get_index(op2), live)) {
872 /* Second operand is live. */
874 if (is_vfp_live(arch_register_get_index(op1), live)) {
875 /* Both operands are live: push the first one.
876 This works even for op1 == op2. */
877 x87_create_fpush(state, n, op1_idx, BINOP_IDX_2);
878 /* now do fxxx (tos=tos X op) */
882 dst = tmpl->normal_op;
884 /* Second live, first operand is dead here, bring it to tos. */
886 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
891 /* now do fxxx (tos=tos X op) */
893 dst = tmpl->normal_op;
897 /* Second operand is dead. */
898 if (is_vfp_live(arch_register_get_index(op1), live)) {
899 /* First operand is live: bring second to tos. */
901 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
906 /* now do fxxxr (tos = op X tos) */
908 dst = tmpl->reverse_op;
911 /* Both operands are dead here, pop them from the stack. */
914 /* Both are identically and on tos, no pop needed. */
915 /* here fxxx (tos = tos X tos) */
916 dst = tmpl->normal_op;
920 /* now do fxxxp (op = op X tos, pop) */
921 dst = tmpl->normal_pop_op;
926 else if (op1_idx == 0) {
927 assert(op1_idx != op2_idx);
928 /* now do fxxxrp (op = tos X op, pop) */
929 dst = tmpl->reverse_pop_op;
934 /* Bring the second on top. */
935 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
936 if (op1_idx == op2_idx) {
937 /* Both are identically and on tos now, no pop needed. */
940 /* use fxxx (tos = tos X tos) */
941 dst = tmpl->normal_op;
945 /* op2 is on tos now */
947 /* use fxxxp (op = op X tos, pop) */
948 dst = tmpl->normal_pop_op;
957 /* second operand is an address mode */
958 if (is_vfp_live(arch_register_get_index(op1), live)) {
959 /* first operand is live: push it here */
960 x87_create_fpush(state, n, op1_idx, BINOP_IDX_1);
962 /* use fxxx (tos = tos X mem) */
963 dst = tmpl->normal_op;
967 /* first operand is dead: bring it to tos */
969 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
973 /* use fxxxp (tos = tos X mem) */
974 dst = tmpl->normal_op;
979 x87_set_st(state, arch_register_get_index(out), x87_patch_insn(n, dst), out_idx);
983 /* patch the operation */
984 attr = get_ia32_attr(n);
985 attr->x87[0] = op1 = &ia32_st_regs[op1_idx];
986 if (reg_index_2 != REG_VFP_NOREG) {
987 attr->x87[1] = op2 = &ia32_st_regs[op2_idx];
989 attr->x87[2] = out = &ia32_st_regs[out_idx];
991 if (reg_index_2 != REG_VFP_NOREG) {
992 DB((dbg, LEVEL_1, "<<< %s %s, %s -> %s\n", get_irn_opname(n),
993 arch_register_get_name(op1), arch_register_get_name(op2),
994 arch_register_get_name(out)));
996 DB((dbg, LEVEL_1, "<<< %s %s, [AM] -> %s\n", get_irn_opname(n),
997 arch_register_get_name(op1),
998 arch_register_get_name(out)));
1005 * Simulate a virtual Unop.
1007 * @param state the x87 state
1008 * @param n the node that should be simulated (and patched)
1009 * @param op the x87 opcode that will replace n's opcode
1011 static int sim_unop(x87_state *state, ir_node *n, ir_op *op) {
1012 int op1_idx, out_idx;
1013 x87_simulator *sim = state->sim;
1014 const arch_register_t *op1 = x87_get_irn_register(sim, get_irn_n(n, UNOP_IDX));
1015 const arch_register_t *out = x87_get_irn_register(sim, n);
1017 unsigned live = vfp_live_args_after(sim, n, REGMASK(out));
1019 DB((dbg, LEVEL_1, ">>> %+F -> %s\n", n, out->name));
1020 DEBUG_ONLY(vfp_dump_live(live));
1022 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1024 if (is_vfp_live(arch_register_get_index(op1), live)) {
1025 /* push the operand here */
1026 x87_create_fpush(state, n, op1_idx, UNOP_IDX);
1030 /* operand is dead, bring it to tos */
1032 x87_create_fxch(state, n, op1_idx, UNOP_IDX);
1037 x87_set_tos(state, arch_register_get_index(out), x87_patch_insn(n, op));
1039 attr = get_ia32_attr(n);
1040 attr->x87[0] = op1 = &ia32_st_regs[0];
1041 attr->x87[2] = out = &ia32_st_regs[0];
1042 DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), out->name));
1048 * Simulate a virtual Load instruction.
1050 * @param state the x87 state
1051 * @param n the node that should be simulated (and patched)
1052 * @param op the x87 opcode that will replace n's opcode
1054 static int sim_load(x87_state *state, ir_node *n, ir_op *op) {
1055 const arch_register_t *out = x87_get_irn_register(state->sim, n);
1058 DB((dbg, LEVEL_1, ">>> %+F -> %s\n", n, arch_register_get_name(out)));
1059 x87_push(state, arch_register_get_index(out), x87_patch_insn(n, op));
1060 assert(out == x87_get_irn_register(state->sim, n));
1061 attr = get_ia32_attr(n);
1062 attr->x87[2] = out = &ia32_st_regs[0];
1063 DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), arch_register_get_name(out)));
1069 * Rewire all users of @p old_val to @new_val iff they are scheduled after @p store.
1071 * @param store The store
1072 * @param old_val The former value
1073 * @param new_val The new value
1075 static void collect_and_rewire_users(ir_node *store, ir_node *old_val, ir_node *new_val) {
1076 const ir_edge_t *edge, *ne;
1078 foreach_out_edge_safe(old_val, edge, ne) {
1079 ir_node *user = get_edge_src_irn(edge);
1081 if (! user || user == store)
1084 /* if the user is scheduled after the store: rewire */
1085 if (sched_is_scheduled(user) && sched_comes_after(store, user)) {
1087 /* find the input of the user pointing to the old value */
1088 for (i = get_irn_arity(user) - 1; i >= 0; i--) {
1089 if (get_irn_n(user, i) == old_val)
1090 set_irn_n(user, i, new_val);
1094 } /* collect_and_rewire_users */
1097 * Simulate a virtual Store.
1099 * @param state the x87 state
1100 * @param n the node that should be simulated (and patched)
1101 * @param op the x87 store opcode
1102 * @param op_p the x87 store and pop opcode
1104 static int sim_store(x87_state *state, ir_node *n, ir_op *op, ir_op *op_p) {
1105 x87_simulator *sim = state->sim;
1106 ir_node *val = get_irn_n(n, STORE_VAL_IDX);
1107 const arch_register_t *op2 = x87_get_irn_register(sim, val);
1108 unsigned live = vfp_live_args_after(sim, n, 0);
1114 op2_idx = x87_on_stack(state, arch_register_get_index(op2));
1115 assert(op2_idx >= 0);
1117 DB((dbg, LEVEL_1, ">>> %+F %s ->\n", n, arch_register_get_name(op2)));
1119 mode = get_ia32_ls_mode(n);
1120 depth = x87_get_depth(state);
1122 if (is_vfp_live(arch_register_get_index(op2), live)) {
1124 Problem: fst doesn't support mode_E (spills), only fstp does
1126 - stack not full: push value and fstp
1127 - stack full: fstp value and load again
1129 if (mode == mode_E) {
1130 if (depth < N_x87_REGS) {
1131 /* ok, we have a free register: push + fstp */
1132 x87_create_fpush(state, n, op2_idx, STORE_VAL_IDX);
1134 x87_patch_insn(n, op_p);
1137 ir_node *vfld, *mem, *block, *rproj, *mproj;
1140 /* stack full here: need fstp + load */
1142 x87_patch_insn(n, op_p);
1144 block = get_nodes_block(n);
1145 irg = get_irn_irg(n);
1146 vfld = new_rd_ia32_vfld(NULL, irg, block, get_irn_n(n, 0), get_irn_n(n, 1), new_rd_NoMem(irg));
1148 /* copy all attributes */
1149 set_ia32_frame_ent(vfld, get_ia32_frame_ent(n));
1150 if (is_ia32_use_frame(n))
1151 set_ia32_use_frame(vfld);
1152 set_ia32_am_flavour(vfld, get_ia32_am_flavour(n));
1153 set_ia32_op_type(vfld, ia32_am_Source);
1154 add_ia32_am_offs(vfld, get_ia32_am_offs(n));
1155 set_ia32_am_sc(vfld, get_ia32_am_sc(n));
1156 set_ia32_ls_mode(vfld, get_ia32_ls_mode(n));
1158 rproj = new_r_Proj(irg, block, vfld, get_ia32_ls_mode(vfld), pn_ia32_vfld_res);
1159 mproj = new_r_Proj(irg, block, vfld, mode_M, pn_ia32_vfld_M);
1160 mem = get_irn_Proj_for_mode(n, mode_M);
1162 assert(mem && "Store memory not found");
1164 arch_set_irn_register(sim->env, rproj, op2);
1166 /* reroute all former users of the store memory to the load memory */
1167 edges_reroute(mem, mproj, irg);
1168 /* set the memory input of the load to the store memory */
1169 set_irn_n(vfld, 2, mem);
1171 sched_add_after(n, vfld);
1172 sched_add_after(vfld, rproj);
1174 /* rewire all users, scheduled after the store, to the loaded value */
1175 collect_and_rewire_users(n, val, rproj);
1181 /* we can only store the tos to memory */
1183 x87_create_fxch(state, n, op2_idx, STORE_VAL_IDX);
1185 /* mode != mode_E -> use normal fst */
1186 x87_patch_insn(n, op);
1190 /* we can only store the tos to memory */
1192 x87_create_fxch(state, n, op2_idx, STORE_VAL_IDX);
1195 x87_patch_insn(n, op_p);
1198 attr = get_ia32_attr(n);
1199 attr->x87[1] = op2 = &ia32_st_regs[0];
1200 DB((dbg, LEVEL_1, "<<< %s %s ->\n", get_irn_opname(n), arch_register_get_name(op2)));
1206 * Simulate a virtual Phi.
1207 * Just for cosmetic reasons change the mode of Phi nodes to mode_E.
1209 * @param state the x87 state
1210 * @param n the node that should be simulated (and patched)
1211 * @param env the architecture environment
1213 static int sim_Phi(x87_state *state, ir_node *n, const arch_env_t *env) {
1214 ir_mode *mode = get_irn_mode(n);
1216 if (mode_is_float(mode))
1217 set_irn_mode(n, mode_E);
1223 #define _GEN_BINOP(op, rev) \
1224 static int sim_##op(x87_state *state, ir_node *n) { \
1225 exchange_tmpl tmpl = { op_ia32_##op, op_ia32_##rev, op_ia32_##op##p, op_ia32_##rev##p }; \
1226 return sim_binop(state, n, &tmpl); \
1229 #define GEN_BINOP(op) _GEN_BINOP(op, op)
1230 #define GEN_BINOPR(op) _GEN_BINOP(op, op##r)
1232 #define GEN_LOAD2(op, nop) \
1233 static int sim_##op(x87_state *state, ir_node *n) { \
1234 return sim_load(state, n, op_ia32_##nop); \
1237 #define GEN_LOAD(op) GEN_LOAD2(op, op)
1239 #define GEN_UNOP(op) \
1240 static int sim_##op(x87_state *state, ir_node *n) { \
1241 return sim_unop(state, n, op_ia32_##op); \
1244 #define GEN_STORE(op) \
1245 static int sim_##op(x87_state *state, ir_node *n) { \
1246 return sim_store(state, n, op_ia32_##op, op_ia32_##op##p); \
1266 GEN_LOAD2(fConst, fldConst)
1272 * Simulate a fCondJmp.
1274 * @param state the x87 state
1275 * @param n the node that should be simulated (and patched)
1277 static int sim_fCondJmp(x87_state *state, ir_node *n) {
1283 x87_simulator *sim = state->sim;
1284 const arch_register_t *op1 = x87_get_irn_register(sim, get_irn_n(n, BINOP_IDX_1));
1285 const arch_register_t *op2 = x87_get_irn_register(sim, get_irn_n(n, BINOP_IDX_2));
1286 int reg_index_1 = arch_register_get_index(op1);
1287 int reg_index_2 = arch_register_get_index(op2);
1288 unsigned live = vfp_live_args_after(sim, n, 0);
1290 DB((dbg, LEVEL_1, ">>> %+F %s, %s\n", n,
1291 arch_register_get_name(op1), arch_register_get_name(op2)));
1292 DEBUG_ONLY(vfp_dump_live(live));
1293 DB((dbg, LEVEL_1, "Stack before: "));
1294 DEBUG_ONLY(x87_dump_stack(state));
1296 op1_idx = x87_on_stack(state, reg_index_1);
1297 assert(op1_idx >= 0);
1299 /* BEWARE: check for comp a,a cases, they might happen */
1300 if (reg_index_2 != REG_VFP_NOREG) {
1301 /* second operand is a vfp register */
1302 op2_idx = x87_on_stack(state, reg_index_2);
1303 assert(op2_idx >= 0);
1305 if (is_vfp_live(arch_register_get_index(op2), live)) {
1306 /* second operand is live */
1308 if (is_vfp_live(arch_register_get_index(op1), live)) {
1309 /* both operands are live */
1312 dst = op_ia32_fcomJmp;
1313 } else if (op2_idx == 0) {
1314 dst = op_ia32_fcomrJmp;
1316 /* bring the first one to tos */
1317 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1321 dst = op_ia32_fcomJmp;
1325 /* second live, first operand is dead here, bring it to tos.
1326 This means further, op1_idx != op2_idx. */
1327 assert(op1_idx != op2_idx);
1329 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1334 dst = op_ia32_fcompJmp;
1339 /* second operand is dead */
1340 if (is_vfp_live(arch_register_get_index(op1), live)) {
1341 /* first operand is live: bring second to tos.
1342 This means further, op1_idx != op2_idx. */
1343 assert(op1_idx != op2_idx);
1345 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1350 dst = op_ia32_fcomrpJmp;
1354 /* both operands are dead here, check first for identity. */
1355 if (op1_idx == op2_idx) {
1356 /* identically, one pop needed */
1358 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1362 dst = op_ia32_fcompJmp;
1365 /* different, move them to st and st(1) and pop both.
1366 The tricky part is to get one into st(1).*/
1367 else if (op2_idx == 1) {
1368 /* good, second operand is already in the right place, move the first */
1370 /* bring the first on top */
1371 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1374 dst = op_ia32_fcomppJmp;
1377 else if (op1_idx == 1) {
1378 /* good, first operand is already in the right place, move the second */
1380 /* bring the first on top */
1381 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1384 dst = op_ia32_fcomrppJmp;
1388 /* if one is already the TOS, we need two fxch */
1390 /* first one is TOS, move to st(1) */
1391 x87_create_fxch(state, n, 1, BINOP_IDX_1);
1393 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1395 dst = op_ia32_fcomrppJmp;
1398 else if (op2_idx == 0) {
1399 /* second one is TOS, move to st(1) */
1400 x87_create_fxch(state, n, 1, BINOP_IDX_2);
1402 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1404 dst = op_ia32_fcomrppJmp;
1408 /* none of them is either TOS or st(1), 3 fxch needed */
1409 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1410 x87_create_fxch(state, n, 1, BINOP_IDX_2);
1412 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1414 dst = op_ia32_fcomppJmp;
1422 /* second operand is an address mode */
1423 if (is_vfp_live(arch_register_get_index(op1), live)) {
1424 /* first operand is live: bring it to TOS */
1426 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1429 dst = op_ia32_fcomJmp;
1432 /* first operand is dead: bring it to tos */
1434 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1437 dst = op_ia32_fcompJmp;
1442 x87_patch_insn(n, dst);
1443 assert(pop_cnt < 3);
1449 /* patch the operation */
1450 attr = get_ia32_attr(n);
1451 op1 = &ia32_st_regs[op1_idx];
1454 op2 = &ia32_st_regs[op2_idx];
1457 attr->x87[2] = NULL;
1460 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(n),
1461 arch_register_get_name(op1), arch_register_get_name(op2)));
1463 DB((dbg, LEVEL_1, "<<< %s %s, [AM]\n", get_irn_opname(n),
1464 arch_register_get_name(op1)));
1467 } /* sim_fCondJmp */
1470 * Simulate a be_Copy.
1472 * @param state the x87 state
1473 * @param n the node that should be simulated (and patched)
1475 static int sim_Copy(x87_state *state, ir_node *n) {
1476 ir_mode *mode = get_irn_mode(n);
1478 if (mode_is_float(mode)) {
1479 x87_simulator *sim = state->sim;
1480 ir_node *pred = get_irn_n(n, 0);
1481 const arch_register_t *out = x87_get_irn_register(sim, n);
1482 const arch_register_t *op1 = x87_get_irn_register(sim, pred);
1483 ir_node *node, *next;
1485 int op1_idx, out_idx;
1486 unsigned live = vfp_live_args_after(sim, n, REGMASK(out));
1487 ir_node *(*cnstr)(dbg_info *, ir_graph *, ir_node *, ir_mode *);
1489 DB((dbg, LEVEL_1, ">>> %+F %s -> %s\n", n,
1490 arch_register_get_name(op1), arch_register_get_name(out)));
1491 DEBUG_ONLY(vfp_dump_live(live));
1493 /* Do not copy constants, recreate them. */
1494 switch (get_ia32_irn_opcode(pred)) {
1496 cnstr = new_rd_ia32_fldz;
1499 cnstr = new_rd_ia32_fld1;
1501 case iro_ia32_fldpi:
1502 cnstr = new_rd_ia32_fldpi;
1504 case iro_ia32_fldl2e:
1505 cnstr = new_rd_ia32_fldl2e;
1507 case iro_ia32_fldl2t:
1508 cnstr = new_rd_ia32_fldl2t;
1510 case iro_ia32_fldlg2:
1511 cnstr = new_rd_ia32_fldlg2;
1513 case iro_ia32_fldln2:
1514 cnstr = new_rd_ia32_fldln2;
1520 /* copy a constant */
1521 node = (*cnstr)(get_irn_dbg_info(n), get_irn_irg(n), get_nodes_block(n), mode);
1522 arch_set_irn_register(sim->env, node, out);
1524 x87_push(state, arch_register_get_index(out), node);
1526 attr = get_ia32_attr(node);
1527 attr->x87[2] = out = &ia32_st_regs[0];
1529 next = sched_next(n);
1532 sched_add_before(next, node);
1533 DB((dbg, LEVEL_1, ">>> %+F -> %s\n", node, arch_register_get_name(out)));
1537 /* handle the infamous unknown value */
1538 if (arch_register_get_index(op1) == REG_VFP_UKNWN) {
1539 /* This happens before Phi nodes */
1540 if (x87_state_is_empty(state)) {
1541 /* create some value */
1542 x87_patch_insn(n, op_ia32_fldz);
1543 attr = get_ia32_attr(n);
1544 attr->x87[2] = out = &ia32_st_regs[0];
1545 DB((dbg, LEVEL_1, "<<< %+F -> %s\n", n,
1546 arch_register_get_name(out)));
1548 /* Just copy one. We need here an fpush that can hold a
1549 a register, so use the fpushCopy. */
1550 node = new_rd_ia32_fpushCopy(get_irn_dbg_info(n), get_irn_irg(n), get_nodes_block(n), get_irn_n(n, 0), mode);
1551 arch_set_irn_register(sim->env, node, out);
1553 x87_push(state, arch_register_get_index(out), node);
1555 attr = get_ia32_attr(node);
1556 attr->x87[0] = op1 =
1557 attr->x87[2] = out = &ia32_st_regs[0];
1559 next = sched_next(n);
1562 sched_add_before(next, node);
1563 DB((dbg, LEVEL_1, "<<< %+F %s -> %s\n", node,
1564 arch_register_get_name(op1),
1565 arch_register_get_name(out)));
1570 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1572 if (is_vfp_live(arch_register_get_index(op1), live)) {
1573 /* Operand is still live,a real copy. We need here an fpush that can hold a
1574 a register, so use the fpushCopy. */
1575 node = new_rd_ia32_fpushCopy(get_irn_dbg_info(n), get_irn_irg(n), get_nodes_block(n), get_irn_n(n, 0), mode);
1576 arch_set_irn_register(sim->env, node, out);
1578 x87_push(state, arch_register_get_index(out), node);
1580 attr = get_ia32_attr(node);
1581 attr->x87[0] = op1 = &ia32_st_regs[op1_idx];
1582 attr->x87[2] = out = &ia32_st_regs[0];
1584 next = sched_next(n);
1587 sched_add_before(next, node);
1588 DB((dbg, LEVEL_1, ">>> %+F %s -> %s\n", node, op1->name, out->name));
1591 out_idx = x87_on_stack(state, arch_register_get_index(out));
1593 if (out_idx >= 0 && out_idx != op1_idx) {
1594 /* op1 must be killed and placed where out is */
1596 /* best case, simple remove and rename */
1597 x87_patch_insn(n, op_ia32_Pop);
1598 attr = get_ia32_attr(n);
1599 attr->x87[0] = op1 = &ia32_st_regs[0];
1602 x87_set_st(state, arch_register_get_index(out), n, op1_idx - 1);
1605 /* move op1 to tos, store and pop it */
1607 x87_create_fxch(state, n, op1_idx, 0);
1610 x87_patch_insn(n, op_ia32_Pop);
1611 attr = get_ia32_attr(n);
1612 attr->x87[0] = op1 = &ia32_st_regs[out_idx];
1615 x87_set_st(state, arch_register_get_index(out), n, out_idx - 1);
1617 DB((dbg, LEVEL_1, ">>> %+F %s\n", n, op1->name));
1620 /* just a virtual copy */
1621 x87_set_st(state, arch_register_get_index(out), get_unop_op(n), op1_idx);
1623 DB((dbg, LEVEL_1, ">>> KILLED %s\n", get_irn_opname(n)));
1624 exchange(n, get_unop_op(n));
1632 * Simulate a be_Call.
1634 * @param state the x87 state
1635 * @param n the node that should be simulated
1636 * @param env the architecture environment
1638 static int sim_Call(x87_state *state, ir_node *n, const arch_env_t *env) {
1639 ir_type *call_tp = be_Call_get_type(n);
1641 /* at the begin of a call the x87 state should be empty */
1642 assert(state->depth == 0 && "stack not empty before call");
1645 * If the called function returns a float, it is returned in st(0).
1646 * This even happens if the return value is NOT used.
1647 * Moreover, only one return result is supported.
1649 if (get_method_n_ress(call_tp) > 0) {
1650 ir_type *res_type = get_method_res_type(call_tp, 0);
1651 ir_mode *mode = get_type_mode(res_type);
1653 if (mode && mode_is_float(mode)) {
1655 * TODO: what to push here? The result might be unused and currently
1656 * we have no possibility to detect this :-(
1658 x87_push(state, 0, n);
1666 * Simulate a be_Spill.
1668 * @param state the x87 state
1669 * @param n the node that should be simulated (and patched)
1670 * @param env the architecture environment
1672 * Should not happen, spills are lowered before x87 simulator see them.
1674 static int sim_Spill(x87_state *state, ir_node *n) {
1675 assert(0 && "Spill not lowered");
1676 return sim_fst(state, n);
1680 * Simulate a be_Reload.
1682 * @param state the x87 state
1683 * @param n the node that should be simulated (and patched)
1684 * @param env the architecture environment
1686 * Should not happen, reloads are lowered before x87 simulator see them.
1688 static int sim_Reload(x87_state *state, ir_node *n) {
1689 assert(0 && "Reload not lowered");
1690 return sim_fld(state, n);
1694 * Simulate a be_Return.
1696 * @param state the x87 state
1697 * @param n the node that should be simulated (and patched)
1698 * @param env the architecture environment
1700 static int sim_Return(x87_state *state, ir_node *n) {
1701 int n_res = be_Return_get_n_rets(n);
1702 int i, n_float_res = 0;
1704 /* only floating point return values must resist on stack */
1705 for (i = 0; i < n_res; ++i) {
1706 ir_node *res = get_irn_n(n, be_pos_Return_val + i);
1708 if (mode_is_float(get_irn_mode(res)))
1711 assert(x87_get_depth(state) == n_float_res);
1713 /* pop them virtually */
1714 for (i = n_float_res - 1; i >= 0; --i)
1720 typedef struct _perm_data_t {
1721 const arch_register_t *in;
1722 const arch_register_t *out;
1726 * Simulate a be_Perm.
1728 * @param state the x87 state
1729 * @param irn the node that should be simulated (and patched)
1731 static int sim_Perm(x87_state *state, ir_node *irn) {
1733 x87_simulator *sim = state->sim;
1734 ir_node *pred = get_irn_n(irn, 0);
1736 const ir_edge_t *edge;
1738 /* handle only floating point Perms */
1739 if (! mode_is_float(get_irn_mode(pred)))
1742 DB((dbg, LEVEL_1, ">>> %+F\n", irn));
1744 /* Perm is a pure virtual instruction on x87.
1745 All inputs must be on the FPU stack and are pairwise
1746 different from each other.
1747 So, all we need to do is to permutate the stack state. */
1748 n = get_irn_arity(irn);
1749 NEW_ARR_A(int, stack_pos, n);
1751 /* collect old stack positions */
1752 for (i = 0; i < n; ++i) {
1753 const arch_register_t *inreg = x87_get_irn_register(sim, get_irn_n(irn, i));
1754 int idx = x87_on_stack(state, arch_register_get_index(inreg));
1756 assert(idx >= 0 && "Perm argument not on x87 stack");
1760 /* now do the permutation */
1761 foreach_out_edge(irn, edge) {
1762 ir_node *proj = get_edge_src_irn(edge);
1763 const arch_register_t *out = x87_get_irn_register(sim, proj);
1764 long num = get_Proj_proj(proj);
1766 assert(0 <= num && num < n && "More Proj's than Perm inputs");
1767 x87_set_st(state, arch_register_get_index(out), proj, stack_pos[(unsigned)num]);
1769 DB((dbg, LEVEL_1, "<<< %+F\n", irn));
1775 * Kill any dead registers at block start by popping them from the stack.
1777 * @param sim the simulator handle
1778 * @param block the current block
1779 * @param start_state the x87 state at the begin of the block
1781 static x87_state *x87_kill_deads(x87_simulator *sim, ir_node *block, x87_state *start_state) {
1782 x87_state *state = start_state;
1783 ir_node *first_insn = sched_first(block);
1784 ir_node *keep = NULL;
1785 unsigned live = vfp_live_args_after(sim, block, 0);
1787 int i, depth, num_pop;
1790 depth = x87_get_depth(state);
1791 for (i = depth - 1; i >= 0; --i) {
1792 int reg = x87_get_st_reg(state, i);
1794 if (! is_vfp_live(reg, live))
1795 kill_mask |= (1 << i);
1799 /* create a new state, will be changed */
1800 state = x87_clone_state(sim, state);
1802 DB((dbg, LEVEL_1, "Killing deads:\n"));
1803 DEBUG_ONLY(vfp_dump_live(live));
1804 DEBUG_ONLY(x87_dump_stack(state));
1806 /* now kill registers */
1808 /* we can only kill from TOS, so bring them up */
1809 if (! (kill_mask & 1)) {
1810 /* search from behind, because we can to a double-pop */
1811 for (i = depth - 1; i >= 0; --i) {
1812 if (kill_mask & (1 << i)) {
1813 kill_mask &= ~(1 << i);
1820 x87_set_st(state, -1, keep, i);
1821 keep = x87_create_fxch(state, first_insn, i, -1);
1824 keep = x87_get_st_node(state, 0);
1826 if ((kill_mask & 3) == 3) {
1827 /* we can do a double-pop */
1831 /* only a single pop */
1836 kill_mask >>= num_pop;
1837 keep = x87_create_fpop(state, first_insn, num_pop, keep);
1842 } /* x87_kill_deads */
1845 * Run a simulation and fix all virtual instructions for a block.
1847 * @param sim the simulator handle
1848 * @param block the current block
1850 * @return non-zero if simulation is complete,
1851 * zero if the simulation must be rerun
1853 static void x87_simulate_block(x87_simulator *sim, ir_node *block) {
1855 blk_state *bl_state = x87_get_bl_state(sim, block);
1856 x87_state *state = bl_state->begin;
1857 const ir_edge_t *edge;
1858 ir_node *start_block;
1860 assert(state != NULL);
1861 // already processed?
1862 if(bl_state->end != NULL)
1865 update_liveness(sim, block);
1867 DB((dbg, LEVEL_1, "Simulate %+F\n", block));
1868 DB((dbg, LEVEL_2, "State at Block begin:\n "));
1869 DEBUG_ONLY(x87_dump_stack(state));
1871 /* at block begin, kill all dead registers */
1872 state = x87_kill_deads(sim, block, state);
1874 /* beware, n might changed */
1875 for (n = sched_first(block); !sched_is_end(n); n = next) {
1878 ir_op *op = get_irn_op(n);
1880 next = sched_next(n);
1881 if (op->ops.generic == NULL)
1884 func = (sim_func)op->ops.generic;
1886 /* have work to do */
1887 if (state == bl_state->begin) {
1888 /* create a new state, will be changed */
1889 state = x87_clone_state(sim, state);
1893 node_inserted = (*func)(state, n);
1896 sim_func might have added additional nodes after n,
1898 beware: n must not be changed by sim_func
1899 (i.e. removed from schedule) in this case
1902 next = sched_next(n);
1905 start_block = get_irg_start_block(get_irn_irg(block));
1907 /* check if the state must be shuffled */
1908 foreach_block_succ(block, edge) {
1909 ir_node *succ = get_edge_src_irn(edge);
1910 blk_state *succ_state;
1912 if(succ == start_block)
1915 succ_state = x87_get_bl_state(sim, succ);
1917 if (succ_state->begin == NULL) {
1918 succ_state->begin = state;
1919 waitq_put(sim->worklist, succ);
1921 /* There is already a begin state for the successor, bad.
1922 Do the necessary permutations.
1923 Note that critical edges are removed, so this is always possible. */
1924 x87_shuffle(sim, block, state, succ, succ_state->begin);
1927 bl_state->end = state;
1929 DB((dbg, LEVEL_2, "State at Block end:\n ")); DEBUG_ONLY(x87_dump_stack(state));
1930 } /* x87_simulate_block */
1933 * Create a new x87 simulator.
1935 * @param sim a simulator handle, will be initialized
1936 * @param irg the current graph
1937 * @param env the architecture environment
1939 static void x87_init_simulator(x87_simulator *sim, ir_graph *irg, const arch_env_t *env)
1941 obstack_init(&sim->obst);
1942 sim->blk_states = pmap_create();
1944 sim->n_idx = get_irg_last_idx(irg);
1945 sim->live = obstack_alloc(&sim->obst, sizeof(*sim->live) * sim->n_idx);
1947 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.x87");
1949 DB((dbg, LEVEL_1, "--------------------------------\n"
1950 "x87 Simulator started for %+F\n", irg));
1952 /* set the generic function pointer of instruction we must simulate */
1953 clear_irp_opcodes_generic_func();
1955 #define ASSOC(op) (op_ ## op)->ops.generic = (op_func)(sim_##op)
1956 #define ASSOC_IA32(op) (op_ia32_v ## op)->ops.generic = (op_func)(sim_##op)
1957 #define ASSOC_BE(op) (op_be_ ## op)->ops.generic = (op_func)(sim_##op)
1975 ASSOC_IA32(fCondJmp);
1986 } /* x87_init_simulator */
1989 * Destroy a x87 simulator.
1991 * @param sim the simulator handle
1993 static void x87_destroy_simulator(x87_simulator *sim) {
1994 pmap_destroy(sim->blk_states);
1995 obstack_free(&sim->obst, NULL);
1996 DB((dbg, LEVEL_1, "x87 Simulator stopped\n\n"));
1997 } /* x87_destroy_simulator */
2000 * Run a simulation and fix all virtual instructions for a graph.
2002 * @param env the architecture environment
2003 * @param irg the current graph
2005 * Needs a block-schedule.
2007 void x87_simulate_graph(const arch_env_t *env, be_irg_t *birg) {
2008 ir_node *block, *start_block;
2009 blk_state *bl_state;
2011 ir_graph *irg = birg->irg;
2013 /* create the simulator */
2014 x87_init_simulator(&sim, irg, env);
2016 start_block = get_irg_start_block(irg);
2017 bl_state = x87_get_bl_state(&sim, start_block);
2019 /* start with the empty state */
2020 bl_state->begin = empty;
2023 sim.worklist = new_waitq();
2024 waitq_put(sim.worklist, start_block);
2026 be_invalidate_liveness(birg);
2027 be_assure_liveness(birg);
2031 /* Create the worklist for the schedule and calculate the liveness
2032 for all nodes. We must precalculate this info, because the
2033 simulator adds new nodes (and possible before Phi nodes) which
2034 let fail the old lazy calculation.
2035 On the other hand we reduce the computation amount due to
2036 precaching from O(n^2) to O(n) at the expense of O(n) cache memory.
2038 for (i = 0, n = ARR_LEN(blk_list); i < n; ++i) {
2039 update_liveness(&sim, lv, blk_list[i]);
2040 waitq_put(worklist, blk_list[i]);
2046 block = waitq_get(sim.worklist);
2047 x87_simulate_block(&sim, block);
2048 } while (! pdeq_empty(sim.worklist));
2051 del_waitq(sim.worklist);
2052 x87_destroy_simulator(&sim);
2053 } /* x87_simulate_graph */