2 * This file implements the x87 support and virtual to stack
3 * register translation for the ia32 backend.
5 * @author: Michael Beck
18 #include "iredges_t.h"
28 #include "../belive_t.h"
29 #include "../besched_t.h"
30 #include "../benode_t.h"
31 #include "ia32_new_nodes.h"
32 #include "gen_ia32_new_nodes.h"
33 #include "gen_ia32_regalloc_if.h"
38 /* first and second binop index */
45 /* the store val index */
46 #define STORE_VAL_IDX 2
48 #define MASK_TOS(x) ((x) & (N_x87_REGS - 1))
50 /** the debug handle */
51 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
53 /* Forward declaration. */
54 typedef struct _x87_simulator x87_simulator;
57 * An exchange template.
58 * Note that our virtual functions have the same inputs
59 * and attributes as the real ones, so we can simple exchange
61 * Further, x87 supports inverse instructions, so we can handle them.
63 typedef struct _exchange_tmpl {
64 ir_op *normal_op; /**< the normal one */
65 ir_op *reverse_op; /**< the reverse one if exists */
66 ir_op *normal_pop_op; /**< the normal one with tos pop */
67 ir_op *reverse_pop_op; /**< the reverse one with tos pop */
71 * An entry on the simulated x87 stack.
73 typedef struct _st_entry {
74 int reg_idx; /**< the virtual register index of this stack value */
75 ir_node *node; /**< the node that produced this value */
81 typedef struct _x87_state {
82 st_entry st[N_x87_REGS]; /**< the register stack */
83 int depth; /**< the current stack depth */
84 int tos; /**< position of the tos */
85 x87_simulator *sim; /**< The simulator. */
88 /** An empty state, used for blocks without fp instructions. */
89 static x87_state _empty = { { {0, NULL}, }, 0, 0 };
90 static x87_state *empty = (x87_state *)&_empty;
92 /** The type of an instruction simulator function. */
93 typedef int (*sim_func)(x87_state *state, ir_node *n);
96 * A block state: Every block has a x87 state at the beginning and at the end.
98 typedef struct _blk_state {
99 x87_state *begin; /**< state at the begin or NULL if not assigned */
100 x87_state *end; /**< state at the end or NULL if not assigned */
103 #define PTR_TO_BLKSTATE(p) ((blk_state *)(p))
105 /** liveness bitset for vfp registers. */
106 typedef unsigned char vfp_liveness;
111 struct _x87_simulator {
112 struct obstack obst; /**< An obstack for fast allocating. */
113 pmap *blk_states; /**< Map blocks to states. */
114 const arch_env_t *arch_env; /**< The architecture environment. */
115 be_lv_t *lv; /**< intrablock liveness. */
116 vfp_liveness *live; /**< Liveness information. */
117 unsigned n_idx; /**< The cached get_irg_last_idx() result. */
118 waitq *worklist; /**< list of blocks to process. */
122 * Returns the current stack depth.
124 * @param state the x87 state
126 * @return the x87 stack depth
128 static int x87_get_depth(const x87_state *state) {
134 * Check if the state is empty.
136 * @param state the x87 state
138 * returns non-zero if the x87 stack is empty
140 static int x87_state_is_empty(const x87_state *state) {
141 return state->depth == 0;
146 * Return the virtual register index at st(pos).
148 * @param state the x87 state
149 * @param pos a stack position
151 * @return the vfp register index that produced the value at st(pos)
153 static int x87_get_st_reg(const x87_state *state, int pos) {
154 assert(pos < state->depth);
155 return state->st[MASK_TOS(state->tos + pos)].reg_idx;
159 * Return the node at st(pos).
161 * @param state the x87 state
162 * @param pos a stack position
164 * @return the IR node that produced the value at st(pos)
166 static ir_node *x87_get_st_node(const x87_state *state, int pos) {
167 assert(pos < state->depth);
168 return state->st[MASK_TOS(state->tos + pos)].node;
169 } /* x87_get_st_node */
173 * Dump the stack for debugging.
175 * @param state the x87 state
177 static void x87_dump_stack(const x87_state *state) {
180 for (i = state->depth - 1; i >= 0; --i) {
181 DB((dbg, LEVEL_2, "vf%d(%+F) ", x87_get_st_reg(state, i),
182 x87_get_st_node(state, i)));
184 DB((dbg, LEVEL_2, "<-- TOS\n"));
185 } /* x87_dump_stack */
186 #endif /* DEBUG_libfirm */
189 * Set a virtual register to st(pos).
191 * @param state the x87 state
192 * @param reg_idx the vfp register index that should be set
193 * @param node the IR node that produces the value of the vfp register
194 * @param pos the stack position where the new value should be entered
196 static void x87_set_st(x87_state *state, int reg_idx, ir_node *node, int pos) {
197 assert(0 < state->depth);
198 state->st[MASK_TOS(state->tos + pos)].reg_idx = reg_idx;
199 state->st[MASK_TOS(state->tos + pos)].node = node;
201 DB((dbg, LEVEL_2, "After SET_REG: "));
202 DEBUG_ONLY(x87_dump_stack(state));
206 * Set the tos virtual register.
208 * @param state the x87 state
209 * @param reg_idx the vfp register index that should be set
210 * @param node the IR node that produces the value of the vfp register
212 static void x87_set_tos(x87_state *state, int reg_idx, ir_node *node) {
213 x87_set_st(state, reg_idx, node, 0);
218 * Flush the x87 stack.
220 * @param state the x87 state
222 static void x87_flush(x87_state *state) {
229 * Swap st(0) with st(pos).
231 * @param state the x87 state
232 * @param pos the stack position to change the tos with
234 static void x87_fxch(x87_state *state, int pos) {
236 assert(pos < state->depth);
238 entry = state->st[MASK_TOS(state->tos + pos)];
239 state->st[MASK_TOS(state->tos + pos)] = state->st[MASK_TOS(state->tos)];
240 state->st[MASK_TOS(state->tos)] = entry;
242 DB((dbg, LEVEL_2, "After FXCH: ")); DEBUG_ONLY(x87_dump_stack(state));
246 * Convert a virtual register to the stack index.
248 * @param state the x87 state
249 * @param reg_idx the register vfp index
251 * @return the stack position where the register is stacked
252 * or -1 if the virtual register was not found
254 static int x87_on_stack(const x87_state *state, int reg_idx) {
255 int i, tos = state->tos;
257 for (i = 0; i < state->depth; ++i)
258 if (state->st[MASK_TOS(tos + i)].reg_idx == reg_idx)
264 * Push a virtual Register onto the stack, double pushed allowed.
266 * @param state the x87 state
267 * @param reg_idx the register vfp index
268 * @param node the node that produces the value of the vfp register
270 static void x87_push_dbl(x87_state *state, int reg_idx, ir_node *node) {
271 assert(state->depth < N_x87_REGS && "stack overrun");
274 state->tos = MASK_TOS(state->tos - 1);
275 state->st[state->tos].reg_idx = reg_idx;
276 state->st[state->tos].node = node;
278 DB((dbg, LEVEL_2, "After PUSH: ")); DEBUG_ONLY(x87_dump_stack(state));
282 * Push a virtual Register onto the stack, double pushes are NOT allowed..
284 * @param state the x87 state
285 * @param reg_idx the register vfp index
286 * @param node the node that produces the value of the vfp register
287 * @param dbl_push if != 0 double pushes are allowed
289 static void x87_push(x87_state *state, int reg_idx, ir_node *node) {
290 assert(x87_on_stack(state, reg_idx) == -1 && "double push");
292 x87_push_dbl(state, reg_idx, node);
296 * Pop a virtual Register from the stack.
298 static void x87_pop(x87_state *state) {
299 assert(state->depth > 0 && "stack underrun");
302 state->tos = MASK_TOS(state->tos + 1);
304 DB((dbg, LEVEL_2, "After POP: ")); DEBUG_ONLY(x87_dump_stack(state));
308 * Returns the block state of a block.
310 * @param sim the x87 simulator handle
311 * @param block the current block
313 * @return the block state
315 static blk_state *x87_get_bl_state(x87_simulator *sim, ir_node *block) {
316 pmap_entry *entry = pmap_find(sim->blk_states, block);
319 blk_state *bl_state = obstack_alloc(&sim->obst, sizeof(*bl_state));
320 bl_state->begin = NULL;
321 bl_state->end = NULL;
323 pmap_insert(sim->blk_states, block, bl_state);
327 return PTR_TO_BLKSTATE(entry->value);
328 } /* x87_get_bl_state */
331 * Creates a new x87 state.
333 * @param sim the x87 simulator handle
335 * @return a new x87 state
337 static x87_state *x87_alloc_state(x87_simulator *sim) {
338 x87_state *res = obstack_alloc(&sim->obst, sizeof(*res));
342 } /* x87_alloc_state */
346 * Create a new empty x87 state.
348 * @param sim the x87 simulator handle
350 * @return a new empty x87 state
352 static x87_state *x87_alloc_empty_state(x87_simulator *sim) {
353 x87_state *res = x87_alloc_state(sim);
357 } /* x87_alloc_empty_state */
363 * @param sim the x87 simulator handle
364 * @param src the x87 state that will be cloned
366 * @return a cloned copy of the src state
368 static x87_state *x87_clone_state(x87_simulator *sim, const x87_state *src) {
369 x87_state *res = x87_alloc_state(sim);
371 memcpy(res, src, sizeof(*res));
373 } /* x87_clone_state */
376 * Patch a virtual instruction into a x87 one and return
379 * @param n the IR node to patch
380 * @param op the x87 opcode to patch in
382 static ir_node *x87_patch_insn(ir_node *n, ir_op *op) {
383 ir_mode *mode = get_irn_mode(n);
388 if (mode == mode_T) {
389 /* patch all Proj's */
390 const ir_edge_t *edge;
392 foreach_out_edge(n, edge) {
393 ir_node *proj = get_edge_src_irn(edge);
395 mode = get_irn_mode(proj);
396 if (mode_is_float(mode)) {
398 set_irn_mode(proj, mode_E);
403 else if (mode_is_float(mode))
404 set_irn_mode(n, mode_E);
406 } /* x87_patch_insn */
409 * Returns the first Proj of a mode_T node having a given mode.
411 * @param n the mode_T node
412 * @param m the desired mode of the Proj
413 * @return The first Proj of mode @p m found or NULL.
415 static ir_node *get_irn_Proj_for_mode(ir_node *n, ir_mode *m) {
416 const ir_edge_t *edge;
418 assert(get_irn_mode(n) == mode_T && "Need mode_T node");
420 foreach_out_edge(n, edge) {
421 ir_node *proj = get_edge_src_irn(edge);
422 if (get_irn_mode(proj) == m)
427 } /* get_irn_Proj_for_mode */
430 * Wrap the arch_* function here so we can check for errors.
432 static INLINE const arch_register_t *x87_get_irn_register(x87_simulator *sim, const ir_node *irn) {
433 const arch_register_t *res;
435 res = arch_get_irn_register(sim->arch_env, irn);
436 assert(res->reg_class->regs == ia32_vfp_regs);
440 /* -------------- x87 perm --------------- */
443 * Creates a fxch for shuffle.
445 * @param state the x87 state
446 * @param pos parameter for fxch
447 * @param block the block were fxch is inserted
449 * Creates a new fxch node and reroute the user of the old node
452 * @return the fxch node
454 static ir_node *x87_fxch_shuffle(x87_state *state, int pos, ir_node *block)
459 fxch = new_rd_ia32_fxch(NULL, get_irn_irg(block), block, mode_E);
460 attr = get_ia32_attr(fxch);
461 attr->x87[0] = &ia32_st_regs[pos];
462 attr->x87[2] = &ia32_st_regs[0];
466 x87_fxch(state, pos);
468 } /* x87_fxch_shuffle */
471 * Calculate the necessary permutations to reach dst_state.
473 * These permutations are done with fxch instructions and placed
474 * at the end of the block.
476 * Note that critical edges are removed here, so we need only
477 * a shuffle if the current block has only one successor.
479 * @param sim the simulator handle
480 * @param block the current block
481 * @param state the current x87 stack state, might be modified
482 * @param dst_block the destination block
483 * @param dst_state destination state
487 static x87_state *x87_shuffle(x87_simulator *sim, ir_node *block, x87_state *state, ir_node *dst_block, const x87_state *dst_state) {
488 int i, n_cycles, k, ri;
489 unsigned cycles[4], all_mask;
490 char cycle_idx[4][8];
491 ir_node *fxch, *before, *after;
493 assert(state->depth == dst_state->depth);
495 /* Some mathematics here:
496 If we have a cycle of length n that includes the tos,
497 we need n-1 exchange operations.
498 We can always add the tos and restore it, so we need
499 n+1 exchange operations for a cycle not containing the tos.
500 So, the maximum of needed operations is for a cycle of 7
501 not including the tos == 8.
502 This is the same number of ops we would need for using stores,
503 so exchange is cheaper (we save the loads).
504 On the other hand, we might need an additional exchange
505 in the next block to bring one operand on top, so the
506 number of ops in the first case is identical.
507 Further, no more than 4 cycles can exists (4 x 2).
509 all_mask = (1 << (state->depth)) - 1;
511 for (n_cycles = 0; all_mask; ++n_cycles) {
512 int src_idx, dst_idx;
514 /* find the first free slot */
515 for (i = 0; i < state->depth; ++i) {
516 if (all_mask & (1 << i)) {
517 all_mask &= ~(1 << i);
519 /* check if there are differences here */
520 if (x87_get_st_reg(state, i) != x87_get_st_reg(dst_state, i))
526 /* no more cycles found */
531 cycles[n_cycles] = (1 << i);
532 cycle_idx[n_cycles][k++] = i;
533 for (src_idx = i; ; src_idx = dst_idx) {
534 dst_idx = x87_on_stack(dst_state, x87_get_st_reg(state, src_idx));
536 if ((all_mask & (1 << dst_idx)) == 0)
539 cycle_idx[n_cycles][k++] = dst_idx;
540 cycles[n_cycles] |= (1 << dst_idx);
541 all_mask &= ~(1 << dst_idx);
543 cycle_idx[n_cycles][k] = -1;
547 /* no permutation needed */
551 /* Hmm: permutation needed */
552 DB((dbg, LEVEL_2, "\n%+F needs permutation: from\n", block));
553 DEBUG_ONLY(x87_dump_stack(state));
554 DB((dbg, LEVEL_2, " to\n"));
555 DEBUG_ONLY(x87_dump_stack(dst_state));
559 DB((dbg, LEVEL_2, "Need %d cycles\n", n_cycles));
560 for (ri = 0; ri < n_cycles; ++ri) {
561 DB((dbg, LEVEL_2, " Ring %d:\n ", ri));
562 for (k = 0; cycle_idx[ri][k] != -1; ++k)
563 DB((dbg, LEVEL_2, " st%d ->", cycle_idx[ri][k]));
564 DB((dbg, LEVEL_2, "\n"));
571 * Find the place node must be insert.
572 * We have only one successor block, so the last instruction should
575 before = sched_last(block);
576 assert(is_cfop(before));
578 /* now do the permutations */
579 for (ri = 0; ri < n_cycles; ++ri) {
580 if ((cycles[ri] & 1) == 0) {
581 /* this cycle does not include the tos */
582 fxch = x87_fxch_shuffle(state, cycle_idx[ri][0], block);
584 sched_add_after(after, fxch);
586 sched_add_before(before, fxch);
589 for (k = 1; cycle_idx[ri][k] != -1; ++k) {
590 fxch = x87_fxch_shuffle(state, cycle_idx[ri][k], block);
592 sched_add_after(after, fxch);
594 sched_add_before(before, fxch);
597 if ((cycles[ri] & 1) == 0) {
598 /* this cycle does not include the tos */
599 fxch = x87_fxch_shuffle(state, cycle_idx[ri][0], block);
600 sched_add_after(after, fxch);
607 * Create a fxch node before another node.
609 * @param state the x87 state
610 * @param n the node after the fxch
611 * @param pos exchange st(pos) with st(0)
612 * @param op_idx if >= 0, replace input op_idx of n with the fxch result
616 static ir_node *x87_create_fxch(x87_state *state, ir_node *n, int pos, int op_idx) {
619 ir_graph *irg = get_irn_irg(n);
620 ir_node *block = get_nodes_block(n);
622 x87_fxch(state, pos);
624 fxch = new_rd_ia32_fxch(NULL, irg, block, mode_E);
625 attr = get_ia32_attr(fxch);
626 attr->x87[0] = &ia32_st_regs[pos];
627 attr->x87[2] = &ia32_st_regs[0];
631 sched_add_before(n, fxch);
632 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(fxch), attr->x87[0]->name, attr->x87[2]->name));
634 } /* x87_create_fxch */
637 * Create a fpush before node n.
639 * @param state the x87 state
640 * @param n the node after the fpush
641 * @param pos push st(pos) on stack
642 * @param op_idx replace input op_idx of n with the fpush result
644 static void x87_create_fpush(x87_state *state, ir_node *n, int pos, int op_idx) {
645 ir_node *fpush, *pred = get_irn_n(n, op_idx);
647 const arch_register_t *out = x87_get_irn_register(state->sim, pred);
649 x87_push_dbl(state, arch_register_get_index(out), pred);
651 fpush = new_rd_ia32_fpush(NULL, get_irn_irg(n), get_nodes_block(n), mode_E);
652 attr = get_ia32_attr(fpush);
653 attr->x87[0] = &ia32_st_regs[pos];
654 attr->x87[2] = &ia32_st_regs[0];
657 sched_add_before(n, fpush);
659 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(fpush), attr->x87[0]->name, attr->x87[2]->name));
660 } /* x87_create_fpush */
663 * Create a fpop before node n.
665 * @param state the x87 state
666 * @param n the node after the fpop
667 * @param num pop 1 or 2 values
668 * @param pred node to use as predecessor of the fpop
670 * @return the fpop node
672 static ir_node *x87_create_fpop(x87_state *state, ir_node *n, int num, ir_node *pred) {
673 ir_node *fpop = pred;
680 fpop = new_rd_ia32_fpop(NULL, get_irn_irg(n), get_nodes_block(n), mode_E);
681 attr = get_ia32_attr(fpop);
682 attr->x87[0] = &ia32_st_regs[0];
683 attr->x87[1] = &ia32_st_regs[0];
684 attr->x87[2] = &ia32_st_regs[0];
687 sched_add_before(n, fpop);
688 DB((dbg, LEVEL_1, "<<< %s %s\n", get_irn_opname(fpop), attr->x87[0]->name));
694 } /* x87_create_fpop */
697 * Creates an fldz before node n
699 * @param state the x87 state
700 * @param n the node after the fldz
702 * @return the fldz node
704 static ir_node *x87_create_fldz(x87_state *state, ir_node *n, int regidx) {
705 ir_graph *irg = get_irn_irg(n);
706 ir_node *block = get_nodes_block(n);
709 fldz = new_rd_ia32_fldz(NULL, irg, block, mode_E);
711 sched_add_before(n, fldz);
712 DB((dbg, LEVEL_1, "<<< %s\n", get_irn_opname(fldz)));
715 x87_push(state, regidx, fldz);
720 /* --------------------------------- liveness ------------------------------------------ */
723 * The liveness transfer function.
724 * Updates a live set over a single step from a given node to its predecessor.
725 * Everything defined at the node is removed from the set, the uses of the node get inserted.
727 * @param sim The simulator handle.
728 * @param irn The node at which liveness should be computed.
729 * @param live The bitset of registers live before @p irn. This set gets modified by updating it to
730 * the registers live after irn.
732 * @return The live bitset.
734 static vfp_liveness vfp_liveness_transfer(x87_simulator *sim, ir_node *irn, vfp_liveness live)
737 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
738 const arch_env_t *arch_env = sim->arch_env;
740 if (arch_irn_consider_in_reg_alloc(arch_env, cls, irn)) {
741 const arch_register_t *reg = x87_get_irn_register(sim, irn);
742 live &= ~(1 << arch_register_get_index(reg));
745 for (i = 0, n = get_irn_arity(irn); i < n; ++i) {
746 ir_node *op = get_irn_n(irn, i);
748 if (mode_is_float(get_irn_mode(op)) && arch_irn_consider_in_reg_alloc(arch_env, cls, op)) {
749 const arch_register_t *reg = x87_get_irn_register(sim, op);
750 live |= 1 << arch_register_get_index(reg);
754 } /* vfp_liveness_transfer */
757 * Put all live virtual registers at the end of a block into a bitset.
759 * @param sim the simulator handle
760 * @param lv the liveness information
761 * @param bl the block
763 * @return The live bitset at the end of this block
765 static vfp_liveness vfp_liveness_end_of_block(x87_simulator *sim, const ir_node *block)
768 vfp_liveness live = 0;
769 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
770 const arch_env_t *arch_env = sim->arch_env;
771 const be_lv_t *lv = sim->lv;
773 be_lv_foreach(lv, block, be_lv_state_end, i) {
774 const arch_register_t *reg;
775 const ir_node *node = be_lv_get_irn(lv, block, i);
776 if (!arch_irn_consider_in_reg_alloc(arch_env, cls, node))
779 reg = x87_get_irn_register(sim, node);
780 live |= 1 << arch_register_get_index(reg);
784 } /* vfp_liveness_end_of_block */
786 /** get the register mask from an arch_register */
787 #define REGMASK(reg) (1 << (arch_register_get_index(reg)))
790 * Return a bitset of argument registers which are live at the end of a node.
792 * @param sim the simulator handle
793 * @param pos the node
794 * @param kill kill mask for the output registers
796 * @return The live bitset.
798 static unsigned vfp_live_args_after(x87_simulator *sim, const ir_node *pos, unsigned kill)
800 unsigned idx = get_irn_idx(pos);
802 assert(idx < sim->n_idx);
803 return sim->live[idx] & ~kill;
804 } /* vfp_live_args_after */
807 * Calculate the liveness for a whole block and cache it.
809 * @param sim the simulator handle
810 * @param lv the liveness handle
811 * @param block the block
813 static void update_liveness(x87_simulator *sim, ir_node *block) {
814 vfp_liveness live = vfp_liveness_end_of_block(sim, block);
818 /* now iterate through the block backward and cache the results */
819 sched_foreach_reverse(block, irn) {
820 /* stop at the first Phi: this produces the live-in */
824 idx = get_irn_idx(irn);
825 sim->live[idx] = live;
827 live = vfp_liveness_transfer(sim, irn, live);
829 idx = get_irn_idx(block);
830 sim->live[idx] = live;
831 } /* update_liveness */
834 * Returns true if a register is live in a set.
836 * @param reg_idx the vfp register index
837 * @param live a live bitset
839 #define is_vfp_live(reg_idx, live) ((live) & (1 << (reg_idx)))
843 * Dump liveness info.
845 * @param live the live bitset
847 static void vfp_dump_live(vfp_liveness live) {
850 DB((dbg, LEVEL_2, "Live after: "));
851 for (i = 0; i < 8; ++i) {
852 if (live & (1 << i)) {
853 DB((dbg, LEVEL_2, "vf%d ", i));
856 DB((dbg, LEVEL_2, "\n"));
857 } /* vfp_dump_live */
858 #endif /* DEBUG_libfirm */
860 /* --------------------------------- simulators ---------------------------------------- */
862 #define XCHG(a, b) do { int t = (a); (a) = (b); (b) = t; } while (0)
865 * Simulate a virtual binop.
867 * @param state the x87 state
868 * @param n the node that should be simulated (and patched)
869 * @param tmpl the template containing the 4 possible x87 opcodes
871 static int sim_binop(x87_state *state, ir_node *n, const exchange_tmpl *tmpl) {
872 int op2_idx, op1_idx;
873 int out_idx, do_pop = 0;
875 ir_node *patched_insn;
877 x87_simulator *sim = state->sim;
878 const arch_register_t *op1 = x87_get_irn_register(sim, get_irn_n(n, BINOP_IDX_1));
879 const arch_register_t *op2 = x87_get_irn_register(sim, get_irn_n(n, BINOP_IDX_2));
880 const arch_register_t *out = x87_get_irn_register(sim, n);
881 int reg_index_1 = arch_register_get_index(op1);
882 int reg_index_2 = arch_register_get_index(op2);
883 vfp_liveness live = vfp_live_args_after(sim, n, REGMASK(out));
885 DB((dbg, LEVEL_1, ">>> %+F %s, %s -> %s\n", n,
886 arch_register_get_name(op1), arch_register_get_name(op2),
887 arch_register_get_name(out)));
888 DEBUG_ONLY(vfp_dump_live(live));
889 DB((dbg, LEVEL_1, "Stack before: "));
890 DEBUG_ONLY(x87_dump_stack(state));
892 op1_idx = x87_on_stack(state, reg_index_1);
893 assert(op1_idx >= 0);
895 if (reg_index_2 != REG_VFP_NOREG) {
896 /* second operand is a vfp register */
897 op2_idx = x87_on_stack(state, reg_index_2);
898 assert(op2_idx >= 0);
900 if (is_vfp_live(arch_register_get_index(op2), live)) {
901 /* Second operand is live. */
903 if (is_vfp_live(arch_register_get_index(op1), live)) {
904 /* Both operands are live: push the first one.
905 This works even for op1 == op2. */
906 x87_create_fpush(state, n, op1_idx, BINOP_IDX_2);
907 /* now do fxxx (tos=tos X op) */
911 dst = tmpl->normal_op;
913 /* Second live, first operand is dead here, bring it to tos. */
915 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
920 /* now do fxxx (tos=tos X op) */
922 dst = tmpl->normal_op;
925 /* Second operand is dead. */
926 if (is_vfp_live(arch_register_get_index(op1), live)) {
927 /* First operand is live: bring second to tos. */
929 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
934 /* now do fxxxr (tos = op X tos) */
936 dst = tmpl->reverse_op;
938 /* Both operands are dead here, pop them from the stack. */
941 /* Both are identically and on tos, no pop needed. */
942 /* here fxxx (tos = tos X tos) */
943 dst = tmpl->normal_op;
946 /* now do fxxxp (op = op X tos, pop) */
947 dst = tmpl->normal_pop_op;
951 } else if (op1_idx == 0) {
952 assert(op1_idx != op2_idx);
953 /* now do fxxxrp (op = tos X op, pop) */
954 dst = tmpl->reverse_pop_op;
958 /* Bring the second on top. */
959 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
960 if (op1_idx == op2_idx) {
961 /* Both are identically and on tos now, no pop needed. */
964 /* use fxxx (tos = tos X tos) */
965 dst = tmpl->normal_op;
968 /* op2 is on tos now */
970 /* use fxxxp (op = op X tos, pop) */
971 dst = tmpl->normal_pop_op;
979 /* second operand is an address mode */
980 if (is_vfp_live(arch_register_get_index(op1), live)) {
981 /* first operand is live: push it here */
982 x87_create_fpush(state, n, op1_idx, BINOP_IDX_1);
984 /* use fxxx (tos = tos X mem) */
985 dst = tmpl->normal_op;
988 /* first operand is dead: bring it to tos */
990 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
994 /* use fxxxp (tos = tos X mem) */
995 dst = tmpl->normal_op;
1000 patched_insn = x87_patch_insn(n, dst);
1001 x87_set_st(state, arch_register_get_index(out), patched_insn, out_idx);
1006 /* patch the operation */
1007 attr = get_ia32_attr(n);
1008 attr->x87[0] = op1 = &ia32_st_regs[op1_idx];
1009 if (reg_index_2 != REG_VFP_NOREG) {
1010 attr->x87[1] = op2 = &ia32_st_regs[op2_idx];
1012 attr->x87[2] = out = &ia32_st_regs[out_idx];
1014 if (reg_index_2 != REG_VFP_NOREG) {
1015 DB((dbg, LEVEL_1, "<<< %s %s, %s -> %s\n", get_irn_opname(n),
1016 arch_register_get_name(op1), arch_register_get_name(op2),
1017 arch_register_get_name(out)));
1019 DB((dbg, LEVEL_1, "<<< %s %s, [AM] -> %s\n", get_irn_opname(n),
1020 arch_register_get_name(op1),
1021 arch_register_get_name(out)));
1028 * Simulate a virtual Unop.
1030 * @param state the x87 state
1031 * @param n the node that should be simulated (and patched)
1032 * @param op the x87 opcode that will replace n's opcode
1034 static int sim_unop(x87_state *state, ir_node *n, ir_op *op) {
1035 int op1_idx, out_idx;
1036 x87_simulator *sim = state->sim;
1037 const arch_register_t *op1 = x87_get_irn_register(sim, get_irn_n(n, UNOP_IDX));
1038 const arch_register_t *out = x87_get_irn_register(sim, n);
1040 unsigned live = vfp_live_args_after(sim, n, REGMASK(out));
1042 DB((dbg, LEVEL_1, ">>> %+F -> %s\n", n, out->name));
1043 DEBUG_ONLY(vfp_dump_live(live));
1045 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1047 if (is_vfp_live(arch_register_get_index(op1), live)) {
1048 /* push the operand here */
1049 x87_create_fpush(state, n, op1_idx, UNOP_IDX);
1053 /* operand is dead, bring it to tos */
1055 x87_create_fxch(state, n, op1_idx, UNOP_IDX);
1060 x87_set_tos(state, arch_register_get_index(out), x87_patch_insn(n, op));
1062 attr = get_ia32_attr(n);
1063 attr->x87[0] = op1 = &ia32_st_regs[0];
1064 attr->x87[2] = out = &ia32_st_regs[0];
1065 DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), out->name));
1071 * Simulate a virtual Load instruction.
1073 * @param state the x87 state
1074 * @param n the node that should be simulated (and patched)
1075 * @param op the x87 opcode that will replace n's opcode
1077 static int sim_load(x87_state *state, ir_node *n, ir_op *op) {
1078 const arch_register_t *out = x87_get_irn_register(state->sim, n);
1081 DB((dbg, LEVEL_1, ">>> %+F -> %s\n", n, arch_register_get_name(out)));
1082 x87_push(state, arch_register_get_index(out), x87_patch_insn(n, op));
1083 assert(out == x87_get_irn_register(state->sim, n));
1084 attr = get_ia32_attr(n);
1085 attr->x87[2] = out = &ia32_st_regs[0];
1086 DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), arch_register_get_name(out)));
1092 * Rewire all users of @p old_val to @new_val iff they are scheduled after @p store.
1094 * @param store The store
1095 * @param old_val The former value
1096 * @param new_val The new value
1098 static void collect_and_rewire_users(ir_node *store, ir_node *old_val, ir_node *new_val) {
1099 const ir_edge_t *edge, *ne;
1101 foreach_out_edge_safe(old_val, edge, ne) {
1102 ir_node *user = get_edge_src_irn(edge);
1104 if (! user || user == store)
1107 /* if the user is scheduled after the store: rewire */
1108 if (sched_is_scheduled(user) && sched_comes_after(store, user)) {
1110 /* find the input of the user pointing to the old value */
1111 for (i = get_irn_arity(user) - 1; i >= 0; i--) {
1112 if (get_irn_n(user, i) == old_val)
1113 set_irn_n(user, i, new_val);
1117 } /* collect_and_rewire_users */
1120 * Simulate a virtual Store.
1122 * @param state the x87 state
1123 * @param n the node that should be simulated (and patched)
1124 * @param op the x87 store opcode
1125 * @param op_p the x87 store and pop opcode
1127 static int sim_store(x87_state *state, ir_node *n, ir_op *op, ir_op *op_p) {
1128 x87_simulator *sim = state->sim;
1129 ir_node *val = get_irn_n(n, STORE_VAL_IDX);
1130 const arch_register_t *op2 = x87_get_irn_register(sim, val);
1131 unsigned live = vfp_live_args_after(sim, n, 0);
1134 int op2_reg_idx, op2_idx, depth;
1135 int live_after_node;
1138 op2_reg_idx = arch_register_get_index(op2);
1139 if (op2_reg_idx == REG_VFP_UKNWN) {
1140 // just take any value from stack
1141 if(state->depth > 0) {
1143 DEBUG_ONLY(op2 = NULL);
1144 live_after_node = 1;
1146 // produce a new value which we will consume imediately
1147 x87_create_fldz(state, n, op2_reg_idx);
1148 live_after_node = 0;
1149 op2_idx = x87_on_stack(state, op2_reg_idx);
1150 assert(op2_idx >= 0);
1153 op2_idx = x87_on_stack(state, op2_reg_idx);
1154 live_after_node = is_vfp_live(arch_register_get_index(op2), live);
1155 assert(op2_idx >= 0);
1156 DB((dbg, LEVEL_1, ">>> %+F %s ->\n", n, arch_register_get_name(op2)));
1159 mode = get_ia32_ls_mode(n);
1160 depth = x87_get_depth(state);
1162 if (live_after_node) {
1164 Problem: fst doesn't support mode_E (spills), only fstp does
1166 - stack not full: push value and fstp
1167 - stack full: fstp value and load again
1169 if (mode == mode_E) {
1170 if (depth < N_x87_REGS) {
1171 /* ok, we have a free register: push + fstp */
1172 x87_create_fpush(state, n, op2_idx, STORE_VAL_IDX);
1174 x87_patch_insn(n, op_p);
1177 ir_node *vfld, *mem, *block, *rproj, *mproj;
1180 /* stack full here: need fstp + load */
1182 x87_patch_insn(n, op_p);
1184 block = get_nodes_block(n);
1185 irg = get_irn_irg(n);
1186 vfld = new_rd_ia32_vfld(NULL, irg, block, get_irn_n(n, 0), get_irn_n(n, 1), new_rd_NoMem(irg));
1188 /* copy all attributes */
1189 set_ia32_frame_ent(vfld, get_ia32_frame_ent(n));
1190 if (is_ia32_use_frame(n))
1191 set_ia32_use_frame(vfld);
1192 set_ia32_am_flavour(vfld, get_ia32_am_flavour(n));
1193 set_ia32_op_type(vfld, ia32_am_Source);
1194 add_ia32_am_offs_int(vfld, get_ia32_am_offs_int(n));
1195 set_ia32_am_sc(vfld, get_ia32_am_sc(n));
1196 set_ia32_ls_mode(vfld, get_ia32_ls_mode(n));
1198 rproj = new_r_Proj(irg, block, vfld, get_ia32_ls_mode(vfld), pn_ia32_vfld_res);
1199 mproj = new_r_Proj(irg, block, vfld, mode_M, pn_ia32_vfld_M);
1200 mem = get_irn_Proj_for_mode(n, mode_M);
1202 assert(mem && "Store memory not found");
1204 arch_set_irn_register(sim->arch_env, rproj, op2);
1206 /* reroute all former users of the store memory to the load memory */
1207 edges_reroute(mem, mproj, irg);
1208 /* set the memory input of the load to the store memory */
1209 set_irn_n(vfld, 2, mem);
1211 sched_add_after(n, vfld);
1212 sched_add_after(vfld, rproj);
1214 /* rewire all users, scheduled after the store, to the loaded value */
1215 collect_and_rewire_users(n, val, rproj);
1221 /* we can only store the tos to memory */
1223 x87_create_fxch(state, n, op2_idx, STORE_VAL_IDX);
1225 /* mode != mode_E -> use normal fst */
1226 x87_patch_insn(n, op);
1230 /* we can only store the tos to memory */
1232 x87_create_fxch(state, n, op2_idx, STORE_VAL_IDX);
1235 x87_patch_insn(n, op_p);
1238 attr = get_ia32_attr(n);
1239 attr->x87[1] = op2 = &ia32_st_regs[0];
1240 DB((dbg, LEVEL_1, "<<< %s %s ->\n", get_irn_opname(n), arch_register_get_name(op2)));
1246 * Simulate a virtual Phi.
1247 * Just for cosmetic reasons change the mode of Phi nodes to mode_E.
1249 * @param state the x87 state
1250 * @param n the node that should be simulated (and patched)
1251 * @param arch_env the architecture environment
1253 static int sim_Phi(x87_state *state, ir_node *n, const arch_env_t *arch_env) {
1254 ir_mode *mode = get_irn_mode(n);
1256 if (mode_is_float(mode))
1257 set_irn_mode(n, mode_E);
1262 #define _GEN_BINOP(op, rev) \
1263 static int sim_##op(x87_state *state, ir_node *n) { \
1264 exchange_tmpl tmpl = { op_ia32_##op, op_ia32_##rev, op_ia32_##op##p, op_ia32_##rev##p }; \
1265 return sim_binop(state, n, &tmpl); \
1268 #define GEN_BINOP(op) _GEN_BINOP(op, op)
1269 #define GEN_BINOPR(op) _GEN_BINOP(op, op##r)
1271 #define GEN_LOAD2(op, nop) \
1272 static int sim_##op(x87_state *state, ir_node *n) { \
1273 return sim_load(state, n, op_ia32_##nop); \
1276 #define GEN_LOAD(op) GEN_LOAD2(op, op)
1278 #define GEN_UNOP(op) \
1279 static int sim_##op(x87_state *state, ir_node *n) { \
1280 return sim_unop(state, n, op_ia32_##op); \
1283 #define GEN_STORE(op) \
1284 static int sim_##op(x87_state *state, ir_node *n) { \
1285 return sim_store(state, n, op_ia32_##op, op_ia32_##op##p); \
1310 * Simulate a fCondJmp.
1312 * @param state the x87 state
1313 * @param n the node that should be simulated (and patched)
1315 static int sim_fCondJmp(x87_state *state, ir_node *n) {
1321 x87_simulator *sim = state->sim;
1322 const arch_register_t *op1 = x87_get_irn_register(sim, get_irn_n(n, BINOP_IDX_1));
1323 const arch_register_t *op2 = x87_get_irn_register(sim, get_irn_n(n, BINOP_IDX_2));
1324 int reg_index_1 = arch_register_get_index(op1);
1325 int reg_index_2 = arch_register_get_index(op2);
1326 unsigned live = vfp_live_args_after(sim, n, 0);
1328 DB((dbg, LEVEL_1, ">>> %+F %s, %s\n", n,
1329 arch_register_get_name(op1), arch_register_get_name(op2)));
1330 DEBUG_ONLY(vfp_dump_live(live));
1331 DB((dbg, LEVEL_1, "Stack before: "));
1332 DEBUG_ONLY(x87_dump_stack(state));
1334 op1_idx = x87_on_stack(state, reg_index_1);
1335 assert(op1_idx >= 0);
1337 /* BEWARE: check for comp a,a cases, they might happen */
1338 if (reg_index_2 != REG_VFP_NOREG) {
1339 /* second operand is a vfp register */
1340 op2_idx = x87_on_stack(state, reg_index_2);
1341 assert(op2_idx >= 0);
1343 if (is_vfp_live(arch_register_get_index(op2), live)) {
1344 /* second operand is live */
1346 if (is_vfp_live(arch_register_get_index(op1), live)) {
1347 /* both operands are live */
1350 /* res = tos X op */
1351 dst = op_ia32_fcomJmp;
1352 } else if (op2_idx == 0) {
1353 /* res = op X tos */
1354 dst = op_ia32_fcomrJmp;
1356 /* bring the first one to tos */
1357 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1361 /* res = tos X op */
1362 dst = op_ia32_fcomJmp;
1365 /* second live, first operand is dead here, bring it to tos.
1366 This means further, op1_idx != op2_idx. */
1367 assert(op1_idx != op2_idx);
1369 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1374 /* res = tos X op, pop */
1375 dst = op_ia32_fcompJmp;
1379 /* second operand is dead */
1380 if (is_vfp_live(arch_register_get_index(op1), live)) {
1381 /* first operand is live: bring second to tos.
1382 This means further, op1_idx != op2_idx. */
1383 assert(op1_idx != op2_idx);
1385 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1390 /* res = op X tos, pop */
1391 dst = op_ia32_fcomrpJmp;
1394 /* both operands are dead here, check first for identity. */
1395 if (op1_idx == op2_idx) {
1396 /* identically, one pop needed */
1398 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1402 /* res = tos X op, pop */
1403 dst = op_ia32_fcompJmp;
1406 /* different, move them to st and st(1) and pop both.
1407 The tricky part is to get one into st(1).*/
1408 else if (op2_idx == 1) {
1409 /* good, second operand is already in the right place, move the first */
1411 /* bring the first on top */
1412 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1413 assert(op2_idx != 0);
1416 /* res = tos X op, pop, pop */
1417 dst = op_ia32_fcomppJmp;
1419 } else if (op1_idx == 1) {
1420 /* good, first operand is already in the right place, move the second */
1422 /* bring the first on top */
1423 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1424 assert(op1_idx != 0);
1427 dst = op_ia32_fcomrppJmp;
1430 /* if one is already the TOS, we need two fxch */
1432 /* first one is TOS, move to st(1) */
1433 x87_create_fxch(state, n, 1, BINOP_IDX_1);
1434 assert(op2_idx != 1);
1436 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1438 /* res = op X tos, pop, pop */
1439 dst = op_ia32_fcomrppJmp;
1441 } else if (op2_idx == 0) {
1442 /* second one is TOS, move to st(1) */
1443 x87_create_fxch(state, n, 1, BINOP_IDX_2);
1444 assert(op1_idx != 1);
1446 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1448 /* res = tos X op, pop, pop */
1449 dst = op_ia32_fcomppJmp;
1452 /* none of them is either TOS or st(1), 3 fxch needed */
1453 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1454 assert(op1_idx != 0);
1455 x87_create_fxch(state, n, 1, BINOP_IDX_2);
1457 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1459 /* res = tos X op, pop, pop */
1460 dst = op_ia32_fcomppJmp;
1467 /* second operand is an address mode */
1468 if (is_vfp_live(arch_register_get_index(op1), live)) {
1469 /* first operand is live: bring it to TOS */
1471 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1474 dst = op_ia32_fcomJmp;
1476 /* first operand is dead: bring it to tos */
1478 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1481 dst = op_ia32_fcompJmp;
1486 x87_patch_insn(n, dst);
1487 assert(pop_cnt < 3);
1493 /* patch the operation */
1494 attr = get_ia32_attr(n);
1495 op1 = &ia32_st_regs[op1_idx];
1498 op2 = &ia32_st_regs[op2_idx];
1501 attr->x87[2] = NULL;
1504 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(n),
1505 arch_register_get_name(op1), arch_register_get_name(op2)));
1507 DB((dbg, LEVEL_1, "<<< %s %s, [AM]\n", get_irn_opname(n),
1508 arch_register_get_name(op1)));
1511 } /* sim_fCondJmp */
1513 static ir_node *create_Copy(x87_state *state, ir_node *n) {
1514 x87_simulator *sim = state->sim;
1515 ir_graph *irg = get_irn_irg(n);
1516 dbg_info *n_dbg = get_irn_dbg_info(n);
1517 ir_mode *mode = get_irn_mode(n);
1518 ir_node *block = get_nodes_block(n);
1519 ir_node *pred = get_irn_n(n, 0);
1520 ir_node *(*cnstr)(dbg_info *, ir_graph *, ir_node *, ir_mode *) = NULL;
1522 const arch_register_t *out;
1523 const arch_register_t *op1;
1526 /* Do not copy constants, recreate them. */
1527 switch (get_ia32_irn_opcode(pred)) {
1528 case iro_ia32_Unknown_VFP:
1530 cnstr = new_rd_ia32_fldz;
1533 cnstr = new_rd_ia32_fld1;
1535 case iro_ia32_fldpi:
1536 cnstr = new_rd_ia32_fldpi;
1538 case iro_ia32_fldl2e:
1539 cnstr = new_rd_ia32_fldl2e;
1541 case iro_ia32_fldl2t:
1542 cnstr = new_rd_ia32_fldl2t;
1544 case iro_ia32_fldlg2:
1545 cnstr = new_rd_ia32_fldlg2;
1547 case iro_ia32_fldln2:
1548 cnstr = new_rd_ia32_fldln2;
1552 out = x87_get_irn_register(sim, n);
1553 op1 = x87_get_irn_register(sim, pred);
1556 /* copy a constant */
1557 res = (*cnstr)(n_dbg, irg, block, mode);
1559 x87_push(state, arch_register_get_index(out), res);
1561 attr = get_ia32_attr(res);
1562 attr->x87[2] = out = &ia32_st_regs[0];
1564 int op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1566 res = new_rd_ia32_fpushCopy(n_dbg, irg, block, pred, mode);
1568 x87_push(state, arch_register_get_index(out), res);
1570 attr = get_ia32_attr(res);
1571 attr->x87[0] = op1 = &ia32_st_regs[op1_idx];
1572 attr->x87[2] = out = &ia32_st_regs[0];
1574 arch_set_irn_register(sim->arch_env, res, out);
1580 * Simulate a be_Copy.
1582 * @param state the x87 state
1583 * @param n the node that should be simulated (and patched)
1585 static int sim_Copy(x87_state *state, ir_node *n) {
1588 const arch_register_t *out;
1589 const arch_register_t *op1;
1590 ir_node *node, *next;
1592 int op1_idx, out_idx;
1595 ir_mode *mode = get_irn_mode(n);
1597 if (!mode_is_float(mode))
1601 pred = get_irn_n(n, 0);
1602 out = x87_get_irn_register(sim, n);
1603 op1 = x87_get_irn_register(sim, pred);
1604 live = vfp_live_args_after(sim, n, REGMASK(out));
1606 DB((dbg, LEVEL_1, ">>> %+F %s -> %s\n", n,
1607 arch_register_get_name(op1), arch_register_get_name(out)));
1608 DEBUG_ONLY(vfp_dump_live(live));
1610 /* handle the infamous unknown value */
1611 if (arch_register_get_index(op1) == REG_VFP_UKNWN) {
1612 /* Operand is still live, a real copy. We need here an fpush that can
1613 hold a a register, so use the fpushCopy or recreate constants */
1614 node = create_Copy(state, n);
1616 assert(is_ia32_fldz(node));
1617 next = sched_next(n);
1620 sched_add_before(next, node);
1622 DB((dbg, LEVEL_1, "<<< %+F %s -> %s\n", node, op1->name,
1623 arch_get_irn_register(sim->arch_env, node)->name));
1627 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1629 if (is_vfp_live(arch_register_get_index(op1), live)) {
1630 /* Operand is still live, a real copy. We need here an fpush that can
1631 hold a a register, so use the fpushCopy or recreate constants */
1632 node = create_Copy(state, n);
1634 next = sched_next(n);
1637 sched_add_before(next, node);
1638 DB((dbg, LEVEL_1, "<<< %+F %s -> %s\n", node, op1->name,
1639 arch_get_irn_register(sim->arch_env, node)->name));
1641 out_idx = x87_on_stack(state, arch_register_get_index(out));
1643 if (out_idx >= 0 && out_idx != op1_idx) {
1644 /* Matze: out already on stack? how can this happen? */
1647 /* op1 must be killed and placed where out is */
1649 /* best case, simple remove and rename */
1650 x87_patch_insn(n, op_ia32_Pop);
1651 attr = get_ia32_attr(n);
1652 attr->x87[0] = op1 = &ia32_st_regs[0];
1655 x87_set_st(state, arch_register_get_index(out), n, op1_idx - 1);
1657 /* move op1 to tos, store and pop it */
1659 x87_create_fxch(state, n, op1_idx, 0);
1662 x87_patch_insn(n, op_ia32_Pop);
1663 attr = get_ia32_attr(n);
1664 attr->x87[0] = op1 = &ia32_st_regs[out_idx];
1667 x87_set_st(state, arch_register_get_index(out), n, out_idx - 1);
1669 DB((dbg, LEVEL_1, "<<< %+F %s\n", n, op1->name));
1671 /* just a virtual copy */
1672 x87_set_st(state, arch_register_get_index(out), get_unop_op(n), op1_idx);
1674 DB((dbg, LEVEL_1, "<<< KILLED %s\n", get_irn_opname(n)));
1675 exchange(n, get_unop_op(n));
1683 * Simulate a be_Call.
1685 * @param state the x87 state
1686 * @param n the node that should be simulated
1687 * @param arch_env the architecture environment
1689 static int sim_Call(x87_state *state, ir_node *n, const arch_env_t *arch_env) {
1690 ir_type *call_tp = be_Call_get_type(n);
1692 /* at the begin of a call the x87 state should be empty */
1693 assert(state->depth == 0 && "stack not empty before call");
1696 * If the called function returns a float, it is returned in st(0).
1697 * This even happens if the return value is NOT used.
1698 * Moreover, only one return result is supported.
1700 if (get_method_n_ress(call_tp) > 0) {
1701 ir_type *res_type = get_method_res_type(call_tp, 0);
1702 ir_mode *mode = get_type_mode(res_type);
1704 if (mode && mode_is_float(mode)) {
1706 * TODO: what to push here? The result might be unused and currently
1707 * we have no possibility to detect this :-(
1709 x87_push(state, 0, n);
1717 * Simulate a be_Spill.
1719 * @param state the x87 state
1720 * @param n the node that should be simulated (and patched)
1722 * Should not happen, spills are lowered before x87 simulator see them.
1724 static int sim_Spill(x87_state *state, ir_node *n) {
1725 assert(0 && "Spill not lowered");
1726 return sim_fst(state, n);
1730 * Simulate a be_Reload.
1732 * @param state the x87 state
1733 * @param n the node that should be simulated (and patched)
1735 * Should not happen, reloads are lowered before x87 simulator see them.
1737 static int sim_Reload(x87_state *state, ir_node *n) {
1738 assert(0 && "Reload not lowered");
1739 return sim_fld(state, n);
1743 * Simulate a be_Return.
1745 * @param state the x87 state
1746 * @param n the node that should be simulated (and patched)
1748 static int sim_Return(x87_state *state, ir_node *n) {
1749 int n_res = be_Return_get_n_rets(n);
1750 int i, n_float_res = 0;
1752 /* only floating point return values must resist on stack */
1753 for (i = 0; i < n_res; ++i) {
1754 ir_node *res = get_irn_n(n, be_pos_Return_val + i);
1756 if (mode_is_float(get_irn_mode(res)))
1759 assert(x87_get_depth(state) == n_float_res);
1761 /* pop them virtually */
1762 for (i = n_float_res - 1; i >= 0; --i)
1768 typedef struct _perm_data_t {
1769 const arch_register_t *in;
1770 const arch_register_t *out;
1774 * Simulate a be_Perm.
1776 * @param state the x87 state
1777 * @param irn the node that should be simulated (and patched)
1779 static int sim_Perm(x87_state *state, ir_node *irn) {
1781 x87_simulator *sim = state->sim;
1782 ir_node *pred = get_irn_n(irn, 0);
1784 const ir_edge_t *edge;
1786 /* handle only floating point Perms */
1787 if (! mode_is_float(get_irn_mode(pred)))
1790 DB((dbg, LEVEL_1, ">>> %+F\n", irn));
1792 /* Perm is a pure virtual instruction on x87.
1793 All inputs must be on the FPU stack and are pairwise
1794 different from each other.
1795 So, all we need to do is to permutate the stack state. */
1796 n = get_irn_arity(irn);
1797 NEW_ARR_A(int, stack_pos, n);
1799 /* collect old stack positions */
1800 for (i = 0; i < n; ++i) {
1801 const arch_register_t *inreg = x87_get_irn_register(sim, get_irn_n(irn, i));
1802 int idx = x87_on_stack(state, arch_register_get_index(inreg));
1804 assert(idx >= 0 && "Perm argument not on x87 stack");
1808 /* now do the permutation */
1809 foreach_out_edge(irn, edge) {
1810 ir_node *proj = get_edge_src_irn(edge);
1811 const arch_register_t *out = x87_get_irn_register(sim, proj);
1812 long num = get_Proj_proj(proj);
1814 assert(0 <= num && num < n && "More Proj's than Perm inputs");
1815 x87_set_st(state, arch_register_get_index(out), proj, stack_pos[(unsigned)num]);
1817 DB((dbg, LEVEL_1, "<<< %+F\n", irn));
1823 * Kill any dead registers at block start by popping them from the stack.
1825 * @param sim the simulator handle
1826 * @param block the current block
1827 * @param start_state the x87 state at the begin of the block
1829 static x87_state *x87_kill_deads(x87_simulator *sim, ir_node *block, x87_state *start_state) {
1830 x87_state *state = start_state;
1831 ir_node *first_insn = sched_first(block);
1832 ir_node *keep = NULL;
1833 unsigned live = vfp_live_args_after(sim, block, 0);
1835 int i, depth, num_pop;
1838 depth = x87_get_depth(state);
1839 for (i = depth - 1; i >= 0; --i) {
1840 int reg = x87_get_st_reg(state, i);
1842 if (! is_vfp_live(reg, live))
1843 kill_mask |= (1 << i);
1847 /* create a new state, will be changed */
1848 state = x87_clone_state(sim, state);
1850 DB((dbg, LEVEL_1, "Killing deads:\n"));
1851 DEBUG_ONLY(vfp_dump_live(live));
1852 DEBUG_ONLY(x87_dump_stack(state));
1854 /* now kill registers */
1856 /* we can only kill from TOS, so bring them up */
1857 if (! (kill_mask & 1)) {
1858 /* search from behind, because we can to a double-pop */
1859 for (i = depth - 1; i >= 0; --i) {
1860 if (kill_mask & (1 << i)) {
1861 kill_mask &= ~(1 << i);
1868 x87_set_st(state, -1, keep, i);
1869 keep = x87_create_fxch(state, first_insn, i, -1);
1872 keep = x87_get_st_node(state, 0);
1874 if ((kill_mask & 3) == 3) {
1875 /* we can do a double-pop */
1879 /* only a single pop */
1884 kill_mask >>= num_pop;
1885 keep = x87_create_fpop(state, first_insn, num_pop, keep);
1890 } /* x87_kill_deads */
1893 * Run a simulation and fix all virtual instructions for a block.
1895 * @param sim the simulator handle
1896 * @param block the current block
1898 * @return non-zero if simulation is complete,
1899 * zero if the simulation must be rerun
1901 static void x87_simulate_block(x87_simulator *sim, ir_node *block) {
1903 blk_state *bl_state = x87_get_bl_state(sim, block);
1904 x87_state *state = bl_state->begin;
1905 const ir_edge_t *edge;
1906 ir_node *start_block;
1908 assert(state != NULL);
1909 // already processed?
1910 if(bl_state->end != NULL)
1913 //update_liveness(sim, block);
1915 DB((dbg, LEVEL_1, "Simulate %+F\n", block));
1916 DB((dbg, LEVEL_2, "State at Block begin:\n "));
1917 DEBUG_ONLY(x87_dump_stack(state));
1919 /* at block begin, kill all dead registers */
1920 state = x87_kill_deads(sim, block, state);
1922 /* beware, n might change */
1923 for (n = sched_first(block); !sched_is_end(n); n = next) {
1926 ir_op *op = get_irn_op(n);
1928 next = sched_next(n);
1929 if (op->ops.generic == NULL)
1932 func = (sim_func)op->ops.generic;
1934 /* have work to do */
1935 if (state == bl_state->begin) {
1936 /* create a new state, will be changed */
1937 state = x87_clone_state(sim, state);
1941 node_inserted = (*func)(state, n);
1944 sim_func might have added additional nodes after n,
1946 beware: n must not be changed by sim_func
1947 (i.e. removed from schedule) in this case
1950 next = sched_next(n);
1953 start_block = get_irg_start_block(get_irn_irg(block));
1955 /* check if the state must be shuffled */
1956 foreach_block_succ(block, edge) {
1957 ir_node *succ = get_edge_src_irn(edge);
1958 blk_state *succ_state;
1960 if(succ == start_block)
1963 succ_state = x87_get_bl_state(sim, succ);
1965 if (succ_state->begin == NULL) {
1966 succ_state->begin = state;
1967 waitq_put(sim->worklist, succ);
1969 /* There is already a begin state for the successor, bad.
1970 Do the necessary permutations.
1971 Note that critical edges are removed, so this is always possible. */
1972 x87_shuffle(sim, block, state, succ, succ_state->begin);
1975 bl_state->end = state;
1977 DB((dbg, LEVEL_2, "State at Block end:\n ")); DEBUG_ONLY(x87_dump_stack(state));
1978 } /* x87_simulate_block */
1981 * Create a new x87 simulator.
1983 * @param sim a simulator handle, will be initialized
1984 * @param irg the current graph
1985 * @param arch_env the architecture environment
1987 static void x87_init_simulator(x87_simulator *sim, ir_graph *irg,
1988 const arch_env_t *arch_env)
1990 obstack_init(&sim->obst);
1991 sim->blk_states = pmap_create();
1992 sim->arch_env = arch_env;
1993 sim->n_idx = get_irg_last_idx(irg);
1994 sim->live = obstack_alloc(&sim->obst, sizeof(*sim->live) * sim->n_idx);
1996 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.x87");
1998 DB((dbg, LEVEL_1, "--------------------------------\n"
1999 "x87 Simulator started for %+F\n", irg));
2001 /* set the generic function pointer of instruction we must simulate */
2002 clear_irp_opcodes_generic_func();
2004 #define ASSOC(op) (op_ ## op)->ops.generic = (op_func)(sim_##op)
2005 #define ASSOC_IA32(op) (op_ia32_v ## op)->ops.generic = (op_func)(sim_##op)
2006 #define ASSOC_BE(op) (op_be_ ## op)->ops.generic = (op_func)(sim_##op)
2023 ASSOC_IA32(fCondJmp);
2034 } /* x87_init_simulator */
2037 * Destroy a x87 simulator.
2039 * @param sim the simulator handle
2041 static void x87_destroy_simulator(x87_simulator *sim) {
2042 pmap_destroy(sim->blk_states);
2043 obstack_free(&sim->obst, NULL);
2044 DB((dbg, LEVEL_1, "x87 Simulator stopped\n\n"));
2045 } /* x87_destroy_simulator */
2047 static void update_liveness_walker(ir_node *block, void *data)
2049 x87_simulator *sim = data;
2050 update_liveness(sim, block);
2054 * Run a simulation and fix all virtual instructions for a graph.
2056 * @param env the architecture environment
2057 * @param irg the current graph
2059 * Needs a block-schedule.
2061 void x87_simulate_graph(const arch_env_t *arch_env, be_irg_t *birg) {
2062 ir_node *block, *start_block;
2063 blk_state *bl_state;
2065 ir_graph *irg = birg->irg;
2067 /* create the simulator */
2068 x87_init_simulator(&sim, irg, arch_env);
2070 start_block = get_irg_start_block(irg);
2071 bl_state = x87_get_bl_state(&sim, start_block);
2073 /* start with the empty state */
2074 bl_state->begin = empty;
2077 sim.worklist = new_waitq();
2078 waitq_put(sim.worklist, start_block);
2080 be_invalidate_liveness(birg);
2081 be_assure_liveness(birg);
2084 /* Calculate the liveness for all nodes. We must precalculate this info,
2085 * because the simulator adds new nodes (possible before Phi nodes) which
2086 * would let a lazy calculation fail.
2087 * On the other hand we reduce the computation amount due to
2088 * precaching from O(n^2) to O(n) at the expense of O(n) cache memory.
2090 irg_block_walk_graph(irg, update_liveness_walker, NULL, &sim);
2094 block = waitq_get(sim.worklist);
2095 x87_simulate_block(&sim, block);
2096 } while (! pdeq_empty(sim.worklist));
2099 del_waitq(sim.worklist);
2100 x87_destroy_simulator(&sim);
2101 } /* x87_simulate_graph */