2 * This file implements the x87 support and virtual to stack
3 * register translation for the ia32 backend.
5 * @author: Michael Beck
18 #include "iredges_t.h"
29 #include "../belive_t.h"
30 #include "../besched_t.h"
31 #include "../benode_t.h"
32 #include "ia32_new_nodes.h"
33 #include "gen_ia32_new_nodes.h"
34 #include "gen_ia32_regalloc_if.h"
39 /* first and second binop index */
46 /* the store val index */
47 #define STORE_VAL_IDX 2
49 #define MASK_TOS(x) ((x) & (N_x87_REGS - 1))
51 /** the debug handle */
52 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
54 /* Forward declaration. */
55 typedef struct _x87_simulator x87_simulator;
58 * An exchange template.
59 * Note that our virtual functions have the same inputs
60 * and attributes as the real ones, so we can simple exchange
62 * Further, x87 supports inverse instructions, so we can handle them.
64 typedef struct _exchange_tmpl {
65 ir_op *normal_op; /**< the normal one */
66 ir_op *reverse_op; /**< the reverse one if exists */
67 ir_op *normal_pop_op; /**< the normal one with tos pop */
68 ir_op *reverse_pop_op; /**< the reverse one with tos pop */
72 * An entry on the simulated x87 stack.
74 typedef struct _st_entry {
75 int reg_idx; /**< the virtual register index of this stack value */
76 ir_node *node; /**< the node that produced this value */
82 typedef struct _x87_state {
83 st_entry st[N_x87_REGS]; /**< the register stack */
84 int depth; /**< the current stack depth */
85 int tos; /**< position of the tos */
86 x87_simulator *sim; /**< The simulator. */
89 /** An empty state, used for blocks without fp instructions. */
90 static x87_state _empty = { { {0, NULL}, }, 0, 0 };
91 static x87_state *empty = (x87_state *)&_empty;
94 NO_NODE_ADDED = 0, /**< No node was added. */
95 NODE_ADDED = 1 /**< A node was added by the simulator in the schedule. */
99 * The type of an instruction simulator function.
101 * @param state the x87 state
102 * @param n the node to be simulated
104 * @return NODE_ADDED if a node was added AFTER n in schedule,
107 typedef int (*sim_func)(x87_state *state, ir_node *n);
110 * A block state: Every block has a x87 state at the beginning and at the end.
112 typedef struct _blk_state {
113 x87_state *begin; /**< state at the begin or NULL if not assigned */
114 x87_state *end; /**< state at the end or NULL if not assigned */
117 #define PTR_TO_BLKSTATE(p) ((blk_state *)(p))
119 /** liveness bitset for vfp registers. */
120 typedef unsigned char vfp_liveness;
125 struct _x87_simulator {
126 struct obstack obst; /**< An obstack for fast allocating. */
127 pmap *blk_states; /**< Map blocks to states. */
128 const arch_env_t *arch_env; /**< The architecture environment. */
129 be_lv_t *lv; /**< intrablock liveness. */
130 vfp_liveness *live; /**< Liveness information. */
131 unsigned n_idx; /**< The cached get_irg_last_idx() result. */
132 waitq *worklist; /**< Worklist of blocks that must be processed. */
136 * Returns the current stack depth.
138 * @param state the x87 state
140 * @return the x87 stack depth
142 static int x87_get_depth(const x87_state *state) {
144 } /* x87_get_depth */
147 * Return the virtual register index at st(pos).
149 * @param state the x87 state
150 * @param pos a stack position
152 * @return the vfp register index that produced the value at st(pos)
154 static int x87_get_st_reg(const x87_state *state, int pos) {
155 assert(pos < state->depth);
156 return state->st[MASK_TOS(state->tos + pos)].reg_idx;
157 } /* x87_get_st_reg */
160 * Return the node at st(pos).
162 * @param state the x87 state
163 * @param pos a stack position
165 * @return the IR node that produced the value at st(pos)
167 static ir_node *x87_get_st_node(const x87_state *state, int pos) {
168 assert(pos < state->depth);
169 return state->st[MASK_TOS(state->tos + pos)].node;
170 } /* x87_get_st_node */
174 * Dump the stack for debugging.
176 * @param state the x87 state
178 static void x87_dump_stack(const x87_state *state) {
181 for (i = state->depth - 1; i >= 0; --i) {
182 DB((dbg, LEVEL_2, "vf%d(%+F) ", x87_get_st_reg(state, i),
183 x87_get_st_node(state, i)));
185 DB((dbg, LEVEL_2, "<-- TOS\n"));
186 } /* x87_dump_stack */
187 #endif /* DEBUG_libfirm */
190 * Set a virtual register to st(pos).
192 * @param state the x87 state
193 * @param reg_idx the vfp register index that should be set
194 * @param node the IR node that produces the value of the vfp register
195 * @param pos the stack position where the new value should be entered
197 static void x87_set_st(x87_state *state, int reg_idx, ir_node *node, int pos) {
198 assert(0 < state->depth);
199 state->st[MASK_TOS(state->tos + pos)].reg_idx = reg_idx;
200 state->st[MASK_TOS(state->tos + pos)].node = node;
202 DB((dbg, LEVEL_2, "After SET_REG: "));
203 DEBUG_ONLY(x87_dump_stack(state));
207 * Set the tos virtual register.
209 * @param state the x87 state
210 * @param reg_idx the vfp register index that should be set
211 * @param node the IR node that produces the value of the vfp register
213 static void x87_set_tos(x87_state *state, int reg_idx, ir_node *node) {
214 x87_set_st(state, reg_idx, node, 0);
218 * Swap st(0) with st(pos).
220 * @param state the x87 state
221 * @param pos the stack position to change the tos with
223 static void x87_fxch(x87_state *state, int pos) {
225 assert(pos < state->depth);
227 entry = state->st[MASK_TOS(state->tos + pos)];
228 state->st[MASK_TOS(state->tos + pos)] = state->st[MASK_TOS(state->tos)];
229 state->st[MASK_TOS(state->tos)] = entry;
231 DB((dbg, LEVEL_2, "After FXCH: ")); DEBUG_ONLY(x87_dump_stack(state));
235 * Convert a virtual register to the stack index.
237 * @param state the x87 state
238 * @param reg_idx the register vfp index
240 * @return the stack position where the register is stacked
241 * or -1 if the virtual register was not found
243 static int x87_on_stack(const x87_state *state, int reg_idx) {
244 int i, tos = state->tos;
246 for (i = 0; i < state->depth; ++i)
247 if (state->st[MASK_TOS(tos + i)].reg_idx == reg_idx)
253 * Push a virtual Register onto the stack, double pushed allowed.
255 * @param state the x87 state
256 * @param reg_idx the register vfp index
257 * @param node the node that produces the value of the vfp register
259 static void x87_push_dbl(x87_state *state, int reg_idx, ir_node *node) {
260 assert(state->depth < N_x87_REGS && "stack overrun");
263 state->tos = MASK_TOS(state->tos - 1);
264 state->st[state->tos].reg_idx = reg_idx;
265 state->st[state->tos].node = node;
267 DB((dbg, LEVEL_2, "After PUSH: ")); DEBUG_ONLY(x87_dump_stack(state));
271 * Push a virtual Register onto the stack, double pushes are NOT allowed.
273 * @param state the x87 state
274 * @param reg_idx the register vfp index
275 * @param node the node that produces the value of the vfp register
276 * @param dbl_push if != 0 double pushes are allowed
278 static void x87_push(x87_state *state, int reg_idx, ir_node *node) {
279 assert(x87_on_stack(state, reg_idx) == -1 && "double push");
281 x87_push_dbl(state, reg_idx, node);
285 * Pop a virtual Register from the stack.
287 * @param state the x87 state
289 static void x87_pop(x87_state *state) {
290 assert(state->depth > 0 && "stack underrun");
293 state->tos = MASK_TOS(state->tos + 1);
295 DB((dbg, LEVEL_2, "After POP: ")); DEBUG_ONLY(x87_dump_stack(state));
299 * Returns the block state of a block.
301 * @param sim the x87 simulator handle
302 * @param block the current block
304 * @return the block state
306 static blk_state *x87_get_bl_state(x87_simulator *sim, ir_node *block) {
307 pmap_entry *entry = pmap_find(sim->blk_states, block);
310 blk_state *bl_state = obstack_alloc(&sim->obst, sizeof(*bl_state));
311 bl_state->begin = NULL;
312 bl_state->end = NULL;
314 pmap_insert(sim->blk_states, block, bl_state);
318 return PTR_TO_BLKSTATE(entry->value);
319 } /* x87_get_bl_state */
322 * Creates a new x87 state.
324 * @param sim the x87 simulator handle
326 * @return a new x87 state
328 static x87_state *x87_alloc_state(x87_simulator *sim) {
329 x87_state *res = obstack_alloc(&sim->obst, sizeof(*res));
333 } /* x87_alloc_state */
338 * @param sim the x87 simulator handle
339 * @param src the x87 state that will be cloned
341 * @return a cloned copy of the src state
343 static x87_state *x87_clone_state(x87_simulator *sim, const x87_state *src) {
344 x87_state *res = x87_alloc_state(sim);
346 memcpy(res, src, sizeof(*res));
348 } /* x87_clone_state */
351 * Patch a virtual instruction into a x87 one and return
352 * the node representing the result value.
354 * @param n the IR node to patch
355 * @param op the x87 opcode to patch in
357 static ir_node *x87_patch_insn(ir_node *n, ir_op *op) {
358 ir_mode *mode = get_irn_mode(n);
363 if (mode == mode_T) {
364 /* patch all Proj's */
365 const ir_edge_t *edge;
367 foreach_out_edge(n, edge) {
368 ir_node *proj = get_edge_src_irn(edge);
370 mode = get_irn_mode(proj);
371 if (mode_is_float(mode)) {
373 set_irn_mode(proj, mode_E);
377 } else if (mode_is_float(mode))
378 set_irn_mode(n, mode_E);
380 } /* x87_patch_insn */
383 * Returns the first Proj of a mode_T node having a given mode.
385 * @param n the mode_T node
386 * @param m the desired mode of the Proj
387 * @return The first Proj of mode @p m found or NULL.
389 static ir_node *get_irn_Proj_for_mode(ir_node *n, ir_mode *m) {
390 const ir_edge_t *edge;
392 assert(get_irn_mode(n) == mode_T && "Need mode_T node");
394 foreach_out_edge(n, edge) {
395 ir_node *proj = get_edge_src_irn(edge);
396 if (get_irn_mode(proj) == m)
401 } /* get_irn_Proj_for_mode */
404 * Wrap the arch_* function here so we can check for errors.
406 static INLINE const arch_register_t *x87_get_irn_register(x87_simulator *sim, const ir_node *irn) {
407 const arch_register_t *res;
409 res = arch_get_irn_register(sim->arch_env, irn);
410 assert(res->reg_class->regs == ia32_vfp_regs);
412 } /* x87_get_irn_register */
414 /* -------------- x87 perm --------------- */
417 * Creates a fxch for shuffle.
419 * @param state the x87 state
420 * @param pos parameter for fxch
421 * @param block the block were fxch is inserted
423 * Creates a new fxch node and reroute the user of the old node
426 * @return the fxch node
428 static ir_node *x87_fxch_shuffle(x87_state *state, int pos, ir_node *block) {
432 fxch = new_rd_ia32_fxch(NULL, get_irn_irg(block), block, mode_E);
433 attr = get_ia32_attr(fxch);
434 attr->x87[0] = &ia32_st_regs[pos];
435 attr->x87[2] = &ia32_st_regs[0];
439 x87_fxch(state, pos);
441 } /* x87_fxch_shuffle */
444 * Calculate the necessary permutations to reach dst_state.
446 * These permutations are done with fxch instructions and placed
447 * at the end of the block.
449 * Note that critical edges are removed here, so we need only
450 * a shuffle if the current block has only one successor.
452 * @param sim the simulator handle
453 * @param block the current block
454 * @param state the current x87 stack state, might be modified
455 * @param dst_block the destination block
456 * @param dst_state destination state
460 static x87_state *x87_shuffle(x87_simulator *sim, ir_node *block, x87_state *state, ir_node *dst_block, const x87_state *dst_state) {
461 int i, n_cycles, k, ri;
462 unsigned cycles[4], all_mask;
463 char cycle_idx[4][8];
464 ir_node *fxch, *before, *after;
466 assert(state->depth == dst_state->depth);
468 /* Some mathematics here:
469 If we have a cycle of length n that includes the tos,
470 we need n-1 exchange operations.
471 We can always add the tos and restore it, so we need
472 n+1 exchange operations for a cycle not containing the tos.
473 So, the maximum of needed operations is for a cycle of 7
474 not including the tos == 8.
475 This is the same number of ops we would need for using stores,
476 so exchange is cheaper (we save the loads).
477 On the other hand, we might need an additional exchange
478 in the next block to bring one operand on top, so the
479 number of ops in the first case is identical.
480 Further, no more than 4 cycles can exists (4 x 2).
482 all_mask = (1 << (state->depth)) - 1;
484 for (n_cycles = 0; all_mask; ++n_cycles) {
485 int src_idx, dst_idx;
487 /* find the first free slot */
488 for (i = 0; i < state->depth; ++i) {
489 if (all_mask & (1 << i)) {
490 all_mask &= ~(1 << i);
492 /* check if there are differences here */
493 if (x87_get_st_reg(state, i) != x87_get_st_reg(dst_state, i))
499 /* no more cycles found */
504 cycles[n_cycles] = (1 << i);
505 cycle_idx[n_cycles][k++] = i;
506 for (src_idx = i; ; src_idx = dst_idx) {
507 dst_idx = x87_on_stack(dst_state, x87_get_st_reg(state, src_idx));
509 if ((all_mask & (1 << dst_idx)) == 0)
512 cycle_idx[n_cycles][k++] = dst_idx;
513 cycles[n_cycles] |= (1 << dst_idx);
514 all_mask &= ~(1 << dst_idx);
516 cycle_idx[n_cycles][k] = -1;
520 /* no permutation needed */
524 /* Hmm: permutation needed */
525 DB((dbg, LEVEL_2, "\n%+F needs permutation: from\n", block));
526 DEBUG_ONLY(x87_dump_stack(state));
527 DB((dbg, LEVEL_2, " to\n"));
528 DEBUG_ONLY(x87_dump_stack(dst_state));
532 DB((dbg, LEVEL_2, "Need %d cycles\n", n_cycles));
533 for (ri = 0; ri < n_cycles; ++ri) {
534 DB((dbg, LEVEL_2, " Ring %d:\n ", ri));
535 for (k = 0; cycle_idx[ri][k] != -1; ++k)
536 DB((dbg, LEVEL_2, " st%d ->", cycle_idx[ri][k]));
537 DB((dbg, LEVEL_2, "\n"));
544 * Find the place node must be insert.
545 * We have only one successor block, so the last instruction should
548 before = sched_last(block);
549 assert(is_cfop(before));
551 /* now do the permutations */
552 for (ri = 0; ri < n_cycles; ++ri) {
553 if ((cycles[ri] & 1) == 0) {
554 /* this cycle does not include the tos */
555 fxch = x87_fxch_shuffle(state, cycle_idx[ri][0], block);
557 sched_add_after(after, fxch);
559 sched_add_before(before, fxch);
562 for (k = 1; cycle_idx[ri][k] != -1; ++k) {
563 fxch = x87_fxch_shuffle(state, cycle_idx[ri][k], block);
565 sched_add_after(after, fxch);
567 sched_add_before(before, fxch);
570 if ((cycles[ri] & 1) == 0) {
571 /* this cycle does not include the tos */
572 fxch = x87_fxch_shuffle(state, cycle_idx[ri][0], block);
573 sched_add_after(after, fxch);
580 * Create a fxch node before another node.
582 * @param state the x87 state
583 * @param n the node after the fxch
584 * @param pos exchange st(pos) with st(0)
585 * @param op_idx if >= 0, replace input op_idx of n with the fxch result
589 static ir_node *x87_create_fxch(x87_state *state, ir_node *n, int pos, int op_idx) {
592 ir_graph *irg = get_irn_irg(n);
593 ir_node *block = get_nodes_block(n);
595 x87_fxch(state, pos);
597 fxch = new_rd_ia32_fxch(NULL, irg, block, mode_E);
598 attr = get_ia32_attr(fxch);
599 attr->x87[0] = &ia32_st_regs[pos];
600 attr->x87[2] = &ia32_st_regs[0];
604 sched_add_before(n, fxch);
605 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(fxch), attr->x87[0]->name, attr->x87[2]->name));
607 } /* x87_create_fxch */
610 * Create a fpush before node n.
612 * @param state the x87 state
613 * @param n the node after the fpush
614 * @param pos push st(pos) on stack
615 * @param op_idx replace input op_idx of n with the fpush result
617 static void x87_create_fpush(x87_state *state, ir_node *n, int pos, int op_idx) {
618 ir_node *fpush, *pred = get_irn_n(n, op_idx);
620 const arch_register_t *out = x87_get_irn_register(state->sim, pred);
622 x87_push_dbl(state, arch_register_get_index(out), pred);
624 fpush = new_rd_ia32_fpush(NULL, get_irn_irg(n), get_nodes_block(n), mode_E);
625 attr = get_ia32_attr(fpush);
626 attr->x87[0] = &ia32_st_regs[pos];
627 attr->x87[2] = &ia32_st_regs[0];
630 sched_add_before(n, fpush);
632 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(fpush), attr->x87[0]->name, attr->x87[2]->name));
633 } /* x87_create_fpush */
636 * Create a fpop before node n.
638 * @param state the x87 state
639 * @param n the node after the fpop
640 * @param num pop 1 or 2 values
641 * @param pred node to use as predecessor of the fpop
643 * @return the fpop node
645 static ir_node *x87_create_fpop(x87_state *state, ir_node *n, int num, ir_node *pred) {
646 ir_node *fpop = pred;
653 fpop = new_rd_ia32_fpop(NULL, get_irn_irg(n), get_nodes_block(n), mode_E);
654 attr = get_ia32_attr(fpop);
655 attr->x87[0] = &ia32_st_regs[0];
656 attr->x87[1] = &ia32_st_regs[0];
657 attr->x87[2] = &ia32_st_regs[0];
660 sched_add_before(n, fpop);
661 DB((dbg, LEVEL_1, "<<< %s %s\n", get_irn_opname(fpop), attr->x87[0]->name));
667 } /* x87_create_fpop */
670 * Creates an fldz before node n
672 * @param state the x87 state
673 * @param n the node after the fldz
675 * @return the fldz node
677 static ir_node *x87_create_fldz(x87_state *state, ir_node *n, int regidx) {
678 ir_graph *irg = get_irn_irg(n);
679 ir_node *block = get_nodes_block(n);
682 fldz = new_rd_ia32_fldz(NULL, irg, block, mode_E);
684 sched_add_before(n, fldz);
685 DB((dbg, LEVEL_1, "<<< %s\n", get_irn_opname(fldz)));
688 x87_push(state, regidx, fldz);
693 /* --------------------------------- liveness ------------------------------------------ */
696 * The liveness transfer function.
697 * Updates a live set over a single step from a given node to its predecessor.
698 * Everything defined at the node is removed from the set, the uses of the node get inserted.
700 * @param sim The simulator handle.
701 * @param irn The node at which liveness should be computed.
702 * @param live The bitset of registers live before @p irn. This set gets modified by updating it to
703 * the registers live after irn.
705 * @return The live bitset.
707 static vfp_liveness vfp_liveness_transfer(x87_simulator *sim, ir_node *irn, vfp_liveness live)
710 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
711 const arch_env_t *arch_env = sim->arch_env;
713 if (arch_irn_consider_in_reg_alloc(arch_env, cls, irn)) {
714 const arch_register_t *reg = x87_get_irn_register(sim, irn);
715 live &= ~(1 << arch_register_get_index(reg));
718 for (i = 0, n = get_irn_arity(irn); i < n; ++i) {
719 ir_node *op = get_irn_n(irn, i);
721 if (mode_is_float(get_irn_mode(op)) && arch_irn_consider_in_reg_alloc(arch_env, cls, op)) {
722 const arch_register_t *reg = x87_get_irn_register(sim, op);
723 live |= 1 << arch_register_get_index(reg);
727 } /* vfp_liveness_transfer */
730 * Put all live virtual registers at the end of a block into a bitset.
732 * @param sim the simulator handle
733 * @param lv the liveness information
734 * @param bl the block
736 * @return The live bitset at the end of this block
738 static vfp_liveness vfp_liveness_end_of_block(x87_simulator *sim, const ir_node *block)
741 vfp_liveness live = 0;
742 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
743 const arch_env_t *arch_env = sim->arch_env;
744 const be_lv_t *lv = sim->lv;
746 be_lv_foreach(lv, block, be_lv_state_end, i) {
747 const arch_register_t *reg;
748 const ir_node *node = be_lv_get_irn(lv, block, i);
749 if (!arch_irn_consider_in_reg_alloc(arch_env, cls, node))
752 reg = x87_get_irn_register(sim, node);
753 live |= 1 << arch_register_get_index(reg);
757 } /* vfp_liveness_end_of_block */
759 /** get the register mask from an arch_register */
760 #define REGMASK(reg) (1 << (arch_register_get_index(reg)))
763 * Return a bitset of argument registers which are live at the end of a node.
765 * @param sim the simulator handle
766 * @param pos the node
767 * @param kill kill mask for the output registers
769 * @return The live bitset.
771 static unsigned vfp_live_args_after(x87_simulator *sim, const ir_node *pos, unsigned kill)
773 unsigned idx = get_irn_idx(pos);
775 assert(idx < sim->n_idx);
776 return sim->live[idx] & ~kill;
777 } /* vfp_live_args_after */
780 * Calculate the liveness for a whole block and cache it.
782 * @param sim the simulator handle
783 * @param lv the liveness handle
784 * @param block the block
786 static void update_liveness(x87_simulator *sim, ir_node *block) {
787 vfp_liveness live = vfp_liveness_end_of_block(sim, block);
791 /* now iterate through the block backward and cache the results */
792 sched_foreach_reverse(block, irn) {
793 /* stop at the first Phi: this produces the live-in */
797 idx = get_irn_idx(irn);
798 sim->live[idx] = live;
800 live = vfp_liveness_transfer(sim, irn, live);
802 idx = get_irn_idx(block);
803 sim->live[idx] = live;
804 } /* update_liveness */
807 * Returns true if a register is live in a set.
809 * @param reg_idx the vfp register index
810 * @param live a live bitset
812 #define is_vfp_live(reg_idx, live) ((live) & (1 << (reg_idx)))
816 * Dump liveness info.
818 * @param live the live bitset
820 static void vfp_dump_live(vfp_liveness live) {
823 DB((dbg, LEVEL_2, "Live after: "));
824 for (i = 0; i < 8; ++i) {
825 if (live & (1 << i)) {
826 DB((dbg, LEVEL_2, "vf%d ", i));
829 DB((dbg, LEVEL_2, "\n"));
830 } /* vfp_dump_live */
831 #endif /* DEBUG_libfirm */
833 /* --------------------------------- simulators ---------------------------------------- */
835 #define XCHG(a, b) do { int t = (a); (a) = (b); (b) = t; } while (0)
838 * Simulate a virtual binop.
840 * @param state the x87 state
841 * @param n the node that should be simulated (and patched)
842 * @param tmpl the template containing the 4 possible x87 opcodes
844 * @return NO_NODE_ADDED
846 static int sim_binop(x87_state *state, ir_node *n, const exchange_tmpl *tmpl) {
847 int op2_idx = 0, op1_idx;
848 int out_idx, do_pop = 0;
850 ir_node *patched_insn;
852 x87_simulator *sim = state->sim;
853 const arch_register_t *op1 = x87_get_irn_register(sim, get_irn_n(n, BINOP_IDX_1));
854 const arch_register_t *op2 = x87_get_irn_register(sim, get_irn_n(n, BINOP_IDX_2));
855 const arch_register_t *out = x87_get_irn_register(sim, n);
856 int reg_index_1 = arch_register_get_index(op1);
857 int reg_index_2 = arch_register_get_index(op2);
858 vfp_liveness live = vfp_live_args_after(sim, n, REGMASK(out));
860 DB((dbg, LEVEL_1, ">>> %+F %s, %s -> %s\n", n,
861 arch_register_get_name(op1), arch_register_get_name(op2),
862 arch_register_get_name(out)));
863 DEBUG_ONLY(vfp_dump_live(live));
864 DB((dbg, LEVEL_1, "Stack before: "));
865 DEBUG_ONLY(x87_dump_stack(state));
867 op1_idx = x87_on_stack(state, reg_index_1);
868 assert(op1_idx >= 0);
870 if (reg_index_2 != REG_VFP_NOREG) {
871 /* second operand is a vfp register */
872 op2_idx = x87_on_stack(state, reg_index_2);
873 assert(op2_idx >= 0);
875 if (is_vfp_live(arch_register_get_index(op2), live)) {
876 /* Second operand is live. */
878 if (is_vfp_live(arch_register_get_index(op1), live)) {
879 /* Both operands are live: push the first one.
880 This works even for op1 == op2. */
881 x87_create_fpush(state, n, op1_idx, BINOP_IDX_2);
882 /* now do fxxx (tos=tos X op) */
886 dst = tmpl->normal_op;
888 /* Second live, first operand is dead here, bring it to tos. */
890 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
895 /* now do fxxx (tos=tos X op) */
897 dst = tmpl->normal_op;
900 /* Second operand is dead. */
901 if (is_vfp_live(arch_register_get_index(op1), live)) {
902 /* First operand is live: bring second to tos. */
904 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
909 /* now do fxxxr (tos = op X tos) */
911 dst = tmpl->reverse_op;
913 /* Both operands are dead here, pop them from the stack. */
916 /* Both are identically and on tos, no pop needed. */
917 /* here fxxx (tos = tos X tos) */
918 dst = tmpl->normal_op;
921 /* now do fxxxp (op = op X tos, pop) */
922 dst = tmpl->normal_pop_op;
926 } else if (op1_idx == 0) {
927 assert(op1_idx != op2_idx);
928 /* now do fxxxrp (op = tos X op, pop) */
929 dst = tmpl->reverse_pop_op;
933 /* Bring the second on top. */
934 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
935 if (op1_idx == op2_idx) {
936 /* Both are identically and on tos now, no pop needed. */
939 /* use fxxx (tos = tos X tos) */
940 dst = tmpl->normal_op;
943 /* op2 is on tos now */
945 /* use fxxxp (op = op X tos, pop) */
946 dst = tmpl->normal_pop_op;
954 /* second operand is an address mode */
955 if (is_vfp_live(arch_register_get_index(op1), live)) {
956 /* first operand is live: push it here */
957 x87_create_fpush(state, n, op1_idx, BINOP_IDX_1);
959 /* use fxxx (tos = tos X mem) */
960 dst = tmpl->normal_op;
963 /* first operand is dead: bring it to tos */
965 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
969 /* use fxxxp (tos = tos X mem) */
970 dst = tmpl->normal_op;
975 patched_insn = x87_patch_insn(n, dst);
976 x87_set_st(state, arch_register_get_index(out), patched_insn, out_idx);
981 /* patch the operation */
982 attr = get_ia32_attr(n);
983 attr->x87[0] = op1 = &ia32_st_regs[op1_idx];
984 if (reg_index_2 != REG_VFP_NOREG) {
985 attr->x87[1] = op2 = &ia32_st_regs[op2_idx];
987 attr->x87[2] = out = &ia32_st_regs[out_idx];
989 if (reg_index_2 != REG_VFP_NOREG) {
990 DB((dbg, LEVEL_1, "<<< %s %s, %s -> %s\n", get_irn_opname(n),
991 arch_register_get_name(op1), arch_register_get_name(op2),
992 arch_register_get_name(out)));
994 DB((dbg, LEVEL_1, "<<< %s %s, [AM] -> %s\n", get_irn_opname(n),
995 arch_register_get_name(op1),
996 arch_register_get_name(out)));
999 return NO_NODE_ADDED;
1003 * Simulate a virtual Unop.
1005 * @param state the x87 state
1006 * @param n the node that should be simulated (and patched)
1007 * @param op the x87 opcode that will replace n's opcode
1009 * @return NO_NODE_ADDED
1011 static int sim_unop(x87_state *state, ir_node *n, ir_op *op) {
1012 int op1_idx, out_idx;
1013 x87_simulator *sim = state->sim;
1014 const arch_register_t *op1 = x87_get_irn_register(sim, get_irn_n(n, UNOP_IDX));
1015 const arch_register_t *out = x87_get_irn_register(sim, n);
1017 unsigned live = vfp_live_args_after(sim, n, REGMASK(out));
1019 DB((dbg, LEVEL_1, ">>> %+F -> %s\n", n, out->name));
1020 DEBUG_ONLY(vfp_dump_live(live));
1022 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1024 if (is_vfp_live(arch_register_get_index(op1), live)) {
1025 /* push the operand here */
1026 x87_create_fpush(state, n, op1_idx, UNOP_IDX);
1030 /* operand is dead, bring it to tos */
1032 x87_create_fxch(state, n, op1_idx, UNOP_IDX);
1037 x87_set_tos(state, arch_register_get_index(out), x87_patch_insn(n, op));
1039 attr = get_ia32_attr(n);
1040 attr->x87[0] = op1 = &ia32_st_regs[0];
1041 attr->x87[2] = out = &ia32_st_regs[0];
1042 DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), out->name));
1044 return NO_NODE_ADDED;
1048 * Simulate a virtual Load instruction.
1050 * @param state the x87 state
1051 * @param n the node that should be simulated (and patched)
1052 * @param op the x87 opcode that will replace n's opcode
1054 * @return NO_NODE_ADDED
1056 static int sim_load(x87_state *state, ir_node *n, ir_op *op) {
1057 const arch_register_t *out = x87_get_irn_register(state->sim, n);
1060 DB((dbg, LEVEL_1, ">>> %+F -> %s\n", n, arch_register_get_name(out)));
1061 x87_push(state, arch_register_get_index(out), x87_patch_insn(n, op));
1062 assert(out == x87_get_irn_register(state->sim, n));
1063 attr = get_ia32_attr(n);
1064 attr->x87[2] = out = &ia32_st_regs[0];
1065 DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), arch_register_get_name(out)));
1067 return NO_NODE_ADDED;
1071 * Rewire all users of @p old_val to @new_val iff they are scheduled after @p store.
1073 * @param store The store
1074 * @param old_val The former value
1075 * @param new_val The new value
1077 static void collect_and_rewire_users(ir_node *store, ir_node *old_val, ir_node *new_val) {
1078 const ir_edge_t *edge, *ne;
1080 foreach_out_edge_safe(old_val, edge, ne) {
1081 ir_node *user = get_edge_src_irn(edge);
1083 if (! user || user == store)
1086 /* if the user is scheduled after the store: rewire */
1087 if (sched_is_scheduled(user) && sched_comes_after(store, user)) {
1089 /* find the input of the user pointing to the old value */
1090 for (i = get_irn_arity(user) - 1; i >= 0; i--) {
1091 if (get_irn_n(user, i) == old_val)
1092 set_irn_n(user, i, new_val);
1096 } /* collect_and_rewire_users */
1099 * Simulate a virtual Store.
1101 * @param state the x87 state
1102 * @param n the node that should be simulated (and patched)
1103 * @param op the x87 store opcode
1104 * @param op_p the x87 store and pop opcode
1106 static int sim_store(x87_state *state, ir_node *n, ir_op *op, ir_op *op_p) {
1107 x87_simulator *sim = state->sim;
1108 ir_node *val = get_irn_n(n, STORE_VAL_IDX);
1109 const arch_register_t *op2 = x87_get_irn_register(sim, val);
1110 unsigned live = vfp_live_args_after(sim, n, 0);
1111 int insn = NO_NODE_ADDED;
1113 int op2_reg_idx, op2_idx, depth;
1114 int live_after_node;
1117 op2_reg_idx = arch_register_get_index(op2);
1118 if (op2_reg_idx == REG_VFP_UKNWN) {
1119 /* just take any value from stack */
1120 if(state->depth > 0) {
1122 DEBUG_ONLY(op2 = NULL);
1123 live_after_node = 1;
1125 /* produce a new value which we will consume immediately */
1126 x87_create_fldz(state, n, op2_reg_idx);
1127 live_after_node = 0;
1128 op2_idx = x87_on_stack(state, op2_reg_idx);
1129 assert(op2_idx >= 0);
1132 op2_idx = x87_on_stack(state, op2_reg_idx);
1133 live_after_node = is_vfp_live(arch_register_get_index(op2), live);
1134 assert(op2_idx >= 0);
1135 DB((dbg, LEVEL_1, ">>> %+F %s ->\n", n, arch_register_get_name(op2)));
1138 mode = get_ia32_ls_mode(n);
1139 depth = x87_get_depth(state);
1141 if (live_after_node) {
1143 Problem: fst doesn't support mode_E (spills), only fstp does
1145 - stack not full: push value and fstp
1146 - stack full: fstp value and load again
1148 if (mode == mode_E) {
1149 if (depth < N_x87_REGS) {
1150 /* ok, we have a free register: push + fstp */
1151 x87_create_fpush(state, n, op2_idx, STORE_VAL_IDX);
1153 x87_patch_insn(n, op_p);
1155 ir_node *vfld, *mem, *block, *rproj, *mproj;
1158 /* stack full here: need fstp + load */
1160 x87_patch_insn(n, op_p);
1162 block = get_nodes_block(n);
1163 irg = get_irn_irg(n);
1164 vfld = new_rd_ia32_vfld(NULL, irg, block, get_irn_n(n, 0), get_irn_n(n, 1), new_rd_NoMem(irg));
1166 /* copy all attributes */
1167 set_ia32_frame_ent(vfld, get_ia32_frame_ent(n));
1168 if (is_ia32_use_frame(n))
1169 set_ia32_use_frame(vfld);
1170 set_ia32_am_flavour(vfld, get_ia32_am_flavour(n));
1171 set_ia32_op_type(vfld, ia32_am_Source);
1172 add_ia32_am_offs_int(vfld, get_ia32_am_offs_int(n));
1173 set_ia32_am_sc(vfld, get_ia32_am_sc(n));
1174 set_ia32_ls_mode(vfld, get_ia32_ls_mode(n));
1176 rproj = new_r_Proj(irg, block, vfld, get_ia32_ls_mode(vfld), pn_ia32_vfld_res);
1177 mproj = new_r_Proj(irg, block, vfld, mode_M, pn_ia32_vfld_M);
1178 mem = get_irn_Proj_for_mode(n, mode_M);
1180 assert(mem && "Store memory not found");
1182 arch_set_irn_register(sim->arch_env, rproj, op2);
1184 /* reroute all former users of the store memory to the load memory */
1185 edges_reroute(mem, mproj, irg);
1186 /* set the memory input of the load to the store memory */
1187 set_irn_n(vfld, 2, mem);
1189 sched_add_after(n, vfld);
1190 sched_add_after(vfld, rproj);
1192 /* rewire all users, scheduled after the store, to the loaded value */
1193 collect_and_rewire_users(n, val, rproj);
1198 /* we can only store the tos to memory */
1200 x87_create_fxch(state, n, op2_idx, STORE_VAL_IDX);
1202 /* mode != mode_E -> use normal fst */
1203 x87_patch_insn(n, op);
1206 /* we can only store the tos to memory */
1208 x87_create_fxch(state, n, op2_idx, STORE_VAL_IDX);
1211 x87_patch_insn(n, op_p);
1214 attr = get_ia32_attr(n);
1215 attr->x87[1] = op2 = &ia32_st_regs[0];
1216 DB((dbg, LEVEL_1, "<<< %s %s ->\n", get_irn_opname(n), arch_register_get_name(op2)));
1222 * Simulate a virtual Phi.
1223 * Just for cosmetic reasons change the mode of Phi nodes to mode_E.
1225 * @param state the x87 state
1226 * @param n the node that should be simulated (and patched)
1227 * @param arch_env the architecture environment
1229 * @return NO_NODE_ADDED
1231 static int sim_Phi(x87_state *state, ir_node *n, const arch_env_t *arch_env) {
1232 ir_mode *mode = get_irn_mode(n);
1234 if (mode_is_float(mode))
1235 set_irn_mode(n, mode_E);
1237 return NO_NODE_ADDED;
1240 #define _GEN_BINOP(op, rev) \
1241 static int sim_##op(x87_state *state, ir_node *n) { \
1242 exchange_tmpl tmpl = { op_ia32_##op, op_ia32_##rev, op_ia32_##op##p, op_ia32_##rev##p }; \
1243 return sim_binop(state, n, &tmpl); \
1246 #define GEN_BINOP(op) _GEN_BINOP(op, op)
1247 #define GEN_BINOPR(op) _GEN_BINOP(op, op##r)
1249 #define GEN_LOAD2(op, nop) \
1250 static int sim_##op(x87_state *state, ir_node *n) { \
1251 return sim_load(state, n, op_ia32_##nop); \
1254 #define GEN_LOAD(op) GEN_LOAD2(op, op)
1256 #define GEN_UNOP(op) \
1257 static int sim_##op(x87_state *state, ir_node *n) { \
1258 return sim_unop(state, n, op_ia32_##op); \
1261 #define GEN_STORE(op) \
1262 static int sim_##op(x87_state *state, ir_node *n) { \
1263 return sim_store(state, n, op_ia32_##op, op_ia32_##op##p); \
1288 * Simulate a fCondJmp.
1290 * @param state the x87 state
1291 * @param n the node that should be simulated (and patched)
1293 * @return NO_NODE_ADDED
1295 static int sim_fCondJmp(x87_state *state, ir_node *n) {
1301 x87_simulator *sim = state->sim;
1302 const arch_register_t *op1 = x87_get_irn_register(sim, get_irn_n(n, BINOP_IDX_1));
1303 const arch_register_t *op2 = x87_get_irn_register(sim, get_irn_n(n, BINOP_IDX_2));
1304 int reg_index_1 = arch_register_get_index(op1);
1305 int reg_index_2 = arch_register_get_index(op2);
1306 unsigned live = vfp_live_args_after(sim, n, 0);
1308 DB((dbg, LEVEL_1, ">>> %+F %s, %s\n", n,
1309 arch_register_get_name(op1), arch_register_get_name(op2)));
1310 DEBUG_ONLY(vfp_dump_live(live));
1311 DB((dbg, LEVEL_1, "Stack before: "));
1312 DEBUG_ONLY(x87_dump_stack(state));
1314 op1_idx = x87_on_stack(state, reg_index_1);
1315 assert(op1_idx >= 0);
1317 /* BEWARE: check for comp a,a cases, they might happen */
1318 if (reg_index_2 != REG_VFP_NOREG) {
1319 /* second operand is a vfp register */
1320 op2_idx = x87_on_stack(state, reg_index_2);
1321 assert(op2_idx >= 0);
1323 if (is_vfp_live(arch_register_get_index(op2), live)) {
1324 /* second operand is live */
1326 if (is_vfp_live(arch_register_get_index(op1), live)) {
1327 /* both operands are live */
1330 /* res = tos X op */
1331 dst = op_ia32_fcomJmp;
1332 } else if (op2_idx == 0) {
1333 /* res = op X tos */
1334 dst = op_ia32_fcomrJmp;
1336 /* bring the first one to tos */
1337 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1341 /* res = tos X op */
1342 dst = op_ia32_fcomJmp;
1345 /* second live, first operand is dead here, bring it to tos.
1346 This means further, op1_idx != op2_idx. */
1347 assert(op1_idx != op2_idx);
1349 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1354 /* res = tos X op, pop */
1355 dst = op_ia32_fcompJmp;
1359 /* second operand is dead */
1360 if (is_vfp_live(arch_register_get_index(op1), live)) {
1361 /* first operand is live: bring second to tos.
1362 This means further, op1_idx != op2_idx. */
1363 assert(op1_idx != op2_idx);
1365 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1370 /* res = op X tos, pop */
1371 dst = op_ia32_fcomrpJmp;
1374 /* both operands are dead here, check first for identity. */
1375 if (op1_idx == op2_idx) {
1376 /* identically, one pop needed */
1378 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1382 /* res = tos X op, pop */
1383 dst = op_ia32_fcompJmp;
1386 /* different, move them to st and st(1) and pop both.
1387 The tricky part is to get one into st(1).*/
1388 else if (op2_idx == 1) {
1389 /* good, second operand is already in the right place, move the first */
1391 /* bring the first on top */
1392 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1393 assert(op2_idx != 0);
1396 /* res = tos X op, pop, pop */
1397 dst = op_ia32_fcomppJmp;
1399 } else if (op1_idx == 1) {
1400 /* good, first operand is already in the right place, move the second */
1402 /* bring the first on top */
1403 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1404 assert(op1_idx != 0);
1407 dst = op_ia32_fcomrppJmp;
1410 /* if one is already the TOS, we need two fxch */
1412 /* first one is TOS, move to st(1) */
1413 x87_create_fxch(state, n, 1, BINOP_IDX_1);
1414 assert(op2_idx != 1);
1416 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1418 /* res = op X tos, pop, pop */
1419 dst = op_ia32_fcomrppJmp;
1421 } else if (op2_idx == 0) {
1422 /* second one is TOS, move to st(1) */
1423 x87_create_fxch(state, n, 1, BINOP_IDX_2);
1424 assert(op1_idx != 1);
1426 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1428 /* res = tos X op, pop, pop */
1429 dst = op_ia32_fcomppJmp;
1432 /* none of them is either TOS or st(1), 3 fxch needed */
1433 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1434 assert(op1_idx != 0);
1435 x87_create_fxch(state, n, 1, BINOP_IDX_2);
1437 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1439 /* res = tos X op, pop, pop */
1440 dst = op_ia32_fcomppJmp;
1447 /* second operand is an address mode */
1448 if (is_vfp_live(arch_register_get_index(op1), live)) {
1449 /* first operand is live: bring it to TOS */
1451 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1454 dst = op_ia32_fcomJmp;
1456 /* first operand is dead: bring it to tos */
1458 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1461 dst = op_ia32_fcompJmp;
1466 x87_patch_insn(n, dst);
1467 assert(pop_cnt < 3);
1473 /* patch the operation */
1474 attr = get_ia32_attr(n);
1475 op1 = &ia32_st_regs[op1_idx];
1478 op2 = &ia32_st_regs[op2_idx];
1481 attr->x87[2] = NULL;
1484 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(n),
1485 arch_register_get_name(op1), arch_register_get_name(op2)));
1487 DB((dbg, LEVEL_1, "<<< %s %s, [AM]\n", get_irn_opname(n),
1488 arch_register_get_name(op1)));
1490 return NO_NODE_ADDED;
1491 } /* sim_fCondJmp */
1494 * Create a copy of a node. Recreate the node if it's a constant.
1496 * @param state the x87 state
1497 * @param n the node to be copied
1499 * @return the copy of n
1501 static ir_node *create_Copy(x87_state *state, ir_node *n) {
1502 x87_simulator *sim = state->sim;
1503 ir_graph *irg = get_irn_irg(n);
1504 dbg_info *n_dbg = get_irn_dbg_info(n);
1505 ir_mode *mode = get_irn_mode(n);
1506 ir_node *block = get_nodes_block(n);
1507 ir_node *pred = get_irn_n(n, 0);
1508 ir_node *(*cnstr)(dbg_info *, ir_graph *, ir_node *, ir_mode *) = NULL;
1510 const arch_register_t *out;
1511 const arch_register_t *op1;
1514 /* Do not copy constants, recreate them. */
1515 switch (get_ia32_irn_opcode(pred)) {
1516 case iro_ia32_Unknown_VFP:
1518 cnstr = new_rd_ia32_fldz;
1521 cnstr = new_rd_ia32_fld1;
1523 case iro_ia32_fldpi:
1524 cnstr = new_rd_ia32_fldpi;
1526 case iro_ia32_fldl2e:
1527 cnstr = new_rd_ia32_fldl2e;
1529 case iro_ia32_fldl2t:
1530 cnstr = new_rd_ia32_fldl2t;
1532 case iro_ia32_fldlg2:
1533 cnstr = new_rd_ia32_fldlg2;
1535 case iro_ia32_fldln2:
1536 cnstr = new_rd_ia32_fldln2;
1542 out = x87_get_irn_register(sim, n);
1543 op1 = x87_get_irn_register(sim, pred);
1545 if (cnstr != NULL) {
1546 /* copy a constant */
1547 res = (*cnstr)(n_dbg, irg, block, mode);
1549 x87_push(state, arch_register_get_index(out), res);
1551 attr = get_ia32_attr(res);
1552 attr->x87[2] = &ia32_st_regs[0];
1554 int op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1556 res = new_rd_ia32_fpushCopy(n_dbg, irg, block, pred, mode);
1558 x87_push(state, arch_register_get_index(out), res);
1560 attr = get_ia32_attr(res);
1561 attr->x87[0] = &ia32_st_regs[op1_idx];
1562 attr->x87[2] = &ia32_st_regs[0];
1564 arch_set_irn_register(sim->arch_env, res, out);
1570 * Simulate a be_Copy.
1572 * @param state the x87 state
1573 * @param n the node that should be simulated (and patched)
1575 * @return NO_NODE_ADDED
1577 static int sim_Copy(x87_state *state, ir_node *n) {
1580 const arch_register_t *out;
1581 const arch_register_t *op1;
1582 ir_node *node, *next;
1584 int op1_idx, out_idx;
1587 ir_mode *mode = get_irn_mode(n);
1589 if (!mode_is_float(mode))
1593 pred = get_irn_n(n, 0);
1594 out = x87_get_irn_register(sim, n);
1595 op1 = x87_get_irn_register(sim, pred);
1596 live = vfp_live_args_after(sim, n, REGMASK(out));
1598 DB((dbg, LEVEL_1, ">>> %+F %s -> %s\n", n,
1599 arch_register_get_name(op1), arch_register_get_name(out)));
1600 DEBUG_ONLY(vfp_dump_live(live));
1602 /* handle the infamous unknown value */
1603 if (arch_register_get_index(op1) == REG_VFP_UKNWN) {
1604 /* Operand is still live, a real copy. We need here an fpush that can
1605 hold a a register, so use the fpushCopy or recreate constants */
1606 node = create_Copy(state, n);
1608 assert(is_ia32_fldz(node));
1609 next = sched_next(n);
1612 sched_add_before(next, node);
1614 DB((dbg, LEVEL_1, "<<< %+F %s -> %s\n", node, op1->name,
1615 arch_get_irn_register(sim->arch_env, node)->name));
1616 return NO_NODE_ADDED;
1619 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1621 if (is_vfp_live(arch_register_get_index(op1), live)) {
1622 /* Operand is still live, a real copy. We need here an fpush that can
1623 hold a a register, so use the fpushCopy or recreate constants */
1624 node = create_Copy(state, n);
1626 next = sched_next(n);
1629 sched_add_before(next, node);
1630 DB((dbg, LEVEL_1, "<<< %+F %s -> %s\n", node, op1->name,
1631 arch_get_irn_register(sim->arch_env, node)->name));
1633 out_idx = x87_on_stack(state, arch_register_get_index(out));
1635 if (out_idx >= 0 && out_idx != op1_idx) {
1636 /* Matze: out already on stack? how can this happen? */
1639 /* op1 must be killed and placed where out is */
1641 /* best case, simple remove and rename */
1642 x87_patch_insn(n, op_ia32_Pop);
1643 attr = get_ia32_attr(n);
1644 attr->x87[0] = op1 = &ia32_st_regs[0];
1647 x87_set_st(state, arch_register_get_index(out), n, op1_idx - 1);
1649 /* move op1 to tos, store and pop it */
1651 x87_create_fxch(state, n, op1_idx, 0);
1654 x87_patch_insn(n, op_ia32_Pop);
1655 attr = get_ia32_attr(n);
1656 attr->x87[0] = op1 = &ia32_st_regs[out_idx];
1659 x87_set_st(state, arch_register_get_index(out), n, out_idx - 1);
1661 DB((dbg, LEVEL_1, "<<< %+F %s\n", n, op1->name));
1663 /* just a virtual copy */
1664 x87_set_st(state, arch_register_get_index(out), get_unop_op(n), op1_idx);
1665 /* don't remove the node to keep the verifier quiet :),
1666 the emitter won't emit any code for the node */
1669 DB((dbg, LEVEL_1, "<<< KILLED %s\n", get_irn_opname(n)));
1670 exchange(n, get_unop_op(n));
1674 return NO_NODE_ADDED;
1678 * Returns the result proj of the call, or NULL if the result is not used
1680 static ir_node *get_call_result_proj(ir_node *call) {
1681 const ir_edge_t *edge;
1682 ir_node *resproj = NULL;
1684 /* search the result proj */
1685 foreach_out_edge(call, edge) {
1686 ir_node *proj = get_edge_src_irn(edge);
1687 long pn = get_Proj_proj(proj);
1689 if (pn == pn_be_Call_first_res) {
1694 if (resproj == NULL) {
1698 /* the result proj is connected to a Keep and maybe other nodes */
1699 foreach_out_edge(resproj, edge) {
1700 ir_node *pred = get_edge_src_irn(edge);
1701 if (!be_is_Keep(pred)) {
1706 /* only be_Keep found, so result is not used */
1708 } /* get_call_result_proj */
1711 * Simulate a be_Call.
1713 * @param state the x87 state
1714 * @param n the node that should be simulated
1715 * @param arch_env the architecture environment
1717 * @return NO_NODE_ADDED
1719 static int sim_Call(x87_state *state, ir_node *n, const arch_env_t *arch_env) {
1720 ir_type *call_tp = be_Call_get_type(n);
1724 const arch_register_t *reg;
1726 /* at the begin of a call the x87 state should be empty */
1727 assert(state->depth == 0 && "stack not empty before call");
1729 if (get_method_n_ress(call_tp) <= 0)
1730 return NO_NODE_ADDED;
1733 * If the called function returns a float, it is returned in st(0).
1734 * This even happens if the return value is NOT used.
1735 * Moreover, only one return result is supported.
1737 res_type = get_method_res_type(call_tp, 0);
1738 mode = get_type_mode(res_type);
1740 if (mode == NULL || !mode_is_float(mode))
1741 return NO_NODE_ADDED;
1743 resproj = get_call_result_proj(n);
1744 if (resproj == NULL)
1745 return NO_NODE_ADDED;
1747 reg = x87_get_irn_register(state->sim, resproj);
1748 x87_push(state, arch_register_get_index(reg), resproj);
1750 return NO_NODE_ADDED;
1754 * Simulate a be_Spill.
1756 * @param state the x87 state
1757 * @param n the node that should be simulated (and patched)
1759 * Should not happen, spills are lowered before x87 simulator see them.
1761 static int sim_Spill(x87_state *state, ir_node *n) {
1762 assert(0 && "Spill not lowered");
1763 return sim_fst(state, n);
1767 * Simulate a be_Reload.
1769 * @param state the x87 state
1770 * @param n the node that should be simulated (and patched)
1772 * Should not happen, reloads are lowered before x87 simulator see them.
1774 static int sim_Reload(x87_state *state, ir_node *n) {
1775 assert(0 && "Reload not lowered");
1776 return sim_fld(state, n);
1780 * Simulate a be_Return.
1782 * @param state the x87 state
1783 * @param n the node that should be simulated (and patched)
1785 * @return NO_NODE_ADDED
1787 static int sim_Return(x87_state *state, ir_node *n) {
1788 int n_res = be_Return_get_n_rets(n);
1789 int i, n_float_res = 0;
1791 /* only floating point return values must resist on stack */
1792 for (i = 0; i < n_res; ++i) {
1793 ir_node *res = get_irn_n(n, be_pos_Return_val + i);
1795 if (mode_is_float(get_irn_mode(res)))
1798 assert(x87_get_depth(state) == n_float_res);
1800 /* pop them virtually */
1801 for (i = n_float_res - 1; i >= 0; --i)
1804 return NO_NODE_ADDED;
1807 typedef struct _perm_data_t {
1808 const arch_register_t *in;
1809 const arch_register_t *out;
1813 * Simulate a be_Perm.
1815 * @param state the x87 state
1816 * @param irn the node that should be simulated (and patched)
1818 * @return NO_NODE_ADDED
1820 static int sim_Perm(x87_state *state, ir_node *irn) {
1822 x87_simulator *sim = state->sim;
1823 ir_node *pred = get_irn_n(irn, 0);
1825 const ir_edge_t *edge;
1827 /* handle only floating point Perms */
1828 if (! mode_is_float(get_irn_mode(pred)))
1829 return NO_NODE_ADDED;
1831 DB((dbg, LEVEL_1, ">>> %+F\n", irn));
1833 /* Perm is a pure virtual instruction on x87.
1834 All inputs must be on the FPU stack and are pairwise
1835 different from each other.
1836 So, all we need to do is to permutate the stack state. */
1837 n = get_irn_arity(irn);
1838 NEW_ARR_A(int, stack_pos, n);
1840 /* collect old stack positions */
1841 for (i = 0; i < n; ++i) {
1842 const arch_register_t *inreg = x87_get_irn_register(sim, get_irn_n(irn, i));
1843 int idx = x87_on_stack(state, arch_register_get_index(inreg));
1845 assert(idx >= 0 && "Perm argument not on x87 stack");
1849 /* now do the permutation */
1850 foreach_out_edge(irn, edge) {
1851 ir_node *proj = get_edge_src_irn(edge);
1852 const arch_register_t *out = x87_get_irn_register(sim, proj);
1853 long num = get_Proj_proj(proj);
1855 assert(0 <= num && num < n && "More Proj's than Perm inputs");
1856 x87_set_st(state, arch_register_get_index(out), proj, stack_pos[(unsigned)num]);
1858 DB((dbg, LEVEL_1, "<<< %+F\n", irn));
1860 return NO_NODE_ADDED;
1864 * Kill any dead registers at block start by popping them from the stack.
1866 * @param sim the simulator handle
1867 * @param block the current block
1868 * @param start_state the x87 state at the begin of the block
1870 * @return the x87 state after dead register killed
1872 static x87_state *x87_kill_deads(x87_simulator *sim, ir_node *block, x87_state *start_state) {
1873 x87_state *state = start_state;
1874 ir_node *first_insn = sched_first(block);
1875 ir_node *keep = NULL;
1876 unsigned live = vfp_live_args_after(sim, block, 0);
1878 int i, depth, num_pop;
1881 depth = x87_get_depth(state);
1882 for (i = depth - 1; i >= 0; --i) {
1883 int reg = x87_get_st_reg(state, i);
1885 if (! is_vfp_live(reg, live))
1886 kill_mask |= (1 << i);
1890 /* create a new state, will be changed */
1891 state = x87_clone_state(sim, state);
1893 DB((dbg, LEVEL_1, "Killing deads:\n"));
1894 DEBUG_ONLY(vfp_dump_live(live));
1895 DEBUG_ONLY(x87_dump_stack(state));
1897 /* now kill registers */
1899 /* we can only kill from TOS, so bring them up */
1900 if (! (kill_mask & 1)) {
1901 /* search from behind, because we can to a double-pop */
1902 for (i = depth - 1; i >= 0; --i) {
1903 if (kill_mask & (1 << i)) {
1904 kill_mask &= ~(1 << i);
1911 x87_set_st(state, -1, keep, i);
1912 keep = x87_create_fxch(state, first_insn, i, -1);
1915 keep = x87_get_st_node(state, 0);
1917 if ((kill_mask & 3) == 3) {
1918 /* we can do a double-pop */
1922 /* only a single pop */
1927 kill_mask >>= num_pop;
1928 keep = x87_create_fpop(state, first_insn, num_pop, keep);
1933 } /* x87_kill_deads */
1936 * Run a simulation and fix all virtual instructions for a block.
1938 * @param sim the simulator handle
1939 * @param block the current block
1941 static void x87_simulate_block(x87_simulator *sim, ir_node *block) {
1943 blk_state *bl_state = x87_get_bl_state(sim, block);
1944 x87_state *state = bl_state->begin;
1945 const ir_edge_t *edge;
1946 ir_node *start_block;
1948 assert(state != NULL);
1949 /* already processed? */
1950 if (bl_state->end != NULL)
1953 DB((dbg, LEVEL_1, "Simulate %+F\n", block));
1954 DB((dbg, LEVEL_2, "State at Block begin:\n "));
1955 DEBUG_ONLY(x87_dump_stack(state));
1957 /* at block begin, kill all dead registers */
1958 state = x87_kill_deads(sim, block, state);
1960 /* beware, n might change */
1961 for (n = sched_first(block); !sched_is_end(n); n = next) {
1964 ir_op *op = get_irn_op(n);
1966 next = sched_next(n);
1967 if (op->ops.generic == NULL)
1970 func = (sim_func)op->ops.generic;
1972 /* have work to do */
1973 if (state == bl_state->begin) {
1974 /* create a new state, will be changed */
1975 state = x87_clone_state(sim, state);
1979 node_inserted = (*func)(state, n);
1982 sim_func might have added an additional node after n,
1984 beware: n must not be changed by sim_func
1985 (i.e. removed from schedule) in this case
1987 if (node_inserted != NO_NODE_ADDED)
1988 next = sched_next(n);
1991 start_block = get_irg_start_block(get_irn_irg(block));
1993 /* check if the state must be shuffled */
1994 foreach_block_succ(block, edge) {
1995 ir_node *succ = get_edge_src_irn(edge);
1996 blk_state *succ_state;
1998 if (succ == start_block)
2001 succ_state = x87_get_bl_state(sim, succ);
2003 if (succ_state->begin == NULL) {
2004 succ_state->begin = state;
2005 waitq_put(sim->worklist, succ);
2007 /* There is already a begin state for the successor, bad.
2008 Do the necessary permutations.
2009 Note that critical edges are removed, so this is always possible:
2010 If the successor has more than one possible input, then it must
2013 x87_shuffle(sim, block, state, succ, succ_state->begin);
2016 bl_state->end = state;
2018 DB((dbg, LEVEL_2, "State at Block end:\n ")); DEBUG_ONLY(x87_dump_stack(state));
2019 } /* x87_simulate_block */
2022 * Create a new x87 simulator.
2024 * @param sim a simulator handle, will be initialized
2025 * @param irg the current graph
2026 * @param arch_env the architecture environment
2028 static void x87_init_simulator(x87_simulator *sim, ir_graph *irg,
2029 const arch_env_t *arch_env)
2031 obstack_init(&sim->obst);
2032 sim->blk_states = pmap_create();
2033 sim->arch_env = arch_env;
2034 sim->n_idx = get_irg_last_idx(irg);
2035 sim->live = obstack_alloc(&sim->obst, sizeof(*sim->live) * sim->n_idx);
2037 DB((dbg, LEVEL_1, "--------------------------------\n"
2038 "x87 Simulator started for %+F\n", irg));
2040 /* set the generic function pointer of instruction we must simulate */
2041 clear_irp_opcodes_generic_func();
2043 #define ASSOC(op) (op_ ## op)->ops.generic = (op_func)(sim_##op)
2044 #define ASSOC_IA32(op) (op_ia32_v ## op)->ops.generic = (op_func)(sim_##op)
2045 #define ASSOC_BE(op) (op_be_ ## op)->ops.generic = (op_func)(sim_##op)
2062 ASSOC_IA32(fCondJmp);
2073 } /* x87_init_simulator */
2076 * Destroy a x87 simulator.
2078 * @param sim the simulator handle
2080 static void x87_destroy_simulator(x87_simulator *sim) {
2081 pmap_destroy(sim->blk_states);
2082 obstack_free(&sim->obst, NULL);
2083 DB((dbg, LEVEL_1, "x87 Simulator stopped\n\n"));
2084 } /* x87_destroy_simulator */
2087 * Pre-block walker: calculate the liveness information for the block
2088 * and store it into the sim->live cache.
2090 static void update_liveness_walker(ir_node *block, void *data) {
2091 x87_simulator *sim = data;
2092 update_liveness(sim, block);
2093 } /* update_liveness_walker */
2096 * Run a simulation and fix all virtual instructions for a graph.
2098 * @param env the architecture environment
2099 * @param irg the current graph
2101 * Needs a block-schedule.
2103 void x87_simulate_graph(const arch_env_t *arch_env, be_irg_t *birg) {
2104 ir_node *block, *start_block;
2105 blk_state *bl_state;
2107 ir_graph *irg = be_get_birg_irg(birg);
2109 /* create the simulator */
2110 x87_init_simulator(&sim, irg, arch_env);
2112 start_block = get_irg_start_block(irg);
2113 bl_state = x87_get_bl_state(&sim, start_block);
2115 /* start with the empty state */
2116 bl_state->begin = empty;
2119 sim.worklist = new_waitq();
2120 waitq_put(sim.worklist, start_block);
2122 be_invalidate_liveness(birg);
2123 be_assure_liveness(birg);
2124 sim.lv = be_get_birg_liveness(birg);
2126 /* Calculate the liveness for all nodes. We must precalculate this info,
2127 * because the simulator adds new nodes (possible before Phi nodes) which
2128 * would let a lazy calculation fail.
2129 * On the other hand we reduce the computation amount due to
2130 * precaching from O(n^2) to O(n) at the expense of O(n) cache memory.
2132 irg_block_walk_graph(irg, update_liveness_walker, NULL, &sim);
2136 block = waitq_get(sim.worklist);
2137 x87_simulate_block(&sim, block);
2138 } while (! waitq_empty(sim.worklist));
2141 del_waitq(sim.worklist);
2142 x87_destroy_simulator(&sim);
2143 } /* x87_simulate_graph */
2145 void ia32_init_x87(void) {
2146 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.x87");
2147 } /* ia32_init_x87 */