2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the x87 support and virtual to stack
23 * register translation for the ia32 backend.
24 * @author Michael Beck
34 #include "iredges_t.h"
46 #include "../belive_t.h"
47 #include "../besched_t.h"
48 #include "../benode_t.h"
49 #include "bearch_ia32_t.h"
50 #include "ia32_new_nodes.h"
51 #include "gen_ia32_new_nodes.h"
52 #include "gen_ia32_regalloc_if.h"
54 #include "ia32_architecture.h"
61 #define MASK_TOS(x) ((x) & (N_x87_REGS - 1))
63 /** the debug handle */
64 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
66 /* Forward declaration. */
67 typedef struct _x87_simulator x87_simulator;
70 * An exchange template.
71 * Note that our virtual functions have the same inputs
72 * and attributes as the real ones, so we can simple exchange
74 * Further, x87 supports inverse instructions, so we can handle them.
76 typedef struct _exchange_tmpl {
77 ir_op *normal_op; /**< the normal one */
78 ir_op *reverse_op; /**< the reverse one if exists */
79 ir_op *normal_pop_op; /**< the normal one with tos pop */
80 ir_op *reverse_pop_op; /**< the reverse one with tos pop */
84 * An entry on the simulated x87 stack.
86 typedef struct _st_entry {
87 int reg_idx; /**< the virtual register index of this stack value */
88 ir_node *node; /**< the node that produced this value */
94 typedef struct _x87_state {
95 st_entry st[N_x87_REGS]; /**< the register stack */
96 int depth; /**< the current stack depth */
97 int tos; /**< position of the tos */
98 x87_simulator *sim; /**< The simulator. */
101 /** An empty state, used for blocks without fp instructions. */
102 static x87_state _empty = { { {0, NULL}, }, 0, 0, NULL };
103 static x87_state *empty = (x87_state *)&_empty;
106 NO_NODE_ADDED = 0, /**< No node was added. */
107 NODE_ADDED = 1 /**< A node was added by the simulator in the schedule. */
111 * The type of an instruction simulator function.
113 * @param state the x87 state
114 * @param n the node to be simulated
116 * @return NODE_ADDED if a node was added AFTER n in schedule,
119 typedef int (*sim_func)(x87_state *state, ir_node *n);
122 * A block state: Every block has a x87 state at the beginning and at the end.
124 typedef struct _blk_state {
125 x87_state *begin; /**< state at the begin or NULL if not assigned */
126 x87_state *end; /**< state at the end or NULL if not assigned */
129 #define PTR_TO_BLKSTATE(p) ((blk_state *)(p))
131 /** liveness bitset for vfp registers. */
132 typedef unsigned char vfp_liveness;
137 struct _x87_simulator {
138 struct obstack obst; /**< An obstack for fast allocating. */
139 pmap *blk_states; /**< Map blocks to states. */
140 be_lv_t *lv; /**< intrablock liveness. */
141 vfp_liveness *live; /**< Liveness information. */
142 unsigned n_idx; /**< The cached get_irg_last_idx() result. */
143 waitq *worklist; /**< Worklist of blocks that must be processed. */
144 ia32_isa_t *isa; /**< the ISA object */
148 * Returns the current stack depth.
150 * @param state the x87 state
152 * @return the x87 stack depth
154 static int x87_get_depth(const x87_state *state)
157 } /* x87_get_depth */
160 * Return the virtual register index at st(pos).
162 * @param state the x87 state
163 * @param pos a stack position
165 * @return the vfp register index that produced the value at st(pos)
167 static int x87_get_st_reg(const x87_state *state, int pos)
169 assert(pos < state->depth);
170 return state->st[MASK_TOS(state->tos + pos)].reg_idx;
171 } /* x87_get_st_reg */
175 * Return the node at st(pos).
177 * @param state the x87 state
178 * @param pos a stack position
180 * @return the IR node that produced the value at st(pos)
182 static ir_node *x87_get_st_node(const x87_state *state, int pos)
184 assert(pos < state->depth);
185 return state->st[MASK_TOS(state->tos + pos)].node;
186 } /* x87_get_st_node */
189 * Dump the stack for debugging.
191 * @param state the x87 state
193 static void x87_dump_stack(const x87_state *state)
197 for (i = state->depth - 1; i >= 0; --i) {
198 DB((dbg, LEVEL_2, "vf%d(%+F) ", x87_get_st_reg(state, i),
199 x87_get_st_node(state, i)));
201 DB((dbg, LEVEL_2, "<-- TOS\n"));
202 } /* x87_dump_stack */
203 #endif /* DEBUG_libfirm */
206 * Set a virtual register to st(pos).
208 * @param state the x87 state
209 * @param reg_idx the vfp register index that should be set
210 * @param node the IR node that produces the value of the vfp register
211 * @param pos the stack position where the new value should be entered
213 static void x87_set_st(x87_state *state, int reg_idx, ir_node *node, int pos)
215 assert(0 < state->depth);
216 state->st[MASK_TOS(state->tos + pos)].reg_idx = reg_idx;
217 state->st[MASK_TOS(state->tos + pos)].node = node;
219 DB((dbg, LEVEL_2, "After SET_REG: "));
220 DEBUG_ONLY(x87_dump_stack(state));
224 * Set the tos virtual register.
226 * @param state the x87 state
227 * @param reg_idx the vfp register index that should be set
228 * @param node the IR node that produces the value of the vfp register
230 static void x87_set_tos(x87_state *state, int reg_idx, ir_node *node)
232 x87_set_st(state, reg_idx, node, 0);
236 * Swap st(0) with st(pos).
238 * @param state the x87 state
239 * @param pos the stack position to change the tos with
241 static void x87_fxch(x87_state *state, int pos)
244 assert(pos < state->depth);
246 entry = state->st[MASK_TOS(state->tos + pos)];
247 state->st[MASK_TOS(state->tos + pos)] = state->st[MASK_TOS(state->tos)];
248 state->st[MASK_TOS(state->tos)] = entry;
250 DB((dbg, LEVEL_2, "After FXCH: ")); DEBUG_ONLY(x87_dump_stack(state));
254 * Convert a virtual register to the stack index.
256 * @param state the x87 state
257 * @param reg_idx the register vfp index
259 * @return the stack position where the register is stacked
260 * or -1 if the virtual register was not found
262 static int x87_on_stack(const x87_state *state, int reg_idx)
264 int i, tos = state->tos;
266 for (i = 0; i < state->depth; ++i)
267 if (state->st[MASK_TOS(tos + i)].reg_idx == reg_idx)
273 * Push a virtual Register onto the stack, double pushed allowed.
275 * @param state the x87 state
276 * @param reg_idx the register vfp index
277 * @param node the node that produces the value of the vfp register
279 static void x87_push_dbl(x87_state *state, int reg_idx, ir_node *node)
281 assert(state->depth < N_x87_REGS && "stack overrun");
284 state->tos = MASK_TOS(state->tos - 1);
285 state->st[state->tos].reg_idx = reg_idx;
286 state->st[state->tos].node = node;
288 DB((dbg, LEVEL_2, "After PUSH: ")); DEBUG_ONLY(x87_dump_stack(state));
292 * Push a virtual Register onto the stack, double pushes are NOT allowed.
294 * @param state the x87 state
295 * @param reg_idx the register vfp index
296 * @param node the node that produces the value of the vfp register
297 * @param dbl_push if != 0 double pushes are allowed
299 static void x87_push(x87_state *state, int reg_idx, ir_node *node)
301 assert(x87_on_stack(state, reg_idx) == -1 && "double push");
303 x87_push_dbl(state, reg_idx, node);
307 * Pop a virtual Register from the stack.
309 * @param state the x87 state
311 static void x87_pop(x87_state *state)
313 assert(state->depth > 0 && "stack underrun");
316 state->tos = MASK_TOS(state->tos + 1);
318 DB((dbg, LEVEL_2, "After POP: ")); DEBUG_ONLY(x87_dump_stack(state));
322 * Empty the fpu stack
324 * @param state the x87 state
326 static void x87_emms(x87_state *state)
333 * Returns the block state of a block.
335 * @param sim the x87 simulator handle
336 * @param block the current block
338 * @return the block state
340 static blk_state *x87_get_bl_state(x87_simulator *sim, ir_node *block)
342 pmap_entry *entry = pmap_find(sim->blk_states, block);
345 blk_state *bl_state = obstack_alloc(&sim->obst, sizeof(*bl_state));
346 bl_state->begin = NULL;
347 bl_state->end = NULL;
349 pmap_insert(sim->blk_states, block, bl_state);
353 return PTR_TO_BLKSTATE(entry->value);
354 } /* x87_get_bl_state */
357 * Creates a new x87 state.
359 * @param sim the x87 simulator handle
361 * @return a new x87 state
363 static x87_state *x87_alloc_state(x87_simulator *sim)
365 x87_state *res = obstack_alloc(&sim->obst, sizeof(*res));
369 } /* x87_alloc_state */
374 * @param sim the x87 simulator handle
375 * @param src the x87 state that will be cloned
377 * @return a cloned copy of the src state
379 static x87_state *x87_clone_state(x87_simulator *sim, const x87_state *src)
381 x87_state *res = x87_alloc_state(sim);
383 memcpy(res, src, sizeof(*res));
385 } /* x87_clone_state */
388 * Patch a virtual instruction into a x87 one and return
389 * the node representing the result value.
391 * @param n the IR node to patch
392 * @param op the x87 opcode to patch in
394 static ir_node *x87_patch_insn(ir_node *n, ir_op *op)
396 ir_mode *mode = get_irn_mode(n);
401 if (mode == mode_T) {
402 /* patch all Proj's */
403 const ir_edge_t *edge;
405 foreach_out_edge(n, edge) {
406 ir_node *proj = get_edge_src_irn(edge);
408 mode = get_irn_mode(proj);
409 if (mode_is_float(mode)) {
411 set_irn_mode(proj, mode_E);
415 } else if (mode_is_float(mode))
416 set_irn_mode(n, mode_E);
418 } /* x87_patch_insn */
421 * Returns the first Proj of a mode_T node having a given mode.
423 * @param n the mode_T node
424 * @param m the desired mode of the Proj
425 * @return The first Proj of mode @p m found or NULL.
427 static ir_node *get_irn_Proj_for_mode(ir_node *n, ir_mode *m)
429 const ir_edge_t *edge;
431 assert(get_irn_mode(n) == mode_T && "Need mode_T node");
433 foreach_out_edge(n, edge) {
434 ir_node *proj = get_edge_src_irn(edge);
435 if (get_irn_mode(proj) == m)
440 } /* get_irn_Proj_for_mode */
443 * Wrap the arch_* function here so we can check for errors.
445 static INLINE const arch_register_t *x87_get_irn_register(const ir_node *irn)
447 const arch_register_t *res = arch_get_irn_register(irn);
449 assert(res->reg_class->regs == ia32_vfp_regs);
451 } /* x87_get_irn_register */
453 /* -------------- x87 perm --------------- */
456 * Creates a fxch for shuffle.
458 * @param state the x87 state
459 * @param pos parameter for fxch
460 * @param block the block were fxch is inserted
462 * Creates a new fxch node and reroute the user of the old node
465 * @return the fxch node
467 static ir_node *x87_fxch_shuffle(x87_state *state, int pos, ir_node *block)
470 ia32_x87_attr_t *attr;
472 fxch = new_rd_ia32_fxch(NULL, get_irn_irg(block), block);
473 attr = get_ia32_x87_attr(fxch);
474 attr->x87[0] = &ia32_st_regs[pos];
475 attr->x87[2] = &ia32_st_regs[0];
479 x87_fxch(state, pos);
481 } /* x87_fxch_shuffle */
484 * Calculate the necessary permutations to reach dst_state.
486 * These permutations are done with fxch instructions and placed
487 * at the end of the block.
489 * Note that critical edges are removed here, so we need only
490 * a shuffle if the current block has only one successor.
492 * @param sim the simulator handle
493 * @param block the current block
494 * @param state the current x87 stack state, might be modified
495 * @param dst_block the destination block
496 * @param dst_state destination state
500 static x87_state *x87_shuffle(x87_simulator *sim, ir_node *block,
501 x87_state *state, ir_node *dst_block,
502 const x87_state *dst_state)
504 int i, n_cycles, k, ri;
505 unsigned cycles[4], all_mask;
506 char cycle_idx[4][8];
507 ir_node *fxch, *before, *after;
511 assert(state->depth == dst_state->depth);
513 /* Some mathematics here:
514 If we have a cycle of length n that includes the tos,
515 we need n-1 exchange operations.
516 We can always add the tos and restore it, so we need
517 n+1 exchange operations for a cycle not containing the tos.
518 So, the maximum of needed operations is for a cycle of 7
519 not including the tos == 8.
520 This is the same number of ops we would need for using stores,
521 so exchange is cheaper (we save the loads).
522 On the other hand, we might need an additional exchange
523 in the next block to bring one operand on top, so the
524 number of ops in the first case is identical.
525 Further, no more than 4 cycles can exists (4 x 2).
527 all_mask = (1 << (state->depth)) - 1;
529 for (n_cycles = 0; all_mask; ++n_cycles) {
530 int src_idx, dst_idx;
532 /* find the first free slot */
533 for (i = 0; i < state->depth; ++i) {
534 if (all_mask & (1 << i)) {
535 all_mask &= ~(1 << i);
537 /* check if there are differences here */
538 if (x87_get_st_reg(state, i) != x87_get_st_reg(dst_state, i))
544 /* no more cycles found */
549 cycles[n_cycles] = (1 << i);
550 cycle_idx[n_cycles][k++] = i;
551 for (src_idx = i; ; src_idx = dst_idx) {
552 dst_idx = x87_on_stack(dst_state, x87_get_st_reg(state, src_idx));
554 if ((all_mask & (1 << dst_idx)) == 0)
557 cycle_idx[n_cycles][k++] = dst_idx;
558 cycles[n_cycles] |= (1 << dst_idx);
559 all_mask &= ~(1 << dst_idx);
561 cycle_idx[n_cycles][k] = -1;
565 /* no permutation needed */
569 /* Hmm: permutation needed */
570 DB((dbg, LEVEL_2, "\n%+F needs permutation: from\n", block));
571 DEBUG_ONLY(x87_dump_stack(state));
572 DB((dbg, LEVEL_2, " to\n"));
573 DEBUG_ONLY(x87_dump_stack(dst_state));
577 DB((dbg, LEVEL_2, "Need %d cycles\n", n_cycles));
578 for (ri = 0; ri < n_cycles; ++ri) {
579 DB((dbg, LEVEL_2, " Ring %d:\n ", ri));
580 for (k = 0; cycle_idx[ri][k] != -1; ++k)
581 DB((dbg, LEVEL_2, " st%d ->", cycle_idx[ri][k]));
582 DB((dbg, LEVEL_2, "\n"));
589 * Find the place node must be insert.
590 * We have only one successor block, so the last instruction should
593 before = sched_last(block);
594 assert(is_cfop(before));
596 /* now do the permutations */
597 for (ri = 0; ri < n_cycles; ++ri) {
598 if ((cycles[ri] & 1) == 0) {
599 /* this cycle does not include the tos */
600 fxch = x87_fxch_shuffle(state, cycle_idx[ri][0], block);
602 sched_add_after(after, fxch);
604 sched_add_before(before, fxch);
607 for (k = 1; cycle_idx[ri][k] != -1; ++k) {
608 fxch = x87_fxch_shuffle(state, cycle_idx[ri][k], block);
610 sched_add_after(after, fxch);
612 sched_add_before(before, fxch);
615 if ((cycles[ri] & 1) == 0) {
616 /* this cycle does not include the tos */
617 fxch = x87_fxch_shuffle(state, cycle_idx[ri][0], block);
618 sched_add_after(after, fxch);
625 * Create a fxch node before another node.
627 * @param state the x87 state
628 * @param n the node after the fxch
629 * @param pos exchange st(pos) with st(0)
633 static ir_node *x87_create_fxch(x87_state *state, ir_node *n, int pos)
636 ia32_x87_attr_t *attr;
637 ir_graph *irg = get_irn_irg(n);
638 ir_node *block = get_nodes_block(n);
640 x87_fxch(state, pos);
642 fxch = new_rd_ia32_fxch(NULL, irg, block);
643 attr = get_ia32_x87_attr(fxch);
644 attr->x87[0] = &ia32_st_regs[pos];
645 attr->x87[2] = &ia32_st_regs[0];
649 sched_add_before(n, fxch);
650 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(fxch), attr->x87[0]->name, attr->x87[2]->name));
652 } /* x87_create_fxch */
655 * Create a fpush before node n.
657 * @param state the x87 state
658 * @param n the node after the fpush
659 * @param pos push st(pos) on stack
660 * @param op_idx replace input op_idx of n with the fpush result
662 static void x87_create_fpush(x87_state *state, ir_node *n, int pos, int op_idx)
664 ir_node *fpush, *pred = get_irn_n(n, op_idx);
665 ia32_x87_attr_t *attr;
666 const arch_register_t *out = x87_get_irn_register(pred);
668 x87_push_dbl(state, arch_register_get_index(out), pred);
670 fpush = new_rd_ia32_fpush(NULL, get_irn_irg(n), get_nodes_block(n));
671 attr = get_ia32_x87_attr(fpush);
672 attr->x87[0] = &ia32_st_regs[pos];
673 attr->x87[2] = &ia32_st_regs[0];
676 sched_add_before(n, fpush);
678 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(fpush), attr->x87[0]->name, attr->x87[2]->name));
679 } /* x87_create_fpush */
682 * Create a fpop before node n.
684 * @param state the x87 state
685 * @param n the node after the fpop
686 * @param num pop 1 or 2 values
688 * @return the fpop node
690 static ir_node *x87_create_fpop(x87_state *state, ir_node *n, int num)
692 ir_node *fpop = NULL;
693 ia32_x87_attr_t *attr;
698 if (ia32_cg_config.use_ffreep)
699 fpop = new_rd_ia32_ffreep(NULL, get_irn_irg(n), get_nodes_block(n));
701 fpop = new_rd_ia32_fpop(NULL, get_irn_irg(n), get_nodes_block(n));
702 attr = get_ia32_x87_attr(fpop);
703 attr->x87[0] = &ia32_st_regs[0];
704 attr->x87[1] = &ia32_st_regs[0];
705 attr->x87[2] = &ia32_st_regs[0];
708 sched_add_before(n, fpop);
709 DB((dbg, LEVEL_1, "<<< %s %s\n", get_irn_opname(fpop), attr->x87[0]->name));
714 } /* x87_create_fpop */
717 * Creates an fldz before node n
719 * @param state the x87 state
720 * @param n the node after the fldz
722 * @return the fldz node
724 static ir_node *x87_create_fldz(x87_state *state, ir_node *n, int regidx)
726 ir_graph *irg = get_irn_irg(n);
727 ir_node *block = get_nodes_block(n);
730 fldz = new_rd_ia32_fldz(NULL, irg, block, mode_E);
732 sched_add_before(n, fldz);
733 DB((dbg, LEVEL_1, "<<< %s\n", get_irn_opname(fldz)));
736 x87_push(state, regidx, fldz);
741 /* --------------------------------- liveness ------------------------------------------ */
744 * The liveness transfer function.
745 * Updates a live set over a single step from a given node to its predecessor.
746 * Everything defined at the node is removed from the set, the uses of the node get inserted.
748 * @param irn The node at which liveness should be computed.
749 * @param live The bitset of registers live before @p irn. This set gets modified by updating it to
750 * the registers live after irn.
752 * @return The live bitset.
754 static vfp_liveness vfp_liveness_transfer(ir_node *irn, vfp_liveness live)
757 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
759 if (get_irn_mode(irn) == mode_T) {
760 const ir_edge_t *edge;
762 foreach_out_edge(irn, edge) {
763 ir_node *proj = get_edge_src_irn(edge);
765 if (arch_irn_consider_in_reg_alloc(cls, proj)) {
766 const arch_register_t *reg = x87_get_irn_register(proj);
767 live &= ~(1 << arch_register_get_index(reg));
772 if (arch_irn_consider_in_reg_alloc(cls, irn)) {
773 const arch_register_t *reg = x87_get_irn_register(irn);
774 live &= ~(1 << arch_register_get_index(reg));
777 for (i = 0, n = get_irn_arity(irn); i < n; ++i) {
778 ir_node *op = get_irn_n(irn, i);
780 if (mode_is_float(get_irn_mode(op)) &&
781 arch_irn_consider_in_reg_alloc(cls, op)) {
782 const arch_register_t *reg = x87_get_irn_register(op);
783 live |= 1 << arch_register_get_index(reg);
787 } /* vfp_liveness_transfer */
790 * Put all live virtual registers at the end of a block into a bitset.
792 * @param sim the simulator handle
793 * @param lv the liveness information
794 * @param bl the block
796 * @return The live bitset at the end of this block
798 static vfp_liveness vfp_liveness_end_of_block(x87_simulator *sim, const ir_node *block)
801 vfp_liveness live = 0;
802 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
803 const be_lv_t *lv = sim->lv;
805 be_lv_foreach(lv, block, be_lv_state_end, i) {
806 const arch_register_t *reg;
807 const ir_node *node = be_lv_get_irn(lv, block, i);
808 if (!arch_irn_consider_in_reg_alloc(cls, node))
811 reg = x87_get_irn_register(node);
812 live |= 1 << arch_register_get_index(reg);
816 } /* vfp_liveness_end_of_block */
818 /** get the register mask from an arch_register */
819 #define REGMASK(reg) (1 << (arch_register_get_index(reg)))
822 * Return a bitset of argument registers which are live at the end of a node.
824 * @param sim the simulator handle
825 * @param pos the node
826 * @param kill kill mask for the output registers
828 * @return The live bitset.
830 static unsigned vfp_live_args_after(x87_simulator *sim, const ir_node *pos, unsigned kill)
832 unsigned idx = get_irn_idx(pos);
834 assert(idx < sim->n_idx);
835 return sim->live[idx] & ~kill;
836 } /* vfp_live_args_after */
839 * Calculate the liveness for a whole block and cache it.
841 * @param sim the simulator handle
842 * @param lv the liveness handle
843 * @param block the block
845 static void update_liveness(x87_simulator *sim, ir_node *block)
847 vfp_liveness live = vfp_liveness_end_of_block(sim, block);
851 /* now iterate through the block backward and cache the results */
852 sched_foreach_reverse(block, irn) {
853 /* stop at the first Phi: this produces the live-in */
857 idx = get_irn_idx(irn);
858 sim->live[idx] = live;
860 live = vfp_liveness_transfer(irn, live);
862 idx = get_irn_idx(block);
863 sim->live[idx] = live;
864 } /* update_liveness */
867 * Returns true if a register is live in a set.
869 * @param reg_idx the vfp register index
870 * @param live a live bitset
872 #define is_vfp_live(reg_idx, live) ((live) & (1 << (reg_idx)))
876 * Dump liveness info.
878 * @param live the live bitset
880 static void vfp_dump_live(vfp_liveness live)
884 DB((dbg, LEVEL_2, "Live after: "));
885 for (i = 0; i < 8; ++i) {
886 if (live & (1 << i)) {
887 DB((dbg, LEVEL_2, "vf%d ", i));
890 DB((dbg, LEVEL_2, "\n"));
891 } /* vfp_dump_live */
892 #endif /* DEBUG_libfirm */
894 /* --------------------------------- simulators ---------------------------------------- */
896 #define XCHG(a, b) do { int t = (a); (a) = (b); (b) = t; } while (0)
908 * Simulate a virtual binop.
910 * @param state the x87 state
911 * @param n the node that should be simulated (and patched)
912 * @param tmpl the template containing the 4 possible x87 opcodes
914 * @return NO_NODE_ADDED
916 static int sim_binop(x87_state *state, ir_node *n, const exchange_tmpl *tmpl)
918 int op2_idx = 0, op1_idx;
919 int out_idx, do_pop = 0;
920 ia32_x87_attr_t *attr;
922 ir_node *patched_insn;
924 x87_simulator *sim = state->sim;
925 ir_node *op1 = get_irn_n(n, n_ia32_binary_left);
926 ir_node *op2 = get_irn_n(n, n_ia32_binary_right);
927 const arch_register_t *op1_reg = x87_get_irn_register(op1);
928 const arch_register_t *op2_reg = x87_get_irn_register(op2);
929 const arch_register_t *out = x87_get_irn_register(n);
930 int reg_index_1 = arch_register_get_index(op1_reg);
931 int reg_index_2 = arch_register_get_index(op2_reg);
932 vfp_liveness live = vfp_live_args_after(sim, n, REGMASK(out));
936 DB((dbg, LEVEL_1, ">>> %+F %s, %s -> %s\n", n,
937 arch_register_get_name(op1_reg), arch_register_get_name(op2_reg),
938 arch_register_get_name(out)));
939 DEBUG_ONLY(vfp_dump_live(live));
940 DB((dbg, LEVEL_1, "Stack before: "));
941 DEBUG_ONLY(x87_dump_stack(state));
943 if (reg_index_1 == REG_VFP_UKNWN) {
947 op1_idx = x87_on_stack(state, reg_index_1);
948 assert(op1_idx >= 0);
949 op1_live_after = is_vfp_live(arch_register_get_index(op1_reg), live);
952 attr = get_ia32_x87_attr(n);
953 permuted = attr->attr.data.ins_permuted;
955 if (reg_index_2 != REG_VFP_NOREG) {
958 if (reg_index_2 == REG_VFP_UKNWN) {
962 /* second operand is a vfp register */
963 op2_idx = x87_on_stack(state, reg_index_2);
964 assert(op2_idx >= 0);
966 = is_vfp_live(arch_register_get_index(op2_reg), live);
969 if (op2_live_after) {
970 /* Second operand is live. */
972 if (op1_live_after) {
973 /* Both operands are live: push the first one.
974 This works even for op1 == op2. */
975 x87_create_fpush(state, n, op1_idx, n_ia32_binary_right);
976 /* now do fxxx (tos=tos X op) */
980 dst = tmpl->normal_op;
982 /* Second live, first operand is dead here, bring it to tos. */
984 x87_create_fxch(state, n, op1_idx);
989 /* now do fxxx (tos=tos X op) */
991 dst = tmpl->normal_op;
994 /* Second operand is dead. */
995 if (op1_live_after) {
996 /* First operand is live: bring second to tos. */
998 x87_create_fxch(state, n, op2_idx);
1003 /* now do fxxxr (tos = op X tos) */
1005 dst = tmpl->reverse_op;
1007 /* Both operands are dead here, pop them from the stack. */
1010 /* Both are identically and on tos, no pop needed. */
1011 /* here fxxx (tos = tos X tos) */
1012 dst = tmpl->normal_op;
1015 /* now do fxxxp (op = op X tos, pop) */
1016 dst = tmpl->normal_pop_op;
1020 } else if (op1_idx == 0) {
1021 assert(op1_idx != op2_idx);
1022 /* now do fxxxrp (op = tos X op, pop) */
1023 dst = tmpl->reverse_pop_op;
1027 /* Bring the second on top. */
1028 x87_create_fxch(state, n, op2_idx);
1029 if (op1_idx == op2_idx) {
1030 /* Both are identically and on tos now, no pop needed. */
1033 /* use fxxx (tos = tos X tos) */
1034 dst = tmpl->normal_op;
1037 /* op2 is on tos now */
1039 /* use fxxxp (op = op X tos, pop) */
1040 dst = tmpl->normal_pop_op;
1048 /* second operand is an address mode */
1049 if (op1_live_after) {
1050 /* first operand is live: push it here */
1051 x87_create_fpush(state, n, op1_idx, n_ia32_binary_left);
1054 /* first operand is dead: bring it to tos */
1056 x87_create_fxch(state, n, op1_idx);
1061 /* use fxxx (tos = tos X mem) */
1062 dst = permuted ? tmpl->reverse_op : tmpl->normal_op;
1066 patched_insn = x87_patch_insn(n, dst);
1067 x87_set_st(state, arch_register_get_index(out), patched_insn, out_idx);
1072 /* patch the operation */
1073 attr->x87[0] = op1_reg = &ia32_st_regs[op1_idx];
1074 if (reg_index_2 != REG_VFP_NOREG) {
1075 attr->x87[1] = op2_reg = &ia32_st_regs[op2_idx];
1077 attr->x87[2] = out = &ia32_st_regs[out_idx];
1079 if (reg_index_2 != REG_VFP_NOREG) {
1080 DB((dbg, LEVEL_1, "<<< %s %s, %s -> %s\n", get_irn_opname(n),
1081 arch_register_get_name(op1_reg), arch_register_get_name(op2_reg),
1082 arch_register_get_name(out)));
1084 DB((dbg, LEVEL_1, "<<< %s %s, [AM] -> %s\n", get_irn_opname(n),
1085 arch_register_get_name(op1_reg),
1086 arch_register_get_name(out)));
1089 return NO_NODE_ADDED;
1093 * Simulate a virtual Unop.
1095 * @param state the x87 state
1096 * @param n the node that should be simulated (and patched)
1097 * @param op the x87 opcode that will replace n's opcode
1099 * @return NO_NODE_ADDED
1101 static int sim_unop(x87_state *state, ir_node *n, ir_op *op)
1103 int op1_idx, out_idx;
1104 x87_simulator *sim = state->sim;
1105 const arch_register_t *op1 = x87_get_irn_register(get_irn_n(n, UNOP_IDX));
1106 const arch_register_t *out = x87_get_irn_register(n);
1107 ia32_x87_attr_t *attr;
1108 unsigned live = vfp_live_args_after(sim, n, REGMASK(out));
1110 DB((dbg, LEVEL_1, ">>> %+F -> %s\n", n, out->name));
1111 DEBUG_ONLY(vfp_dump_live(live));
1113 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1115 if (is_vfp_live(arch_register_get_index(op1), live)) {
1116 /* push the operand here */
1117 x87_create_fpush(state, n, op1_idx, UNOP_IDX);
1121 /* operand is dead, bring it to tos */
1123 x87_create_fxch(state, n, op1_idx);
1128 x87_set_tos(state, arch_register_get_index(out), x87_patch_insn(n, op));
1130 attr = get_ia32_x87_attr(n);
1131 attr->x87[0] = op1 = &ia32_st_regs[0];
1132 attr->x87[2] = out = &ia32_st_regs[0];
1133 DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), out->name));
1135 return NO_NODE_ADDED;
1139 * Simulate a virtual Load instruction.
1141 * @param state the x87 state
1142 * @param n the node that should be simulated (and patched)
1143 * @param op the x87 opcode that will replace n's opcode
1145 * @return NO_NODE_ADDED
1147 static int sim_load(x87_state *state, ir_node *n, ir_op *op)
1149 const arch_register_t *out = x87_get_irn_register(n);
1150 ia32_x87_attr_t *attr;
1152 DB((dbg, LEVEL_1, ">>> %+F -> %s\n", n, arch_register_get_name(out)));
1153 x87_push(state, arch_register_get_index(out), x87_patch_insn(n, op));
1154 assert(out == x87_get_irn_register(n));
1155 attr = get_ia32_x87_attr(n);
1156 attr->x87[2] = out = &ia32_st_regs[0];
1157 DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), arch_register_get_name(out)));
1159 return NO_NODE_ADDED;
1163 * Rewire all users of @p old_val to @new_val iff they are scheduled after @p store.
1165 * @param store The store
1166 * @param old_val The former value
1167 * @param new_val The new value
1169 static void collect_and_rewire_users(ir_node *store, ir_node *old_val, ir_node *new_val)
1171 const ir_edge_t *edge, *ne;
1173 foreach_out_edge_safe(old_val, edge, ne) {
1174 ir_node *user = get_edge_src_irn(edge);
1176 if (! user || user == store)
1179 /* if the user is scheduled after the store: rewire */
1180 if (sched_is_scheduled(user) && sched_comes_after(store, user)) {
1182 /* find the input of the user pointing to the old value */
1183 for (i = get_irn_arity(user) - 1; i >= 0; i--) {
1184 if (get_irn_n(user, i) == old_val)
1185 set_irn_n(user, i, new_val);
1189 } /* collect_and_rewire_users */
1192 * Simulate a virtual Store.
1194 * @param state the x87 state
1195 * @param n the node that should be simulated (and patched)
1196 * @param op the x87 store opcode
1197 * @param op_p the x87 store and pop opcode
1199 static int sim_store(x87_state *state, ir_node *n, ir_op *op, ir_op *op_p)
1201 ir_node *val = get_irn_n(n, n_ia32_vfst_val);
1202 const arch_register_t *op2 = x87_get_irn_register(val);
1203 unsigned live = vfp_live_args_after(state->sim, n, 0);
1204 int insn = NO_NODE_ADDED;
1205 ia32_x87_attr_t *attr;
1206 int op2_reg_idx, op2_idx, depth;
1207 int live_after_node;
1210 op2_reg_idx = arch_register_get_index(op2);
1211 if (op2_reg_idx == REG_VFP_UKNWN) {
1212 /* just take any value from stack */
1213 if (state->depth > 0) {
1215 DEBUG_ONLY(op2 = NULL);
1216 live_after_node = 1;
1218 /* produce a new value which we will consume immediately */
1219 x87_create_fldz(state, n, op2_reg_idx);
1220 live_after_node = 0;
1221 op2_idx = x87_on_stack(state, op2_reg_idx);
1222 assert(op2_idx >= 0);
1225 op2_idx = x87_on_stack(state, op2_reg_idx);
1226 live_after_node = is_vfp_live(arch_register_get_index(op2), live);
1227 DB((dbg, LEVEL_1, ">>> %+F %s ->\n", n, arch_register_get_name(op2)));
1228 assert(op2_idx >= 0);
1231 mode = get_ia32_ls_mode(n);
1232 depth = x87_get_depth(state);
1234 if (live_after_node) {
1236 Problem: fst doesn't support mode_E (spills), only fstp does
1238 - stack not full: push value and fstp
1239 - stack full: fstp value and load again
1240 Note that we cannot test on mode_E, because floats might be 96bit ...
1242 if (get_mode_size_bits(mode) > 64 || mode == mode_Ls) {
1243 if (depth < N_x87_REGS) {
1244 /* ok, we have a free register: push + fstp */
1245 x87_create_fpush(state, n, op2_idx, n_ia32_vfst_val);
1247 x87_patch_insn(n, op_p);
1249 ir_node *vfld, *mem, *block, *rproj, *mproj;
1252 /* stack full here: need fstp + load */
1254 x87_patch_insn(n, op_p);
1256 block = get_nodes_block(n);
1257 irg = get_irn_irg(n);
1258 vfld = new_rd_ia32_vfld(NULL, irg, block, get_irn_n(n, 0), get_irn_n(n, 1), new_rd_NoMem(irg), get_ia32_ls_mode(n));
1260 /* copy all attributes */
1261 set_ia32_frame_ent(vfld, get_ia32_frame_ent(n));
1262 if (is_ia32_use_frame(n))
1263 set_ia32_use_frame(vfld);
1264 set_ia32_op_type(vfld, ia32_AddrModeS);
1265 add_ia32_am_offs_int(vfld, get_ia32_am_offs_int(n));
1266 set_ia32_am_sc(vfld, get_ia32_am_sc(n));
1267 set_ia32_ls_mode(vfld, get_ia32_ls_mode(n));
1269 rproj = new_r_Proj(irg, block, vfld, get_ia32_ls_mode(vfld), pn_ia32_vfld_res);
1270 mproj = new_r_Proj(irg, block, vfld, mode_M, pn_ia32_vfld_M);
1271 mem = get_irn_Proj_for_mode(n, mode_M);
1273 assert(mem && "Store memory not found");
1275 arch_set_irn_register(rproj, op2);
1277 /* reroute all former users of the store memory to the load memory */
1278 edges_reroute(mem, mproj, irg);
1279 /* set the memory input of the load to the store memory */
1280 set_irn_n(vfld, n_ia32_vfld_mem, mem);
1282 sched_add_after(n, vfld);
1283 sched_add_after(vfld, rproj);
1285 /* rewire all users, scheduled after the store, to the loaded value */
1286 collect_and_rewire_users(n, val, rproj);
1291 /* we can only store the tos to memory */
1293 x87_create_fxch(state, n, op2_idx);
1295 /* mode != mode_E -> use normal fst */
1296 x87_patch_insn(n, op);
1299 /* we can only store the tos to memory */
1301 x87_create_fxch(state, n, op2_idx);
1304 x87_patch_insn(n, op_p);
1307 attr = get_ia32_x87_attr(n);
1308 attr->x87[1] = op2 = &ia32_st_regs[0];
1309 DB((dbg, LEVEL_1, "<<< %s %s ->\n", get_irn_opname(n), arch_register_get_name(op2)));
1314 #define _GEN_BINOP(op, rev) \
1315 static int sim_##op(x87_state *state, ir_node *n) { \
1316 exchange_tmpl tmpl = { op_ia32_##op, op_ia32_##rev, op_ia32_##op##p, op_ia32_##rev##p }; \
1317 return sim_binop(state, n, &tmpl); \
1320 #define GEN_BINOP(op) _GEN_BINOP(op, op)
1321 #define GEN_BINOPR(op) _GEN_BINOP(op, op##r)
1323 #define GEN_LOAD2(op, nop) \
1324 static int sim_##op(x87_state *state, ir_node *n) { \
1325 return sim_load(state, n, op_ia32_##nop); \
1328 #define GEN_LOAD(op) GEN_LOAD2(op, op)
1330 #define GEN_UNOP(op) \
1331 static int sim_##op(x87_state *state, ir_node *n) { \
1332 return sim_unop(state, n, op_ia32_##op); \
1335 #define GEN_STORE(op) \
1336 static int sim_##op(x87_state *state, ir_node *n) { \
1337 return sim_store(state, n, op_ia32_##op, op_ia32_##op##p); \
1359 * Simulate a virtual fisttp.
1361 * @param state the x87 state
1362 * @param n the node that should be simulated (and patched)
1364 static int sim_fisttp(x87_state *state, ir_node *n)
1366 ir_node *val = get_irn_n(n, n_ia32_vfst_val);
1367 const arch_register_t *op2 = x87_get_irn_register(val);
1368 int insn = NO_NODE_ADDED;
1369 ia32_x87_attr_t *attr;
1370 int op2_reg_idx, op2_idx, depth;
1372 op2_reg_idx = arch_register_get_index(op2);
1373 if (op2_reg_idx == REG_VFP_UKNWN) {
1374 /* just take any value from stack */
1375 if (state->depth > 0) {
1377 DEBUG_ONLY(op2 = NULL);
1379 /* produce a new value which we will consume immediately */
1380 x87_create_fldz(state, n, op2_reg_idx);
1381 op2_idx = x87_on_stack(state, op2_reg_idx);
1382 assert(op2_idx >= 0);
1385 op2_idx = x87_on_stack(state, op2_reg_idx);
1386 DB((dbg, LEVEL_1, ">>> %+F %s ->\n", n, arch_register_get_name(op2)));
1387 assert(op2_idx >= 0);
1390 depth = x87_get_depth(state);
1392 /* Note: although the value is still live here, it is destroyed because
1393 of the pop. The register allocator is aware of that and introduced a copy
1394 if the value must be alive. */
1396 /* we can only store the tos to memory */
1398 x87_create_fxch(state, n, op2_idx);
1401 x87_patch_insn(n, op_ia32_fisttp);
1403 attr = get_ia32_x87_attr(n);
1404 attr->x87[1] = op2 = &ia32_st_regs[0];
1405 DB((dbg, LEVEL_1, "<<< %s %s ->\n", get_irn_opname(n), arch_register_get_name(op2)));
1410 static int sim_FtstFnstsw(x87_state *state, ir_node *n)
1412 x87_simulator *sim = state->sim;
1413 ia32_x87_attr_t *attr = get_ia32_x87_attr(n);
1414 ir_node *op1_node = get_irn_n(n, n_ia32_vFtstFnstsw_left);
1415 const arch_register_t *reg1 = x87_get_irn_register(op1_node);
1416 int reg_index_1 = arch_register_get_index(reg1);
1417 int op1_idx = x87_on_stack(state, reg_index_1);
1418 unsigned live = vfp_live_args_after(sim, n, 0);
1420 DB((dbg, LEVEL_1, ">>> %+F %s\n", n, arch_register_get_name(reg1)));
1421 DEBUG_ONLY(vfp_dump_live(live));
1422 DB((dbg, LEVEL_1, "Stack before: "));
1423 DEBUG_ONLY(x87_dump_stack(state));
1424 assert(op1_idx >= 0);
1427 /* bring the value to tos */
1428 x87_create_fxch(state, n, op1_idx);
1432 /* patch the operation */
1433 x87_patch_insn(n, op_ia32_FtstFnstsw);
1434 reg1 = &ia32_st_regs[op1_idx];
1435 attr->x87[0] = reg1;
1436 attr->x87[1] = NULL;
1437 attr->x87[2] = NULL;
1439 if (!is_vfp_live(reg_index_1, live)) {
1440 x87_create_fpop(state, sched_next(n), 1);
1444 return NO_NODE_ADDED;
1448 * @param state the x87 state
1449 * @param n the node that should be simulated (and patched)
1451 static int sim_Fucom(x87_state *state, ir_node *n)
1455 ia32_x87_attr_t *attr = get_ia32_x87_attr(n);
1457 x87_simulator *sim = state->sim;
1458 ir_node *op1_node = get_irn_n(n, n_ia32_vFucomFnstsw_left);
1459 ir_node *op2_node = get_irn_n(n, n_ia32_vFucomFnstsw_right);
1460 const arch_register_t *op1 = x87_get_irn_register(op1_node);
1461 const arch_register_t *op2 = x87_get_irn_register(op2_node);
1462 int reg_index_1 = arch_register_get_index(op1);
1463 int reg_index_2 = arch_register_get_index(op2);
1464 unsigned live = vfp_live_args_after(sim, n, 0);
1465 int permuted = attr->attr.data.ins_permuted;
1468 int node_added = NO_NODE_ADDED;
1470 DB((dbg, LEVEL_1, ">>> %+F %s, %s\n", n,
1471 arch_register_get_name(op1), arch_register_get_name(op2)));
1472 DEBUG_ONLY(vfp_dump_live(live));
1473 DB((dbg, LEVEL_1, "Stack before: "));
1474 DEBUG_ONLY(x87_dump_stack(state));
1476 op1_idx = x87_on_stack(state, reg_index_1);
1477 assert(op1_idx >= 0);
1479 /* BEWARE: check for comp a,a cases, they might happen */
1480 if (reg_index_2 != REG_VFP_NOREG) {
1481 /* second operand is a vfp register */
1482 op2_idx = x87_on_stack(state, reg_index_2);
1483 assert(op2_idx >= 0);
1485 if (is_vfp_live(reg_index_2, live)) {
1486 /* second operand is live */
1488 if (is_vfp_live(reg_index_1, live)) {
1489 /* both operands are live */
1492 /* res = tos X op */
1493 } else if (op2_idx == 0) {
1494 /* res = op X tos */
1495 permuted = !permuted;
1498 /* bring the first one to tos */
1499 x87_create_fxch(state, n, op1_idx);
1503 /* res = tos X op */
1506 /* second live, first operand is dead here, bring it to tos.
1507 This means further, op1_idx != op2_idx. */
1508 assert(op1_idx != op2_idx);
1510 x87_create_fxch(state, n, op1_idx);
1515 /* res = tos X op, pop */
1519 /* second operand is dead */
1520 if (is_vfp_live(reg_index_1, live)) {
1521 /* first operand is live: bring second to tos.
1522 This means further, op1_idx != op2_idx. */
1523 assert(op1_idx != op2_idx);
1525 x87_create_fxch(state, n, op2_idx);
1530 /* res = op X tos, pop */
1532 permuted = !permuted;
1535 /* both operands are dead here, check first for identity. */
1536 if (op1_idx == op2_idx) {
1537 /* identically, one pop needed */
1539 x87_create_fxch(state, n, op1_idx);
1543 /* res = tos X op, pop */
1546 /* different, move them to st and st(1) and pop both.
1547 The tricky part is to get one into st(1).*/
1548 else if (op2_idx == 1) {
1549 /* good, second operand is already in the right place, move the first */
1551 /* bring the first on top */
1552 x87_create_fxch(state, n, op1_idx);
1553 assert(op2_idx != 0);
1556 /* res = tos X op, pop, pop */
1558 } else if (op1_idx == 1) {
1559 /* good, first operand is already in the right place, move the second */
1561 /* bring the first on top */
1562 x87_create_fxch(state, n, op2_idx);
1563 assert(op1_idx != 0);
1566 /* res = op X tos, pop, pop */
1567 permuted = !permuted;
1571 /* if one is already the TOS, we need two fxch */
1573 /* first one is TOS, move to st(1) */
1574 x87_create_fxch(state, n, 1);
1575 assert(op2_idx != 1);
1577 x87_create_fxch(state, n, op2_idx);
1579 /* res = op X tos, pop, pop */
1581 permuted = !permuted;
1583 } else if (op2_idx == 0) {
1584 /* second one is TOS, move to st(1) */
1585 x87_create_fxch(state, n, 1);
1586 assert(op1_idx != 1);
1588 x87_create_fxch(state, n, op1_idx);
1590 /* res = tos X op, pop, pop */
1593 /* none of them is either TOS or st(1), 3 fxch needed */
1594 x87_create_fxch(state, n, op2_idx);
1595 assert(op1_idx != 0);
1596 x87_create_fxch(state, n, 1);
1598 x87_create_fxch(state, n, op1_idx);
1600 /* res = tos X op, pop, pop */
1607 /* second operand is an address mode */
1608 if (is_vfp_live(reg_index_1, live)) {
1609 /* first operand is live: bring it to TOS */
1611 x87_create_fxch(state, n, op1_idx);
1615 /* first operand is dead: bring it to tos */
1617 x87_create_fxch(state, n, op1_idx);
1624 /* patch the operation */
1625 if (is_ia32_vFucomFnstsw(n)) {
1629 case 0: dst = op_ia32_FucomFnstsw; break;
1630 case 1: dst = op_ia32_FucompFnstsw; break;
1631 case 2: dst = op_ia32_FucomppFnstsw; break;
1632 default: panic("invalid popcount in sim_Fucom");
1635 for (i = 0; i < pops; ++i) {
1638 } else if (is_ia32_vFucomi(n)) {
1640 case 0: dst = op_ia32_Fucomi; break;
1641 case 1: dst = op_ia32_Fucompi; x87_pop(state); break;
1643 dst = op_ia32_Fucompi;
1645 x87_create_fpop(state, sched_next(n), 1);
1646 node_added = NODE_ADDED;
1648 default: panic("invalid popcount in sim_Fucom");
1651 panic("invalid operation %+F in sim_FucomFnstsw", n);
1654 x87_patch_insn(n, dst);
1661 op1 = &ia32_st_regs[op1_idx];
1664 op2 = &ia32_st_regs[op2_idx];
1667 attr->x87[2] = NULL;
1668 attr->attr.data.ins_permuted = permuted;
1671 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(n),
1672 arch_register_get_name(op1), arch_register_get_name(op2)));
1674 DB((dbg, LEVEL_1, "<<< %s %s, [AM]\n", get_irn_opname(n),
1675 arch_register_get_name(op1)));
1681 static int sim_Keep(x87_state *state, ir_node *node)
1684 const arch_register_t *op_reg;
1689 int node_added = NO_NODE_ADDED;
1691 DB((dbg, LEVEL_1, ">>> %+F\n", node));
1693 arity = get_irn_arity(node);
1694 for (i = 0; i < arity; ++i) {
1695 op = get_irn_n(node, i);
1696 op_reg = arch_get_irn_register(op);
1697 if (arch_register_get_class(op_reg) != &ia32_reg_classes[CLASS_ia32_vfp])
1700 reg_id = arch_register_get_index(op_reg);
1701 live = vfp_live_args_after(state->sim, node, 0);
1703 op_stack_idx = x87_on_stack(state, reg_id);
1704 if (op_stack_idx >= 0 && !is_vfp_live(reg_id, live)) {
1705 x87_create_fpop(state, sched_next(node), 1);
1706 node_added = NODE_ADDED;
1710 DB((dbg, LEVEL_1, "Stack after: "));
1711 DEBUG_ONLY(x87_dump_stack(state));
1716 static void keep_float_node_alive(ir_node *node)
1722 const arch_register_class_t *cls;
1724 irg = get_irn_irg(node);
1725 block = get_nodes_block(node);
1726 cls = arch_get_irn_reg_class(node, -1);
1728 keep = be_new_Keep(cls, irg, block, 1, in);
1730 assert(sched_is_scheduled(node));
1731 sched_add_after(node, keep);
1735 * Create a copy of a node. Recreate the node if it's a constant.
1737 * @param state the x87 state
1738 * @param n the node to be copied
1740 * @return the copy of n
1742 static ir_node *create_Copy(x87_state *state, ir_node *n)
1744 ir_graph *irg = get_irn_irg(n);
1745 dbg_info *n_dbg = get_irn_dbg_info(n);
1746 ir_mode *mode = get_irn_mode(n);
1747 ir_node *block = get_nodes_block(n);
1748 ir_node *pred = get_irn_n(n, 0);
1749 ir_node *(*cnstr)(dbg_info *, ir_graph *, ir_node *, ir_mode *) = NULL;
1751 const arch_register_t *out;
1752 const arch_register_t *op1;
1753 ia32_x87_attr_t *attr;
1755 /* Do not copy constants, recreate them. */
1756 switch (get_ia32_irn_opcode(pred)) {
1757 case iro_ia32_Unknown_VFP:
1759 cnstr = new_rd_ia32_fldz;
1762 cnstr = new_rd_ia32_fld1;
1764 case iro_ia32_fldpi:
1765 cnstr = new_rd_ia32_fldpi;
1767 case iro_ia32_fldl2e:
1768 cnstr = new_rd_ia32_fldl2e;
1770 case iro_ia32_fldl2t:
1771 cnstr = new_rd_ia32_fldl2t;
1773 case iro_ia32_fldlg2:
1774 cnstr = new_rd_ia32_fldlg2;
1776 case iro_ia32_fldln2:
1777 cnstr = new_rd_ia32_fldln2;
1783 out = x87_get_irn_register(n);
1784 op1 = x87_get_irn_register(pred);
1786 if (cnstr != NULL) {
1787 /* copy a constant */
1788 res = (*cnstr)(n_dbg, irg, block, mode);
1790 x87_push(state, arch_register_get_index(out), res);
1792 attr = get_ia32_x87_attr(res);
1793 attr->x87[2] = &ia32_st_regs[0];
1795 int op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1797 res = new_rd_ia32_fpushCopy(n_dbg, irg, block, pred, mode);
1799 x87_push(state, arch_register_get_index(out), res);
1801 attr = get_ia32_x87_attr(res);
1802 attr->x87[0] = &ia32_st_regs[op1_idx];
1803 attr->x87[2] = &ia32_st_regs[0];
1805 arch_set_irn_register(res, out);
1811 * Simulate a be_Copy.
1813 * @param state the x87 state
1814 * @param n the node that should be simulated (and patched)
1816 * @return NO_NODE_ADDED
1818 static int sim_Copy(x87_state *state, ir_node *n)
1821 const arch_register_t *out;
1822 const arch_register_t *op1;
1823 const arch_register_class_t *cls;
1824 ir_node *node, *next;
1825 ia32_x87_attr_t *attr;
1826 int op1_idx, out_idx;
1829 cls = arch_get_irn_reg_class(n, -1);
1830 if (cls->regs != ia32_vfp_regs)
1833 pred = get_irn_n(n, 0);
1834 out = x87_get_irn_register(n);
1835 op1 = x87_get_irn_register(pred);
1836 live = vfp_live_args_after(state->sim, n, REGMASK(out));
1838 DB((dbg, LEVEL_1, ">>> %+F %s -> %s\n", n,
1839 arch_register_get_name(op1), arch_register_get_name(out)));
1840 DEBUG_ONLY(vfp_dump_live(live));
1842 /* handle the infamous unknown value */
1843 if (arch_register_get_index(op1) == REG_VFP_UKNWN) {
1844 /* Operand is still live, a real copy. We need here an fpush that can
1845 hold a a register, so use the fpushCopy or recreate constants */
1846 node = create_Copy(state, n);
1848 assert(is_ia32_fldz(node));
1849 next = sched_next(n);
1852 sched_add_before(next, node);
1854 DB((dbg, LEVEL_1, "<<< %+F %s -> %s\n", node, op1->name,
1855 arch_get_irn_register(node)->name));
1856 return NO_NODE_ADDED;
1859 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1861 if (is_vfp_live(arch_register_get_index(op1), live)) {
1862 ir_node *pred = get_irn_n(n, 0);
1864 /* Operand is still live, a real copy. We need here an fpush that can
1865 hold a a register, so use the fpushCopy or recreate constants */
1866 node = create_Copy(state, n);
1868 /* We have to make sure the old value doesn't go dead (which can happen
1869 * when we recreate constants). As the simulator expected that value in
1870 * the pred blocks. This is unfortunate as removing it would save us 1
1871 * instruction, but we would have to rerun all the simulation to get
1874 next = sched_next(n);
1877 sched_add_before(next, node);
1879 if (get_irn_n_edges(pred) == 0) {
1880 keep_float_node_alive(pred);
1883 DB((dbg, LEVEL_1, "<<< %+F %s -> ?\n", node, op1->name));
1885 out_idx = x87_on_stack(state, arch_register_get_index(out));
1887 if (out_idx >= 0 && out_idx != op1_idx) {
1888 /* Matze: out already on stack? how can this happen? */
1891 /* op1 must be killed and placed where out is */
1893 /* best case, simple remove and rename */
1894 x87_patch_insn(n, op_ia32_Pop);
1895 attr = get_ia32_x87_attr(n);
1896 attr->x87[0] = op1 = &ia32_st_regs[0];
1899 x87_set_st(state, arch_register_get_index(out), n, op1_idx - 1);
1901 /* move op1 to tos, store and pop it */
1903 x87_create_fxch(state, n, op1_idx);
1906 x87_patch_insn(n, op_ia32_Pop);
1907 attr = get_ia32_x87_attr(n);
1908 attr->x87[0] = op1 = &ia32_st_regs[out_idx];
1911 x87_set_st(state, arch_register_get_index(out), n, out_idx - 1);
1913 DB((dbg, LEVEL_1, "<<< %+F %s\n", n, op1->name));
1915 /* just a virtual copy */
1916 x87_set_st(state, arch_register_get_index(out), get_unop_op(n), op1_idx);
1917 /* don't remove the node to keep the verifier quiet :),
1918 the emitter won't emit any code for the node */
1921 DB((dbg, LEVEL_1, "<<< KILLED %s\n", get_irn_opname(n)));
1922 exchange(n, get_unop_op(n));
1926 return NO_NODE_ADDED;
1930 * Returns the result proj of the call
1932 static ir_node *get_call_result_proj(ir_node *call)
1934 const ir_edge_t *edge;
1936 /* search the result proj */
1937 foreach_out_edge(call, edge) {
1938 ir_node *proj = get_edge_src_irn(edge);
1939 long pn = get_Proj_proj(proj);
1941 if (pn == pn_ia32_Call_vf0) {
1947 } /* get_call_result_proj */
1950 * Simulate a ia32_Call.
1952 * @param state the x87 state
1953 * @param n the node that should be simulated
1955 * @return NO_NODE_ADDED
1957 static int sim_Call(x87_state *state, ir_node *n)
1959 ir_type *call_tp = get_ia32_call_attr_const(n)->call_tp;
1963 const arch_register_t *reg;
1965 DB((dbg, LEVEL_1, ">>> %+F\n", n));
1967 /* at the begin of a call the x87 state should be empty */
1968 assert(state->depth == 0 && "stack not empty before call");
1970 if (get_method_n_ress(call_tp) <= 0)
1974 * If the called function returns a float, it is returned in st(0).
1975 * This even happens if the return value is NOT used.
1976 * Moreover, only one return result is supported.
1978 res_type = get_method_res_type(call_tp, 0);
1979 mode = get_type_mode(res_type);
1981 if (mode == NULL || !mode_is_float(mode))
1984 resproj = get_call_result_proj(n);
1985 assert(resproj != NULL);
1987 reg = x87_get_irn_register(resproj);
1988 x87_push(state, arch_register_get_index(reg), resproj);
1991 DB((dbg, LEVEL_1, "Stack after: "));
1992 DEBUG_ONLY(x87_dump_stack(state));
1994 return NO_NODE_ADDED;
1998 * Simulate a be_Spill.
2000 * @param state the x87 state
2001 * @param n the node that should be simulated (and patched)
2003 * Should not happen, spills are lowered before x87 simulator see them.
2005 static int sim_Spill(x87_state *state, ir_node *n)
2007 assert(0 && "Spill not lowered");
2008 return sim_fst(state, n);
2012 * Simulate a be_Reload.
2014 * @param state the x87 state
2015 * @param n the node that should be simulated (and patched)
2017 * Should not happen, reloads are lowered before x87 simulator see them.
2019 static int sim_Reload(x87_state *state, ir_node *n)
2021 assert(0 && "Reload not lowered");
2022 return sim_fld(state, n);
2026 * Simulate a be_Return.
2028 * @param state the x87 state
2029 * @param n the node that should be simulated (and patched)
2031 * @return NO_NODE_ADDED
2033 static int sim_Return(x87_state *state, ir_node *n)
2035 int n_res = be_Return_get_n_rets(n);
2036 int i, n_float_res = 0;
2038 /* only floating point return values must reside on stack */
2039 for (i = 0; i < n_res; ++i) {
2040 ir_node *res = get_irn_n(n, be_pos_Return_val + i);
2042 if (mode_is_float(get_irn_mode(res)))
2045 assert(x87_get_depth(state) == n_float_res);
2047 /* pop them virtually */
2048 for (i = n_float_res - 1; i >= 0; --i)
2051 return NO_NODE_ADDED;
2054 typedef struct _perm_data_t {
2055 const arch_register_t *in;
2056 const arch_register_t *out;
2060 * Simulate a be_Perm.
2062 * @param state the x87 state
2063 * @param irn the node that should be simulated (and patched)
2065 * @return NO_NODE_ADDED
2067 static int sim_Perm(x87_state *state, ir_node *irn)
2070 ir_node *pred = get_irn_n(irn, 0);
2072 const ir_edge_t *edge;
2074 /* handle only floating point Perms */
2075 if (! mode_is_float(get_irn_mode(pred)))
2076 return NO_NODE_ADDED;
2078 DB((dbg, LEVEL_1, ">>> %+F\n", irn));
2080 /* Perm is a pure virtual instruction on x87.
2081 All inputs must be on the FPU stack and are pairwise
2082 different from each other.
2083 So, all we need to do is to permutate the stack state. */
2084 n = get_irn_arity(irn);
2085 NEW_ARR_A(int, stack_pos, n);
2087 /* collect old stack positions */
2088 for (i = 0; i < n; ++i) {
2089 const arch_register_t *inreg = x87_get_irn_register(get_irn_n(irn, i));
2090 int idx = x87_on_stack(state, arch_register_get_index(inreg));
2092 assert(idx >= 0 && "Perm argument not on x87 stack");
2096 /* now do the permutation */
2097 foreach_out_edge(irn, edge) {
2098 ir_node *proj = get_edge_src_irn(edge);
2099 const arch_register_t *out = x87_get_irn_register(proj);
2100 long num = get_Proj_proj(proj);
2102 assert(0 <= num && num < n && "More Proj's than Perm inputs");
2103 x87_set_st(state, arch_register_get_index(out), proj, stack_pos[(unsigned)num]);
2105 DB((dbg, LEVEL_1, "<<< %+F\n", irn));
2107 return NO_NODE_ADDED;
2110 static int sim_Barrier(x87_state *state, ir_node *node)
2114 /* materialize unknown if needed */
2115 arity = get_irn_arity(node);
2116 for (i = 0; i < arity; ++i) {
2117 const arch_register_t *reg;
2120 ia32_x87_attr_t *attr;
2121 ir_node *in = get_irn_n(node, i);
2123 if (!is_ia32_Unknown_VFP(in))
2126 /* TODO: not completely correct... */
2127 reg = &ia32_vfp_regs[REG_VFP_UKNWN];
2130 block = get_nodes_block(node);
2131 zero = new_rd_ia32_fldz(NULL, current_ir_graph, block, mode_E);
2132 x87_push(state, arch_register_get_index(reg), zero);
2134 attr = get_ia32_x87_attr(zero);
2135 attr->x87[2] = &ia32_st_regs[0];
2137 sched_add_before(node, zero);
2139 set_irn_n(node, i, zero);
2142 return NO_NODE_ADDED;
2147 * Kill any dead registers at block start by popping them from the stack.
2149 * @param sim the simulator handle
2150 * @param block the current block
2151 * @param start_state the x87 state at the begin of the block
2153 * @return the x87 state after dead register killed
2155 static x87_state *x87_kill_deads(x87_simulator *sim, ir_node *block, x87_state *start_state)
2157 x87_state *state = start_state;
2158 ir_node *first_insn = sched_first(block);
2159 ir_node *keep = NULL;
2160 unsigned live = vfp_live_args_after(sim, block, 0);
2162 int i, depth, num_pop;
2165 depth = x87_get_depth(state);
2166 for (i = depth - 1; i >= 0; --i) {
2167 int reg = x87_get_st_reg(state, i);
2169 if (! is_vfp_live(reg, live))
2170 kill_mask |= (1 << i);
2174 /* create a new state, will be changed */
2175 state = x87_clone_state(sim, state);
2177 DB((dbg, LEVEL_1, "Killing deads:\n"));
2178 DEBUG_ONLY(vfp_dump_live(live));
2179 DEBUG_ONLY(x87_dump_stack(state));
2181 if (kill_mask != 0 && live == 0) {
2182 /* special case: kill all registers */
2183 if (ia32_cg_config.use_femms || ia32_cg_config.use_emms) {
2184 if (ia32_cg_config.use_femms) {
2185 /* use FEMMS on AMD processors to clear all */
2186 keep = new_rd_ia32_femms(NULL, get_irn_irg(block), block);
2188 /* use EMMS to clear all */
2189 keep = new_rd_ia32_emms(NULL, get_irn_irg(block), block);
2191 sched_add_before(first_insn, keep);
2197 /* now kill registers */
2199 /* we can only kill from TOS, so bring them up */
2200 if (! (kill_mask & 1)) {
2201 /* search from behind, because we can to a double-pop */
2202 for (i = depth - 1; i >= 0; --i) {
2203 if (kill_mask & (1 << i)) {
2204 kill_mask &= ~(1 << i);
2211 x87_set_st(state, -1, keep, i);
2212 x87_create_fxch(state, first_insn, i);
2215 if ((kill_mask & 3) == 3) {
2216 /* we can do a double-pop */
2220 /* only a single pop */
2225 kill_mask >>= num_pop;
2226 keep = x87_create_fpop(state, first_insn, num_pop);
2231 } /* x87_kill_deads */
2234 * If we have PhiEs with unknown operands then we have to make sure that some
2235 * value is actually put onto the stack.
2237 static void fix_unknown_phis(x87_state *state, ir_node *block,
2238 ir_node *pred_block, int pos)
2242 sched_foreach(block, node) {
2244 const arch_register_t *reg;
2245 ia32_x87_attr_t *attr;
2250 op = get_Phi_pred(node, pos);
2251 if (!is_ia32_Unknown_VFP(op))
2254 reg = arch_get_irn_register(node);
2256 /* create a zero at end of pred block */
2257 zero = new_rd_ia32_fldz(NULL, current_ir_graph, pred_block, mode_E);
2258 x87_push(state, arch_register_get_index(reg), zero);
2260 attr = get_ia32_x87_attr(zero);
2261 attr->x87[2] = &ia32_st_regs[0];
2263 assert(is_ia32_fldz(zero));
2264 sched_add_before(sched_last(pred_block), zero);
2266 set_Phi_pred(node, pos, zero);
2271 * Run a simulation and fix all virtual instructions for a block.
2273 * @param sim the simulator handle
2274 * @param block the current block
2276 static void x87_simulate_block(x87_simulator *sim, ir_node *block)
2279 blk_state *bl_state = x87_get_bl_state(sim, block);
2280 x87_state *state = bl_state->begin;
2281 const ir_edge_t *edge;
2282 ir_node *start_block;
2284 assert(state != NULL);
2285 /* already processed? */
2286 if (bl_state->end != NULL)
2289 DB((dbg, LEVEL_1, "Simulate %+F\n", block));
2290 DB((dbg, LEVEL_2, "State at Block begin:\n "));
2291 DEBUG_ONLY(x87_dump_stack(state));
2293 /* at block begin, kill all dead registers */
2294 state = x87_kill_deads(sim, block, state);
2295 /* create a new state, will be changed */
2296 state = x87_clone_state(sim, state);
2298 /* beware, n might change */
2299 for (n = sched_first(block); !sched_is_end(n); n = next) {
2302 ir_op *op = get_irn_op(n);
2304 next = sched_next(n);
2305 if (op->ops.generic == NULL)
2308 func = (sim_func)op->ops.generic;
2311 node_inserted = (*func)(state, n);
2314 sim_func might have added an additional node after n,
2316 beware: n must not be changed by sim_func
2317 (i.e. removed from schedule) in this case
2319 if (node_inserted != NO_NODE_ADDED)
2320 next = sched_next(n);
2323 start_block = get_irg_start_block(get_irn_irg(block));
2325 DB((dbg, LEVEL_2, "State at Block end:\n ")); DEBUG_ONLY(x87_dump_stack(state));
2327 /* check if the state must be shuffled */
2328 foreach_block_succ(block, edge) {
2329 ir_node *succ = get_edge_src_irn(edge);
2330 blk_state *succ_state;
2332 if (succ == start_block)
2335 succ_state = x87_get_bl_state(sim, succ);
2337 fix_unknown_phis(state, succ, block, get_edge_src_pos(edge));
2339 if (succ_state->begin == NULL) {
2340 DB((dbg, LEVEL_2, "Set begin state for succ %+F:\n", succ));
2341 DEBUG_ONLY(x87_dump_stack(state));
2342 succ_state->begin = state;
2344 waitq_put(sim->worklist, succ);
2346 DB((dbg, LEVEL_2, "succ %+F already has a state, shuffling\n", succ));
2347 /* There is already a begin state for the successor, bad.
2348 Do the necessary permutations.
2349 Note that critical edges are removed, so this is always possible:
2350 If the successor has more than one possible input, then it must
2353 x87_shuffle(sim, block, state, succ, succ_state->begin);
2356 bl_state->end = state;
2357 } /* x87_simulate_block */
2359 static void register_sim(ir_op *op, sim_func func)
2361 assert(op->ops.generic == NULL);
2362 op->ops.generic = (op_func) func;
2366 * Create a new x87 simulator.
2368 * @param sim a simulator handle, will be initialized
2369 * @param irg the current graph
2371 static void x87_init_simulator(x87_simulator *sim, ir_graph *irg)
2373 obstack_init(&sim->obst);
2374 sim->blk_states = pmap_create();
2375 sim->n_idx = get_irg_last_idx(irg);
2376 sim->live = obstack_alloc(&sim->obst, sizeof(*sim->live) * sim->n_idx);
2378 DB((dbg, LEVEL_1, "--------------------------------\n"
2379 "x87 Simulator started for %+F\n", irg));
2381 /* set the generic function pointer of instruction we must simulate */
2382 clear_irp_opcodes_generic_func();
2384 register_sim(op_ia32_Call, sim_Call);
2385 register_sim(op_ia32_vfld, sim_fld);
2386 register_sim(op_ia32_vfild, sim_fild);
2387 register_sim(op_ia32_vfld1, sim_fld1);
2388 register_sim(op_ia32_vfldz, sim_fldz);
2389 register_sim(op_ia32_vfadd, sim_fadd);
2390 register_sim(op_ia32_vfsub, sim_fsub);
2391 register_sim(op_ia32_vfmul, sim_fmul);
2392 register_sim(op_ia32_vfdiv, sim_fdiv);
2393 register_sim(op_ia32_vfprem, sim_fprem);
2394 register_sim(op_ia32_vfabs, sim_fabs);
2395 register_sim(op_ia32_vfchs, sim_fchs);
2396 register_sim(op_ia32_vfist, sim_fist);
2397 register_sim(op_ia32_vfisttp, sim_fisttp);
2398 register_sim(op_ia32_vfst, sim_fst);
2399 register_sim(op_ia32_vFtstFnstsw, sim_FtstFnstsw);
2400 register_sim(op_ia32_vFucomFnstsw, sim_Fucom);
2401 register_sim(op_ia32_vFucomi, sim_Fucom);
2402 register_sim(op_be_Copy, sim_Copy);
2403 register_sim(op_be_Spill, sim_Spill);
2404 register_sim(op_be_Reload, sim_Reload);
2405 register_sim(op_be_Return, sim_Return);
2406 register_sim(op_be_Perm, sim_Perm);
2407 register_sim(op_be_Keep, sim_Keep);
2408 register_sim(op_be_Barrier, sim_Barrier);
2409 } /* x87_init_simulator */
2412 * Destroy a x87 simulator.
2414 * @param sim the simulator handle
2416 static void x87_destroy_simulator(x87_simulator *sim)
2418 pmap_destroy(sim->blk_states);
2419 obstack_free(&sim->obst, NULL);
2420 DB((dbg, LEVEL_1, "x87 Simulator stopped\n\n"));
2421 } /* x87_destroy_simulator */
2424 * Pre-block walker: calculate the liveness information for the block
2425 * and store it into the sim->live cache.
2427 static void update_liveness_walker(ir_node *block, void *data)
2429 x87_simulator *sim = data;
2430 update_liveness(sim, block);
2431 } /* update_liveness_walker */
2433 void x87_simulate_graph(be_irg_t *birg)
2435 /* TODO improve code quality (less executed fxch) by using execfreqs */
2437 ir_node *block, *start_block;
2438 blk_state *bl_state;
2440 ir_graph *irg = be_get_birg_irg(birg);
2442 /* create the simulator */
2443 x87_init_simulator(&sim, irg);
2445 start_block = get_irg_start_block(irg);
2446 bl_state = x87_get_bl_state(&sim, start_block);
2448 /* start with the empty state */
2449 bl_state->begin = empty;
2452 sim.worklist = new_waitq();
2453 waitq_put(sim.worklist, start_block);
2455 be_assure_liveness(birg);
2456 sim.lv = be_get_birg_liveness(birg);
2457 // sim.lv = be_liveness(be_get_birg_irg(birg));
2458 be_liveness_assure_sets(sim.lv);
2460 /* Calculate the liveness for all nodes. We must precalculate this info,
2461 * because the simulator adds new nodes (possible before Phi nodes) which
2462 * would let a lazy calculation fail.
2463 * On the other hand we reduce the computation amount due to
2464 * precaching from O(n^2) to O(n) at the expense of O(n) cache memory.
2466 irg_block_walk_graph(irg, update_liveness_walker, NULL, &sim);
2470 block = waitq_get(sim.worklist);
2471 x87_simulate_block(&sim, block);
2472 } while (! waitq_empty(sim.worklist));
2475 del_waitq(sim.worklist);
2476 x87_destroy_simulator(&sim);
2477 } /* x87_simulate_graph */
2479 void ia32_init_x87(void)
2481 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.x87");
2482 } /* ia32_init_x87 */