2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the x87 support and virtual to stack
23 * register translation for the ia32 backend.
24 * @author Michael Beck
36 #include "iredges_t.h"
48 #include "../belive_t.h"
49 #include "../besched_t.h"
50 #include "../benode_t.h"
51 #include "bearch_ia32_t.h"
52 #include "ia32_new_nodes.h"
53 #include "gen_ia32_new_nodes.h"
54 #include "gen_ia32_regalloc_if.h"
56 #include "ia32_architecture.h"
63 #define MASK_TOS(x) ((x) & (N_x87_REGS - 1))
65 /** the debug handle */
66 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
68 /* Forward declaration. */
69 typedef struct _x87_simulator x87_simulator;
72 * An exchange template.
73 * Note that our virtual functions have the same inputs
74 * and attributes as the real ones, so we can simple exchange
76 * Further, x87 supports inverse instructions, so we can handle them.
78 typedef struct _exchange_tmpl {
79 ir_op *normal_op; /**< the normal one */
80 ir_op *reverse_op; /**< the reverse one if exists */
81 ir_op *normal_pop_op; /**< the normal one with tos pop */
82 ir_op *reverse_pop_op; /**< the reverse one with tos pop */
86 * An entry on the simulated x87 stack.
88 typedef struct _st_entry {
89 int reg_idx; /**< the virtual register index of this stack value */
90 ir_node *node; /**< the node that produced this value */
96 typedef struct _x87_state {
97 st_entry st[N_x87_REGS]; /**< the register stack */
98 int depth; /**< the current stack depth */
99 int tos; /**< position of the tos */
100 x87_simulator *sim; /**< The simulator. */
103 /** An empty state, used for blocks without fp instructions. */
104 static x87_state _empty = { { {0, NULL}, }, 0, 0, NULL };
105 static x87_state *empty = (x87_state *)&_empty;
108 NO_NODE_ADDED = 0, /**< No node was added. */
109 NODE_ADDED = 1 /**< A node was added by the simulator in the schedule. */
113 * The type of an instruction simulator function.
115 * @param state the x87 state
116 * @param n the node to be simulated
118 * @return NODE_ADDED if a node was added AFTER n in schedule,
121 typedef int (*sim_func)(x87_state *state, ir_node *n);
124 * A block state: Every block has a x87 state at the beginning and at the end.
126 typedef struct _blk_state {
127 x87_state *begin; /**< state at the begin or NULL if not assigned */
128 x87_state *end; /**< state at the end or NULL if not assigned */
131 #define PTR_TO_BLKSTATE(p) ((blk_state *)(p))
133 /** liveness bitset for vfp registers. */
134 typedef unsigned char vfp_liveness;
139 struct _x87_simulator {
140 struct obstack obst; /**< An obstack for fast allocating. */
141 pmap *blk_states; /**< Map blocks to states. */
142 be_lv_t *lv; /**< intrablock liveness. */
143 vfp_liveness *live; /**< Liveness information. */
144 unsigned n_idx; /**< The cached get_irg_last_idx() result. */
145 waitq *worklist; /**< Worklist of blocks that must be processed. */
146 ia32_isa_t *isa; /**< the ISA object */
150 * Returns the current stack depth.
152 * @param state the x87 state
154 * @return the x87 stack depth
156 static int x87_get_depth(const x87_state *state)
159 } /* x87_get_depth */
162 * Return the virtual register index at st(pos).
164 * @param state the x87 state
165 * @param pos a stack position
167 * @return the vfp register index that produced the value at st(pos)
169 static int x87_get_st_reg(const x87_state *state, int pos)
171 assert(pos < state->depth);
172 return state->st[MASK_TOS(state->tos + pos)].reg_idx;
173 } /* x87_get_st_reg */
177 * Return the node at st(pos).
179 * @param state the x87 state
180 * @param pos a stack position
182 * @return the IR node that produced the value at st(pos)
184 static ir_node *x87_get_st_node(const x87_state *state, int pos)
186 assert(pos < state->depth);
187 return state->st[MASK_TOS(state->tos + pos)].node;
188 } /* x87_get_st_node */
191 * Dump the stack for debugging.
193 * @param state the x87 state
195 static void x87_dump_stack(const x87_state *state)
199 for (i = state->depth - 1; i >= 0; --i) {
200 DB((dbg, LEVEL_2, "vf%d(%+F) ", x87_get_st_reg(state, i),
201 x87_get_st_node(state, i)));
203 DB((dbg, LEVEL_2, "<-- TOS\n"));
204 } /* x87_dump_stack */
205 #endif /* DEBUG_libfirm */
208 * Set a virtual register to st(pos).
210 * @param state the x87 state
211 * @param reg_idx the vfp register index that should be set
212 * @param node the IR node that produces the value of the vfp register
213 * @param pos the stack position where the new value should be entered
215 static void x87_set_st(x87_state *state, int reg_idx, ir_node *node, int pos)
217 assert(0 < state->depth);
218 state->st[MASK_TOS(state->tos + pos)].reg_idx = reg_idx;
219 state->st[MASK_TOS(state->tos + pos)].node = node;
221 DB((dbg, LEVEL_2, "After SET_REG: "));
222 DEBUG_ONLY(x87_dump_stack(state));
226 * Set the tos virtual register.
228 * @param state the x87 state
229 * @param reg_idx the vfp register index that should be set
230 * @param node the IR node that produces the value of the vfp register
232 static void x87_set_tos(x87_state *state, int reg_idx, ir_node *node)
234 x87_set_st(state, reg_idx, node, 0);
238 * Swap st(0) with st(pos).
240 * @param state the x87 state
241 * @param pos the stack position to change the tos with
243 static void x87_fxch(x87_state *state, int pos)
246 assert(pos < state->depth);
248 entry = state->st[MASK_TOS(state->tos + pos)];
249 state->st[MASK_TOS(state->tos + pos)] = state->st[MASK_TOS(state->tos)];
250 state->st[MASK_TOS(state->tos)] = entry;
252 DB((dbg, LEVEL_2, "After FXCH: ")); DEBUG_ONLY(x87_dump_stack(state));
256 * Convert a virtual register to the stack index.
258 * @param state the x87 state
259 * @param reg_idx the register vfp index
261 * @return the stack position where the register is stacked
262 * or -1 if the virtual register was not found
264 static int x87_on_stack(const x87_state *state, int reg_idx)
266 int i, tos = state->tos;
268 for (i = 0; i < state->depth; ++i)
269 if (state->st[MASK_TOS(tos + i)].reg_idx == reg_idx)
275 * Push a virtual Register onto the stack, double pushed allowed.
277 * @param state the x87 state
278 * @param reg_idx the register vfp index
279 * @param node the node that produces the value of the vfp register
281 static void x87_push_dbl(x87_state *state, int reg_idx, ir_node *node)
283 assert(state->depth < N_x87_REGS && "stack overrun");
286 state->tos = MASK_TOS(state->tos - 1);
287 state->st[state->tos].reg_idx = reg_idx;
288 state->st[state->tos].node = node;
290 DB((dbg, LEVEL_2, "After PUSH: ")); DEBUG_ONLY(x87_dump_stack(state));
294 * Push a virtual Register onto the stack, double pushes are NOT allowed.
296 * @param state the x87 state
297 * @param reg_idx the register vfp index
298 * @param node the node that produces the value of the vfp register
299 * @param dbl_push if != 0 double pushes are allowed
301 static void x87_push(x87_state *state, int reg_idx, ir_node *node)
303 assert(x87_on_stack(state, reg_idx) == -1 && "double push");
305 x87_push_dbl(state, reg_idx, node);
309 * Pop a virtual Register from the stack.
311 * @param state the x87 state
313 static void x87_pop(x87_state *state)
315 assert(state->depth > 0 && "stack underrun");
318 state->tos = MASK_TOS(state->tos + 1);
320 DB((dbg, LEVEL_2, "After POP: ")); DEBUG_ONLY(x87_dump_stack(state));
324 * Empty the fpu stack
326 * @param state the x87 state
328 static void x87_emms(x87_state *state)
335 * Returns the block state of a block.
337 * @param sim the x87 simulator handle
338 * @param block the current block
340 * @return the block state
342 static blk_state *x87_get_bl_state(x87_simulator *sim, ir_node *block)
344 pmap_entry *entry = pmap_find(sim->blk_states, block);
347 blk_state *bl_state = obstack_alloc(&sim->obst, sizeof(*bl_state));
348 bl_state->begin = NULL;
349 bl_state->end = NULL;
351 pmap_insert(sim->blk_states, block, bl_state);
355 return PTR_TO_BLKSTATE(entry->value);
356 } /* x87_get_bl_state */
359 * Creates a new x87 state.
361 * @param sim the x87 simulator handle
363 * @return a new x87 state
365 static x87_state *x87_alloc_state(x87_simulator *sim)
367 x87_state *res = obstack_alloc(&sim->obst, sizeof(*res));
371 } /* x87_alloc_state */
376 * @param sim the x87 simulator handle
377 * @param src the x87 state that will be cloned
379 * @return a cloned copy of the src state
381 static x87_state *x87_clone_state(x87_simulator *sim, const x87_state *src)
383 x87_state *res = x87_alloc_state(sim);
385 memcpy(res, src, sizeof(*res));
387 } /* x87_clone_state */
390 * Patch a virtual instruction into a x87 one and return
391 * the node representing the result value.
393 * @param n the IR node to patch
394 * @param op the x87 opcode to patch in
396 static ir_node *x87_patch_insn(ir_node *n, ir_op *op)
398 ir_mode *mode = get_irn_mode(n);
403 if (mode == mode_T) {
404 /* patch all Proj's */
405 const ir_edge_t *edge;
407 foreach_out_edge(n, edge) {
408 ir_node *proj = get_edge_src_irn(edge);
410 mode = get_irn_mode(proj);
411 if (mode_is_float(mode)) {
413 set_irn_mode(proj, mode_E);
417 } else if (mode_is_float(mode))
418 set_irn_mode(n, mode_E);
420 } /* x87_patch_insn */
423 * Returns the first Proj of a mode_T node having a given mode.
425 * @param n the mode_T node
426 * @param m the desired mode of the Proj
427 * @return The first Proj of mode @p m found or NULL.
429 static ir_node *get_irn_Proj_for_mode(ir_node *n, ir_mode *m)
431 const ir_edge_t *edge;
433 assert(get_irn_mode(n) == mode_T && "Need mode_T node");
435 foreach_out_edge(n, edge) {
436 ir_node *proj = get_edge_src_irn(edge);
437 if (get_irn_mode(proj) == m)
442 } /* get_irn_Proj_for_mode */
445 * Wrap the arch_* function here so we can check for errors.
447 static INLINE const arch_register_t *x87_get_irn_register(const ir_node *irn)
449 const arch_register_t *res = arch_get_irn_register(irn);
451 assert(res->reg_class->regs == ia32_vfp_regs);
453 } /* x87_get_irn_register */
455 /* -------------- x87 perm --------------- */
458 * Creates a fxch for shuffle.
460 * @param state the x87 state
461 * @param pos parameter for fxch
462 * @param block the block were fxch is inserted
464 * Creates a new fxch node and reroute the user of the old node
467 * @return the fxch node
469 static ir_node *x87_fxch_shuffle(x87_state *state, int pos, ir_node *block)
472 ia32_x87_attr_t *attr;
474 fxch = new_rd_ia32_fxch(NULL, get_irn_irg(block), block);
475 attr = get_ia32_x87_attr(fxch);
476 attr->x87[0] = &ia32_st_regs[pos];
477 attr->x87[2] = &ia32_st_regs[0];
481 x87_fxch(state, pos);
483 } /* x87_fxch_shuffle */
486 * Calculate the necessary permutations to reach dst_state.
488 * These permutations are done with fxch instructions and placed
489 * at the end of the block.
491 * Note that critical edges are removed here, so we need only
492 * a shuffle if the current block has only one successor.
494 * @param sim the simulator handle
495 * @param block the current block
496 * @param state the current x87 stack state, might be modified
497 * @param dst_block the destination block
498 * @param dst_state destination state
502 static x87_state *x87_shuffle(x87_simulator *sim, ir_node *block,
503 x87_state *state, ir_node *dst_block,
504 const x87_state *dst_state)
506 int i, n_cycles, k, ri;
507 unsigned cycles[4], all_mask;
508 char cycle_idx[4][8];
509 ir_node *fxch, *before, *after;
513 assert(state->depth == dst_state->depth);
515 /* Some mathematics here:
516 If we have a cycle of length n that includes the tos,
517 we need n-1 exchange operations.
518 We can always add the tos and restore it, so we need
519 n+1 exchange operations for a cycle not containing the tos.
520 So, the maximum of needed operations is for a cycle of 7
521 not including the tos == 8.
522 This is the same number of ops we would need for using stores,
523 so exchange is cheaper (we save the loads).
524 On the other hand, we might need an additional exchange
525 in the next block to bring one operand on top, so the
526 number of ops in the first case is identical.
527 Further, no more than 4 cycles can exists (4 x 2).
529 all_mask = (1 << (state->depth)) - 1;
531 for (n_cycles = 0; all_mask; ++n_cycles) {
532 int src_idx, dst_idx;
534 /* find the first free slot */
535 for (i = 0; i < state->depth; ++i) {
536 if (all_mask & (1 << i)) {
537 all_mask &= ~(1 << i);
539 /* check if there are differences here */
540 if (x87_get_st_reg(state, i) != x87_get_st_reg(dst_state, i))
546 /* no more cycles found */
551 cycles[n_cycles] = (1 << i);
552 cycle_idx[n_cycles][k++] = i;
553 for (src_idx = i; ; src_idx = dst_idx) {
554 dst_idx = x87_on_stack(dst_state, x87_get_st_reg(state, src_idx));
556 if ((all_mask & (1 << dst_idx)) == 0)
559 cycle_idx[n_cycles][k++] = dst_idx;
560 cycles[n_cycles] |= (1 << dst_idx);
561 all_mask &= ~(1 << dst_idx);
563 cycle_idx[n_cycles][k] = -1;
567 /* no permutation needed */
571 /* Hmm: permutation needed */
572 DB((dbg, LEVEL_2, "\n%+F needs permutation: from\n", block));
573 DEBUG_ONLY(x87_dump_stack(state));
574 DB((dbg, LEVEL_2, " to\n"));
575 DEBUG_ONLY(x87_dump_stack(dst_state));
579 DB((dbg, LEVEL_2, "Need %d cycles\n", n_cycles));
580 for (ri = 0; ri < n_cycles; ++ri) {
581 DB((dbg, LEVEL_2, " Ring %d:\n ", ri));
582 for (k = 0; cycle_idx[ri][k] != -1; ++k)
583 DB((dbg, LEVEL_2, " st%d ->", cycle_idx[ri][k]));
584 DB((dbg, LEVEL_2, "\n"));
591 * Find the place node must be insert.
592 * We have only one successor block, so the last instruction should
595 before = sched_last(block);
596 assert(is_cfop(before));
598 /* now do the permutations */
599 for (ri = 0; ri < n_cycles; ++ri) {
600 if ((cycles[ri] & 1) == 0) {
601 /* this cycle does not include the tos */
602 fxch = x87_fxch_shuffle(state, cycle_idx[ri][0], block);
604 sched_add_after(after, fxch);
606 sched_add_before(before, fxch);
609 for (k = 1; cycle_idx[ri][k] != -1; ++k) {
610 fxch = x87_fxch_shuffle(state, cycle_idx[ri][k], block);
612 sched_add_after(after, fxch);
614 sched_add_before(before, fxch);
617 if ((cycles[ri] & 1) == 0) {
618 /* this cycle does not include the tos */
619 fxch = x87_fxch_shuffle(state, cycle_idx[ri][0], block);
620 sched_add_after(after, fxch);
627 * Create a fxch node before another node.
629 * @param state the x87 state
630 * @param n the node after the fxch
631 * @param pos exchange st(pos) with st(0)
635 static ir_node *x87_create_fxch(x87_state *state, ir_node *n, int pos)
638 ia32_x87_attr_t *attr;
639 ir_graph *irg = get_irn_irg(n);
640 ir_node *block = get_nodes_block(n);
642 x87_fxch(state, pos);
644 fxch = new_rd_ia32_fxch(NULL, irg, block);
645 attr = get_ia32_x87_attr(fxch);
646 attr->x87[0] = &ia32_st_regs[pos];
647 attr->x87[2] = &ia32_st_regs[0];
651 sched_add_before(n, fxch);
652 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(fxch), attr->x87[0]->name, attr->x87[2]->name));
654 } /* x87_create_fxch */
657 * Create a fpush before node n.
659 * @param state the x87 state
660 * @param n the node after the fpush
661 * @param pos push st(pos) on stack
662 * @param op_idx replace input op_idx of n with the fpush result
664 static void x87_create_fpush(x87_state *state, ir_node *n, int pos, int op_idx)
666 ir_node *fpush, *pred = get_irn_n(n, op_idx);
667 ia32_x87_attr_t *attr;
668 const arch_register_t *out = x87_get_irn_register(pred);
670 x87_push_dbl(state, arch_register_get_index(out), pred);
672 fpush = new_rd_ia32_fpush(NULL, get_irn_irg(n), get_nodes_block(n));
673 attr = get_ia32_x87_attr(fpush);
674 attr->x87[0] = &ia32_st_regs[pos];
675 attr->x87[2] = &ia32_st_regs[0];
678 sched_add_before(n, fpush);
680 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(fpush), attr->x87[0]->name, attr->x87[2]->name));
681 } /* x87_create_fpush */
684 * Create a fpop before node n.
686 * @param state the x87 state
687 * @param n the node after the fpop
688 * @param num pop 1 or 2 values
690 * @return the fpop node
692 static ir_node *x87_create_fpop(x87_state *state, ir_node *n, int num)
694 ir_node *fpop = NULL;
695 ia32_x87_attr_t *attr;
700 if (ia32_cg_config.use_ffreep)
701 fpop = new_rd_ia32_ffreep(NULL, get_irn_irg(n), get_nodes_block(n));
703 fpop = new_rd_ia32_fpop(NULL, get_irn_irg(n), get_nodes_block(n));
704 attr = get_ia32_x87_attr(fpop);
705 attr->x87[0] = &ia32_st_regs[0];
706 attr->x87[1] = &ia32_st_regs[0];
707 attr->x87[2] = &ia32_st_regs[0];
710 sched_add_before(n, fpop);
711 DB((dbg, LEVEL_1, "<<< %s %s\n", get_irn_opname(fpop), attr->x87[0]->name));
716 } /* x87_create_fpop */
719 * Creates an fldz before node n
721 * @param state the x87 state
722 * @param n the node after the fldz
724 * @return the fldz node
726 static ir_node *x87_create_fldz(x87_state *state, ir_node *n, int regidx)
728 ir_graph *irg = get_irn_irg(n);
729 ir_node *block = get_nodes_block(n);
732 fldz = new_rd_ia32_fldz(NULL, irg, block, mode_E);
734 sched_add_before(n, fldz);
735 DB((dbg, LEVEL_1, "<<< %s\n", get_irn_opname(fldz)));
738 x87_push(state, regidx, fldz);
743 /* --------------------------------- liveness ------------------------------------------ */
746 * The liveness transfer function.
747 * Updates a live set over a single step from a given node to its predecessor.
748 * Everything defined at the node is removed from the set, the uses of the node get inserted.
750 * @param irn The node at which liveness should be computed.
751 * @param live The bitset of registers live before @p irn. This set gets modified by updating it to
752 * the registers live after irn.
754 * @return The live bitset.
756 static vfp_liveness vfp_liveness_transfer(ir_node *irn, vfp_liveness live)
759 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
761 if (get_irn_mode(irn) == mode_T) {
762 const ir_edge_t *edge;
764 foreach_out_edge(irn, edge) {
765 ir_node *proj = get_edge_src_irn(edge);
767 if (arch_irn_consider_in_reg_alloc(cls, proj)) {
768 const arch_register_t *reg = x87_get_irn_register(proj);
769 live &= ~(1 << arch_register_get_index(reg));
774 if (arch_irn_consider_in_reg_alloc(cls, irn)) {
775 const arch_register_t *reg = x87_get_irn_register(irn);
776 live &= ~(1 << arch_register_get_index(reg));
779 for (i = 0, n = get_irn_arity(irn); i < n; ++i) {
780 ir_node *op = get_irn_n(irn, i);
782 if (mode_is_float(get_irn_mode(op)) &&
783 arch_irn_consider_in_reg_alloc(cls, op)) {
784 const arch_register_t *reg = x87_get_irn_register(op);
785 live |= 1 << arch_register_get_index(reg);
789 } /* vfp_liveness_transfer */
792 * Put all live virtual registers at the end of a block into a bitset.
794 * @param sim the simulator handle
795 * @param lv the liveness information
796 * @param bl the block
798 * @return The live bitset at the end of this block
800 static vfp_liveness vfp_liveness_end_of_block(x87_simulator *sim, const ir_node *block)
803 vfp_liveness live = 0;
804 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
805 const be_lv_t *lv = sim->lv;
807 be_lv_foreach(lv, block, be_lv_state_end, i) {
808 const arch_register_t *reg;
809 const ir_node *node = be_lv_get_irn(lv, block, i);
810 if (!arch_irn_consider_in_reg_alloc(cls, node))
813 reg = x87_get_irn_register(node);
814 live |= 1 << arch_register_get_index(reg);
818 } /* vfp_liveness_end_of_block */
820 /** get the register mask from an arch_register */
821 #define REGMASK(reg) (1 << (arch_register_get_index(reg)))
824 * Return a bitset of argument registers which are live at the end of a node.
826 * @param sim the simulator handle
827 * @param pos the node
828 * @param kill kill mask for the output registers
830 * @return The live bitset.
832 static unsigned vfp_live_args_after(x87_simulator *sim, const ir_node *pos, unsigned kill)
834 unsigned idx = get_irn_idx(pos);
836 assert(idx < sim->n_idx);
837 return sim->live[idx] & ~kill;
838 } /* vfp_live_args_after */
841 * Calculate the liveness for a whole block and cache it.
843 * @param sim the simulator handle
844 * @param lv the liveness handle
845 * @param block the block
847 static void update_liveness(x87_simulator *sim, ir_node *block)
849 vfp_liveness live = vfp_liveness_end_of_block(sim, block);
853 /* now iterate through the block backward and cache the results */
854 sched_foreach_reverse(block, irn) {
855 /* stop at the first Phi: this produces the live-in */
859 idx = get_irn_idx(irn);
860 sim->live[idx] = live;
862 live = vfp_liveness_transfer(irn, live);
864 idx = get_irn_idx(block);
865 sim->live[idx] = live;
866 } /* update_liveness */
869 * Returns true if a register is live in a set.
871 * @param reg_idx the vfp register index
872 * @param live a live bitset
874 #define is_vfp_live(reg_idx, live) ((live) & (1 << (reg_idx)))
878 * Dump liveness info.
880 * @param live the live bitset
882 static void vfp_dump_live(vfp_liveness live)
886 DB((dbg, LEVEL_2, "Live after: "));
887 for (i = 0; i < 8; ++i) {
888 if (live & (1 << i)) {
889 DB((dbg, LEVEL_2, "vf%d ", i));
892 DB((dbg, LEVEL_2, "\n"));
893 } /* vfp_dump_live */
894 #endif /* DEBUG_libfirm */
896 /* --------------------------------- simulators ---------------------------------------- */
898 #define XCHG(a, b) do { int t = (a); (a) = (b); (b) = t; } while (0)
910 * Simulate a virtual binop.
912 * @param state the x87 state
913 * @param n the node that should be simulated (and patched)
914 * @param tmpl the template containing the 4 possible x87 opcodes
916 * @return NO_NODE_ADDED
918 static int sim_binop(x87_state *state, ir_node *n, const exchange_tmpl *tmpl)
920 int op2_idx = 0, op1_idx;
921 int out_idx, do_pop = 0;
922 ia32_x87_attr_t *attr;
924 ir_node *patched_insn;
926 x87_simulator *sim = state->sim;
927 ir_node *op1 = get_irn_n(n, n_ia32_binary_left);
928 ir_node *op2 = get_irn_n(n, n_ia32_binary_right);
929 const arch_register_t *op1_reg = x87_get_irn_register(op1);
930 const arch_register_t *op2_reg = x87_get_irn_register(op2);
931 const arch_register_t *out = x87_get_irn_register(n);
932 int reg_index_1 = arch_register_get_index(op1_reg);
933 int reg_index_2 = arch_register_get_index(op2_reg);
934 vfp_liveness live = vfp_live_args_after(sim, n, REGMASK(out));
938 DB((dbg, LEVEL_1, ">>> %+F %s, %s -> %s\n", n,
939 arch_register_get_name(op1_reg), arch_register_get_name(op2_reg),
940 arch_register_get_name(out)));
941 DEBUG_ONLY(vfp_dump_live(live));
942 DB((dbg, LEVEL_1, "Stack before: "));
943 DEBUG_ONLY(x87_dump_stack(state));
945 if (reg_index_1 == REG_VFP_UKNWN) {
949 op1_idx = x87_on_stack(state, reg_index_1);
950 assert(op1_idx >= 0);
951 op1_live_after = is_vfp_live(arch_register_get_index(op1_reg), live);
954 attr = get_ia32_x87_attr(n);
955 permuted = attr->attr.data.ins_permuted;
957 if (reg_index_2 != REG_VFP_NOREG) {
960 if (reg_index_2 == REG_VFP_UKNWN) {
964 /* second operand is a vfp register */
965 op2_idx = x87_on_stack(state, reg_index_2);
966 assert(op2_idx >= 0);
968 = is_vfp_live(arch_register_get_index(op2_reg), live);
971 if (op2_live_after) {
972 /* Second operand is live. */
974 if (op1_live_after) {
975 /* Both operands are live: push the first one.
976 This works even for op1 == op2. */
977 x87_create_fpush(state, n, op1_idx, n_ia32_binary_right);
978 /* now do fxxx (tos=tos X op) */
982 dst = tmpl->normal_op;
984 /* Second live, first operand is dead here, bring it to tos. */
986 x87_create_fxch(state, n, op1_idx);
991 /* now do fxxx (tos=tos X op) */
993 dst = tmpl->normal_op;
996 /* Second operand is dead. */
997 if (op1_live_after) {
998 /* First operand is live: bring second to tos. */
1000 x87_create_fxch(state, n, op2_idx);
1005 /* now do fxxxr (tos = op X tos) */
1007 dst = tmpl->reverse_op;
1009 /* Both operands are dead here, pop them from the stack. */
1012 /* Both are identically and on tos, no pop needed. */
1013 /* here fxxx (tos = tos X tos) */
1014 dst = tmpl->normal_op;
1017 /* now do fxxxp (op = op X tos, pop) */
1018 dst = tmpl->normal_pop_op;
1022 } else if (op1_idx == 0) {
1023 assert(op1_idx != op2_idx);
1024 /* now do fxxxrp (op = tos X op, pop) */
1025 dst = tmpl->reverse_pop_op;
1029 /* Bring the second on top. */
1030 x87_create_fxch(state, n, op2_idx);
1031 if (op1_idx == op2_idx) {
1032 /* Both are identically and on tos now, no pop needed. */
1035 /* use fxxx (tos = tos X tos) */
1036 dst = tmpl->normal_op;
1039 /* op2 is on tos now */
1041 /* use fxxxp (op = op X tos, pop) */
1042 dst = tmpl->normal_pop_op;
1050 /* second operand is an address mode */
1051 if (op1_live_after) {
1052 /* first operand is live: push it here */
1053 x87_create_fpush(state, n, op1_idx, n_ia32_binary_left);
1056 /* first operand is dead: bring it to tos */
1058 x87_create_fxch(state, n, op1_idx);
1063 /* use fxxx (tos = tos X mem) */
1064 dst = permuted ? tmpl->reverse_op : tmpl->normal_op;
1068 patched_insn = x87_patch_insn(n, dst);
1069 x87_set_st(state, arch_register_get_index(out), patched_insn, out_idx);
1074 /* patch the operation */
1075 attr->x87[0] = op1_reg = &ia32_st_regs[op1_idx];
1076 if (reg_index_2 != REG_VFP_NOREG) {
1077 attr->x87[1] = op2_reg = &ia32_st_regs[op2_idx];
1079 attr->x87[2] = out = &ia32_st_regs[out_idx];
1081 if (reg_index_2 != REG_VFP_NOREG) {
1082 DB((dbg, LEVEL_1, "<<< %s %s, %s -> %s\n", get_irn_opname(n),
1083 arch_register_get_name(op1_reg), arch_register_get_name(op2_reg),
1084 arch_register_get_name(out)));
1086 DB((dbg, LEVEL_1, "<<< %s %s, [AM] -> %s\n", get_irn_opname(n),
1087 arch_register_get_name(op1_reg),
1088 arch_register_get_name(out)));
1091 return NO_NODE_ADDED;
1095 * Simulate a virtual Unop.
1097 * @param state the x87 state
1098 * @param n the node that should be simulated (and patched)
1099 * @param op the x87 opcode that will replace n's opcode
1101 * @return NO_NODE_ADDED
1103 static int sim_unop(x87_state *state, ir_node *n, ir_op *op)
1105 int op1_idx, out_idx;
1106 x87_simulator *sim = state->sim;
1107 const arch_register_t *op1 = x87_get_irn_register(get_irn_n(n, UNOP_IDX));
1108 const arch_register_t *out = x87_get_irn_register(n);
1109 ia32_x87_attr_t *attr;
1110 unsigned live = vfp_live_args_after(sim, n, REGMASK(out));
1112 DB((dbg, LEVEL_1, ">>> %+F -> %s\n", n, out->name));
1113 DEBUG_ONLY(vfp_dump_live(live));
1115 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1117 if (is_vfp_live(arch_register_get_index(op1), live)) {
1118 /* push the operand here */
1119 x87_create_fpush(state, n, op1_idx, UNOP_IDX);
1123 /* operand is dead, bring it to tos */
1125 x87_create_fxch(state, n, op1_idx);
1130 x87_set_tos(state, arch_register_get_index(out), x87_patch_insn(n, op));
1132 attr = get_ia32_x87_attr(n);
1133 attr->x87[0] = op1 = &ia32_st_regs[0];
1134 attr->x87[2] = out = &ia32_st_regs[0];
1135 DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), out->name));
1137 return NO_NODE_ADDED;
1141 * Simulate a virtual Load instruction.
1143 * @param state the x87 state
1144 * @param n the node that should be simulated (and patched)
1145 * @param op the x87 opcode that will replace n's opcode
1147 * @return NO_NODE_ADDED
1149 static int sim_load(x87_state *state, ir_node *n, ir_op *op)
1151 const arch_register_t *out = x87_get_irn_register(n);
1152 ia32_x87_attr_t *attr;
1154 DB((dbg, LEVEL_1, ">>> %+F -> %s\n", n, arch_register_get_name(out)));
1155 x87_push(state, arch_register_get_index(out), x87_patch_insn(n, op));
1156 assert(out == x87_get_irn_register(n));
1157 attr = get_ia32_x87_attr(n);
1158 attr->x87[2] = out = &ia32_st_regs[0];
1159 DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), arch_register_get_name(out)));
1161 return NO_NODE_ADDED;
1165 * Rewire all users of @p old_val to @new_val iff they are scheduled after @p store.
1167 * @param store The store
1168 * @param old_val The former value
1169 * @param new_val The new value
1171 static void collect_and_rewire_users(ir_node *store, ir_node *old_val, ir_node *new_val)
1173 const ir_edge_t *edge, *ne;
1175 foreach_out_edge_safe(old_val, edge, ne) {
1176 ir_node *user = get_edge_src_irn(edge);
1178 if (! user || user == store)
1181 /* if the user is scheduled after the store: rewire */
1182 if (sched_is_scheduled(user) && sched_comes_after(store, user)) {
1184 /* find the input of the user pointing to the old value */
1185 for (i = get_irn_arity(user) - 1; i >= 0; i--) {
1186 if (get_irn_n(user, i) == old_val)
1187 set_irn_n(user, i, new_val);
1191 } /* collect_and_rewire_users */
1194 * Simulate a virtual Store.
1196 * @param state the x87 state
1197 * @param n the node that should be simulated (and patched)
1198 * @param op the x87 store opcode
1199 * @param op_p the x87 store and pop opcode
1201 static int sim_store(x87_state *state, ir_node *n, ir_op *op, ir_op *op_p)
1203 ir_node *val = get_irn_n(n, n_ia32_vfst_val);
1204 const arch_register_t *op2 = x87_get_irn_register(val);
1205 unsigned live = vfp_live_args_after(state->sim, n, 0);
1206 int insn = NO_NODE_ADDED;
1207 ia32_x87_attr_t *attr;
1208 int op2_reg_idx, op2_idx, depth;
1209 int live_after_node;
1212 op2_reg_idx = arch_register_get_index(op2);
1213 if (op2_reg_idx == REG_VFP_UKNWN) {
1214 /* just take any value from stack */
1215 if (state->depth > 0) {
1217 DEBUG_ONLY(op2 = NULL);
1218 live_after_node = 1;
1220 /* produce a new value which we will consume immediately */
1221 x87_create_fldz(state, n, op2_reg_idx);
1222 live_after_node = 0;
1223 op2_idx = x87_on_stack(state, op2_reg_idx);
1224 assert(op2_idx >= 0);
1227 op2_idx = x87_on_stack(state, op2_reg_idx);
1228 live_after_node = is_vfp_live(arch_register_get_index(op2), live);
1229 DB((dbg, LEVEL_1, ">>> %+F %s ->\n", n, arch_register_get_name(op2)));
1230 assert(op2_idx >= 0);
1233 mode = get_ia32_ls_mode(n);
1234 depth = x87_get_depth(state);
1236 if (live_after_node) {
1238 Problem: fst doesn't support mode_E (spills), only fstp does
1240 - stack not full: push value and fstp
1241 - stack full: fstp value and load again
1242 Note that we cannot test on mode_E, because floats might be 96bit ...
1244 if (get_mode_size_bits(mode) > 64 || mode == mode_Ls) {
1245 if (depth < N_x87_REGS) {
1246 /* ok, we have a free register: push + fstp */
1247 x87_create_fpush(state, n, op2_idx, n_ia32_vfst_val);
1249 x87_patch_insn(n, op_p);
1251 ir_node *vfld, *mem, *block, *rproj, *mproj;
1254 /* stack full here: need fstp + load */
1256 x87_patch_insn(n, op_p);
1258 block = get_nodes_block(n);
1259 irg = get_irn_irg(n);
1260 vfld = new_rd_ia32_vfld(NULL, irg, block, get_irn_n(n, 0), get_irn_n(n, 1), new_rd_NoMem(irg), get_ia32_ls_mode(n));
1262 /* copy all attributes */
1263 set_ia32_frame_ent(vfld, get_ia32_frame_ent(n));
1264 if (is_ia32_use_frame(n))
1265 set_ia32_use_frame(vfld);
1266 set_ia32_op_type(vfld, ia32_AddrModeS);
1267 add_ia32_am_offs_int(vfld, get_ia32_am_offs_int(n));
1268 set_ia32_am_sc(vfld, get_ia32_am_sc(n));
1269 set_ia32_ls_mode(vfld, get_ia32_ls_mode(n));
1271 rproj = new_r_Proj(irg, block, vfld, get_ia32_ls_mode(vfld), pn_ia32_vfld_res);
1272 mproj = new_r_Proj(irg, block, vfld, mode_M, pn_ia32_vfld_M);
1273 mem = get_irn_Proj_for_mode(n, mode_M);
1275 assert(mem && "Store memory not found");
1277 arch_set_irn_register(rproj, op2);
1279 /* reroute all former users of the store memory to the load memory */
1280 edges_reroute(mem, mproj, irg);
1281 /* set the memory input of the load to the store memory */
1282 set_irn_n(vfld, n_ia32_vfld_mem, mem);
1284 sched_add_after(n, vfld);
1285 sched_add_after(vfld, rproj);
1287 /* rewire all users, scheduled after the store, to the loaded value */
1288 collect_and_rewire_users(n, val, rproj);
1293 /* we can only store the tos to memory */
1295 x87_create_fxch(state, n, op2_idx);
1297 /* mode != mode_E -> use normal fst */
1298 x87_patch_insn(n, op);
1301 /* we can only store the tos to memory */
1303 x87_create_fxch(state, n, op2_idx);
1306 x87_patch_insn(n, op_p);
1309 attr = get_ia32_x87_attr(n);
1310 attr->x87[1] = op2 = &ia32_st_regs[0];
1311 DB((dbg, LEVEL_1, "<<< %s %s ->\n", get_irn_opname(n), arch_register_get_name(op2)));
1316 #define _GEN_BINOP(op, rev) \
1317 static int sim_##op(x87_state *state, ir_node *n) { \
1318 exchange_tmpl tmpl = { op_ia32_##op, op_ia32_##rev, op_ia32_##op##p, op_ia32_##rev##p }; \
1319 return sim_binop(state, n, &tmpl); \
1322 #define GEN_BINOP(op) _GEN_BINOP(op, op)
1323 #define GEN_BINOPR(op) _GEN_BINOP(op, op##r)
1325 #define GEN_LOAD2(op, nop) \
1326 static int sim_##op(x87_state *state, ir_node *n) { \
1327 return sim_load(state, n, op_ia32_##nop); \
1330 #define GEN_LOAD(op) GEN_LOAD2(op, op)
1332 #define GEN_UNOP(op) \
1333 static int sim_##op(x87_state *state, ir_node *n) { \
1334 return sim_unop(state, n, op_ia32_##op); \
1337 #define GEN_STORE(op) \
1338 static int sim_##op(x87_state *state, ir_node *n) { \
1339 return sim_store(state, n, op_ia32_##op, op_ia32_##op##p); \
1361 * Simulate a virtual fisttp.
1363 * @param state the x87 state
1364 * @param n the node that should be simulated (and patched)
1366 static int sim_fisttp(x87_state *state, ir_node *n)
1368 ir_node *val = get_irn_n(n, n_ia32_vfst_val);
1369 const arch_register_t *op2 = x87_get_irn_register(val);
1370 int insn = NO_NODE_ADDED;
1371 ia32_x87_attr_t *attr;
1372 int op2_reg_idx, op2_idx, depth;
1374 op2_reg_idx = arch_register_get_index(op2);
1375 if (op2_reg_idx == REG_VFP_UKNWN) {
1376 /* just take any value from stack */
1377 if (state->depth > 0) {
1379 DEBUG_ONLY(op2 = NULL);
1381 /* produce a new value which we will consume immediately */
1382 x87_create_fldz(state, n, op2_reg_idx);
1383 op2_idx = x87_on_stack(state, op2_reg_idx);
1384 assert(op2_idx >= 0);
1387 op2_idx = x87_on_stack(state, op2_reg_idx);
1388 DB((dbg, LEVEL_1, ">>> %+F %s ->\n", n, arch_register_get_name(op2)));
1389 assert(op2_idx >= 0);
1392 depth = x87_get_depth(state);
1394 /* Note: although the value is still live here, it is destroyed because
1395 of the pop. The register allocator is aware of that and introduced a copy
1396 if the value must be alive. */
1398 /* we can only store the tos to memory */
1400 x87_create_fxch(state, n, op2_idx);
1403 x87_patch_insn(n, op_ia32_fisttp);
1405 attr = get_ia32_x87_attr(n);
1406 attr->x87[1] = op2 = &ia32_st_regs[0];
1407 DB((dbg, LEVEL_1, "<<< %s %s ->\n", get_irn_opname(n), arch_register_get_name(op2)));
1412 static int sim_FtstFnstsw(x87_state *state, ir_node *n)
1414 x87_simulator *sim = state->sim;
1415 ia32_x87_attr_t *attr = get_ia32_x87_attr(n);
1416 ir_node *op1_node = get_irn_n(n, n_ia32_vFtstFnstsw_left);
1417 const arch_register_t *reg1 = x87_get_irn_register(op1_node);
1418 int reg_index_1 = arch_register_get_index(reg1);
1419 int op1_idx = x87_on_stack(state, reg_index_1);
1420 unsigned live = vfp_live_args_after(sim, n, 0);
1422 DB((dbg, LEVEL_1, ">>> %+F %s\n", n, arch_register_get_name(reg1)));
1423 DEBUG_ONLY(vfp_dump_live(live));
1424 DB((dbg, LEVEL_1, "Stack before: "));
1425 DEBUG_ONLY(x87_dump_stack(state));
1426 assert(op1_idx >= 0);
1429 /* bring the value to tos */
1430 x87_create_fxch(state, n, op1_idx);
1434 /* patch the operation */
1435 x87_patch_insn(n, op_ia32_FtstFnstsw);
1436 reg1 = &ia32_st_regs[op1_idx];
1437 attr->x87[0] = reg1;
1438 attr->x87[1] = NULL;
1439 attr->x87[2] = NULL;
1441 if (!is_vfp_live(reg_index_1, live)) {
1442 x87_create_fpop(state, sched_next(n), 1);
1446 return NO_NODE_ADDED;
1450 * @param state the x87 state
1451 * @param n the node that should be simulated (and patched)
1453 static int sim_Fucom(x87_state *state, ir_node *n)
1457 ia32_x87_attr_t *attr = get_ia32_x87_attr(n);
1459 x87_simulator *sim = state->sim;
1460 ir_node *op1_node = get_irn_n(n, n_ia32_vFucomFnstsw_left);
1461 ir_node *op2_node = get_irn_n(n, n_ia32_vFucomFnstsw_right);
1462 const arch_register_t *op1 = x87_get_irn_register(op1_node);
1463 const arch_register_t *op2 = x87_get_irn_register(op2_node);
1464 int reg_index_1 = arch_register_get_index(op1);
1465 int reg_index_2 = arch_register_get_index(op2);
1466 unsigned live = vfp_live_args_after(sim, n, 0);
1467 int permuted = attr->attr.data.ins_permuted;
1470 int node_added = NO_NODE_ADDED;
1472 DB((dbg, LEVEL_1, ">>> %+F %s, %s\n", n,
1473 arch_register_get_name(op1), arch_register_get_name(op2)));
1474 DEBUG_ONLY(vfp_dump_live(live));
1475 DB((dbg, LEVEL_1, "Stack before: "));
1476 DEBUG_ONLY(x87_dump_stack(state));
1478 op1_idx = x87_on_stack(state, reg_index_1);
1479 assert(op1_idx >= 0);
1481 /* BEWARE: check for comp a,a cases, they might happen */
1482 if (reg_index_2 != REG_VFP_NOREG) {
1483 /* second operand is a vfp register */
1484 op2_idx = x87_on_stack(state, reg_index_2);
1485 assert(op2_idx >= 0);
1487 if (is_vfp_live(reg_index_2, live)) {
1488 /* second operand is live */
1490 if (is_vfp_live(reg_index_1, live)) {
1491 /* both operands are live */
1494 /* res = tos X op */
1495 } else if (op2_idx == 0) {
1496 /* res = op X tos */
1497 permuted = !permuted;
1500 /* bring the first one to tos */
1501 x87_create_fxch(state, n, op1_idx);
1505 /* res = tos X op */
1508 /* second live, first operand is dead here, bring it to tos.
1509 This means further, op1_idx != op2_idx. */
1510 assert(op1_idx != op2_idx);
1512 x87_create_fxch(state, n, op1_idx);
1517 /* res = tos X op, pop */
1521 /* second operand is dead */
1522 if (is_vfp_live(reg_index_1, live)) {
1523 /* first operand is live: bring second to tos.
1524 This means further, op1_idx != op2_idx. */
1525 assert(op1_idx != op2_idx);
1527 x87_create_fxch(state, n, op2_idx);
1532 /* res = op X tos, pop */
1534 permuted = !permuted;
1537 /* both operands are dead here, check first for identity. */
1538 if (op1_idx == op2_idx) {
1539 /* identically, one pop needed */
1541 x87_create_fxch(state, n, op1_idx);
1545 /* res = tos X op, pop */
1548 /* different, move them to st and st(1) and pop both.
1549 The tricky part is to get one into st(1).*/
1550 else if (op2_idx == 1) {
1551 /* good, second operand is already in the right place, move the first */
1553 /* bring the first on top */
1554 x87_create_fxch(state, n, op1_idx);
1555 assert(op2_idx != 0);
1558 /* res = tos X op, pop, pop */
1560 } else if (op1_idx == 1) {
1561 /* good, first operand is already in the right place, move the second */
1563 /* bring the first on top */
1564 x87_create_fxch(state, n, op2_idx);
1565 assert(op1_idx != 0);
1568 /* res = op X tos, pop, pop */
1569 permuted = !permuted;
1573 /* if one is already the TOS, we need two fxch */
1575 /* first one is TOS, move to st(1) */
1576 x87_create_fxch(state, n, 1);
1577 assert(op2_idx != 1);
1579 x87_create_fxch(state, n, op2_idx);
1581 /* res = op X tos, pop, pop */
1583 permuted = !permuted;
1585 } else if (op2_idx == 0) {
1586 /* second one is TOS, move to st(1) */
1587 x87_create_fxch(state, n, 1);
1588 assert(op1_idx != 1);
1590 x87_create_fxch(state, n, op1_idx);
1592 /* res = tos X op, pop, pop */
1595 /* none of them is either TOS or st(1), 3 fxch needed */
1596 x87_create_fxch(state, n, op2_idx);
1597 assert(op1_idx != 0);
1598 x87_create_fxch(state, n, 1);
1600 x87_create_fxch(state, n, op1_idx);
1602 /* res = tos X op, pop, pop */
1609 /* second operand is an address mode */
1610 if (is_vfp_live(reg_index_1, live)) {
1611 /* first operand is live: bring it to TOS */
1613 x87_create_fxch(state, n, op1_idx);
1617 /* first operand is dead: bring it to tos */
1619 x87_create_fxch(state, n, op1_idx);
1626 /* patch the operation */
1627 if (is_ia32_vFucomFnstsw(n)) {
1631 case 0: dst = op_ia32_FucomFnstsw; break;
1632 case 1: dst = op_ia32_FucompFnstsw; break;
1633 case 2: dst = op_ia32_FucomppFnstsw; break;
1634 default: panic("invalid popcount in sim_Fucom");
1637 for (i = 0; i < pops; ++i) {
1640 } else if (is_ia32_vFucomi(n)) {
1642 case 0: dst = op_ia32_Fucomi; break;
1643 case 1: dst = op_ia32_Fucompi; x87_pop(state); break;
1645 dst = op_ia32_Fucompi;
1647 x87_create_fpop(state, sched_next(n), 1);
1648 node_added = NODE_ADDED;
1650 default: panic("invalid popcount in sim_Fucom");
1653 panic("invalid operation %+F in sim_FucomFnstsw", n);
1656 x87_patch_insn(n, dst);
1663 op1 = &ia32_st_regs[op1_idx];
1666 op2 = &ia32_st_regs[op2_idx];
1669 attr->x87[2] = NULL;
1670 attr->attr.data.ins_permuted = permuted;
1673 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(n),
1674 arch_register_get_name(op1), arch_register_get_name(op2)));
1676 DB((dbg, LEVEL_1, "<<< %s %s, [AM]\n", get_irn_opname(n),
1677 arch_register_get_name(op1)));
1683 static int sim_Keep(x87_state *state, ir_node *node)
1686 const arch_register_t *op_reg;
1691 int node_added = NO_NODE_ADDED;
1693 DB((dbg, LEVEL_1, ">>> %+F\n", node));
1695 arity = get_irn_arity(node);
1696 for (i = 0; i < arity; ++i) {
1697 op = get_irn_n(node, i);
1698 op_reg = arch_get_irn_register(op);
1699 if (arch_register_get_class(op_reg) != &ia32_reg_classes[CLASS_ia32_vfp])
1702 reg_id = arch_register_get_index(op_reg);
1703 live = vfp_live_args_after(state->sim, node, 0);
1705 op_stack_idx = x87_on_stack(state, reg_id);
1706 if (op_stack_idx >= 0 && !is_vfp_live(reg_id, live)) {
1707 x87_create_fpop(state, sched_next(node), 1);
1708 node_added = NODE_ADDED;
1712 DB((dbg, LEVEL_1, "Stack after: "));
1713 DEBUG_ONLY(x87_dump_stack(state));
1718 static void keep_float_node_alive(ir_node *node)
1724 const arch_register_class_t *cls;
1726 irg = get_irn_irg(node);
1727 block = get_nodes_block(node);
1728 cls = arch_get_irn_reg_class(node, -1);
1730 keep = be_new_Keep(cls, irg, block, 1, in);
1732 assert(sched_is_scheduled(node));
1733 sched_add_after(node, keep);
1737 * Create a copy of a node. Recreate the node if it's a constant.
1739 * @param state the x87 state
1740 * @param n the node to be copied
1742 * @return the copy of n
1744 static ir_node *create_Copy(x87_state *state, ir_node *n)
1746 ir_graph *irg = get_irn_irg(n);
1747 dbg_info *n_dbg = get_irn_dbg_info(n);
1748 ir_mode *mode = get_irn_mode(n);
1749 ir_node *block = get_nodes_block(n);
1750 ir_node *pred = get_irn_n(n, 0);
1751 ir_node *(*cnstr)(dbg_info *, ir_graph *, ir_node *, ir_mode *) = NULL;
1753 const arch_register_t *out;
1754 const arch_register_t *op1;
1755 ia32_x87_attr_t *attr;
1757 /* Do not copy constants, recreate them. */
1758 switch (get_ia32_irn_opcode(pred)) {
1759 case iro_ia32_Unknown_VFP:
1761 cnstr = new_rd_ia32_fldz;
1764 cnstr = new_rd_ia32_fld1;
1766 case iro_ia32_fldpi:
1767 cnstr = new_rd_ia32_fldpi;
1769 case iro_ia32_fldl2e:
1770 cnstr = new_rd_ia32_fldl2e;
1772 case iro_ia32_fldl2t:
1773 cnstr = new_rd_ia32_fldl2t;
1775 case iro_ia32_fldlg2:
1776 cnstr = new_rd_ia32_fldlg2;
1778 case iro_ia32_fldln2:
1779 cnstr = new_rd_ia32_fldln2;
1785 out = x87_get_irn_register(n);
1786 op1 = x87_get_irn_register(pred);
1788 if (cnstr != NULL) {
1789 /* copy a constant */
1790 res = (*cnstr)(n_dbg, irg, block, mode);
1792 x87_push(state, arch_register_get_index(out), res);
1794 attr = get_ia32_x87_attr(res);
1795 attr->x87[2] = &ia32_st_regs[0];
1797 int op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1799 res = new_rd_ia32_fpushCopy(n_dbg, irg, block, pred, mode);
1801 x87_push(state, arch_register_get_index(out), res);
1803 attr = get_ia32_x87_attr(res);
1804 attr->x87[0] = &ia32_st_regs[op1_idx];
1805 attr->x87[2] = &ia32_st_regs[0];
1807 arch_set_irn_register(res, out);
1813 * Simulate a be_Copy.
1815 * @param state the x87 state
1816 * @param n the node that should be simulated (and patched)
1818 * @return NO_NODE_ADDED
1820 static int sim_Copy(x87_state *state, ir_node *n)
1823 const arch_register_t *out;
1824 const arch_register_t *op1;
1825 const arch_register_class_t *cls;
1826 ir_node *node, *next;
1827 ia32_x87_attr_t *attr;
1828 int op1_idx, out_idx;
1831 cls = arch_get_irn_reg_class(n, -1);
1832 if (cls->regs != ia32_vfp_regs)
1835 pred = get_irn_n(n, 0);
1836 out = x87_get_irn_register(n);
1837 op1 = x87_get_irn_register(pred);
1838 live = vfp_live_args_after(state->sim, n, REGMASK(out));
1840 DB((dbg, LEVEL_1, ">>> %+F %s -> %s\n", n,
1841 arch_register_get_name(op1), arch_register_get_name(out)));
1842 DEBUG_ONLY(vfp_dump_live(live));
1844 /* handle the infamous unknown value */
1845 if (arch_register_get_index(op1) == REG_VFP_UKNWN) {
1846 /* Operand is still live, a real copy. We need here an fpush that can
1847 hold a a register, so use the fpushCopy or recreate constants */
1848 node = create_Copy(state, n);
1850 assert(is_ia32_fldz(node));
1851 next = sched_next(n);
1854 sched_add_before(next, node);
1856 DB((dbg, LEVEL_1, "<<< %+F %s -> %s\n", node, op1->name,
1857 arch_get_irn_register(node)->name));
1858 return NO_NODE_ADDED;
1861 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1863 if (is_vfp_live(arch_register_get_index(op1), live)) {
1864 ir_node *pred = get_irn_n(n, 0);
1866 /* Operand is still live, a real copy. We need here an fpush that can
1867 hold a a register, so use the fpushCopy or recreate constants */
1868 node = create_Copy(state, n);
1870 /* We have to make sure the old value doesn't go dead (which can happen
1871 * when we recreate constants). As the simulator expected that value in
1872 * the pred blocks. This is unfortunate as removing it would save us 1
1873 * instruction, but we would have to rerun all the simulation to get
1876 next = sched_next(n);
1879 sched_add_before(next, node);
1881 if (get_irn_n_edges(pred) == 0) {
1882 keep_float_node_alive(pred);
1885 DB((dbg, LEVEL_1, "<<< %+F %s -> ?\n", node, op1->name));
1887 out_idx = x87_on_stack(state, arch_register_get_index(out));
1889 if (out_idx >= 0 && out_idx != op1_idx) {
1890 /* Matze: out already on stack? how can this happen? */
1893 /* op1 must be killed and placed where out is */
1895 /* best case, simple remove and rename */
1896 x87_patch_insn(n, op_ia32_Pop);
1897 attr = get_ia32_x87_attr(n);
1898 attr->x87[0] = op1 = &ia32_st_regs[0];
1901 x87_set_st(state, arch_register_get_index(out), n, op1_idx - 1);
1903 /* move op1 to tos, store and pop it */
1905 x87_create_fxch(state, n, op1_idx);
1908 x87_patch_insn(n, op_ia32_Pop);
1909 attr = get_ia32_x87_attr(n);
1910 attr->x87[0] = op1 = &ia32_st_regs[out_idx];
1913 x87_set_st(state, arch_register_get_index(out), n, out_idx - 1);
1915 DB((dbg, LEVEL_1, "<<< %+F %s\n", n, op1->name));
1917 /* just a virtual copy */
1918 x87_set_st(state, arch_register_get_index(out), get_unop_op(n), op1_idx);
1919 /* don't remove the node to keep the verifier quiet :),
1920 the emitter won't emit any code for the node */
1923 DB((dbg, LEVEL_1, "<<< KILLED %s\n", get_irn_opname(n)));
1924 exchange(n, get_unop_op(n));
1928 return NO_NODE_ADDED;
1932 * Returns the result proj of the call
1934 static ir_node *get_call_result_proj(ir_node *call)
1936 const ir_edge_t *edge;
1938 /* search the result proj */
1939 foreach_out_edge(call, edge) {
1940 ir_node *proj = get_edge_src_irn(edge);
1941 long pn = get_Proj_proj(proj);
1943 if (pn == pn_ia32_Call_vf0) {
1949 } /* get_call_result_proj */
1952 * Simulate a ia32_Call.
1954 * @param state the x87 state
1955 * @param n the node that should be simulated
1957 * @return NO_NODE_ADDED
1959 static int sim_Call(x87_state *state, ir_node *n)
1961 ir_type *call_tp = get_ia32_call_attr_const(n)->call_tp;
1965 const arch_register_t *reg;
1967 DB((dbg, LEVEL_1, ">>> %+F\n", n));
1969 /* at the begin of a call the x87 state should be empty */
1970 assert(state->depth == 0 && "stack not empty before call");
1972 if (get_method_n_ress(call_tp) <= 0)
1976 * If the called function returns a float, it is returned in st(0).
1977 * This even happens if the return value is NOT used.
1978 * Moreover, only one return result is supported.
1980 res_type = get_method_res_type(call_tp, 0);
1981 mode = get_type_mode(res_type);
1983 if (mode == NULL || !mode_is_float(mode))
1986 resproj = get_call_result_proj(n);
1987 assert(resproj != NULL);
1989 reg = x87_get_irn_register(resproj);
1990 x87_push(state, arch_register_get_index(reg), resproj);
1993 DB((dbg, LEVEL_1, "Stack after: "));
1994 DEBUG_ONLY(x87_dump_stack(state));
1996 return NO_NODE_ADDED;
2000 * Simulate a be_Spill.
2002 * @param state the x87 state
2003 * @param n the node that should be simulated (and patched)
2005 * Should not happen, spills are lowered before x87 simulator see them.
2007 static int sim_Spill(x87_state *state, ir_node *n)
2009 assert(0 && "Spill not lowered");
2010 return sim_fst(state, n);
2014 * Simulate a be_Reload.
2016 * @param state the x87 state
2017 * @param n the node that should be simulated (and patched)
2019 * Should not happen, reloads are lowered before x87 simulator see them.
2021 static int sim_Reload(x87_state *state, ir_node *n)
2023 assert(0 && "Reload not lowered");
2024 return sim_fld(state, n);
2028 * Simulate a be_Return.
2030 * @param state the x87 state
2031 * @param n the node that should be simulated (and patched)
2033 * @return NO_NODE_ADDED
2035 static int sim_Return(x87_state *state, ir_node *n)
2037 int n_res = be_Return_get_n_rets(n);
2038 int i, n_float_res = 0;
2040 /* only floating point return values must reside on stack */
2041 for (i = 0; i < n_res; ++i) {
2042 ir_node *res = get_irn_n(n, be_pos_Return_val + i);
2044 if (mode_is_float(get_irn_mode(res)))
2047 assert(x87_get_depth(state) == n_float_res);
2049 /* pop them virtually */
2050 for (i = n_float_res - 1; i >= 0; --i)
2053 return NO_NODE_ADDED;
2056 typedef struct _perm_data_t {
2057 const arch_register_t *in;
2058 const arch_register_t *out;
2062 * Simulate a be_Perm.
2064 * @param state the x87 state
2065 * @param irn the node that should be simulated (and patched)
2067 * @return NO_NODE_ADDED
2069 static int sim_Perm(x87_state *state, ir_node *irn)
2072 ir_node *pred = get_irn_n(irn, 0);
2074 const ir_edge_t *edge;
2076 /* handle only floating point Perms */
2077 if (! mode_is_float(get_irn_mode(pred)))
2078 return NO_NODE_ADDED;
2080 DB((dbg, LEVEL_1, ">>> %+F\n", irn));
2082 /* Perm is a pure virtual instruction on x87.
2083 All inputs must be on the FPU stack and are pairwise
2084 different from each other.
2085 So, all we need to do is to permutate the stack state. */
2086 n = get_irn_arity(irn);
2087 NEW_ARR_A(int, stack_pos, n);
2089 /* collect old stack positions */
2090 for (i = 0; i < n; ++i) {
2091 const arch_register_t *inreg = x87_get_irn_register(get_irn_n(irn, i));
2092 int idx = x87_on_stack(state, arch_register_get_index(inreg));
2094 assert(idx >= 0 && "Perm argument not on x87 stack");
2098 /* now do the permutation */
2099 foreach_out_edge(irn, edge) {
2100 ir_node *proj = get_edge_src_irn(edge);
2101 const arch_register_t *out = x87_get_irn_register(proj);
2102 long num = get_Proj_proj(proj);
2104 assert(0 <= num && num < n && "More Proj's than Perm inputs");
2105 x87_set_st(state, arch_register_get_index(out), proj, stack_pos[(unsigned)num]);
2107 DB((dbg, LEVEL_1, "<<< %+F\n", irn));
2109 return NO_NODE_ADDED;
2112 static int sim_Barrier(x87_state *state, ir_node *node)
2116 /* materialize unknown if needed */
2117 arity = get_irn_arity(node);
2118 for (i = 0; i < arity; ++i) {
2119 const arch_register_t *reg;
2122 ia32_x87_attr_t *attr;
2123 ir_node *in = get_irn_n(node, i);
2125 if (!is_ia32_Unknown_VFP(in))
2128 /* TODO: not completely correct... */
2129 reg = &ia32_vfp_regs[REG_VFP_UKNWN];
2132 block = get_nodes_block(node);
2133 zero = new_rd_ia32_fldz(NULL, current_ir_graph, block, mode_E);
2134 x87_push(state, arch_register_get_index(reg), zero);
2136 attr = get_ia32_x87_attr(zero);
2137 attr->x87[2] = &ia32_st_regs[0];
2139 sched_add_before(node, zero);
2141 set_irn_n(node, i, zero);
2144 return NO_NODE_ADDED;
2149 * Kill any dead registers at block start by popping them from the stack.
2151 * @param sim the simulator handle
2152 * @param block the current block
2153 * @param start_state the x87 state at the begin of the block
2155 * @return the x87 state after dead register killed
2157 static x87_state *x87_kill_deads(x87_simulator *sim, ir_node *block, x87_state *start_state)
2159 x87_state *state = start_state;
2160 ir_node *first_insn = sched_first(block);
2161 ir_node *keep = NULL;
2162 unsigned live = vfp_live_args_after(sim, block, 0);
2164 int i, depth, num_pop;
2167 depth = x87_get_depth(state);
2168 for (i = depth - 1; i >= 0; --i) {
2169 int reg = x87_get_st_reg(state, i);
2171 if (! is_vfp_live(reg, live))
2172 kill_mask |= (1 << i);
2176 /* create a new state, will be changed */
2177 state = x87_clone_state(sim, state);
2179 DB((dbg, LEVEL_1, "Killing deads:\n"));
2180 DEBUG_ONLY(vfp_dump_live(live));
2181 DEBUG_ONLY(x87_dump_stack(state));
2183 if (kill_mask != 0 && live == 0) {
2184 /* special case: kill all registers */
2185 if (ia32_cg_config.use_femms || ia32_cg_config.use_emms) {
2186 if (ia32_cg_config.use_femms) {
2187 /* use FEMMS on AMD processors to clear all */
2188 keep = new_rd_ia32_femms(NULL, get_irn_irg(block), block);
2190 /* use EMMS to clear all */
2191 keep = new_rd_ia32_emms(NULL, get_irn_irg(block), block);
2193 sched_add_before(first_insn, keep);
2199 /* now kill registers */
2201 /* we can only kill from TOS, so bring them up */
2202 if (! (kill_mask & 1)) {
2203 /* search from behind, because we can to a double-pop */
2204 for (i = depth - 1; i >= 0; --i) {
2205 if (kill_mask & (1 << i)) {
2206 kill_mask &= ~(1 << i);
2213 x87_set_st(state, -1, keep, i);
2214 x87_create_fxch(state, first_insn, i);
2217 if ((kill_mask & 3) == 3) {
2218 /* we can do a double-pop */
2222 /* only a single pop */
2227 kill_mask >>= num_pop;
2228 keep = x87_create_fpop(state, first_insn, num_pop);
2233 } /* x87_kill_deads */
2236 * If we have PhiEs with unknown operands then we have to make sure that some
2237 * value is actually put onto the stack.
2239 static void fix_unknown_phis(x87_state *state, ir_node *block,
2240 ir_node *pred_block, int pos)
2244 sched_foreach(block, node) {
2246 const arch_register_t *reg;
2247 ia32_x87_attr_t *attr;
2252 op = get_Phi_pred(node, pos);
2253 if (!is_ia32_Unknown_VFP(op))
2256 reg = arch_get_irn_register(node);
2258 /* create a zero at end of pred block */
2259 zero = new_rd_ia32_fldz(NULL, current_ir_graph, pred_block, mode_E);
2260 x87_push(state, arch_register_get_index(reg), zero);
2262 attr = get_ia32_x87_attr(zero);
2263 attr->x87[2] = &ia32_st_regs[0];
2265 assert(is_ia32_fldz(zero));
2266 sched_add_before(sched_last(pred_block), zero);
2268 set_Phi_pred(node, pos, zero);
2273 * Run a simulation and fix all virtual instructions for a block.
2275 * @param sim the simulator handle
2276 * @param block the current block
2278 static void x87_simulate_block(x87_simulator *sim, ir_node *block)
2281 blk_state *bl_state = x87_get_bl_state(sim, block);
2282 x87_state *state = bl_state->begin;
2283 const ir_edge_t *edge;
2284 ir_node *start_block;
2286 assert(state != NULL);
2287 /* already processed? */
2288 if (bl_state->end != NULL)
2291 DB((dbg, LEVEL_1, "Simulate %+F\n", block));
2292 DB((dbg, LEVEL_2, "State at Block begin:\n "));
2293 DEBUG_ONLY(x87_dump_stack(state));
2295 /* at block begin, kill all dead registers */
2296 state = x87_kill_deads(sim, block, state);
2297 /* create a new state, will be changed */
2298 state = x87_clone_state(sim, state);
2300 /* beware, n might change */
2301 for (n = sched_first(block); !sched_is_end(n); n = next) {
2304 ir_op *op = get_irn_op(n);
2306 next = sched_next(n);
2307 if (op->ops.generic == NULL)
2310 func = (sim_func)op->ops.generic;
2313 node_inserted = (*func)(state, n);
2316 sim_func might have added an additional node after n,
2318 beware: n must not be changed by sim_func
2319 (i.e. removed from schedule) in this case
2321 if (node_inserted != NO_NODE_ADDED)
2322 next = sched_next(n);
2325 start_block = get_irg_start_block(get_irn_irg(block));
2327 DB((dbg, LEVEL_2, "State at Block end:\n ")); DEBUG_ONLY(x87_dump_stack(state));
2329 /* check if the state must be shuffled */
2330 foreach_block_succ(block, edge) {
2331 ir_node *succ = get_edge_src_irn(edge);
2332 blk_state *succ_state;
2334 if (succ == start_block)
2337 succ_state = x87_get_bl_state(sim, succ);
2339 fix_unknown_phis(state, succ, block, get_edge_src_pos(edge));
2341 if (succ_state->begin == NULL) {
2342 DB((dbg, LEVEL_2, "Set begin state for succ %+F:\n", succ));
2343 DEBUG_ONLY(x87_dump_stack(state));
2344 succ_state->begin = state;
2346 waitq_put(sim->worklist, succ);
2348 DB((dbg, LEVEL_2, "succ %+F already has a state, shuffling\n", succ));
2349 /* There is already a begin state for the successor, bad.
2350 Do the necessary permutations.
2351 Note that critical edges are removed, so this is always possible:
2352 If the successor has more than one possible input, then it must
2355 x87_shuffle(sim, block, state, succ, succ_state->begin);
2358 bl_state->end = state;
2359 } /* x87_simulate_block */
2361 static void register_sim(ir_op *op, sim_func func)
2363 assert(op->ops.generic == NULL);
2364 op->ops.generic = (op_func) func;
2368 * Create a new x87 simulator.
2370 * @param sim a simulator handle, will be initialized
2371 * @param irg the current graph
2373 static void x87_init_simulator(x87_simulator *sim, ir_graph *irg)
2375 obstack_init(&sim->obst);
2376 sim->blk_states = pmap_create();
2377 sim->n_idx = get_irg_last_idx(irg);
2378 sim->live = obstack_alloc(&sim->obst, sizeof(*sim->live) * sim->n_idx);
2380 DB((dbg, LEVEL_1, "--------------------------------\n"
2381 "x87 Simulator started for %+F\n", irg));
2383 /* set the generic function pointer of instruction we must simulate */
2384 clear_irp_opcodes_generic_func();
2386 register_sim(op_ia32_Call, sim_Call);
2387 register_sim(op_ia32_vfld, sim_fld);
2388 register_sim(op_ia32_vfild, sim_fild);
2389 register_sim(op_ia32_vfld1, sim_fld1);
2390 register_sim(op_ia32_vfldz, sim_fldz);
2391 register_sim(op_ia32_vfadd, sim_fadd);
2392 register_sim(op_ia32_vfsub, sim_fsub);
2393 register_sim(op_ia32_vfmul, sim_fmul);
2394 register_sim(op_ia32_vfdiv, sim_fdiv);
2395 register_sim(op_ia32_vfprem, sim_fprem);
2396 register_sim(op_ia32_vfabs, sim_fabs);
2397 register_sim(op_ia32_vfchs, sim_fchs);
2398 register_sim(op_ia32_vfist, sim_fist);
2399 register_sim(op_ia32_vfisttp, sim_fisttp);
2400 register_sim(op_ia32_vfst, sim_fst);
2401 register_sim(op_ia32_vFtstFnstsw, sim_FtstFnstsw);
2402 register_sim(op_ia32_vFucomFnstsw, sim_Fucom);
2403 register_sim(op_ia32_vFucomi, sim_Fucom);
2404 register_sim(op_be_Copy, sim_Copy);
2405 register_sim(op_be_Spill, sim_Spill);
2406 register_sim(op_be_Reload, sim_Reload);
2407 register_sim(op_be_Return, sim_Return);
2408 register_sim(op_be_Perm, sim_Perm);
2409 register_sim(op_be_Keep, sim_Keep);
2410 register_sim(op_be_Barrier, sim_Barrier);
2411 } /* x87_init_simulator */
2414 * Destroy a x87 simulator.
2416 * @param sim the simulator handle
2418 static void x87_destroy_simulator(x87_simulator *sim)
2420 pmap_destroy(sim->blk_states);
2421 obstack_free(&sim->obst, NULL);
2422 DB((dbg, LEVEL_1, "x87 Simulator stopped\n\n"));
2423 } /* x87_destroy_simulator */
2426 * Pre-block walker: calculate the liveness information for the block
2427 * and store it into the sim->live cache.
2429 static void update_liveness_walker(ir_node *block, void *data)
2431 x87_simulator *sim = data;
2432 update_liveness(sim, block);
2433 } /* update_liveness_walker */
2435 void x87_simulate_graph(be_irg_t *birg)
2437 ir_node *block, *start_block;
2438 blk_state *bl_state;
2440 ir_graph *irg = be_get_birg_irg(birg);
2442 /* create the simulator */
2443 x87_init_simulator(&sim, irg);
2445 start_block = get_irg_start_block(irg);
2446 bl_state = x87_get_bl_state(&sim, start_block);
2448 /* start with the empty state */
2449 bl_state->begin = empty;
2452 sim.worklist = new_waitq();
2453 waitq_put(sim.worklist, start_block);
2455 be_assure_liveness(birg);
2456 sim.lv = be_get_birg_liveness(birg);
2457 // sim.lv = be_liveness(be_get_birg_irg(birg));
2458 be_liveness_assure_sets(sim.lv);
2460 /* Calculate the liveness for all nodes. We must precalculate this info,
2461 * because the simulator adds new nodes (possible before Phi nodes) which
2462 * would let a lazy calculation fail.
2463 * On the other hand we reduce the computation amount due to
2464 * precaching from O(n^2) to O(n) at the expense of O(n) cache memory.
2466 irg_block_walk_graph(irg, update_liveness_walker, NULL, &sim);
2470 block = waitq_get(sim.worklist);
2471 x87_simulate_block(&sim, block);
2472 } while (! waitq_empty(sim.worklist));
2475 del_waitq(sim.worklist);
2476 x87_destroy_simulator(&sim);
2477 } /* x87_simulate_graph */
2479 void ia32_init_x87(void)
2481 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.x87");
2482 } /* ia32_init_x87 */