2 * Copyright (C) 1995-2010 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the x87 support and virtual to stack
23 * register translation for the ia32 backend.
24 * @author Michael Beck
33 #include "iredges_t.h"
48 #include "bearch_ia32_t.h"
49 #include "ia32_new_nodes.h"
50 #include "gen_ia32_new_nodes.h"
51 #include "gen_ia32_regalloc_if.h"
53 #include "ia32_architecture.h"
55 /** the debug handle */
56 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
58 /* Forward declaration. */
59 typedef struct x87_simulator x87_simulator;
62 * An exchange template.
63 * Note that our virtual functions have the same inputs
64 * and attributes as the real ones, so we can simple exchange
66 * Further, x87 supports inverse instructions, so we can handle them.
68 typedef struct exchange_tmpl {
69 ir_op *normal_op; /**< the normal one */
70 ir_op *reverse_op; /**< the reverse one if exists */
71 ir_op *normal_pop_op; /**< the normal one with tos pop */
72 ir_op *reverse_pop_op; /**< the reverse one with tos pop */
76 * An entry on the simulated x87 stack.
78 typedef struct st_entry {
79 int reg_idx; /**< the virtual register index of this stack value */
80 ir_node *node; /**< the node that produced this value */
86 typedef struct x87_state {
87 st_entry st[N_ia32_st_REGS]; /**< the register stack */
88 int depth; /**< the current stack depth */
89 x87_simulator *sim; /**< The simulator. */
92 /** An empty state, used for blocks without fp instructions. */
93 static x87_state empty = { { {0, NULL}, }, 0, NULL };
96 * Return values of the instruction simulator functions.
99 NO_NODE_ADDED = 0, /**< No node that needs simulation was added. */
100 NODE_ADDED = 1 /**< A node that must be simulated was added by the simulator
101 in the schedule AFTER the current node. */
105 * The type of an instruction simulator function.
107 * @param state the x87 state
108 * @param n the node to be simulated
110 * @return NODE_ADDED if a node was added AFTER n in schedule that MUST be
112 * NO_NODE_ADDED otherwise
114 typedef int (*sim_func)(x87_state *state, ir_node *n);
117 * A block state: Every block has a x87 state at the beginning and at the end.
119 typedef struct blk_state {
120 x87_state *begin; /**< state at the begin or NULL if not assigned */
121 x87_state *end; /**< state at the end or NULL if not assigned */
124 /** liveness bitset for vfp registers. */
125 typedef unsigned char vfp_liveness;
130 struct x87_simulator {
131 struct obstack obst; /**< An obstack for fast allocating. */
132 pmap *blk_states; /**< Map blocks to states. */
133 be_lv_t *lv; /**< intrablock liveness. */
134 vfp_liveness *live; /**< Liveness information. */
135 unsigned n_idx; /**< The cached get_irg_last_idx() result. */
136 waitq *worklist; /**< Worklist of blocks that must be processed. */
140 * Returns the current stack depth.
142 * @param state the x87 state
144 * @return the x87 stack depth
146 static int x87_get_depth(const x87_state *state)
151 static st_entry *x87_get_entry(x87_state *const state, int const pos)
153 assert(0 <= pos && pos < state->depth);
154 return &state->st[N_ia32_st_REGS - state->depth + pos];
158 * Return the virtual register index at st(pos).
160 * @param state the x87 state
161 * @param pos a stack position
163 * @return the vfp register index that produced the value at st(pos)
165 static int x87_get_st_reg(const x87_state *state, int pos)
167 return x87_get_entry((x87_state*)state, pos)->reg_idx;
172 * Dump the stack for debugging.
174 * @param state the x87 state
176 static void x87_dump_stack(const x87_state *state)
178 for (int i = state->depth; i-- != 0;) {
179 st_entry const *const entry = x87_get_entry((x87_state*)state, i);
180 DB((dbg, LEVEL_2, "vf%d(%+F) ", entry->reg_idx, entry->node));
182 DB((dbg, LEVEL_2, "<-- TOS\n"));
184 #endif /* DEBUG_libfirm */
187 * Set a virtual register to st(pos).
189 * @param state the x87 state
190 * @param reg_idx the vfp register index that should be set
191 * @param node the IR node that produces the value of the vfp register
192 * @param pos the stack position where the new value should be entered
194 static void x87_set_st(x87_state *state, int reg_idx, ir_node *node, int pos)
196 st_entry *const entry = x87_get_entry(state, pos);
197 entry->reg_idx = reg_idx;
200 DB((dbg, LEVEL_2, "After SET_REG: "));
201 DEBUG_ONLY(x87_dump_stack(state);)
205 * Set the tos virtual register.
207 * @param state the x87 state
208 * @param reg_idx the vfp register index that should be set
209 * @param node the IR node that produces the value of the vfp register
211 static void x87_set_tos(x87_state *state, int reg_idx, ir_node *node)
213 x87_set_st(state, reg_idx, node, 0);
217 * Swap st(0) with st(pos).
219 * @param state the x87 state
220 * @param pos the stack position to change the tos with
222 static void x87_fxch(x87_state *state, int pos)
224 st_entry *const a = x87_get_entry(state, pos);
225 st_entry *const b = x87_get_entry(state, 0);
226 st_entry const t = *a;
230 DB((dbg, LEVEL_2, "After FXCH: "));
231 DEBUG_ONLY(x87_dump_stack(state);)
235 * Convert a virtual register to the stack index.
237 * @param state the x87 state
238 * @param reg_idx the register vfp index
240 * @return the stack position where the register is stacked
241 * or -1 if the virtual register was not found
243 static int x87_on_stack(const x87_state *state, int reg_idx)
245 for (int i = 0; i < state->depth; ++i) {
246 if (x87_get_st_reg(state, i) == reg_idx)
253 * Push a virtual Register onto the stack, double pushed allowed.
255 * @param state the x87 state
256 * @param reg_idx the register vfp index
257 * @param node the node that produces the value of the vfp register
259 static void x87_push_dbl(x87_state *state, int reg_idx, ir_node *node)
261 assert(state->depth < N_ia32_st_REGS && "stack overrun");
264 st_entry *const entry = x87_get_entry(state, 0);
265 entry->reg_idx = reg_idx;
268 DB((dbg, LEVEL_2, "After PUSH: ")); DEBUG_ONLY(x87_dump_stack(state);)
272 * Push a virtual Register onto the stack, double pushes are NOT allowed.
274 * @param state the x87 state
275 * @param reg_idx the register vfp index
276 * @param node the node that produces the value of the vfp register
278 static void x87_push(x87_state *state, int reg_idx, ir_node *node)
280 assert(x87_on_stack(state, reg_idx) == -1 && "double push");
282 x87_push_dbl(state, reg_idx, node);
286 * Pop a virtual Register from the stack.
288 * @param state the x87 state
290 static void x87_pop(x87_state *state)
292 assert(state->depth > 0 && "stack underrun");
296 DB((dbg, LEVEL_2, "After POP: ")); DEBUG_ONLY(x87_dump_stack(state);)
300 * Empty the fpu stack
302 * @param state the x87 state
304 static void x87_emms(x87_state *state)
310 * Returns the block state of a block.
312 * @param sim the x87 simulator handle
313 * @param block the current block
315 * @return the block state
317 static blk_state *x87_get_bl_state(x87_simulator *sim, ir_node *block)
319 blk_state *res = pmap_get(blk_state, sim->blk_states, block);
322 res = OALLOC(&sim->obst, blk_state);
326 pmap_insert(sim->blk_states, block, res);
335 * @param sim the x87 simulator handle
336 * @param src the x87 state that will be cloned
338 * @return a cloned copy of the src state
340 static x87_state *x87_clone_state(x87_simulator *sim, const x87_state *src)
342 x87_state *const res = OALLOC(&sim->obst, x87_state);
348 * Patch a virtual instruction into a x87 one and return
349 * the node representing the result value.
351 * @param n the IR node to patch
352 * @param op the x87 opcode to patch in
354 static ir_node *x87_patch_insn(ir_node *n, ir_op *op)
356 ir_mode *mode = get_irn_mode(n);
361 if (mode == mode_T) {
362 /* patch all Proj's */
363 foreach_out_edge(n, edge) {
364 ir_node *proj = get_edge_src_irn(edge);
366 mode = get_irn_mode(proj);
367 if (mode_is_float(mode)) {
369 set_irn_mode(proj, ia32_reg_classes[CLASS_ia32_st].mode);
373 } else if (mode_is_float(mode))
374 set_irn_mode(n, ia32_reg_classes[CLASS_ia32_st].mode);
379 * Returns the first Proj of a mode_T node having a given mode.
381 * @param n the mode_T node
382 * @param m the desired mode of the Proj
383 * @return The first Proj of mode @p m found or NULL.
385 static ir_node *get_irn_Proj_for_mode(ir_node *n, ir_mode *m)
387 assert(get_irn_mode(n) == mode_T && "Need mode_T node");
389 foreach_out_edge(n, edge) {
390 ir_node *proj = get_edge_src_irn(edge);
391 if (get_irn_mode(proj) == m)
399 * Wrap the arch_* function here so we can check for errors.
401 static inline const arch_register_t *x87_get_irn_register(const ir_node *irn)
403 const arch_register_t *res = arch_get_irn_register(irn);
405 assert(res->reg_class == &ia32_reg_classes[CLASS_ia32_vfp]);
409 static inline const arch_register_t *x87_irn_get_register(const ir_node *irn,
412 const arch_register_t *res = arch_get_irn_register_out(irn, pos);
414 assert(res->reg_class == &ia32_reg_classes[CLASS_ia32_vfp]);
418 static inline const arch_register_t *get_st_reg(int index)
420 return &ia32_registers[REG_ST0 + index];
423 /* -------------- x87 perm --------------- */
426 * Creates a fxch for shuffle.
428 * @param state the x87 state
429 * @param pos parameter for fxch
430 * @param block the block were fxch is inserted
432 * Creates a new fxch node and reroute the user of the old node
435 * @return the fxch node
437 static ir_node *x87_fxch_shuffle(x87_state *state, int pos, ir_node *block)
440 ia32_x87_attr_t *attr;
442 fxch = new_bd_ia32_fxch(NULL, block);
443 attr = get_ia32_x87_attr(fxch);
444 attr->x87[0] = get_st_reg(pos);
445 attr->x87[2] = get_st_reg(0);
449 x87_fxch(state, pos);
454 * Calculate the necessary permutations to reach dst_state.
456 * These permutations are done with fxch instructions and placed
457 * at the end of the block.
459 * Note that critical edges are removed here, so we need only
460 * a shuffle if the current block has only one successor.
462 * @param block the current block
463 * @param state the current x87 stack state, might be modified
464 * @param dst_state destination state
468 static x87_state *x87_shuffle(ir_node *block, x87_state *state, const x87_state *dst_state)
470 int i, n_cycles, k, ri;
471 unsigned cycles[4], all_mask;
472 char cycle_idx[4][8];
473 ir_node *fxch, *before, *after;
475 assert(state->depth == dst_state->depth);
477 /* Some mathematics here:
478 * If we have a cycle of length n that includes the tos,
479 * we need n-1 exchange operations.
480 * We can always add the tos and restore it, so we need
481 * n+1 exchange operations for a cycle not containing the tos.
482 * So, the maximum of needed operations is for a cycle of 7
483 * not including the tos == 8.
484 * This is the same number of ops we would need for using stores,
485 * so exchange is cheaper (we save the loads).
486 * On the other hand, we might need an additional exchange
487 * in the next block to bring one operand on top, so the
488 * number of ops in the first case is identical.
489 * Further, no more than 4 cycles can exists (4 x 2). */
490 all_mask = (1 << (state->depth)) - 1;
492 for (n_cycles = 0; all_mask; ++n_cycles) {
493 int src_idx, dst_idx;
495 /* find the first free slot */
496 for (i = 0; i < state->depth; ++i) {
497 if (all_mask & (1 << i)) {
498 all_mask &= ~(1 << i);
500 /* check if there are differences here */
501 if (x87_get_st_reg(state, i) != x87_get_st_reg(dst_state, i))
507 /* no more cycles found */
512 cycles[n_cycles] = (1 << i);
513 cycle_idx[n_cycles][k++] = i;
514 for (src_idx = i; ; src_idx = dst_idx) {
515 dst_idx = x87_on_stack(dst_state, x87_get_st_reg(state, src_idx));
517 if ((all_mask & (1 << dst_idx)) == 0)
520 cycle_idx[n_cycles][k++] = dst_idx;
521 cycles[n_cycles] |= (1 << dst_idx);
522 all_mask &= ~(1 << dst_idx);
524 cycle_idx[n_cycles][k] = -1;
528 /* no permutation needed */
532 /* Hmm: permutation needed */
533 DB((dbg, LEVEL_2, "\n%+F needs permutation: from\n", block));
534 DEBUG_ONLY(x87_dump_stack(state);)
535 DB((dbg, LEVEL_2, " to\n"));
536 DEBUG_ONLY(x87_dump_stack(dst_state);)
540 DB((dbg, LEVEL_2, "Need %d cycles\n", n_cycles));
541 for (ri = 0; ri < n_cycles; ++ri) {
542 DB((dbg, LEVEL_2, " Ring %d:\n ", ri));
543 for (k = 0; cycle_idx[ri][k] != -1; ++k)
544 DB((dbg, LEVEL_2, " st%d ->", cycle_idx[ri][k]));
545 DB((dbg, LEVEL_2, "\n"));
552 * Find the place node must be insert.
553 * We have only one successor block, so the last instruction should
556 before = sched_last(block);
557 assert(is_cfop(before));
559 /* now do the permutations */
560 for (ri = 0; ri < n_cycles; ++ri) {
561 if ((cycles[ri] & 1) == 0) {
562 /* this cycle does not include the tos */
563 fxch = x87_fxch_shuffle(state, cycle_idx[ri][0], block);
565 sched_add_after(after, fxch);
567 sched_add_before(before, fxch);
570 for (k = 1; cycle_idx[ri][k] != -1; ++k) {
571 fxch = x87_fxch_shuffle(state, cycle_idx[ri][k], block);
573 sched_add_after(after, fxch);
575 sched_add_before(before, fxch);
578 if ((cycles[ri] & 1) == 0) {
579 /* this cycle does not include the tos */
580 fxch = x87_fxch_shuffle(state, cycle_idx[ri][0], block);
581 sched_add_after(after, fxch);
588 * Create a fxch node before another node.
590 * @param state the x87 state
591 * @param n the node after the fxch
592 * @param pos exchange st(pos) with st(0)
596 static ir_node *x87_create_fxch(x87_state *state, ir_node *n, int pos)
599 ia32_x87_attr_t *attr;
600 ir_node *block = get_nodes_block(n);
602 x87_fxch(state, pos);
604 fxch = new_bd_ia32_fxch(NULL, block);
605 attr = get_ia32_x87_attr(fxch);
606 attr->x87[0] = get_st_reg(pos);
607 attr->x87[2] = get_st_reg(0);
611 sched_add_before(n, fxch);
612 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(fxch), attr->x87[0]->name, attr->x87[2]->name));
617 * Create a fpush before node n.
619 * @param state the x87 state
620 * @param n the node after the fpush
621 * @param pos push st(pos) on stack
622 * @param op_idx replace input op_idx of n with the fpush result
624 static void x87_create_fpush(x87_state *state, ir_node *n, int pos, int op_idx)
626 ir_node *const val = get_irn_n(n, op_idx);
627 arch_register_t const *const out = x87_get_irn_register(val);
628 x87_push_dbl(state, arch_register_get_index(out), val);
630 ir_node *const fpush = new_bd_ia32_fpush(NULL, get_nodes_block(n));
631 ia32_x87_attr_t *const attr = get_ia32_x87_attr(fpush);
632 attr->x87[0] = get_st_reg(pos);
633 attr->x87[2] = get_st_reg(0);
636 sched_add_before(n, fpush);
638 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(fpush), attr->x87[0]->name, attr->x87[2]->name));
642 * Create a fpop before node n.
644 * @param state the x87 state
645 * @param n the node after the fpop
646 * @param num pop 1 or 2 values
648 * @return the fpop node
650 static ir_node *x87_create_fpop(x87_state *state, ir_node *n, int num)
652 ir_node *fpop = NULL;
653 ia32_x87_attr_t *attr;
658 if (ia32_cg_config.use_ffreep)
659 fpop = new_bd_ia32_ffreep(NULL, get_nodes_block(n));
661 fpop = new_bd_ia32_fpop(NULL, get_nodes_block(n));
662 attr = get_ia32_x87_attr(fpop);
663 attr->x87[0] = get_st_reg(0);
664 attr->x87[1] = get_st_reg(0);
665 attr->x87[2] = get_st_reg(0);
668 sched_add_before(n, fpop);
669 DB((dbg, LEVEL_1, "<<< %s %s\n", get_irn_opname(fpop), attr->x87[0]->name));
674 /* --------------------------------- liveness ------------------------------------------ */
677 * The liveness transfer function.
678 * Updates a live set over a single step from a given node to its predecessor.
679 * Everything defined at the node is removed from the set, the uses of the node get inserted.
681 * @param irn The node at which liveness should be computed.
682 * @param live The bitset of registers live before @p irn. This set gets modified by updating it to
683 * the registers live after irn.
685 * @return The live bitset.
687 static vfp_liveness vfp_liveness_transfer(ir_node *irn, vfp_liveness live)
690 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
692 if (get_irn_mode(irn) == mode_T) {
693 foreach_out_edge(irn, edge) {
694 ir_node *proj = get_edge_src_irn(edge);
696 if (arch_irn_consider_in_reg_alloc(cls, proj)) {
697 const arch_register_t *reg = x87_get_irn_register(proj);
698 live &= ~(1 << arch_register_get_index(reg));
701 } else if (arch_irn_consider_in_reg_alloc(cls, irn)) {
702 const arch_register_t *reg = x87_get_irn_register(irn);
703 live &= ~(1 << arch_register_get_index(reg));
706 for (i = 0, n = get_irn_arity(irn); i < n; ++i) {
707 ir_node *op = get_irn_n(irn, i);
709 if (mode_is_float(get_irn_mode(op)) &&
710 arch_irn_consider_in_reg_alloc(cls, op)) {
711 const arch_register_t *reg = x87_get_irn_register(op);
712 live |= 1 << arch_register_get_index(reg);
719 * Put all live virtual registers at the end of a block into a bitset.
721 * @param sim the simulator handle
722 * @param bl the block
724 * @return The live bitset at the end of this block
726 static vfp_liveness vfp_liveness_end_of_block(x87_simulator *sim, const ir_node *block)
728 vfp_liveness live = 0;
729 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
730 const be_lv_t *lv = sim->lv;
732 be_lv_foreach(lv, block, be_lv_state_end, node) {
733 const arch_register_t *reg;
734 if (!arch_irn_consider_in_reg_alloc(cls, node))
737 reg = x87_get_irn_register(node);
738 live |= 1 << arch_register_get_index(reg);
744 /** get the register mask from an arch_register */
745 #define REGMASK(reg) (1 << (arch_register_get_index(reg)))
748 * Return a bitset of argument registers which are live at the end of a node.
750 * @param sim the simulator handle
751 * @param pos the node
752 * @param kill kill mask for the output registers
754 * @return The live bitset.
756 static unsigned vfp_live_args_after(x87_simulator *sim, const ir_node *pos, unsigned kill)
758 unsigned idx = get_irn_idx(pos);
760 assert(idx < sim->n_idx);
761 return sim->live[idx] & ~kill;
765 * Calculate the liveness for a whole block and cache it.
767 * @param sim the simulator handle
768 * @param block the block
770 static void update_liveness(x87_simulator *sim, ir_node *block)
772 vfp_liveness live = vfp_liveness_end_of_block(sim, block);
775 /* now iterate through the block backward and cache the results */
776 sched_foreach_reverse(block, irn) {
777 /* stop at the first Phi: this produces the live-in */
781 idx = get_irn_idx(irn);
782 sim->live[idx] = live;
784 live = vfp_liveness_transfer(irn, live);
786 idx = get_irn_idx(block);
787 sim->live[idx] = live;
791 * Returns true if a register is live in a set.
793 * @param reg_idx the vfp register index
794 * @param live a live bitset
796 #define is_vfp_live(reg_idx, live) ((live) & (1 << (reg_idx)))
800 * Dump liveness info.
802 * @param live the live bitset
804 static void vfp_dump_live(vfp_liveness live)
808 DB((dbg, LEVEL_2, "Live after: "));
809 for (i = 0; i < 8; ++i) {
810 if (live & (1 << i)) {
811 DB((dbg, LEVEL_2, "vf%d ", i));
814 DB((dbg, LEVEL_2, "\n"));
816 #endif /* DEBUG_libfirm */
818 /* --------------------------------- simulators ---------------------------------------- */
821 * Simulate a virtual binop.
823 * @param state the x87 state
824 * @param n the node that should be simulated (and patched)
825 * @param tmpl the template containing the 4 possible x87 opcodes
827 * @return NO_NODE_ADDED
829 static int sim_binop(x87_state *state, ir_node *n, const exchange_tmpl *tmpl)
831 int op2_idx = 0, op1_idx;
832 int out_idx, do_pop = 0;
833 ia32_x87_attr_t *attr;
835 ir_node *patched_insn;
837 x87_simulator *sim = state->sim;
838 ir_node *op1 = get_irn_n(n, n_ia32_binary_left);
839 ir_node *op2 = get_irn_n(n, n_ia32_binary_right);
840 const arch_register_t *op1_reg = x87_get_irn_register(op1);
841 const arch_register_t *op2_reg = x87_get_irn_register(op2);
842 const arch_register_t *out = x87_irn_get_register(n, pn_ia32_res);
843 int reg_index_1 = arch_register_get_index(op1_reg);
844 int reg_index_2 = arch_register_get_index(op2_reg);
845 vfp_liveness live = vfp_live_args_after(sim, n, REGMASK(out));
849 DB((dbg, LEVEL_1, ">>> %+F %s, %s -> %s\n", n,
850 arch_register_get_name(op1_reg), arch_register_get_name(op2_reg),
851 arch_register_get_name(out)));
852 DEBUG_ONLY(vfp_dump_live(live);)
853 DB((dbg, LEVEL_1, "Stack before: "));
854 DEBUG_ONLY(x87_dump_stack(state);)
856 op1_idx = x87_on_stack(state, reg_index_1);
857 assert(op1_idx >= 0);
858 op1_live_after = is_vfp_live(reg_index_1, live);
860 attr = get_ia32_x87_attr(n);
861 permuted = attr->attr.data.ins_permuted;
863 if (reg_index_2 != REG_VFP_VFP_NOREG) {
866 /* second operand is a vfp register */
867 op2_idx = x87_on_stack(state, reg_index_2);
868 assert(op2_idx >= 0);
869 op2_live_after = is_vfp_live(reg_index_2, live);
871 if (op2_live_after) {
872 /* Second operand is live. */
874 if (op1_live_after) {
875 /* Both operands are live: push the first one.
876 This works even for op1 == op2. */
877 x87_create_fpush(state, n, op1_idx, n_ia32_binary_right);
878 /* now do fxxx (tos=tos X op) */
882 dst = tmpl->normal_op;
884 /* Second live, first operand is dead here, bring it to tos. */
886 x87_create_fxch(state, n, op1_idx);
891 /* now do fxxx (tos=tos X op) */
893 dst = tmpl->normal_op;
896 /* Second operand is dead. */
897 if (op1_live_after) {
898 /* First operand is live: bring second to tos. */
900 x87_create_fxch(state, n, op2_idx);
905 /* now do fxxxr (tos = op X tos) */
907 dst = tmpl->reverse_op;
909 /* Both operands are dead here, pop them from the stack. */
912 /* Both are identically and on tos, no pop needed. */
913 /* here fxxx (tos = tos X tos) */
914 dst = tmpl->normal_op;
917 /* now do fxxxp (op = op X tos, pop) */
918 dst = tmpl->normal_pop_op;
922 } else if (op1_idx == 0) {
923 assert(op1_idx != op2_idx);
924 /* now do fxxxrp (op = tos X op, pop) */
925 dst = tmpl->reverse_pop_op;
929 /* Bring the second on top. */
930 x87_create_fxch(state, n, op2_idx);
931 if (op1_idx == op2_idx) {
932 /* Both are identically and on tos now, no pop needed. */
935 /* use fxxx (tos = tos X tos) */
936 dst = tmpl->normal_op;
939 /* op2 is on tos now */
941 /* use fxxxp (op = op X tos, pop) */
942 dst = tmpl->normal_pop_op;
950 /* second operand is an address mode */
951 if (op1_live_after) {
952 /* first operand is live: push it here */
953 x87_create_fpush(state, n, op1_idx, n_ia32_binary_left);
956 /* first operand is dead: bring it to tos */
958 x87_create_fxch(state, n, op1_idx);
963 /* use fxxx (tos = tos X mem) */
964 dst = permuted ? tmpl->reverse_op : tmpl->normal_op;
968 patched_insn = x87_patch_insn(n, dst);
969 x87_set_st(state, arch_register_get_index(out), patched_insn, out_idx);
974 /* patch the operation */
975 attr->x87[0] = op1_reg = get_st_reg(op1_idx);
976 if (reg_index_2 != REG_VFP_VFP_NOREG) {
977 attr->x87[1] = op2_reg = get_st_reg(op2_idx);
979 attr->x87[2] = out = get_st_reg(out_idx);
981 if (reg_index_2 != REG_VFP_VFP_NOREG) {
982 DB((dbg, LEVEL_1, "<<< %s %s, %s -> %s\n", get_irn_opname(n),
983 arch_register_get_name(op1_reg), arch_register_get_name(op2_reg),
984 arch_register_get_name(out)));
986 DB((dbg, LEVEL_1, "<<< %s %s, [AM] -> %s\n", get_irn_opname(n),
987 arch_register_get_name(op1_reg),
988 arch_register_get_name(out)));
991 return NO_NODE_ADDED;
995 * Simulate a virtual Unop.
997 * @param state the x87 state
998 * @param n the node that should be simulated (and patched)
999 * @param op the x87 opcode that will replace n's opcode
1001 * @return NO_NODE_ADDED
1003 static int sim_unop(x87_state *state, ir_node *n, ir_op *op)
1005 arch_register_t const *const out = x87_get_irn_register(n);
1006 unsigned const live = vfp_live_args_after(state->sim, n, REGMASK(out));
1007 DB((dbg, LEVEL_1, ">>> %+F -> %s\n", n, out->name));
1008 DEBUG_ONLY(vfp_dump_live(live);)
1010 arch_register_t const *const op1_reg = x87_get_irn_register(get_irn_n(n, 0));
1011 int const op1_reg_idx = arch_register_get_index(op1_reg);
1012 int const op1_idx = x87_on_stack(state, op1_reg_idx);
1013 if (is_vfp_live(op1_reg_idx, live)) {
1014 /* push the operand here */
1015 x87_create_fpush(state, n, op1_idx, 0);
1017 /* operand is dead, bring it to tos */
1019 x87_create_fxch(state, n, op1_idx);
1023 x87_set_tos(state, arch_register_get_index(out), x87_patch_insn(n, op));
1024 ia32_x87_attr_t *const attr = get_ia32_x87_attr(n);
1025 attr->x87[2] = attr->x87[0] = get_st_reg(0);
1026 DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), attr->x87[2]->name));
1028 return NO_NODE_ADDED;
1032 * Simulate a virtual Load instruction.
1034 * @param state the x87 state
1035 * @param n the node that should be simulated (and patched)
1036 * @param op the x87 opcode that will replace n's opcode
1038 * @return NO_NODE_ADDED
1040 static int sim_load(x87_state *state, ir_node *n, ir_op *op, int res_pos)
1042 const arch_register_t *out = x87_irn_get_register(n, res_pos);
1043 ia32_x87_attr_t *attr;
1045 DB((dbg, LEVEL_1, ">>> %+F -> %s\n", n, arch_register_get_name(out)));
1046 x87_push(state, arch_register_get_index(out), x87_patch_insn(n, op));
1047 assert(out == x87_irn_get_register(n, res_pos));
1048 attr = get_ia32_x87_attr(n);
1049 attr->x87[2] = out = get_st_reg(0);
1050 DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), arch_register_get_name(out)));
1052 return NO_NODE_ADDED;
1056 * Rewire all users of @p old_val to @new_val iff they are scheduled after @p store.
1058 * @param store The store
1059 * @param old_val The former value
1060 * @param new_val The new value
1062 static void collect_and_rewire_users(ir_node *store, ir_node *old_val, ir_node *new_val)
1064 foreach_out_edge_safe(old_val, edge) {
1065 ir_node *user = get_edge_src_irn(edge);
1067 if (! user || user == store)
1070 /* if the user is scheduled after the store: rewire */
1071 if (sched_is_scheduled(user) && sched_comes_after(store, user)) {
1073 /* find the input of the user pointing to the old value */
1074 for (i = get_irn_arity(user) - 1; i >= 0; i--) {
1075 if (get_irn_n(user, i) == old_val)
1076 set_irn_n(user, i, new_val);
1083 * Simulate a virtual Store.
1085 * @param state the x87 state
1086 * @param n the node that should be simulated (and patched)
1087 * @param op the x87 store opcode
1088 * @param op_p the x87 store and pop opcode
1090 static int sim_store(x87_state *state, ir_node *n, ir_op *op, ir_op *op_p)
1092 ir_node *const val = get_irn_n(n, n_ia32_vfst_val);
1093 arch_register_t const *const op2 = x87_get_irn_register(val);
1094 DB((dbg, LEVEL_1, ">>> %+F %s ->\n", n, arch_register_get_name(op2)));
1096 int insn = NO_NODE_ADDED;
1097 int const op2_reg_idx = arch_register_get_index(op2);
1098 int const op2_idx = x87_on_stack(state, op2_reg_idx);
1099 unsigned const live = vfp_live_args_after(state->sim, n, 0);
1100 int const live_after_node = is_vfp_live(op2_reg_idx, live);
1101 assert(op2_idx >= 0);
1102 if (live_after_node) {
1103 /* Problem: fst doesn't support 80bit modes (spills), only fstp does
1104 * fist doesn't support 64bit mode, only fistp
1106 * - stack not full: push value and fstp
1107 * - stack full: fstp value and load again
1108 * Note that we cannot test on mode_E, because floats might be 80bit ... */
1109 ir_mode *const mode = get_ia32_ls_mode(n);
1110 if (get_mode_size_bits(mode) > (mode_is_int(mode) ? 32 : 64)) {
1111 if (x87_get_depth(state) < N_ia32_st_REGS) {
1112 /* ok, we have a free register: push + fstp */
1113 x87_create_fpush(state, n, op2_idx, n_ia32_vfst_val);
1115 x87_patch_insn(n, op_p);
1117 /* stack full here: need fstp + load */
1119 x87_patch_insn(n, op_p);
1121 ir_node *const block = get_nodes_block(n);
1122 ir_graph *const irg = get_irn_irg(n);
1123 ir_node *const nomem = get_irg_no_mem(irg);
1124 ir_node *const vfld = new_bd_ia32_vfld(NULL, block, get_irn_n(n, 0), get_irn_n(n, 1), nomem, mode);
1126 /* copy all attributes */
1127 set_ia32_frame_ent(vfld, get_ia32_frame_ent(n));
1128 if (is_ia32_use_frame(n))
1129 set_ia32_use_frame(vfld);
1130 set_ia32_op_type(vfld, ia32_AddrModeS);
1131 add_ia32_am_offs_int(vfld, get_ia32_am_offs_int(n));
1132 set_ia32_am_sc(vfld, get_ia32_am_sc(n));
1133 set_ia32_ls_mode(vfld, mode);
1135 ir_node *const rproj = new_r_Proj(vfld, mode, pn_ia32_vfld_res);
1136 ir_node *const mproj = new_r_Proj(vfld, mode_M, pn_ia32_vfld_M);
1137 ir_node *const mem = get_irn_Proj_for_mode(n, mode_M);
1139 assert(mem && "Store memory not found");
1141 arch_set_irn_register(rproj, op2);
1143 /* reroute all former users of the store memory to the load memory */
1144 edges_reroute(mem, mproj);
1145 /* set the memory input of the load to the store memory */
1146 set_irn_n(vfld, n_ia32_vfld_mem, mem);
1148 sched_add_after(n, vfld);
1149 sched_add_after(vfld, rproj);
1151 /* rewire all users, scheduled after the store, to the loaded value */
1152 collect_and_rewire_users(n, val, rproj);
1157 /* we can only store the tos to memory */
1159 x87_create_fxch(state, n, op2_idx);
1161 /* mode size 64 or smaller -> use normal fst */
1162 x87_patch_insn(n, op);
1165 /* we can only store the tos to memory */
1167 x87_create_fxch(state, n, op2_idx);
1170 x87_patch_insn(n, op_p);
1173 ia32_x87_attr_t *const attr = get_ia32_x87_attr(n);
1174 attr->x87[1] = get_st_reg(0);
1175 DB((dbg, LEVEL_1, "<<< %s %s ->\n", get_irn_opname(n), arch_register_get_name(attr->x87[1])));
1180 #define _GEN_BINOP(op, rev) \
1181 static int sim_##op(x87_state *state, ir_node *n) { \
1182 exchange_tmpl tmpl = { op_ia32_##op, op_ia32_##rev, op_ia32_##op##p, op_ia32_##rev##p }; \
1183 return sim_binop(state, n, &tmpl); \
1186 #define GEN_BINOP(op) _GEN_BINOP(op, op)
1187 #define GEN_BINOPR(op) _GEN_BINOP(op, op##r)
1189 #define GEN_LOAD(op) \
1190 static int sim_##op(x87_state *state, ir_node *n) { \
1191 return sim_load(state, n, op_ia32_##op, pn_ia32_v##op##_res); \
1194 #define GEN_UNOP(op) \
1195 static int sim_##op(x87_state *state, ir_node *n) { \
1196 return sim_unop(state, n, op_ia32_##op); \
1199 #define GEN_STORE(op) \
1200 static int sim_##op(x87_state *state, ir_node *n) { \
1201 return sim_store(state, n, op_ia32_##op, op_ia32_##op##p); \
1223 * Simulate a virtual fisttp.
1225 * @param state the x87 state
1226 * @param n the node that should be simulated (and patched)
1228 * @return NO_NODE_ADDED
1230 static int sim_fisttp(x87_state *state, ir_node *n)
1232 ir_node *val = get_irn_n(n, n_ia32_vfst_val);
1233 const arch_register_t *op2 = x87_get_irn_register(val);
1234 ia32_x87_attr_t *attr;
1235 int op2_reg_idx, op2_idx;
1237 op2_reg_idx = arch_register_get_index(op2);
1238 op2_idx = x87_on_stack(state, op2_reg_idx);
1239 DB((dbg, LEVEL_1, ">>> %+F %s ->\n", n, arch_register_get_name(op2)));
1240 assert(op2_idx >= 0);
1242 /* Note: although the value is still live here, it is destroyed because
1243 of the pop. The register allocator is aware of that and introduced a copy
1244 if the value must be alive. */
1246 /* we can only store the tos to memory */
1248 x87_create_fxch(state, n, op2_idx);
1251 x87_patch_insn(n, op_ia32_fisttp);
1253 attr = get_ia32_x87_attr(n);
1254 attr->x87[1] = op2 = get_st_reg(0);
1255 DB((dbg, LEVEL_1, "<<< %s %s ->\n", get_irn_opname(n), arch_register_get_name(op2)));
1257 return NO_NODE_ADDED;
1261 * Simulate a virtual FtstFnstsw.
1263 * @param state the x87 state
1264 * @param n the node that should be simulated (and patched)
1266 * @return NO_NODE_ADDED
1268 static int sim_FtstFnstsw(x87_state *state, ir_node *n)
1270 x87_simulator *sim = state->sim;
1271 ia32_x87_attr_t *attr = get_ia32_x87_attr(n);
1272 ir_node *op1_node = get_irn_n(n, n_ia32_vFtstFnstsw_left);
1273 const arch_register_t *reg1 = x87_get_irn_register(op1_node);
1274 int reg_index_1 = arch_register_get_index(reg1);
1275 int op1_idx = x87_on_stack(state, reg_index_1);
1276 unsigned live = vfp_live_args_after(sim, n, 0);
1278 DB((dbg, LEVEL_1, ">>> %+F %s\n", n, arch_register_get_name(reg1)));
1279 DEBUG_ONLY(vfp_dump_live(live);)
1280 DB((dbg, LEVEL_1, "Stack before: "));
1281 DEBUG_ONLY(x87_dump_stack(state);)
1282 assert(op1_idx >= 0);
1285 /* bring the value to tos */
1286 x87_create_fxch(state, n, op1_idx);
1290 /* patch the operation */
1291 x87_patch_insn(n, op_ia32_FtstFnstsw);
1292 reg1 = get_st_reg(op1_idx);
1293 attr->x87[0] = reg1;
1294 attr->x87[1] = NULL;
1295 attr->x87[2] = NULL;
1297 if (!is_vfp_live(reg_index_1, live))
1298 x87_create_fpop(state, sched_next(n), 1);
1300 return NO_NODE_ADDED;
1306 * @param state the x87 state
1307 * @param n the node that should be simulated (and patched)
1309 * @return NO_NODE_ADDED
1311 static int sim_Fucom(x87_state *state, ir_node *n)
1315 ia32_x87_attr_t *attr = get_ia32_x87_attr(n);
1317 x87_simulator *sim = state->sim;
1318 ir_node *op1_node = get_irn_n(n, n_ia32_vFucomFnstsw_left);
1319 ir_node *op2_node = get_irn_n(n, n_ia32_vFucomFnstsw_right);
1320 const arch_register_t *op1 = x87_get_irn_register(op1_node);
1321 const arch_register_t *op2 = x87_get_irn_register(op2_node);
1322 int reg_index_1 = arch_register_get_index(op1);
1323 int reg_index_2 = arch_register_get_index(op2);
1324 unsigned live = vfp_live_args_after(sim, n, 0);
1325 bool permuted = attr->attr.data.ins_permuted;
1329 DB((dbg, LEVEL_1, ">>> %+F %s, %s\n", n,
1330 arch_register_get_name(op1), arch_register_get_name(op2)));
1331 DEBUG_ONLY(vfp_dump_live(live);)
1332 DB((dbg, LEVEL_1, "Stack before: "));
1333 DEBUG_ONLY(x87_dump_stack(state);)
1335 op1_idx = x87_on_stack(state, reg_index_1);
1336 assert(op1_idx >= 0);
1338 /* BEWARE: check for comp a,a cases, they might happen */
1339 if (reg_index_2 != REG_VFP_VFP_NOREG) {
1340 /* second operand is a vfp register */
1341 op2_idx = x87_on_stack(state, reg_index_2);
1342 assert(op2_idx >= 0);
1344 if (is_vfp_live(reg_index_2, live)) {
1345 /* second operand is live */
1347 if (is_vfp_live(reg_index_1, live)) {
1348 /* both operands are live */
1351 /* res = tos X op */
1352 } else if (op2_idx == 0) {
1353 /* res = op X tos */
1354 permuted = !permuted;
1357 /* bring the first one to tos */
1358 x87_create_fxch(state, n, op1_idx);
1359 if (op1_idx == op2_idx) {
1361 } else if (op2_idx == 0) {
1365 /* res = tos X op */
1368 /* second live, first operand is dead here, bring it to tos.
1369 This means further, op1_idx != op2_idx. */
1370 assert(op1_idx != op2_idx);
1372 x87_create_fxch(state, n, op1_idx);
1377 /* res = tos X op, pop */
1381 /* second operand is dead */
1382 if (is_vfp_live(reg_index_1, live)) {
1383 /* first operand is live: bring second to tos.
1384 This means further, op1_idx != op2_idx. */
1385 assert(op1_idx != op2_idx);
1387 x87_create_fxch(state, n, op2_idx);
1392 /* res = op X tos, pop */
1394 permuted = !permuted;
1397 /* both operands are dead here, check first for identity. */
1398 if (op1_idx == op2_idx) {
1399 /* identically, one pop needed */
1401 x87_create_fxch(state, n, op1_idx);
1405 /* res = tos X op, pop */
1408 /* different, move them to st and st(1) and pop both.
1409 The tricky part is to get one into st(1).*/
1410 else if (op2_idx == 1) {
1411 /* good, second operand is already in the right place, move the first */
1413 /* bring the first on top */
1414 x87_create_fxch(state, n, op1_idx);
1415 assert(op2_idx != 0);
1418 /* res = tos X op, pop, pop */
1420 } else if (op1_idx == 1) {
1421 /* good, first operand is already in the right place, move the second */
1423 /* bring the first on top */
1424 x87_create_fxch(state, n, op2_idx);
1425 assert(op1_idx != 0);
1428 /* res = op X tos, pop, pop */
1429 permuted = !permuted;
1433 /* if one is already the TOS, we need two fxch */
1435 /* first one is TOS, move to st(1) */
1436 x87_create_fxch(state, n, 1);
1437 assert(op2_idx != 1);
1439 x87_create_fxch(state, n, op2_idx);
1441 /* res = op X tos, pop, pop */
1443 permuted = !permuted;
1445 } else if (op2_idx == 0) {
1446 /* second one is TOS, move to st(1) */
1447 x87_create_fxch(state, n, 1);
1448 assert(op1_idx != 1);
1450 x87_create_fxch(state, n, op1_idx);
1452 /* res = tos X op, pop, pop */
1455 /* none of them is either TOS or st(1), 3 fxch needed */
1456 x87_create_fxch(state, n, op2_idx);
1457 assert(op1_idx != 0);
1458 x87_create_fxch(state, n, 1);
1460 x87_create_fxch(state, n, op1_idx);
1462 /* res = tos X op, pop, pop */
1469 /* second operand is an address mode */
1470 if (is_vfp_live(reg_index_1, live)) {
1471 /* first operand is live: bring it to TOS */
1473 x87_create_fxch(state, n, op1_idx);
1477 /* first operand is dead: bring it to tos */
1479 x87_create_fxch(state, n, op1_idx);
1486 /* patch the operation */
1487 if (is_ia32_vFucomFnstsw(n)) {
1491 case 0: dst = op_ia32_FucomFnstsw; break;
1492 case 1: dst = op_ia32_FucompFnstsw; break;
1493 case 2: dst = op_ia32_FucomppFnstsw; break;
1494 default: panic("invalid popcount");
1497 for (i = 0; i < pops; ++i) {
1500 } else if (is_ia32_vFucomi(n)) {
1502 case 0: dst = op_ia32_Fucomi; break;
1503 case 1: dst = op_ia32_Fucompi; x87_pop(state); break;
1505 dst = op_ia32_Fucompi;
1507 x87_create_fpop(state, sched_next(n), 1);
1509 default: panic("invalid popcount");
1512 panic("invalid operation %+F", n);
1515 x87_patch_insn(n, dst);
1522 op1 = get_st_reg(op1_idx);
1525 op2 = get_st_reg(op2_idx);
1528 attr->x87[2] = NULL;
1529 attr->attr.data.ins_permuted = permuted;
1532 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(n),
1533 arch_register_get_name(op1), arch_register_get_name(op2)));
1535 DB((dbg, LEVEL_1, "<<< %s %s, [AM]\n", get_irn_opname(n),
1536 arch_register_get_name(op1)));
1539 return NO_NODE_ADDED;
1545 * @param state the x87 state
1546 * @param n the node that should be simulated (and patched)
1548 * @return NO_NODE_ADDED
1550 static int sim_Keep(x87_state *state, ir_node *node)
1553 const arch_register_t *op_reg;
1559 DB((dbg, LEVEL_1, ">>> %+F\n", node));
1561 arity = get_irn_arity(node);
1562 for (i = 0; i < arity; ++i) {
1563 op = get_irn_n(node, i);
1564 op_reg = arch_get_irn_register(op);
1565 if (arch_register_get_class(op_reg) != &ia32_reg_classes[CLASS_ia32_vfp])
1568 reg_id = arch_register_get_index(op_reg);
1569 live = vfp_live_args_after(state->sim, node, 0);
1571 op_stack_idx = x87_on_stack(state, reg_id);
1572 if (op_stack_idx >= 0 && !is_vfp_live(reg_id, live))
1573 x87_create_fpop(state, sched_next(node), 1);
1576 DB((dbg, LEVEL_1, "Stack after: "));
1577 DEBUG_ONLY(x87_dump_stack(state);)
1579 return NO_NODE_ADDED;
1583 * Keep the given node alive by adding a be_Keep.
1585 * @param node the node to kept alive
1587 static void keep_float_node_alive(ir_node *node)
1589 ir_node *block = get_nodes_block(node);
1590 ir_node *keep = be_new_Keep(block, 1, &node);
1592 assert(sched_is_scheduled(node));
1593 sched_add_after(node, keep);
1597 * Create a copy of a node. Recreate the node if it's a constant.
1599 * @param state the x87 state
1600 * @param n the node to be copied
1602 * @return the copy of n
1604 static ir_node *create_Copy(x87_state *state, ir_node *n)
1606 dbg_info *n_dbg = get_irn_dbg_info(n);
1607 ir_mode *mode = get_irn_mode(n);
1608 ir_node *block = get_nodes_block(n);
1609 ir_node *pred = get_irn_n(n, 0);
1610 ir_node *(*cnstr)(dbg_info *, ir_node *, ir_mode *) = NULL;
1612 const arch_register_t *out;
1613 const arch_register_t *op1;
1614 ia32_x87_attr_t *attr;
1616 /* Do not copy constants, recreate them. */
1617 switch (get_ia32_irn_opcode(pred)) {
1619 cnstr = new_bd_ia32_fldz;
1622 cnstr = new_bd_ia32_fld1;
1624 case iro_ia32_fldpi:
1625 cnstr = new_bd_ia32_fldpi;
1627 case iro_ia32_fldl2e:
1628 cnstr = new_bd_ia32_fldl2e;
1630 case iro_ia32_fldl2t:
1631 cnstr = new_bd_ia32_fldl2t;
1633 case iro_ia32_fldlg2:
1634 cnstr = new_bd_ia32_fldlg2;
1636 case iro_ia32_fldln2:
1637 cnstr = new_bd_ia32_fldln2;
1643 out = x87_get_irn_register(n);
1644 op1 = x87_get_irn_register(pred);
1646 if (cnstr != NULL) {
1647 /* copy a constant */
1648 res = (*cnstr)(n_dbg, block, mode);
1650 x87_push(state, arch_register_get_index(out), res);
1652 attr = get_ia32_x87_attr(res);
1653 attr->x87[2] = get_st_reg(0);
1655 int op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1657 res = new_bd_ia32_fpushCopy(n_dbg, block, pred, mode);
1659 x87_push(state, arch_register_get_index(out), res);
1661 attr = get_ia32_x87_attr(res);
1662 attr->x87[0] = get_st_reg(op1_idx);
1663 attr->x87[2] = get_st_reg(0);
1665 arch_set_irn_register(res, out);
1671 * Simulate a be_Copy.
1673 * @param state the x87 state
1674 * @param n the node that should be simulated (and patched)
1676 * @return NO_NODE_ADDED
1678 static int sim_Copy(x87_state *state, ir_node *n)
1680 arch_register_class_t const *const cls = arch_get_irn_reg_class(n);
1681 if (cls != &ia32_reg_classes[CLASS_ia32_vfp])
1682 return NO_NODE_ADDED;
1684 ir_node *const pred = be_get_Copy_op(n);
1685 arch_register_t const *const op1 = x87_get_irn_register(pred);
1686 arch_register_t const *const out = x87_get_irn_register(n);
1687 unsigned const live = vfp_live_args_after(state->sim, n, REGMASK(out));
1689 DB((dbg, LEVEL_1, ">>> %+F %s -> %s\n", n,
1690 arch_register_get_name(op1), arch_register_get_name(out)));
1691 DEBUG_ONLY(vfp_dump_live(live);)
1693 if (is_vfp_live(arch_register_get_index(op1), live)) {
1694 /* Operand is still live, a real copy. We need here an fpush that can
1695 hold a a register, so use the fpushCopy or recreate constants */
1696 ir_node *const node = create_Copy(state, n);
1698 /* We have to make sure the old value doesn't go dead (which can happen
1699 * when we recreate constants). As the simulator expected that value in
1700 * the pred blocks. This is unfortunate as removing it would save us 1
1701 * instruction, but we would have to rerun all the simulation to get
1704 ir_node *const next = sched_next(n);
1707 sched_add_before(next, node);
1709 if (get_irn_n_edges(pred) == 0) {
1710 keep_float_node_alive(pred);
1713 DB((dbg, LEVEL_1, "<<< %+F %s -> ?\n", node, op1->name));
1715 int const op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1716 int const out_idx = x87_on_stack(state, arch_register_get_index(out));
1717 if (out_idx >= 0 && out_idx != op1_idx) {
1718 /* Matze: out already on stack? how can this happen? */
1719 panic("invalid stack state");
1722 /* op1 must be killed and placed where out is */
1724 ia32_x87_attr_t *attr;
1725 /* best case, simple remove and rename */
1726 x87_patch_insn(n, op_ia32_Pop);
1727 attr = get_ia32_x87_attr(n);
1728 attr->x87[0] = op1 = get_st_reg(0);
1731 x87_set_st(state, arch_register_get_index(out), n, op1_idx - 1);
1733 ia32_x87_attr_t *attr;
1734 /* move op1 to tos, store and pop it */
1736 x87_create_fxch(state, n, op1_idx);
1739 x87_patch_insn(n, op_ia32_Pop);
1740 attr = get_ia32_x87_attr(n);
1741 attr->x87[0] = op1 = get_st_reg(out_idx);
1744 x87_set_st(state, arch_register_get_index(out), n, out_idx - 1);
1746 DB((dbg, LEVEL_1, "<<< %+F %s\n", n, op1->name));
1749 /* just a virtual copy */
1750 x87_set_st(state, arch_register_get_index(out), pred, op1_idx);
1751 /* don't remove the node to keep the verifier quiet :),
1752 the emitter won't emit any code for the node */
1755 DB((dbg, LEVEL_1, "<<< KILLED %s\n", get_irn_opname(n)));
1760 return NO_NODE_ADDED;
1764 * Returns the vf0 result Proj of a Call.
1766 * @para call the Call node
1768 static ir_node *get_call_result_proj(ir_node *call)
1770 /* search the result proj */
1771 foreach_out_edge(call, edge) {
1772 ir_node *proj = get_edge_src_irn(edge);
1773 long pn = get_Proj_proj(proj);
1775 if (pn == pn_ia32_Call_vf0)
1779 panic("result Proj missing");
1782 static int sim_Asm(x87_state *const state, ir_node *const n)
1786 for (size_t i = get_irn_arity(n); i-- != 0;) {
1787 arch_register_req_t const *const req = arch_get_irn_register_req_in(n, i);
1788 if (req->cls == &ia32_reg_classes[CLASS_ia32_vfp])
1789 panic("cannot handle %+F with x87 constraints", n);
1792 for (size_t i = arch_get_irn_n_outs(n); i-- != 0;) {
1793 arch_register_req_t const *const req = arch_get_irn_register_req_out(n, i);
1794 if (req->cls == &ia32_reg_classes[CLASS_ia32_vfp])
1795 panic("cannot handle %+F with x87 constraints", n);
1798 return NO_NODE_ADDED;
1802 * Simulate a ia32_Call.
1804 * @param state the x87 state
1805 * @param n the node that should be simulated (and patched)
1807 * @return NO_NODE_ADDED
1809 static int sim_Call(x87_state *state, ir_node *n)
1811 DB((dbg, LEVEL_1, ">>> %+F\n", n));
1813 /* at the begin of a call the x87 state should be empty */
1814 assert(state->depth == 0 && "stack not empty before call");
1816 ir_type *const call_tp = get_ia32_call_attr_const(n)->call_tp;
1817 if (get_method_n_ress(call_tp) != 0) {
1818 /* If the called function returns a float, it is returned in st(0).
1819 * This even happens if the return value is NOT used.
1820 * Moreover, only one return result is supported. */
1821 ir_type *const res_type = get_method_res_type(call_tp, 0);
1822 ir_mode *const mode = get_type_mode(res_type);
1823 if (mode && mode_is_float(mode)) {
1824 ir_node *const resproj = get_call_result_proj(n);
1825 arch_register_t const *const reg = x87_get_irn_register(resproj);
1826 x87_push(state, arch_register_get_index(reg), resproj);
1829 DB((dbg, LEVEL_1, "Stack after: "));
1830 DEBUG_ONLY(x87_dump_stack(state);)
1832 return NO_NODE_ADDED;
1836 * Simulate a be_Return.
1838 * @param state the x87 state
1839 * @param n the node that should be simulated (and patched)
1841 * @return NO_NODE_ADDED
1843 static int sim_Return(x87_state *state, ir_node *n)
1845 #ifdef DEBUG_libfirm
1846 /* only floating point return values must reside on stack */
1847 int n_float_res = 0;
1848 int const n_res = be_Return_get_n_rets(n);
1849 for (int i = 0; i < n_res; ++i) {
1850 ir_node *const res = get_irn_n(n, n_be_Return_val + i);
1851 if (mode_is_float(get_irn_mode(res)))
1854 assert(x87_get_depth(state) == n_float_res);
1857 /* pop them virtually */
1859 return NO_NODE_ADDED;
1863 * Simulate a be_Perm.
1865 * @param state the x87 state
1866 * @param irn the node that should be simulated (and patched)
1868 * @return NO_NODE_ADDED
1870 static int sim_Perm(x87_state *state, ir_node *irn)
1873 ir_node *pred = get_irn_n(irn, 0);
1876 /* handle only floating point Perms */
1877 if (! mode_is_float(get_irn_mode(pred)))
1878 return NO_NODE_ADDED;
1880 DB((dbg, LEVEL_1, ">>> %+F\n", irn));
1882 /* Perm is a pure virtual instruction on x87.
1883 All inputs must be on the FPU stack and are pairwise
1884 different from each other.
1885 So, all we need to do is to permutate the stack state. */
1886 n = get_irn_arity(irn);
1887 NEW_ARR_A(int, stack_pos, n);
1889 /* collect old stack positions */
1890 for (i = 0; i < n; ++i) {
1891 const arch_register_t *inreg = x87_get_irn_register(get_irn_n(irn, i));
1892 int idx = x87_on_stack(state, arch_register_get_index(inreg));
1894 assert(idx >= 0 && "Perm argument not on x87 stack");
1898 /* now do the permutation */
1899 foreach_out_edge(irn, edge) {
1900 ir_node *proj = get_edge_src_irn(edge);
1901 const arch_register_t *out = x87_get_irn_register(proj);
1902 long num = get_Proj_proj(proj);
1904 assert(0 <= num && num < n && "More Proj's than Perm inputs");
1905 x87_set_st(state, arch_register_get_index(out), proj, stack_pos[(unsigned)num]);
1907 DB((dbg, LEVEL_1, "<<< %+F\n", irn));
1909 return NO_NODE_ADDED;
1913 * Kill any dead registers at block start by popping them from the stack.
1915 * @param sim the simulator handle
1916 * @param block the current block
1917 * @param state the x87 state at the begin of the block
1919 static void x87_kill_deads(x87_simulator *const sim, ir_node *const block, x87_state *const state)
1921 ir_node *first_insn = sched_first(block);
1922 ir_node *keep = NULL;
1923 unsigned live = vfp_live_args_after(sim, block, 0);
1925 int i, depth, num_pop;
1928 depth = x87_get_depth(state);
1929 for (i = depth - 1; i >= 0; --i) {
1930 int reg = x87_get_st_reg(state, i);
1932 if (! is_vfp_live(reg, live))
1933 kill_mask |= (1 << i);
1937 DB((dbg, LEVEL_1, "Killing deads:\n"));
1938 DEBUG_ONLY(vfp_dump_live(live);)
1939 DEBUG_ONLY(x87_dump_stack(state);)
1941 if (kill_mask != 0 && live == 0) {
1942 /* special case: kill all registers */
1943 if (ia32_cg_config.use_femms || ia32_cg_config.use_emms) {
1944 if (ia32_cg_config.use_femms) {
1945 /* use FEMMS on AMD processors to clear all */
1946 keep = new_bd_ia32_femms(NULL, block);
1948 /* use EMMS to clear all */
1949 keep = new_bd_ia32_emms(NULL, block);
1951 sched_add_before(first_insn, keep);
1957 /* now kill registers */
1959 /* we can only kill from TOS, so bring them up */
1960 if (! (kill_mask & 1)) {
1961 /* search from behind, because we can to a double-pop */
1962 for (i = depth - 1; i >= 0; --i) {
1963 if (kill_mask & (1 << i)) {
1964 kill_mask &= ~(1 << i);
1971 x87_set_st(state, -1, keep, i);
1972 x87_create_fxch(state, first_insn, i);
1975 if ((kill_mask & 3) == 3) {
1976 /* we can do a double-pop */
1980 /* only a single pop */
1985 kill_mask >>= num_pop;
1986 keep = x87_create_fpop(state, first_insn, num_pop);
1993 * Run a simulation and fix all virtual instructions for a block.
1995 * @param sim the simulator handle
1996 * @param block the current block
1998 static void x87_simulate_block(x87_simulator *sim, ir_node *block)
2001 blk_state *bl_state = x87_get_bl_state(sim, block);
2002 x87_state *state = bl_state->begin;
2003 ir_node *start_block;
2005 assert(state != NULL);
2006 /* already processed? */
2007 if (bl_state->end != NULL)
2010 DB((dbg, LEVEL_1, "Simulate %+F\n", block));
2011 DB((dbg, LEVEL_2, "State at Block begin:\n "));
2012 DEBUG_ONLY(x87_dump_stack(state);)
2014 /* create a new state, will be changed */
2015 state = x87_clone_state(sim, state);
2016 /* at block begin, kill all dead registers */
2017 x87_kill_deads(sim, block, state);
2019 /* beware, n might change */
2020 for (n = sched_first(block); !sched_is_end(n); n = next) {
2023 ir_op *op = get_irn_op(n);
2026 * get the next node to be simulated here.
2027 * n might be completely removed from the schedule-
2029 next = sched_next(n);
2030 if (op->ops.generic != NULL) {
2031 func = (sim_func)op->ops.generic;
2034 node_inserted = (*func)(state, n);
2037 * sim_func might have added an additional node after n,
2038 * so update next node
2039 * beware: n must not be changed by sim_func
2040 * (i.e. removed from schedule) in this case
2042 if (node_inserted != NO_NODE_ADDED)
2043 next = sched_next(n);
2047 start_block = get_irg_start_block(get_irn_irg(block));
2049 DB((dbg, LEVEL_2, "State at Block end:\n ")); DEBUG_ONLY(x87_dump_stack(state);)
2051 /* check if the state must be shuffled */
2052 foreach_block_succ(block, edge) {
2053 ir_node *succ = get_edge_src_irn(edge);
2054 blk_state *succ_state;
2056 if (succ == start_block)
2059 succ_state = x87_get_bl_state(sim, succ);
2061 if (succ_state->begin == NULL) {
2062 DB((dbg, LEVEL_2, "Set begin state for succ %+F:\n", succ));
2063 DEBUG_ONLY(x87_dump_stack(state);)
2064 succ_state->begin = state;
2066 waitq_put(sim->worklist, succ);
2068 DB((dbg, LEVEL_2, "succ %+F already has a state, shuffling\n", succ));
2069 /* There is already a begin state for the successor, bad.
2070 Do the necessary permutations.
2071 Note that critical edges are removed, so this is always possible:
2072 If the successor has more than one possible input, then it must
2075 x87_shuffle(block, state, succ_state->begin);
2078 bl_state->end = state;
2082 * Register a simulator function.
2084 * @param op the opcode to simulate
2085 * @param func the simulator function for the opcode
2087 static void register_sim(ir_op *op, sim_func func)
2089 assert(op->ops.generic == NULL);
2090 op->ops.generic = (op_func) func;
2094 * Create a new x87 simulator.
2096 * @param sim a simulator handle, will be initialized
2097 * @param irg the current graph
2099 static void x87_init_simulator(x87_simulator *sim, ir_graph *irg)
2101 obstack_init(&sim->obst);
2102 sim->blk_states = pmap_create();
2103 sim->n_idx = get_irg_last_idx(irg);
2104 sim->live = OALLOCN(&sim->obst, vfp_liveness, sim->n_idx);
2106 DB((dbg, LEVEL_1, "--------------------------------\n"
2107 "x87 Simulator started for %+F\n", irg));
2109 /* set the generic function pointer of instruction we must simulate */
2110 ir_clear_opcodes_generic_func();
2112 register_sim(op_ia32_Asm, sim_Asm);
2113 register_sim(op_ia32_Call, sim_Call);
2114 register_sim(op_ia32_vfld, sim_fld);
2115 register_sim(op_ia32_vfild, sim_fild);
2116 register_sim(op_ia32_vfld1, sim_fld1);
2117 register_sim(op_ia32_vfldz, sim_fldz);
2118 register_sim(op_ia32_vfadd, sim_fadd);
2119 register_sim(op_ia32_vfsub, sim_fsub);
2120 register_sim(op_ia32_vfmul, sim_fmul);
2121 register_sim(op_ia32_vfdiv, sim_fdiv);
2122 register_sim(op_ia32_vfprem, sim_fprem);
2123 register_sim(op_ia32_vfabs, sim_fabs);
2124 register_sim(op_ia32_vfchs, sim_fchs);
2125 register_sim(op_ia32_vfist, sim_fist);
2126 register_sim(op_ia32_vfisttp, sim_fisttp);
2127 register_sim(op_ia32_vfst, sim_fst);
2128 register_sim(op_ia32_vFtstFnstsw, sim_FtstFnstsw);
2129 register_sim(op_ia32_vFucomFnstsw, sim_Fucom);
2130 register_sim(op_ia32_vFucomi, sim_Fucom);
2131 register_sim(op_be_Copy, sim_Copy);
2132 register_sim(op_be_Return, sim_Return);
2133 register_sim(op_be_Perm, sim_Perm);
2134 register_sim(op_be_Keep, sim_Keep);
2138 * Destroy a x87 simulator.
2140 * @param sim the simulator handle
2142 static void x87_destroy_simulator(x87_simulator *sim)
2144 pmap_destroy(sim->blk_states);
2145 obstack_free(&sim->obst, NULL);
2146 DB((dbg, LEVEL_1, "x87 Simulator stopped\n\n"));
2150 * Pre-block walker: calculate the liveness information for the block
2151 * and store it into the sim->live cache.
2153 static void update_liveness_walker(ir_node *block, void *data)
2155 x87_simulator *sim = (x87_simulator*)data;
2156 update_liveness(sim, block);
2160 * Run a simulation and fix all virtual instructions for a graph.
2161 * Replaces all virtual floating point instructions and registers
2164 void ia32_x87_simulate_graph(ir_graph *irg)
2166 /* TODO improve code quality (less executed fxch) by using execfreqs */
2168 ir_node *block, *start_block;
2169 blk_state *bl_state;
2172 /* create the simulator */
2173 x87_init_simulator(&sim, irg);
2175 start_block = get_irg_start_block(irg);
2176 bl_state = x87_get_bl_state(&sim, start_block);
2178 /* start with the empty state */
2180 bl_state->begin = ∅
2182 sim.worklist = new_waitq();
2183 waitq_put(sim.worklist, start_block);
2185 be_assure_live_sets(irg);
2186 sim.lv = be_get_irg_liveness(irg);
2188 /* Calculate the liveness for all nodes. We must precalculate this info,
2189 * because the simulator adds new nodes (possible before Phi nodes) which
2190 * would let a lazy calculation fail.
2191 * On the other hand we reduce the computation amount due to
2192 * precaching from O(n^2) to O(n) at the expense of O(n) cache memory.
2194 irg_block_walk_graph(irg, update_liveness_walker, NULL, &sim);
2198 block = (ir_node*)waitq_get(sim.worklist);
2199 x87_simulate_block(&sim, block);
2200 } while (! waitq_empty(sim.worklist));
2203 del_waitq(sim.worklist);
2204 x87_destroy_simulator(&sim);
2207 /* Initializes the x87 simulator. */
2208 void ia32_init_x87(void)
2210 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.x87");