2 * Copyright (C) 1995-2010 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the x87 support and virtual to stack
23 * register translation for the ia32 backend.
24 * @author Michael Beck
33 #include "iredges_t.h"
48 #include "bearch_ia32_t.h"
49 #include "ia32_new_nodes.h"
50 #include "gen_ia32_new_nodes.h"
51 #include "gen_ia32_regalloc_if.h"
53 #include "ia32_architecture.h"
55 /** the debug handle */
56 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
58 /* Forward declaration. */
59 typedef struct x87_simulator x87_simulator;
62 * An exchange template.
63 * Note that our virtual functions have the same inputs
64 * and attributes as the real ones, so we can simple exchange
66 * Further, x87 supports inverse instructions, so we can handle them.
68 typedef struct exchange_tmpl {
69 ir_op *normal_op; /**< the normal one */
70 ir_op *reverse_op; /**< the reverse one if exists */
71 ir_op *normal_pop_op; /**< the normal one with tos pop */
72 ir_op *reverse_pop_op; /**< the reverse one with tos pop */
76 * An entry on the simulated x87 stack.
78 typedef struct st_entry {
79 int reg_idx; /**< the virtual register index of this stack value */
80 ir_node *node; /**< the node that produced this value */
86 typedef struct x87_state {
87 st_entry st[N_ia32_st_REGS]; /**< the register stack */
88 int depth; /**< the current stack depth */
89 x87_simulator *sim; /**< The simulator. */
92 /** An empty state, used for blocks without fp instructions. */
93 static x87_state empty = { { {0, NULL}, }, 0, NULL };
96 * Return values of the instruction simulator functions.
99 NO_NODE_ADDED = 0, /**< No node that needs simulation was added. */
100 NODE_ADDED = 1 /**< A node that must be simulated was added by the simulator
101 in the schedule AFTER the current node. */
105 * The type of an instruction simulator function.
107 * @param state the x87 state
108 * @param n the node to be simulated
110 * @return NODE_ADDED if a node was added AFTER n in schedule that MUST be
112 * NO_NODE_ADDED otherwise
114 typedef int (*sim_func)(x87_state *state, ir_node *n);
117 * A block state: Every block has a x87 state at the beginning and at the end.
119 typedef struct blk_state {
120 x87_state *begin; /**< state at the begin or NULL if not assigned */
121 x87_state *end; /**< state at the end or NULL if not assigned */
124 /** liveness bitset for vfp registers. */
125 typedef unsigned char vfp_liveness;
130 struct x87_simulator {
131 struct obstack obst; /**< An obstack for fast allocating. */
132 pmap *blk_states; /**< Map blocks to states. */
133 be_lv_t *lv; /**< intrablock liveness. */
134 vfp_liveness *live; /**< Liveness information. */
135 unsigned n_idx; /**< The cached get_irg_last_idx() result. */
136 waitq *worklist; /**< Worklist of blocks that must be processed. */
140 * Returns the current stack depth.
142 * @param state the x87 state
144 * @return the x87 stack depth
146 static int x87_get_depth(const x87_state *state)
151 static st_entry *x87_get_entry(x87_state *const state, int const pos)
153 assert(0 <= pos && pos < state->depth);
154 return &state->st[N_ia32_st_REGS - state->depth + pos];
158 * Return the virtual register index at st(pos).
160 * @param state the x87 state
161 * @param pos a stack position
163 * @return the vfp register index that produced the value at st(pos)
165 static int x87_get_st_reg(const x87_state *state, int pos)
167 return x87_get_entry((x87_state*)state, pos)->reg_idx;
172 * Dump the stack for debugging.
174 * @param state the x87 state
176 static void x87_dump_stack(const x87_state *state)
178 for (int i = state->depth; i-- != 0;) {
179 st_entry const *const entry = x87_get_entry((x87_state*)state, i);
180 DB((dbg, LEVEL_2, "vf%d(%+F) ", entry->reg_idx, entry->node));
182 DB((dbg, LEVEL_2, "<-- TOS\n"));
184 #endif /* DEBUG_libfirm */
187 * Set a virtual register to st(pos).
189 * @param state the x87 state
190 * @param reg_idx the vfp register index that should be set
191 * @param node the IR node that produces the value of the vfp register
192 * @param pos the stack position where the new value should be entered
194 static void x87_set_st(x87_state *state, int reg_idx, ir_node *node, int pos)
196 st_entry *const entry = x87_get_entry(state, pos);
197 entry->reg_idx = reg_idx;
200 DB((dbg, LEVEL_2, "After SET_REG: "));
201 DEBUG_ONLY(x87_dump_stack(state);)
205 * Set the tos virtual register.
207 * @param state the x87 state
208 * @param reg_idx the vfp register index that should be set
209 * @param node the IR node that produces the value of the vfp register
211 static void x87_set_tos(x87_state *state, int reg_idx, ir_node *node)
213 x87_set_st(state, reg_idx, node, 0);
217 * Swap st(0) with st(pos).
219 * @param state the x87 state
220 * @param pos the stack position to change the tos with
222 static void x87_fxch(x87_state *state, int pos)
224 st_entry *const a = x87_get_entry(state, pos);
225 st_entry *const b = x87_get_entry(state, 0);
226 st_entry const t = *a;
230 DB((dbg, LEVEL_2, "After FXCH: "));
231 DEBUG_ONLY(x87_dump_stack(state);)
235 * Convert a virtual register to the stack index.
237 * @param state the x87 state
238 * @param reg_idx the register vfp index
240 * @return the stack position where the register is stacked
241 * or -1 if the virtual register was not found
243 static int x87_on_stack(const x87_state *state, int reg_idx)
245 for (int i = 0; i < state->depth; ++i) {
246 if (x87_get_st_reg(state, i) == reg_idx)
253 * Push a virtual Register onto the stack, double pushed allowed.
255 * @param state the x87 state
256 * @param reg_idx the register vfp index
257 * @param node the node that produces the value of the vfp register
259 static void x87_push_dbl(x87_state *state, int reg_idx, ir_node *node)
261 assert(state->depth < N_ia32_st_REGS && "stack overrun");
264 st_entry *const entry = x87_get_entry(state, 0);
265 entry->reg_idx = reg_idx;
268 DB((dbg, LEVEL_2, "After PUSH: ")); DEBUG_ONLY(x87_dump_stack(state);)
272 * Push a virtual Register onto the stack, double pushes are NOT allowed.
274 * @param state the x87 state
275 * @param reg_idx the register vfp index
276 * @param node the node that produces the value of the vfp register
278 static void x87_push(x87_state *state, int reg_idx, ir_node *node)
280 assert(x87_on_stack(state, reg_idx) == -1 && "double push");
282 x87_push_dbl(state, reg_idx, node);
286 * Pop a virtual Register from the stack.
288 * @param state the x87 state
290 static void x87_pop(x87_state *state)
292 assert(state->depth > 0 && "stack underrun");
296 DB((dbg, LEVEL_2, "After POP: ")); DEBUG_ONLY(x87_dump_stack(state);)
300 * Empty the fpu stack
302 * @param state the x87 state
304 static void x87_emms(x87_state *state)
310 * Returns the block state of a block.
312 * @param sim the x87 simulator handle
313 * @param block the current block
315 * @return the block state
317 static blk_state *x87_get_bl_state(x87_simulator *sim, ir_node *block)
319 blk_state *res = pmap_get(blk_state, sim->blk_states, block);
322 res = OALLOC(&sim->obst, blk_state);
326 pmap_insert(sim->blk_states, block, res);
335 * @param sim the x87 simulator handle
336 * @param src the x87 state that will be cloned
338 * @return a cloned copy of the src state
340 static x87_state *x87_clone_state(x87_simulator *sim, const x87_state *src)
342 x87_state *const res = OALLOC(&sim->obst, x87_state);
348 * Patch a virtual instruction into a x87 one and return
349 * the node representing the result value.
351 * @param n the IR node to patch
352 * @param op the x87 opcode to patch in
354 static ir_node *x87_patch_insn(ir_node *n, ir_op *op)
356 ir_mode *mode = get_irn_mode(n);
361 if (mode == mode_T) {
362 /* patch all Proj's */
363 foreach_out_edge(n, edge) {
364 ir_node *proj = get_edge_src_irn(edge);
366 mode = get_irn_mode(proj);
367 if (mode_is_float(mode)) {
369 set_irn_mode(proj, ia32_reg_classes[CLASS_ia32_st].mode);
373 } else if (mode_is_float(mode))
374 set_irn_mode(n, ia32_reg_classes[CLASS_ia32_st].mode);
379 * Returns the first Proj of a mode_T node having a given mode.
381 * @param n the mode_T node
382 * @param m the desired mode of the Proj
383 * @return The first Proj of mode @p m found or NULL.
385 static ir_node *get_irn_Proj_for_mode(ir_node *n, ir_mode *m)
387 assert(get_irn_mode(n) == mode_T && "Need mode_T node");
389 foreach_out_edge(n, edge) {
390 ir_node *proj = get_edge_src_irn(edge);
391 if (get_irn_mode(proj) == m)
399 * Wrap the arch_* function here so we can check for errors.
401 static inline const arch_register_t *x87_get_irn_register(const ir_node *irn)
403 const arch_register_t *res = arch_get_irn_register(irn);
405 assert(res->reg_class == &ia32_reg_classes[CLASS_ia32_vfp]);
409 static inline const arch_register_t *x87_irn_get_register(const ir_node *irn,
412 const arch_register_t *res = arch_get_irn_register_out(irn, pos);
414 assert(res->reg_class == &ia32_reg_classes[CLASS_ia32_vfp]);
418 static inline const arch_register_t *get_st_reg(int index)
420 return &ia32_registers[REG_ST0 + index];
423 /* -------------- x87 perm --------------- */
426 * Creates a fxch for shuffle.
428 * @param state the x87 state
429 * @param pos parameter for fxch
430 * @param block the block were fxch is inserted
432 * Creates a new fxch node and reroute the user of the old node
435 * @return the fxch node
437 static ir_node *x87_fxch_shuffle(x87_state *state, int pos, ir_node *block)
440 ia32_x87_attr_t *attr;
442 fxch = new_bd_ia32_fxch(NULL, block);
443 attr = get_ia32_x87_attr(fxch);
444 attr->x87[0] = get_st_reg(pos);
445 attr->x87[2] = get_st_reg(0);
449 x87_fxch(state, pos);
454 * Calculate the necessary permutations to reach dst_state.
456 * These permutations are done with fxch instructions and placed
457 * at the end of the block.
459 * Note that critical edges are removed here, so we need only
460 * a shuffle if the current block has only one successor.
462 * @param block the current block
463 * @param state the current x87 stack state, might be modified
464 * @param dst_state destination state
468 static x87_state *x87_shuffle(ir_node *block, x87_state *state, const x87_state *dst_state)
470 int i, n_cycles, k, ri;
471 unsigned cycles[4], all_mask;
472 char cycle_idx[4][8];
473 ir_node *fxch, *before, *after;
475 assert(state->depth == dst_state->depth);
477 /* Some mathematics here:
478 If we have a cycle of length n that includes the tos,
479 we need n-1 exchange operations.
480 We can always add the tos and restore it, so we need
481 n+1 exchange operations for a cycle not containing the tos.
482 So, the maximum of needed operations is for a cycle of 7
483 not including the tos == 8.
484 This is the same number of ops we would need for using stores,
485 so exchange is cheaper (we save the loads).
486 On the other hand, we might need an additional exchange
487 in the next block to bring one operand on top, so the
488 number of ops in the first case is identical.
489 Further, no more than 4 cycles can exists (4 x 2).
491 all_mask = (1 << (state->depth)) - 1;
493 for (n_cycles = 0; all_mask; ++n_cycles) {
494 int src_idx, dst_idx;
496 /* find the first free slot */
497 for (i = 0; i < state->depth; ++i) {
498 if (all_mask & (1 << i)) {
499 all_mask &= ~(1 << i);
501 /* check if there are differences here */
502 if (x87_get_st_reg(state, i) != x87_get_st_reg(dst_state, i))
508 /* no more cycles found */
513 cycles[n_cycles] = (1 << i);
514 cycle_idx[n_cycles][k++] = i;
515 for (src_idx = i; ; src_idx = dst_idx) {
516 dst_idx = x87_on_stack(dst_state, x87_get_st_reg(state, src_idx));
518 if ((all_mask & (1 << dst_idx)) == 0)
521 cycle_idx[n_cycles][k++] = dst_idx;
522 cycles[n_cycles] |= (1 << dst_idx);
523 all_mask &= ~(1 << dst_idx);
525 cycle_idx[n_cycles][k] = -1;
529 /* no permutation needed */
533 /* Hmm: permutation needed */
534 DB((dbg, LEVEL_2, "\n%+F needs permutation: from\n", block));
535 DEBUG_ONLY(x87_dump_stack(state);)
536 DB((dbg, LEVEL_2, " to\n"));
537 DEBUG_ONLY(x87_dump_stack(dst_state);)
541 DB((dbg, LEVEL_2, "Need %d cycles\n", n_cycles));
542 for (ri = 0; ri < n_cycles; ++ri) {
543 DB((dbg, LEVEL_2, " Ring %d:\n ", ri));
544 for (k = 0; cycle_idx[ri][k] != -1; ++k)
545 DB((dbg, LEVEL_2, " st%d ->", cycle_idx[ri][k]));
546 DB((dbg, LEVEL_2, "\n"));
553 * Find the place node must be insert.
554 * We have only one successor block, so the last instruction should
557 before = sched_last(block);
558 assert(is_cfop(before));
560 /* now do the permutations */
561 for (ri = 0; ri < n_cycles; ++ri) {
562 if ((cycles[ri] & 1) == 0) {
563 /* this cycle does not include the tos */
564 fxch = x87_fxch_shuffle(state, cycle_idx[ri][0], block);
566 sched_add_after(after, fxch);
568 sched_add_before(before, fxch);
571 for (k = 1; cycle_idx[ri][k] != -1; ++k) {
572 fxch = x87_fxch_shuffle(state, cycle_idx[ri][k], block);
574 sched_add_after(after, fxch);
576 sched_add_before(before, fxch);
579 if ((cycles[ri] & 1) == 0) {
580 /* this cycle does not include the tos */
581 fxch = x87_fxch_shuffle(state, cycle_idx[ri][0], block);
582 sched_add_after(after, fxch);
589 * Create a fxch node before another node.
591 * @param state the x87 state
592 * @param n the node after the fxch
593 * @param pos exchange st(pos) with st(0)
597 static ir_node *x87_create_fxch(x87_state *state, ir_node *n, int pos)
600 ia32_x87_attr_t *attr;
601 ir_node *block = get_nodes_block(n);
603 x87_fxch(state, pos);
605 fxch = new_bd_ia32_fxch(NULL, block);
606 attr = get_ia32_x87_attr(fxch);
607 attr->x87[0] = get_st_reg(pos);
608 attr->x87[2] = get_st_reg(0);
612 sched_add_before(n, fxch);
613 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(fxch), attr->x87[0]->name, attr->x87[2]->name));
618 * Create a fpush before node n.
620 * @param state the x87 state
621 * @param n the node after the fpush
622 * @param pos push st(pos) on stack
623 * @param op_idx replace input op_idx of n with the fpush result
625 static void x87_create_fpush(x87_state *state, ir_node *n, int pos, int op_idx)
627 ir_node *fpush, *pred = get_irn_n(n, op_idx);
628 ia32_x87_attr_t *attr;
629 const arch_register_t *out = x87_get_irn_register(pred);
631 x87_push_dbl(state, arch_register_get_index(out), pred);
633 fpush = new_bd_ia32_fpush(NULL, get_nodes_block(n));
634 attr = get_ia32_x87_attr(fpush);
635 attr->x87[0] = get_st_reg(pos);
636 attr->x87[2] = get_st_reg(0);
639 sched_add_before(n, fpush);
641 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(fpush), attr->x87[0]->name, attr->x87[2]->name));
645 * Create a fpop before node n.
647 * @param state the x87 state
648 * @param n the node after the fpop
649 * @param num pop 1 or 2 values
651 * @return the fpop node
653 static ir_node *x87_create_fpop(x87_state *state, ir_node *n, int num)
655 ir_node *fpop = NULL;
656 ia32_x87_attr_t *attr;
661 if (ia32_cg_config.use_ffreep)
662 fpop = new_bd_ia32_ffreep(NULL, get_nodes_block(n));
664 fpop = new_bd_ia32_fpop(NULL, get_nodes_block(n));
665 attr = get_ia32_x87_attr(fpop);
666 attr->x87[0] = get_st_reg(0);
667 attr->x87[1] = get_st_reg(0);
668 attr->x87[2] = get_st_reg(0);
671 sched_add_before(n, fpop);
672 DB((dbg, LEVEL_1, "<<< %s %s\n", get_irn_opname(fpop), attr->x87[0]->name));
677 /* --------------------------------- liveness ------------------------------------------ */
680 * The liveness transfer function.
681 * Updates a live set over a single step from a given node to its predecessor.
682 * Everything defined at the node is removed from the set, the uses of the node get inserted.
684 * @param irn The node at which liveness should be computed.
685 * @param live The bitset of registers live before @p irn. This set gets modified by updating it to
686 * the registers live after irn.
688 * @return The live bitset.
690 static vfp_liveness vfp_liveness_transfer(ir_node *irn, vfp_liveness live)
693 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
695 if (get_irn_mode(irn) == mode_T) {
696 foreach_out_edge(irn, edge) {
697 ir_node *proj = get_edge_src_irn(edge);
699 if (arch_irn_consider_in_reg_alloc(cls, proj)) {
700 const arch_register_t *reg = x87_get_irn_register(proj);
701 live &= ~(1 << arch_register_get_index(reg));
704 } else if (arch_irn_consider_in_reg_alloc(cls, irn)) {
705 const arch_register_t *reg = x87_get_irn_register(irn);
706 live &= ~(1 << arch_register_get_index(reg));
709 for (i = 0, n = get_irn_arity(irn); i < n; ++i) {
710 ir_node *op = get_irn_n(irn, i);
712 if (mode_is_float(get_irn_mode(op)) &&
713 arch_irn_consider_in_reg_alloc(cls, op)) {
714 const arch_register_t *reg = x87_get_irn_register(op);
715 live |= 1 << arch_register_get_index(reg);
722 * Put all live virtual registers at the end of a block into a bitset.
724 * @param sim the simulator handle
725 * @param bl the block
727 * @return The live bitset at the end of this block
729 static vfp_liveness vfp_liveness_end_of_block(x87_simulator *sim, const ir_node *block)
731 vfp_liveness live = 0;
732 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
733 const be_lv_t *lv = sim->lv;
735 be_lv_foreach(lv, block, be_lv_state_end, node) {
736 const arch_register_t *reg;
737 if (!arch_irn_consider_in_reg_alloc(cls, node))
740 reg = x87_get_irn_register(node);
741 live |= 1 << arch_register_get_index(reg);
747 /** get the register mask from an arch_register */
748 #define REGMASK(reg) (1 << (arch_register_get_index(reg)))
751 * Return a bitset of argument registers which are live at the end of a node.
753 * @param sim the simulator handle
754 * @param pos the node
755 * @param kill kill mask for the output registers
757 * @return The live bitset.
759 static unsigned vfp_live_args_after(x87_simulator *sim, const ir_node *pos, unsigned kill)
761 unsigned idx = get_irn_idx(pos);
763 assert(idx < sim->n_idx);
764 return sim->live[idx] & ~kill;
768 * Calculate the liveness for a whole block and cache it.
770 * @param sim the simulator handle
771 * @param block the block
773 static void update_liveness(x87_simulator *sim, ir_node *block)
775 vfp_liveness live = vfp_liveness_end_of_block(sim, block);
778 /* now iterate through the block backward and cache the results */
779 sched_foreach_reverse(block, irn) {
780 /* stop at the first Phi: this produces the live-in */
784 idx = get_irn_idx(irn);
785 sim->live[idx] = live;
787 live = vfp_liveness_transfer(irn, live);
789 idx = get_irn_idx(block);
790 sim->live[idx] = live;
794 * Returns true if a register is live in a set.
796 * @param reg_idx the vfp register index
797 * @param live a live bitset
799 #define is_vfp_live(reg_idx, live) ((live) & (1 << (reg_idx)))
803 * Dump liveness info.
805 * @param live the live bitset
807 static void vfp_dump_live(vfp_liveness live)
811 DB((dbg, LEVEL_2, "Live after: "));
812 for (i = 0; i < 8; ++i) {
813 if (live & (1 << i)) {
814 DB((dbg, LEVEL_2, "vf%d ", i));
817 DB((dbg, LEVEL_2, "\n"));
819 #endif /* DEBUG_libfirm */
821 /* --------------------------------- simulators ---------------------------------------- */
824 * Simulate a virtual binop.
826 * @param state the x87 state
827 * @param n the node that should be simulated (and patched)
828 * @param tmpl the template containing the 4 possible x87 opcodes
830 * @return NO_NODE_ADDED
832 static int sim_binop(x87_state *state, ir_node *n, const exchange_tmpl *tmpl)
834 int op2_idx = 0, op1_idx;
835 int out_idx, do_pop = 0;
836 ia32_x87_attr_t *attr;
838 ir_node *patched_insn;
840 x87_simulator *sim = state->sim;
841 ir_node *op1 = get_irn_n(n, n_ia32_binary_left);
842 ir_node *op2 = get_irn_n(n, n_ia32_binary_right);
843 const arch_register_t *op1_reg = x87_get_irn_register(op1);
844 const arch_register_t *op2_reg = x87_get_irn_register(op2);
845 const arch_register_t *out = x87_irn_get_register(n, pn_ia32_res);
846 int reg_index_1 = arch_register_get_index(op1_reg);
847 int reg_index_2 = arch_register_get_index(op2_reg);
848 vfp_liveness live = vfp_live_args_after(sim, n, REGMASK(out));
852 DB((dbg, LEVEL_1, ">>> %+F %s, %s -> %s\n", n,
853 arch_register_get_name(op1_reg), arch_register_get_name(op2_reg),
854 arch_register_get_name(out)));
855 DEBUG_ONLY(vfp_dump_live(live);)
856 DB((dbg, LEVEL_1, "Stack before: "));
857 DEBUG_ONLY(x87_dump_stack(state);)
859 op1_idx = x87_on_stack(state, reg_index_1);
860 assert(op1_idx >= 0);
861 op1_live_after = is_vfp_live(reg_index_1, live);
863 attr = get_ia32_x87_attr(n);
864 permuted = attr->attr.data.ins_permuted;
866 if (reg_index_2 != REG_VFP_VFP_NOREG) {
869 /* second operand is a vfp register */
870 op2_idx = x87_on_stack(state, reg_index_2);
871 assert(op2_idx >= 0);
872 op2_live_after = is_vfp_live(reg_index_2, live);
874 if (op2_live_after) {
875 /* Second operand is live. */
877 if (op1_live_after) {
878 /* Both operands are live: push the first one.
879 This works even for op1 == op2. */
880 x87_create_fpush(state, n, op1_idx, n_ia32_binary_right);
881 /* now do fxxx (tos=tos X op) */
885 dst = tmpl->normal_op;
887 /* Second live, first operand is dead here, bring it to tos. */
889 x87_create_fxch(state, n, op1_idx);
894 /* now do fxxx (tos=tos X op) */
896 dst = tmpl->normal_op;
899 /* Second operand is dead. */
900 if (op1_live_after) {
901 /* First operand is live: bring second to tos. */
903 x87_create_fxch(state, n, op2_idx);
908 /* now do fxxxr (tos = op X tos) */
910 dst = tmpl->reverse_op;
912 /* Both operands are dead here, pop them from the stack. */
915 /* Both are identically and on tos, no pop needed. */
916 /* here fxxx (tos = tos X tos) */
917 dst = tmpl->normal_op;
920 /* now do fxxxp (op = op X tos, pop) */
921 dst = tmpl->normal_pop_op;
925 } else if (op1_idx == 0) {
926 assert(op1_idx != op2_idx);
927 /* now do fxxxrp (op = tos X op, pop) */
928 dst = tmpl->reverse_pop_op;
932 /* Bring the second on top. */
933 x87_create_fxch(state, n, op2_idx);
934 if (op1_idx == op2_idx) {
935 /* Both are identically and on tos now, no pop needed. */
938 /* use fxxx (tos = tos X tos) */
939 dst = tmpl->normal_op;
942 /* op2 is on tos now */
944 /* use fxxxp (op = op X tos, pop) */
945 dst = tmpl->normal_pop_op;
953 /* second operand is an address mode */
954 if (op1_live_after) {
955 /* first operand is live: push it here */
956 x87_create_fpush(state, n, op1_idx, n_ia32_binary_left);
959 /* first operand is dead: bring it to tos */
961 x87_create_fxch(state, n, op1_idx);
966 /* use fxxx (tos = tos X mem) */
967 dst = permuted ? tmpl->reverse_op : tmpl->normal_op;
971 patched_insn = x87_patch_insn(n, dst);
972 x87_set_st(state, arch_register_get_index(out), patched_insn, out_idx);
977 /* patch the operation */
978 attr->x87[0] = op1_reg = get_st_reg(op1_idx);
979 if (reg_index_2 != REG_VFP_VFP_NOREG) {
980 attr->x87[1] = op2_reg = get_st_reg(op2_idx);
982 attr->x87[2] = out = get_st_reg(out_idx);
984 if (reg_index_2 != REG_VFP_VFP_NOREG) {
985 DB((dbg, LEVEL_1, "<<< %s %s, %s -> %s\n", get_irn_opname(n),
986 arch_register_get_name(op1_reg), arch_register_get_name(op2_reg),
987 arch_register_get_name(out)));
989 DB((dbg, LEVEL_1, "<<< %s %s, [AM] -> %s\n", get_irn_opname(n),
990 arch_register_get_name(op1_reg),
991 arch_register_get_name(out)));
994 return NO_NODE_ADDED;
998 * Simulate a virtual Unop.
1000 * @param state the x87 state
1001 * @param n the node that should be simulated (and patched)
1002 * @param op the x87 opcode that will replace n's opcode
1004 * @return NO_NODE_ADDED
1006 static int sim_unop(x87_state *state, ir_node *n, ir_op *op)
1008 x87_simulator *sim = state->sim;
1009 const arch_register_t *op1 = x87_get_irn_register(get_irn_n(n, 0));
1010 const arch_register_t *out = x87_get_irn_register(n);
1011 ia32_x87_attr_t *attr;
1012 unsigned live = vfp_live_args_after(sim, n, REGMASK(out));
1014 DB((dbg, LEVEL_1, ">>> %+F -> %s\n", n, out->name));
1015 DEBUG_ONLY(vfp_dump_live(live);)
1017 int op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1019 if (is_vfp_live(arch_register_get_index(op1), live)) {
1020 /* push the operand here */
1021 x87_create_fpush(state, n, op1_idx, 0);
1024 /* operand is dead, bring it to tos */
1026 x87_create_fxch(state, n, op1_idx);
1030 x87_set_tos(state, arch_register_get_index(out), x87_patch_insn(n, op));
1031 attr = get_ia32_x87_attr(n);
1032 attr->x87[0] = op1 = get_st_reg(0);
1033 attr->x87[2] = out = get_st_reg(0);
1034 DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), out->name));
1036 return NO_NODE_ADDED;
1040 * Simulate a virtual Load instruction.
1042 * @param state the x87 state
1043 * @param n the node that should be simulated (and patched)
1044 * @param op the x87 opcode that will replace n's opcode
1046 * @return NO_NODE_ADDED
1048 static int sim_load(x87_state *state, ir_node *n, ir_op *op, int res_pos)
1050 const arch_register_t *out = x87_irn_get_register(n, res_pos);
1051 ia32_x87_attr_t *attr;
1053 DB((dbg, LEVEL_1, ">>> %+F -> %s\n", n, arch_register_get_name(out)));
1054 x87_push(state, arch_register_get_index(out), x87_patch_insn(n, op));
1055 assert(out == x87_irn_get_register(n, res_pos));
1056 attr = get_ia32_x87_attr(n);
1057 attr->x87[2] = out = get_st_reg(0);
1058 DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), arch_register_get_name(out)));
1060 return NO_NODE_ADDED;
1064 * Rewire all users of @p old_val to @new_val iff they are scheduled after @p store.
1066 * @param store The store
1067 * @param old_val The former value
1068 * @param new_val The new value
1070 static void collect_and_rewire_users(ir_node *store, ir_node *old_val, ir_node *new_val)
1072 foreach_out_edge_safe(old_val, edge) {
1073 ir_node *user = get_edge_src_irn(edge);
1075 if (! user || user == store)
1078 /* if the user is scheduled after the store: rewire */
1079 if (sched_is_scheduled(user) && sched_comes_after(store, user)) {
1081 /* find the input of the user pointing to the old value */
1082 for (i = get_irn_arity(user) - 1; i >= 0; i--) {
1083 if (get_irn_n(user, i) == old_val)
1084 set_irn_n(user, i, new_val);
1091 * Simulate a virtual Store.
1093 * @param state the x87 state
1094 * @param n the node that should be simulated (and patched)
1095 * @param op the x87 store opcode
1096 * @param op_p the x87 store and pop opcode
1098 static int sim_store(x87_state *state, ir_node *n, ir_op *op, ir_op *op_p)
1100 ir_node *val = get_irn_n(n, n_ia32_vfst_val);
1101 const arch_register_t *op2 = x87_get_irn_register(val);
1102 unsigned live = vfp_live_args_after(state->sim, n, 0);
1103 int insn = NO_NODE_ADDED;
1104 ia32_x87_attr_t *attr;
1105 int op2_reg_idx, op2_idx, depth;
1106 int live_after_node;
1109 op2_reg_idx = arch_register_get_index(op2);
1110 op2_idx = x87_on_stack(state, op2_reg_idx);
1111 live_after_node = is_vfp_live(arch_register_get_index(op2), live);
1112 DB((dbg, LEVEL_1, ">>> %+F %s ->\n", n, arch_register_get_name(op2)));
1113 assert(op2_idx >= 0);
1115 mode = get_ia32_ls_mode(n);
1116 depth = x87_get_depth(state);
1118 if (live_after_node) {
1120 Problem: fst doesn't support 96bit modes (spills), only fstp does
1121 fist doesn't support 64bit mode, only fistp
1123 - stack not full: push value and fstp
1124 - stack full: fstp value and load again
1125 Note that we cannot test on mode_E, because floats might be 96bit ...
1127 if (get_mode_size_bits(mode) > 64 || (mode_is_int(mode) && get_mode_size_bits(mode) > 32)) {
1128 if (depth < N_ia32_st_REGS) {
1129 /* ok, we have a free register: push + fstp */
1130 x87_create_fpush(state, n, op2_idx, n_ia32_vfst_val);
1132 x87_patch_insn(n, op_p);
1134 ir_node *vfld, *mem, *block, *rproj, *mproj;
1135 ir_graph *irg = get_irn_irg(n);
1136 ir_node *nomem = get_irg_no_mem(irg);
1138 /* stack full here: need fstp + load */
1140 x87_patch_insn(n, op_p);
1142 block = get_nodes_block(n);
1143 vfld = new_bd_ia32_vfld(NULL, block, get_irn_n(n, 0), get_irn_n(n, 1), nomem, get_ia32_ls_mode(n));
1145 /* copy all attributes */
1146 set_ia32_frame_ent(vfld, get_ia32_frame_ent(n));
1147 if (is_ia32_use_frame(n))
1148 set_ia32_use_frame(vfld);
1149 set_ia32_op_type(vfld, ia32_AddrModeS);
1150 add_ia32_am_offs_int(vfld, get_ia32_am_offs_int(n));
1151 set_ia32_am_sc(vfld, get_ia32_am_sc(n));
1152 set_ia32_ls_mode(vfld, get_ia32_ls_mode(n));
1154 rproj = new_r_Proj(vfld, get_ia32_ls_mode(vfld), pn_ia32_vfld_res);
1155 mproj = new_r_Proj(vfld, mode_M, pn_ia32_vfld_M);
1156 mem = get_irn_Proj_for_mode(n, mode_M);
1158 assert(mem && "Store memory not found");
1160 arch_set_irn_register(rproj, op2);
1162 /* reroute all former users of the store memory to the load memory */
1163 edges_reroute(mem, mproj);
1164 /* set the memory input of the load to the store memory */
1165 set_irn_n(vfld, n_ia32_vfld_mem, mem);
1167 sched_add_after(n, vfld);
1168 sched_add_after(vfld, rproj);
1170 /* rewire all users, scheduled after the store, to the loaded value */
1171 collect_and_rewire_users(n, val, rproj);
1176 /* we can only store the tos to memory */
1178 x87_create_fxch(state, n, op2_idx);
1180 /* mode size 64 or smaller -> use normal fst */
1181 x87_patch_insn(n, op);
1184 /* we can only store the tos to memory */
1186 x87_create_fxch(state, n, op2_idx);
1189 x87_patch_insn(n, op_p);
1192 attr = get_ia32_x87_attr(n);
1193 attr->x87[1] = op2 = get_st_reg(0);
1194 DB((dbg, LEVEL_1, "<<< %s %s ->\n", get_irn_opname(n), arch_register_get_name(op2)));
1199 #define _GEN_BINOP(op, rev) \
1200 static int sim_##op(x87_state *state, ir_node *n) { \
1201 exchange_tmpl tmpl = { op_ia32_##op, op_ia32_##rev, op_ia32_##op##p, op_ia32_##rev##p }; \
1202 return sim_binop(state, n, &tmpl); \
1205 #define GEN_BINOP(op) _GEN_BINOP(op, op)
1206 #define GEN_BINOPR(op) _GEN_BINOP(op, op##r)
1208 #define GEN_LOAD(op) \
1209 static int sim_##op(x87_state *state, ir_node *n) { \
1210 return sim_load(state, n, op_ia32_##op, pn_ia32_v##op##_res); \
1213 #define GEN_UNOP(op) \
1214 static int sim_##op(x87_state *state, ir_node *n) { \
1215 return sim_unop(state, n, op_ia32_##op); \
1218 #define GEN_STORE(op) \
1219 static int sim_##op(x87_state *state, ir_node *n) { \
1220 return sim_store(state, n, op_ia32_##op, op_ia32_##op##p); \
1242 * Simulate a virtual fisttp.
1244 * @param state the x87 state
1245 * @param n the node that should be simulated (and patched)
1247 * @return NO_NODE_ADDED
1249 static int sim_fisttp(x87_state *state, ir_node *n)
1251 ir_node *val = get_irn_n(n, n_ia32_vfst_val);
1252 const arch_register_t *op2 = x87_get_irn_register(val);
1253 ia32_x87_attr_t *attr;
1254 int op2_reg_idx, op2_idx;
1256 op2_reg_idx = arch_register_get_index(op2);
1257 op2_idx = x87_on_stack(state, op2_reg_idx);
1258 DB((dbg, LEVEL_1, ">>> %+F %s ->\n", n, arch_register_get_name(op2)));
1259 assert(op2_idx >= 0);
1261 /* Note: although the value is still live here, it is destroyed because
1262 of the pop. The register allocator is aware of that and introduced a copy
1263 if the value must be alive. */
1265 /* we can only store the tos to memory */
1267 x87_create_fxch(state, n, op2_idx);
1270 x87_patch_insn(n, op_ia32_fisttp);
1272 attr = get_ia32_x87_attr(n);
1273 attr->x87[1] = op2 = get_st_reg(0);
1274 DB((dbg, LEVEL_1, "<<< %s %s ->\n", get_irn_opname(n), arch_register_get_name(op2)));
1276 return NO_NODE_ADDED;
1280 * Simulate a virtual FtstFnstsw.
1282 * @param state the x87 state
1283 * @param n the node that should be simulated (and patched)
1285 * @return NO_NODE_ADDED
1287 static int sim_FtstFnstsw(x87_state *state, ir_node *n)
1289 x87_simulator *sim = state->sim;
1290 ia32_x87_attr_t *attr = get_ia32_x87_attr(n);
1291 ir_node *op1_node = get_irn_n(n, n_ia32_vFtstFnstsw_left);
1292 const arch_register_t *reg1 = x87_get_irn_register(op1_node);
1293 int reg_index_1 = arch_register_get_index(reg1);
1294 int op1_idx = x87_on_stack(state, reg_index_1);
1295 unsigned live = vfp_live_args_after(sim, n, 0);
1297 DB((dbg, LEVEL_1, ">>> %+F %s\n", n, arch_register_get_name(reg1)));
1298 DEBUG_ONLY(vfp_dump_live(live);)
1299 DB((dbg, LEVEL_1, "Stack before: "));
1300 DEBUG_ONLY(x87_dump_stack(state);)
1301 assert(op1_idx >= 0);
1304 /* bring the value to tos */
1305 x87_create_fxch(state, n, op1_idx);
1309 /* patch the operation */
1310 x87_patch_insn(n, op_ia32_FtstFnstsw);
1311 reg1 = get_st_reg(op1_idx);
1312 attr->x87[0] = reg1;
1313 attr->x87[1] = NULL;
1314 attr->x87[2] = NULL;
1316 if (!is_vfp_live(reg_index_1, live))
1317 x87_create_fpop(state, sched_next(n), 1);
1319 return NO_NODE_ADDED;
1325 * @param state the x87 state
1326 * @param n the node that should be simulated (and patched)
1328 * @return NO_NODE_ADDED
1330 static int sim_Fucom(x87_state *state, ir_node *n)
1334 ia32_x87_attr_t *attr = get_ia32_x87_attr(n);
1336 x87_simulator *sim = state->sim;
1337 ir_node *op1_node = get_irn_n(n, n_ia32_vFucomFnstsw_left);
1338 ir_node *op2_node = get_irn_n(n, n_ia32_vFucomFnstsw_right);
1339 const arch_register_t *op1 = x87_get_irn_register(op1_node);
1340 const arch_register_t *op2 = x87_get_irn_register(op2_node);
1341 int reg_index_1 = arch_register_get_index(op1);
1342 int reg_index_2 = arch_register_get_index(op2);
1343 unsigned live = vfp_live_args_after(sim, n, 0);
1344 bool permuted = attr->attr.data.ins_permuted;
1348 DB((dbg, LEVEL_1, ">>> %+F %s, %s\n", n,
1349 arch_register_get_name(op1), arch_register_get_name(op2)));
1350 DEBUG_ONLY(vfp_dump_live(live);)
1351 DB((dbg, LEVEL_1, "Stack before: "));
1352 DEBUG_ONLY(x87_dump_stack(state);)
1354 op1_idx = x87_on_stack(state, reg_index_1);
1355 assert(op1_idx >= 0);
1357 /* BEWARE: check for comp a,a cases, they might happen */
1358 if (reg_index_2 != REG_VFP_VFP_NOREG) {
1359 /* second operand is a vfp register */
1360 op2_idx = x87_on_stack(state, reg_index_2);
1361 assert(op2_idx >= 0);
1363 if (is_vfp_live(reg_index_2, live)) {
1364 /* second operand is live */
1366 if (is_vfp_live(reg_index_1, live)) {
1367 /* both operands are live */
1370 /* res = tos X op */
1371 } else if (op2_idx == 0) {
1372 /* res = op X tos */
1373 permuted = !permuted;
1376 /* bring the first one to tos */
1377 x87_create_fxch(state, n, op1_idx);
1378 if (op1_idx == op2_idx) {
1380 } else if (op2_idx == 0) {
1384 /* res = tos X op */
1387 /* second live, first operand is dead here, bring it to tos.
1388 This means further, op1_idx != op2_idx. */
1389 assert(op1_idx != op2_idx);
1391 x87_create_fxch(state, n, op1_idx);
1396 /* res = tos X op, pop */
1400 /* second operand is dead */
1401 if (is_vfp_live(reg_index_1, live)) {
1402 /* first operand is live: bring second to tos.
1403 This means further, op1_idx != op2_idx. */
1404 assert(op1_idx != op2_idx);
1406 x87_create_fxch(state, n, op2_idx);
1411 /* res = op X tos, pop */
1413 permuted = !permuted;
1416 /* both operands are dead here, check first for identity. */
1417 if (op1_idx == op2_idx) {
1418 /* identically, one pop needed */
1420 x87_create_fxch(state, n, op1_idx);
1424 /* res = tos X op, pop */
1427 /* different, move them to st and st(1) and pop both.
1428 The tricky part is to get one into st(1).*/
1429 else if (op2_idx == 1) {
1430 /* good, second operand is already in the right place, move the first */
1432 /* bring the first on top */
1433 x87_create_fxch(state, n, op1_idx);
1434 assert(op2_idx != 0);
1437 /* res = tos X op, pop, pop */
1439 } else if (op1_idx == 1) {
1440 /* good, first operand is already in the right place, move the second */
1442 /* bring the first on top */
1443 x87_create_fxch(state, n, op2_idx);
1444 assert(op1_idx != 0);
1447 /* res = op X tos, pop, pop */
1448 permuted = !permuted;
1452 /* if one is already the TOS, we need two fxch */
1454 /* first one is TOS, move to st(1) */
1455 x87_create_fxch(state, n, 1);
1456 assert(op2_idx != 1);
1458 x87_create_fxch(state, n, op2_idx);
1460 /* res = op X tos, pop, pop */
1462 permuted = !permuted;
1464 } else if (op2_idx == 0) {
1465 /* second one is TOS, move to st(1) */
1466 x87_create_fxch(state, n, 1);
1467 assert(op1_idx != 1);
1469 x87_create_fxch(state, n, op1_idx);
1471 /* res = tos X op, pop, pop */
1474 /* none of them is either TOS or st(1), 3 fxch needed */
1475 x87_create_fxch(state, n, op2_idx);
1476 assert(op1_idx != 0);
1477 x87_create_fxch(state, n, 1);
1479 x87_create_fxch(state, n, op1_idx);
1481 /* res = tos X op, pop, pop */
1488 /* second operand is an address mode */
1489 if (is_vfp_live(reg_index_1, live)) {
1490 /* first operand is live: bring it to TOS */
1492 x87_create_fxch(state, n, op1_idx);
1496 /* first operand is dead: bring it to tos */
1498 x87_create_fxch(state, n, op1_idx);
1505 /* patch the operation */
1506 if (is_ia32_vFucomFnstsw(n)) {
1510 case 0: dst = op_ia32_FucomFnstsw; break;
1511 case 1: dst = op_ia32_FucompFnstsw; break;
1512 case 2: dst = op_ia32_FucomppFnstsw; break;
1513 default: panic("invalid popcount");
1516 for (i = 0; i < pops; ++i) {
1519 } else if (is_ia32_vFucomi(n)) {
1521 case 0: dst = op_ia32_Fucomi; break;
1522 case 1: dst = op_ia32_Fucompi; x87_pop(state); break;
1524 dst = op_ia32_Fucompi;
1526 x87_create_fpop(state, sched_next(n), 1);
1528 default: panic("invalid popcount");
1531 panic("invalid operation %+F", n);
1534 x87_patch_insn(n, dst);
1541 op1 = get_st_reg(op1_idx);
1544 op2 = get_st_reg(op2_idx);
1547 attr->x87[2] = NULL;
1548 attr->attr.data.ins_permuted = permuted;
1551 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(n),
1552 arch_register_get_name(op1), arch_register_get_name(op2)));
1554 DB((dbg, LEVEL_1, "<<< %s %s, [AM]\n", get_irn_opname(n),
1555 arch_register_get_name(op1)));
1558 return NO_NODE_ADDED;
1564 * @param state the x87 state
1565 * @param n the node that should be simulated (and patched)
1567 * @return NO_NODE_ADDED
1569 static int sim_Keep(x87_state *state, ir_node *node)
1572 const arch_register_t *op_reg;
1578 DB((dbg, LEVEL_1, ">>> %+F\n", node));
1580 arity = get_irn_arity(node);
1581 for (i = 0; i < arity; ++i) {
1582 op = get_irn_n(node, i);
1583 op_reg = arch_get_irn_register(op);
1584 if (arch_register_get_class(op_reg) != &ia32_reg_classes[CLASS_ia32_vfp])
1587 reg_id = arch_register_get_index(op_reg);
1588 live = vfp_live_args_after(state->sim, node, 0);
1590 op_stack_idx = x87_on_stack(state, reg_id);
1591 if (op_stack_idx >= 0 && !is_vfp_live(reg_id, live))
1592 x87_create_fpop(state, sched_next(node), 1);
1595 DB((dbg, LEVEL_1, "Stack after: "));
1596 DEBUG_ONLY(x87_dump_stack(state);)
1598 return NO_NODE_ADDED;
1602 * Keep the given node alive by adding a be_Keep.
1604 * @param node the node to kept alive
1606 static void keep_float_node_alive(ir_node *node)
1608 ir_node *block = get_nodes_block(node);
1609 ir_node *keep = be_new_Keep(block, 1, &node);
1611 assert(sched_is_scheduled(node));
1612 sched_add_after(node, keep);
1616 * Create a copy of a node. Recreate the node if it's a constant.
1618 * @param state the x87 state
1619 * @param n the node to be copied
1621 * @return the copy of n
1623 static ir_node *create_Copy(x87_state *state, ir_node *n)
1625 dbg_info *n_dbg = get_irn_dbg_info(n);
1626 ir_mode *mode = get_irn_mode(n);
1627 ir_node *block = get_nodes_block(n);
1628 ir_node *pred = get_irn_n(n, 0);
1629 ir_node *(*cnstr)(dbg_info *, ir_node *, ir_mode *) = NULL;
1631 const arch_register_t *out;
1632 const arch_register_t *op1;
1633 ia32_x87_attr_t *attr;
1635 /* Do not copy constants, recreate them. */
1636 switch (get_ia32_irn_opcode(pred)) {
1638 cnstr = new_bd_ia32_fldz;
1641 cnstr = new_bd_ia32_fld1;
1643 case iro_ia32_fldpi:
1644 cnstr = new_bd_ia32_fldpi;
1646 case iro_ia32_fldl2e:
1647 cnstr = new_bd_ia32_fldl2e;
1649 case iro_ia32_fldl2t:
1650 cnstr = new_bd_ia32_fldl2t;
1652 case iro_ia32_fldlg2:
1653 cnstr = new_bd_ia32_fldlg2;
1655 case iro_ia32_fldln2:
1656 cnstr = new_bd_ia32_fldln2;
1662 out = x87_get_irn_register(n);
1663 op1 = x87_get_irn_register(pred);
1665 if (cnstr != NULL) {
1666 /* copy a constant */
1667 res = (*cnstr)(n_dbg, block, mode);
1669 x87_push(state, arch_register_get_index(out), res);
1671 attr = get_ia32_x87_attr(res);
1672 attr->x87[2] = get_st_reg(0);
1674 int op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1676 res = new_bd_ia32_fpushCopy(n_dbg, block, pred, mode);
1678 x87_push(state, arch_register_get_index(out), res);
1680 attr = get_ia32_x87_attr(res);
1681 attr->x87[0] = get_st_reg(op1_idx);
1682 attr->x87[2] = get_st_reg(0);
1684 arch_set_irn_register(res, out);
1690 * Simulate a be_Copy.
1692 * @param state the x87 state
1693 * @param n the node that should be simulated (and patched)
1695 * @return NO_NODE_ADDED
1697 static int sim_Copy(x87_state *state, ir_node *n)
1700 const arch_register_t *out;
1701 const arch_register_t *op1;
1702 const arch_register_class_t *cls;
1703 ir_node *node, *next;
1704 int op1_idx, out_idx;
1707 cls = arch_get_irn_reg_class(n);
1708 if (cls != &ia32_reg_classes[CLASS_ia32_vfp])
1711 pred = be_get_Copy_op(n);
1712 out = x87_get_irn_register(n);
1713 op1 = x87_get_irn_register(pred);
1714 live = vfp_live_args_after(state->sim, n, REGMASK(out));
1716 DB((dbg, LEVEL_1, ">>> %+F %s -> %s\n", n,
1717 arch_register_get_name(op1), arch_register_get_name(out)));
1718 DEBUG_ONLY(vfp_dump_live(live);)
1720 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1722 if (is_vfp_live(arch_register_get_index(op1), live)) {
1723 /* Operand is still live, a real copy. We need here an fpush that can
1724 hold a a register, so use the fpushCopy or recreate constants */
1725 node = create_Copy(state, n);
1727 /* We have to make sure the old value doesn't go dead (which can happen
1728 * when we recreate constants). As the simulator expected that value in
1729 * the pred blocks. This is unfortunate as removing it would save us 1
1730 * instruction, but we would have to rerun all the simulation to get
1733 next = sched_next(n);
1736 sched_add_before(next, node);
1738 if (get_irn_n_edges(pred) == 0) {
1739 keep_float_node_alive(pred);
1742 DB((dbg, LEVEL_1, "<<< %+F %s -> ?\n", node, op1->name));
1744 out_idx = x87_on_stack(state, arch_register_get_index(out));
1746 if (out_idx >= 0 && out_idx != op1_idx) {
1747 /* Matze: out already on stack? how can this happen? */
1748 panic("invalid stack state");
1751 /* op1 must be killed and placed where out is */
1753 ia32_x87_attr_t *attr;
1754 /* best case, simple remove and rename */
1755 x87_patch_insn(n, op_ia32_Pop);
1756 attr = get_ia32_x87_attr(n);
1757 attr->x87[0] = op1 = get_st_reg(0);
1760 x87_set_st(state, arch_register_get_index(out), n, op1_idx - 1);
1762 ia32_x87_attr_t *attr;
1763 /* move op1 to tos, store and pop it */
1765 x87_create_fxch(state, n, op1_idx);
1768 x87_patch_insn(n, op_ia32_Pop);
1769 attr = get_ia32_x87_attr(n);
1770 attr->x87[0] = op1 = get_st_reg(out_idx);
1773 x87_set_st(state, arch_register_get_index(out), n, out_idx - 1);
1775 DB((dbg, LEVEL_1, "<<< %+F %s\n", n, op1->name));
1778 /* just a virtual copy */
1779 x87_set_st(state, arch_register_get_index(out), pred, op1_idx);
1780 /* don't remove the node to keep the verifier quiet :),
1781 the emitter won't emit any code for the node */
1784 DB((dbg, LEVEL_1, "<<< KILLED %s\n", get_irn_opname(n)));
1789 return NO_NODE_ADDED;
1793 * Returns the vf0 result Proj of a Call.
1795 * @para call the Call node
1797 static ir_node *get_call_result_proj(ir_node *call)
1799 /* search the result proj */
1800 foreach_out_edge(call, edge) {
1801 ir_node *proj = get_edge_src_irn(edge);
1802 long pn = get_Proj_proj(proj);
1804 if (pn == pn_ia32_Call_vf0)
1808 panic("result Proj missing");
1811 static int sim_Asm(x87_state *const state, ir_node *const n)
1815 for (size_t i = get_irn_arity(n); i-- != 0;) {
1816 arch_register_req_t const *const req = arch_get_irn_register_req_in(n, i);
1817 if (req->cls == &ia32_reg_classes[CLASS_ia32_vfp])
1818 panic("cannot handle %+F with x87 constraints", n);
1821 for (size_t i = arch_get_irn_n_outs(n); i-- != 0;) {
1822 arch_register_req_t const *const req = arch_get_irn_register_req_out(n, i);
1823 if (req->cls == &ia32_reg_classes[CLASS_ia32_vfp])
1824 panic("cannot handle %+F with x87 constraints", n);
1827 return NO_NODE_ADDED;
1831 * Simulate a ia32_Call.
1833 * @param state the x87 state
1834 * @param n the node that should be simulated (and patched)
1836 * @return NO_NODE_ADDED
1838 static int sim_Call(x87_state *state, ir_node *n)
1840 ir_type *call_tp = get_ia32_call_attr_const(n)->call_tp;
1844 const arch_register_t *reg;
1846 DB((dbg, LEVEL_1, ">>> %+F\n", n));
1848 /* at the begin of a call the x87 state should be empty */
1849 assert(state->depth == 0 && "stack not empty before call");
1851 if (get_method_n_ress(call_tp) <= 0)
1855 * If the called function returns a float, it is returned in st(0).
1856 * This even happens if the return value is NOT used.
1857 * Moreover, only one return result is supported.
1859 res_type = get_method_res_type(call_tp, 0);
1860 mode = get_type_mode(res_type);
1862 if (mode == NULL || !mode_is_float(mode))
1865 resproj = get_call_result_proj(n);
1867 reg = x87_get_irn_register(resproj);
1868 x87_push(state, arch_register_get_index(reg), resproj);
1871 DB((dbg, LEVEL_1, "Stack after: "));
1872 DEBUG_ONLY(x87_dump_stack(state);)
1874 return NO_NODE_ADDED;
1878 * Simulate a be_Return.
1880 * @param state the x87 state
1881 * @param n the node that should be simulated (and patched)
1883 * @return NO_NODE_ADDED
1885 static int sim_Return(x87_state *state, ir_node *n)
1887 #ifdef DEBUG_libfirm
1888 /* only floating point return values must reside on stack */
1889 int n_float_res = 0;
1890 int const n_res = be_Return_get_n_rets(n);
1891 for (int i = 0; i < n_res; ++i) {
1892 ir_node *const res = get_irn_n(n, n_be_Return_val + i);
1893 if (mode_is_float(get_irn_mode(res)))
1896 assert(x87_get_depth(state) == n_float_res);
1899 /* pop them virtually */
1901 return NO_NODE_ADDED;
1905 * Simulate a be_Perm.
1907 * @param state the x87 state
1908 * @param irn the node that should be simulated (and patched)
1910 * @return NO_NODE_ADDED
1912 static int sim_Perm(x87_state *state, ir_node *irn)
1915 ir_node *pred = get_irn_n(irn, 0);
1918 /* handle only floating point Perms */
1919 if (! mode_is_float(get_irn_mode(pred)))
1920 return NO_NODE_ADDED;
1922 DB((dbg, LEVEL_1, ">>> %+F\n", irn));
1924 /* Perm is a pure virtual instruction on x87.
1925 All inputs must be on the FPU stack and are pairwise
1926 different from each other.
1927 So, all we need to do is to permutate the stack state. */
1928 n = get_irn_arity(irn);
1929 NEW_ARR_A(int, stack_pos, n);
1931 /* collect old stack positions */
1932 for (i = 0; i < n; ++i) {
1933 const arch_register_t *inreg = x87_get_irn_register(get_irn_n(irn, i));
1934 int idx = x87_on_stack(state, arch_register_get_index(inreg));
1936 assert(idx >= 0 && "Perm argument not on x87 stack");
1940 /* now do the permutation */
1941 foreach_out_edge(irn, edge) {
1942 ir_node *proj = get_edge_src_irn(edge);
1943 const arch_register_t *out = x87_get_irn_register(proj);
1944 long num = get_Proj_proj(proj);
1946 assert(0 <= num && num < n && "More Proj's than Perm inputs");
1947 x87_set_st(state, arch_register_get_index(out), proj, stack_pos[(unsigned)num]);
1949 DB((dbg, LEVEL_1, "<<< %+F\n", irn));
1951 return NO_NODE_ADDED;
1955 * Kill any dead registers at block start by popping them from the stack.
1957 * @param sim the simulator handle
1958 * @param block the current block
1959 * @param state the x87 state at the begin of the block
1961 static void x87_kill_deads(x87_simulator *const sim, ir_node *const block, x87_state *const state)
1963 ir_node *first_insn = sched_first(block);
1964 ir_node *keep = NULL;
1965 unsigned live = vfp_live_args_after(sim, block, 0);
1967 int i, depth, num_pop;
1970 depth = x87_get_depth(state);
1971 for (i = depth - 1; i >= 0; --i) {
1972 int reg = x87_get_st_reg(state, i);
1974 if (! is_vfp_live(reg, live))
1975 kill_mask |= (1 << i);
1979 DB((dbg, LEVEL_1, "Killing deads:\n"));
1980 DEBUG_ONLY(vfp_dump_live(live);)
1981 DEBUG_ONLY(x87_dump_stack(state);)
1983 if (kill_mask != 0 && live == 0) {
1984 /* special case: kill all registers */
1985 if (ia32_cg_config.use_femms || ia32_cg_config.use_emms) {
1986 if (ia32_cg_config.use_femms) {
1987 /* use FEMMS on AMD processors to clear all */
1988 keep = new_bd_ia32_femms(NULL, block);
1990 /* use EMMS to clear all */
1991 keep = new_bd_ia32_emms(NULL, block);
1993 sched_add_before(first_insn, keep);
1999 /* now kill registers */
2001 /* we can only kill from TOS, so bring them up */
2002 if (! (kill_mask & 1)) {
2003 /* search from behind, because we can to a double-pop */
2004 for (i = depth - 1; i >= 0; --i) {
2005 if (kill_mask & (1 << i)) {
2006 kill_mask &= ~(1 << i);
2013 x87_set_st(state, -1, keep, i);
2014 x87_create_fxch(state, first_insn, i);
2017 if ((kill_mask & 3) == 3) {
2018 /* we can do a double-pop */
2022 /* only a single pop */
2027 kill_mask >>= num_pop;
2028 keep = x87_create_fpop(state, first_insn, num_pop);
2035 * Run a simulation and fix all virtual instructions for a block.
2037 * @param sim the simulator handle
2038 * @param block the current block
2040 static void x87_simulate_block(x87_simulator *sim, ir_node *block)
2043 blk_state *bl_state = x87_get_bl_state(sim, block);
2044 x87_state *state = bl_state->begin;
2045 ir_node *start_block;
2047 assert(state != NULL);
2048 /* already processed? */
2049 if (bl_state->end != NULL)
2052 DB((dbg, LEVEL_1, "Simulate %+F\n", block));
2053 DB((dbg, LEVEL_2, "State at Block begin:\n "));
2054 DEBUG_ONLY(x87_dump_stack(state);)
2056 /* create a new state, will be changed */
2057 state = x87_clone_state(sim, state);
2058 /* at block begin, kill all dead registers */
2059 x87_kill_deads(sim, block, state);
2061 /* beware, n might change */
2062 for (n = sched_first(block); !sched_is_end(n); n = next) {
2065 ir_op *op = get_irn_op(n);
2068 * get the next node to be simulated here.
2069 * n might be completely removed from the schedule-
2071 next = sched_next(n);
2072 if (op->ops.generic != NULL) {
2073 func = (sim_func)op->ops.generic;
2076 node_inserted = (*func)(state, n);
2079 * sim_func might have added an additional node after n,
2080 * so update next node
2081 * beware: n must not be changed by sim_func
2082 * (i.e. removed from schedule) in this case
2084 if (node_inserted != NO_NODE_ADDED)
2085 next = sched_next(n);
2089 start_block = get_irg_start_block(get_irn_irg(block));
2091 DB((dbg, LEVEL_2, "State at Block end:\n ")); DEBUG_ONLY(x87_dump_stack(state);)
2093 /* check if the state must be shuffled */
2094 foreach_block_succ(block, edge) {
2095 ir_node *succ = get_edge_src_irn(edge);
2096 blk_state *succ_state;
2098 if (succ == start_block)
2101 succ_state = x87_get_bl_state(sim, succ);
2103 if (succ_state->begin == NULL) {
2104 DB((dbg, LEVEL_2, "Set begin state for succ %+F:\n", succ));
2105 DEBUG_ONLY(x87_dump_stack(state);)
2106 succ_state->begin = state;
2108 waitq_put(sim->worklist, succ);
2110 DB((dbg, LEVEL_2, "succ %+F already has a state, shuffling\n", succ));
2111 /* There is already a begin state for the successor, bad.
2112 Do the necessary permutations.
2113 Note that critical edges are removed, so this is always possible:
2114 If the successor has more than one possible input, then it must
2117 x87_shuffle(block, state, succ_state->begin);
2120 bl_state->end = state;
2124 * Register a simulator function.
2126 * @param op the opcode to simulate
2127 * @param func the simulator function for the opcode
2129 static void register_sim(ir_op *op, sim_func func)
2131 assert(op->ops.generic == NULL);
2132 op->ops.generic = (op_func) func;
2136 * Create a new x87 simulator.
2138 * @param sim a simulator handle, will be initialized
2139 * @param irg the current graph
2141 static void x87_init_simulator(x87_simulator *sim, ir_graph *irg)
2143 obstack_init(&sim->obst);
2144 sim->blk_states = pmap_create();
2145 sim->n_idx = get_irg_last_idx(irg);
2146 sim->live = OALLOCN(&sim->obst, vfp_liveness, sim->n_idx);
2148 DB((dbg, LEVEL_1, "--------------------------------\n"
2149 "x87 Simulator started for %+F\n", irg));
2151 /* set the generic function pointer of instruction we must simulate */
2152 ir_clear_opcodes_generic_func();
2154 register_sim(op_ia32_Asm, sim_Asm);
2155 register_sim(op_ia32_Call, sim_Call);
2156 register_sim(op_ia32_vfld, sim_fld);
2157 register_sim(op_ia32_vfild, sim_fild);
2158 register_sim(op_ia32_vfld1, sim_fld1);
2159 register_sim(op_ia32_vfldz, sim_fldz);
2160 register_sim(op_ia32_vfadd, sim_fadd);
2161 register_sim(op_ia32_vfsub, sim_fsub);
2162 register_sim(op_ia32_vfmul, sim_fmul);
2163 register_sim(op_ia32_vfdiv, sim_fdiv);
2164 register_sim(op_ia32_vfprem, sim_fprem);
2165 register_sim(op_ia32_vfabs, sim_fabs);
2166 register_sim(op_ia32_vfchs, sim_fchs);
2167 register_sim(op_ia32_vfist, sim_fist);
2168 register_sim(op_ia32_vfisttp, sim_fisttp);
2169 register_sim(op_ia32_vfst, sim_fst);
2170 register_sim(op_ia32_vFtstFnstsw, sim_FtstFnstsw);
2171 register_sim(op_ia32_vFucomFnstsw, sim_Fucom);
2172 register_sim(op_ia32_vFucomi, sim_Fucom);
2173 register_sim(op_be_Copy, sim_Copy);
2174 register_sim(op_be_Return, sim_Return);
2175 register_sim(op_be_Perm, sim_Perm);
2176 register_sim(op_be_Keep, sim_Keep);
2180 * Destroy a x87 simulator.
2182 * @param sim the simulator handle
2184 static void x87_destroy_simulator(x87_simulator *sim)
2186 pmap_destroy(sim->blk_states);
2187 obstack_free(&sim->obst, NULL);
2188 DB((dbg, LEVEL_1, "x87 Simulator stopped\n\n"));
2192 * Pre-block walker: calculate the liveness information for the block
2193 * and store it into the sim->live cache.
2195 static void update_liveness_walker(ir_node *block, void *data)
2197 x87_simulator *sim = (x87_simulator*)data;
2198 update_liveness(sim, block);
2202 * Run a simulation and fix all virtual instructions for a graph.
2203 * Replaces all virtual floating point instructions and registers
2206 void ia32_x87_simulate_graph(ir_graph *irg)
2208 /* TODO improve code quality (less executed fxch) by using execfreqs */
2210 ir_node *block, *start_block;
2211 blk_state *bl_state;
2214 /* create the simulator */
2215 x87_init_simulator(&sim, irg);
2217 start_block = get_irg_start_block(irg);
2218 bl_state = x87_get_bl_state(&sim, start_block);
2220 /* start with the empty state */
2222 bl_state->begin = ∅
2224 sim.worklist = new_waitq();
2225 waitq_put(sim.worklist, start_block);
2227 be_assure_live_sets(irg);
2228 sim.lv = be_get_irg_liveness(irg);
2230 /* Calculate the liveness for all nodes. We must precalculate this info,
2231 * because the simulator adds new nodes (possible before Phi nodes) which
2232 * would let a lazy calculation fail.
2233 * On the other hand we reduce the computation amount due to
2234 * precaching from O(n^2) to O(n) at the expense of O(n) cache memory.
2236 irg_block_walk_graph(irg, update_liveness_walker, NULL, &sim);
2240 block = (ir_node*)waitq_get(sim.worklist);
2241 x87_simulate_block(&sim, block);
2242 } while (! waitq_empty(sim.worklist));
2245 del_waitq(sim.worklist);
2246 x87_destroy_simulator(&sim);
2249 /* Initializes the x87 simulator. */
2250 void ia32_init_x87(void)
2252 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.x87");