2 * Copyright (C) 1995-2010 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the x87 support and virtual to stack
23 * register translation for the ia32 backend.
24 * @author Michael Beck
33 #include "iredges_t.h"
48 #include "bearch_ia32_t.h"
49 #include "ia32_new_nodes.h"
50 #include "gen_ia32_new_nodes.h"
51 #include "gen_ia32_regalloc_if.h"
53 #include "ia32_architecture.h"
55 #define MASK_TOS(x) ((x) & (N_ia32_st_REGS - 1))
57 /** the debug handle */
58 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
60 /* Forward declaration. */
61 typedef struct x87_simulator x87_simulator;
64 * An exchange template.
65 * Note that our virtual functions have the same inputs
66 * and attributes as the real ones, so we can simple exchange
68 * Further, x87 supports inverse instructions, so we can handle them.
70 typedef struct exchange_tmpl {
71 ir_op *normal_op; /**< the normal one */
72 ir_op *reverse_op; /**< the reverse one if exists */
73 ir_op *normal_pop_op; /**< the normal one with tos pop */
74 ir_op *reverse_pop_op; /**< the reverse one with tos pop */
78 * An entry on the simulated x87 stack.
80 typedef struct st_entry {
81 int reg_idx; /**< the virtual register index of this stack value */
82 ir_node *node; /**< the node that produced this value */
88 typedef struct x87_state {
89 st_entry st[N_ia32_st_REGS]; /**< the register stack */
90 int depth; /**< the current stack depth */
91 int tos; /**< position of the tos */
92 x87_simulator *sim; /**< The simulator. */
95 /** An empty state, used for blocks without fp instructions. */
96 static x87_state _empty = { { {0, NULL}, }, 0, 0, NULL };
97 static x87_state *empty = (x87_state *)&_empty;
100 * Return values of the instruction simulator functions.
103 NO_NODE_ADDED = 0, /**< No node that needs simulation was added. */
104 NODE_ADDED = 1 /**< A node that must be simulated was added by the simulator
105 in the schedule AFTER the current node. */
109 * The type of an instruction simulator function.
111 * @param state the x87 state
112 * @param n the node to be simulated
114 * @return NODE_ADDED if a node was added AFTER n in schedule that MUST be
116 * NO_NODE_ADDED otherwise
118 typedef int (*sim_func)(x87_state *state, ir_node *n);
121 * A block state: Every block has a x87 state at the beginning and at the end.
123 typedef struct blk_state {
124 x87_state *begin; /**< state at the begin or NULL if not assigned */
125 x87_state *end; /**< state at the end or NULL if not assigned */
128 /** liveness bitset for vfp registers. */
129 typedef unsigned char vfp_liveness;
134 struct x87_simulator {
135 struct obstack obst; /**< An obstack for fast allocating. */
136 pmap *blk_states; /**< Map blocks to states. */
137 be_lv_t *lv; /**< intrablock liveness. */
138 vfp_liveness *live; /**< Liveness information. */
139 unsigned n_idx; /**< The cached get_irg_last_idx() result. */
140 waitq *worklist; /**< Worklist of blocks that must be processed. */
144 * Returns the current stack depth.
146 * @param state the x87 state
148 * @return the x87 stack depth
150 static int x87_get_depth(const x87_state *state)
156 * Return the virtual register index at st(pos).
158 * @param state the x87 state
159 * @param pos a stack position
161 * @return the vfp register index that produced the value at st(pos)
163 static int x87_get_st_reg(const x87_state *state, int pos)
165 assert(pos < state->depth);
166 return state->st[MASK_TOS(state->tos + pos)].reg_idx;
171 * Return the node at st(pos).
173 * @param state the x87 state
174 * @param pos a stack position
176 * @return the IR node that produced the value at st(pos)
178 static ir_node *x87_get_st_node(const x87_state *state, int pos)
180 assert(pos < state->depth);
181 return state->st[MASK_TOS(state->tos + pos)].node;
185 * Dump the stack for debugging.
187 * @param state the x87 state
189 static void x87_dump_stack(const x87_state *state)
193 for (i = state->depth - 1; i >= 0; --i) {
194 DB((dbg, LEVEL_2, "vf%d(%+F) ", x87_get_st_reg(state, i),
195 x87_get_st_node(state, i)));
197 DB((dbg, LEVEL_2, "<-- TOS\n"));
199 #endif /* DEBUG_libfirm */
202 * Set a virtual register to st(pos).
204 * @param state the x87 state
205 * @param reg_idx the vfp register index that should be set
206 * @param node the IR node that produces the value of the vfp register
207 * @param pos the stack position where the new value should be entered
209 static void x87_set_st(x87_state *state, int reg_idx, ir_node *node, int pos)
211 assert(0 < state->depth);
212 state->st[MASK_TOS(state->tos + pos)].reg_idx = reg_idx;
213 state->st[MASK_TOS(state->tos + pos)].node = node;
215 DB((dbg, LEVEL_2, "After SET_REG: "));
216 DEBUG_ONLY(x87_dump_stack(state);)
220 * Set the tos virtual register.
222 * @param state the x87 state
223 * @param reg_idx the vfp register index that should be set
224 * @param node the IR node that produces the value of the vfp register
226 static void x87_set_tos(x87_state *state, int reg_idx, ir_node *node)
228 x87_set_st(state, reg_idx, node, 0);
232 * Swap st(0) with st(pos).
234 * @param state the x87 state
235 * @param pos the stack position to change the tos with
237 static void x87_fxch(x87_state *state, int pos)
240 assert(pos < state->depth);
242 entry = state->st[MASK_TOS(state->tos + pos)];
243 state->st[MASK_TOS(state->tos + pos)] = state->st[MASK_TOS(state->tos)];
244 state->st[MASK_TOS(state->tos)] = entry;
246 DB((dbg, LEVEL_2, "After FXCH: "));
247 DEBUG_ONLY(x87_dump_stack(state);)
251 * Convert a virtual register to the stack index.
253 * @param state the x87 state
254 * @param reg_idx the register vfp index
256 * @return the stack position where the register is stacked
257 * or -1 if the virtual register was not found
259 static int x87_on_stack(const x87_state *state, int reg_idx)
261 int i, tos = state->tos;
263 for (i = 0; i < state->depth; ++i)
264 if (state->st[MASK_TOS(tos + i)].reg_idx == reg_idx)
270 * Push a virtual Register onto the stack, double pushed allowed.
272 * @param state the x87 state
273 * @param reg_idx the register vfp index
274 * @param node the node that produces the value of the vfp register
276 static void x87_push_dbl(x87_state *state, int reg_idx, ir_node *node)
278 assert(state->depth < N_ia32_st_REGS && "stack overrun");
281 state->tos = MASK_TOS(state->tos - 1);
282 state->st[state->tos].reg_idx = reg_idx;
283 state->st[state->tos].node = node;
285 DB((dbg, LEVEL_2, "After PUSH: ")); DEBUG_ONLY(x87_dump_stack(state);)
289 * Push a virtual Register onto the stack, double pushes are NOT allowed.
291 * @param state the x87 state
292 * @param reg_idx the register vfp index
293 * @param node the node that produces the value of the vfp register
295 static void x87_push(x87_state *state, int reg_idx, ir_node *node)
297 assert(x87_on_stack(state, reg_idx) == -1 && "double push");
299 x87_push_dbl(state, reg_idx, node);
303 * Pop a virtual Register from the stack.
305 * @param state the x87 state
307 static void x87_pop(x87_state *state)
309 assert(state->depth > 0 && "stack underrun");
312 state->tos = MASK_TOS(state->tos + 1);
314 DB((dbg, LEVEL_2, "After POP: ")); DEBUG_ONLY(x87_dump_stack(state);)
318 * Empty the fpu stack
320 * @param state the x87 state
322 static void x87_emms(x87_state *state)
329 * Returns the block state of a block.
331 * @param sim the x87 simulator handle
332 * @param block the current block
334 * @return the block state
336 static blk_state *x87_get_bl_state(x87_simulator *sim, ir_node *block)
338 blk_state *res = pmap_get(blk_state, sim->blk_states, block);
341 res = OALLOC(&sim->obst, blk_state);
345 pmap_insert(sim->blk_states, block, res);
352 * Creates a new x87 state.
354 * @param sim the x87 simulator handle
356 * @return a new x87 state
358 static x87_state *x87_alloc_state(x87_simulator *sim)
360 x87_state *res = OALLOC(&sim->obst, x87_state);
369 * @param sim the x87 simulator handle
370 * @param src the x87 state that will be cloned
372 * @return a cloned copy of the src state
374 static x87_state *x87_clone_state(x87_simulator *sim, const x87_state *src)
376 x87_state *res = x87_alloc_state(sim);
383 * Patch a virtual instruction into a x87 one and return
384 * the node representing the result value.
386 * @param n the IR node to patch
387 * @param op the x87 opcode to patch in
389 static ir_node *x87_patch_insn(ir_node *n, ir_op *op)
391 ir_mode *mode = get_irn_mode(n);
396 if (mode == mode_T) {
397 /* patch all Proj's */
398 foreach_out_edge(n, edge) {
399 ir_node *proj = get_edge_src_irn(edge);
401 mode = get_irn_mode(proj);
402 if (mode_is_float(mode)) {
404 set_irn_mode(proj, ia32_reg_classes[CLASS_ia32_st].mode);
408 } else if (mode_is_float(mode))
409 set_irn_mode(n, ia32_reg_classes[CLASS_ia32_st].mode);
414 * Returns the first Proj of a mode_T node having a given mode.
416 * @param n the mode_T node
417 * @param m the desired mode of the Proj
418 * @return The first Proj of mode @p m found or NULL.
420 static ir_node *get_irn_Proj_for_mode(ir_node *n, ir_mode *m)
422 assert(get_irn_mode(n) == mode_T && "Need mode_T node");
424 foreach_out_edge(n, edge) {
425 ir_node *proj = get_edge_src_irn(edge);
426 if (get_irn_mode(proj) == m)
434 * Wrap the arch_* function here so we can check for errors.
436 static inline const arch_register_t *x87_get_irn_register(const ir_node *irn)
438 const arch_register_t *res = arch_get_irn_register(irn);
440 assert(res->reg_class == &ia32_reg_classes[CLASS_ia32_vfp]);
444 static inline const arch_register_t *x87_irn_get_register(const ir_node *irn,
447 const arch_register_t *res = arch_get_irn_register_out(irn, pos);
449 assert(res->reg_class == &ia32_reg_classes[CLASS_ia32_vfp]);
453 static inline const arch_register_t *get_st_reg(int index)
455 return &ia32_registers[REG_ST0 + index];
458 /* -------------- x87 perm --------------- */
461 * Creates a fxch for shuffle.
463 * @param state the x87 state
464 * @param pos parameter for fxch
465 * @param block the block were fxch is inserted
467 * Creates a new fxch node and reroute the user of the old node
470 * @return the fxch node
472 static ir_node *x87_fxch_shuffle(x87_state *state, int pos, ir_node *block)
475 ia32_x87_attr_t *attr;
477 fxch = new_bd_ia32_fxch(NULL, block);
478 attr = get_ia32_x87_attr(fxch);
479 attr->x87[0] = get_st_reg(pos);
480 attr->x87[2] = get_st_reg(0);
484 x87_fxch(state, pos);
489 * Calculate the necessary permutations to reach dst_state.
491 * These permutations are done with fxch instructions and placed
492 * at the end of the block.
494 * Note that critical edges are removed here, so we need only
495 * a shuffle if the current block has only one successor.
497 * @param block the current block
498 * @param state the current x87 stack state, might be modified
499 * @param dst_state destination state
503 static x87_state *x87_shuffle(ir_node *block, x87_state *state, const x87_state *dst_state)
505 int i, n_cycles, k, ri;
506 unsigned cycles[4], all_mask;
507 char cycle_idx[4][8];
508 ir_node *fxch, *before, *after;
510 assert(state->depth == dst_state->depth);
512 /* Some mathematics here:
513 If we have a cycle of length n that includes the tos,
514 we need n-1 exchange operations.
515 We can always add the tos and restore it, so we need
516 n+1 exchange operations for a cycle not containing the tos.
517 So, the maximum of needed operations is for a cycle of 7
518 not including the tos == 8.
519 This is the same number of ops we would need for using stores,
520 so exchange is cheaper (we save the loads).
521 On the other hand, we might need an additional exchange
522 in the next block to bring one operand on top, so the
523 number of ops in the first case is identical.
524 Further, no more than 4 cycles can exists (4 x 2).
526 all_mask = (1 << (state->depth)) - 1;
528 for (n_cycles = 0; all_mask; ++n_cycles) {
529 int src_idx, dst_idx;
531 /* find the first free slot */
532 for (i = 0; i < state->depth; ++i) {
533 if (all_mask & (1 << i)) {
534 all_mask &= ~(1 << i);
536 /* check if there are differences here */
537 if (x87_get_st_reg(state, i) != x87_get_st_reg(dst_state, i))
543 /* no more cycles found */
548 cycles[n_cycles] = (1 << i);
549 cycle_idx[n_cycles][k++] = i;
550 for (src_idx = i; ; src_idx = dst_idx) {
551 dst_idx = x87_on_stack(dst_state, x87_get_st_reg(state, src_idx));
553 if ((all_mask & (1 << dst_idx)) == 0)
556 cycle_idx[n_cycles][k++] = dst_idx;
557 cycles[n_cycles] |= (1 << dst_idx);
558 all_mask &= ~(1 << dst_idx);
560 cycle_idx[n_cycles][k] = -1;
564 /* no permutation needed */
568 /* Hmm: permutation needed */
569 DB((dbg, LEVEL_2, "\n%+F needs permutation: from\n", block));
570 DEBUG_ONLY(x87_dump_stack(state);)
571 DB((dbg, LEVEL_2, " to\n"));
572 DEBUG_ONLY(x87_dump_stack(dst_state);)
576 DB((dbg, LEVEL_2, "Need %d cycles\n", n_cycles));
577 for (ri = 0; ri < n_cycles; ++ri) {
578 DB((dbg, LEVEL_2, " Ring %d:\n ", ri));
579 for (k = 0; cycle_idx[ri][k] != -1; ++k)
580 DB((dbg, LEVEL_2, " st%d ->", cycle_idx[ri][k]));
581 DB((dbg, LEVEL_2, "\n"));
588 * Find the place node must be insert.
589 * We have only one successor block, so the last instruction should
592 before = sched_last(block);
593 assert(is_cfop(before));
595 /* now do the permutations */
596 for (ri = 0; ri < n_cycles; ++ri) {
597 if ((cycles[ri] & 1) == 0) {
598 /* this cycle does not include the tos */
599 fxch = x87_fxch_shuffle(state, cycle_idx[ri][0], block);
601 sched_add_after(after, fxch);
603 sched_add_before(before, fxch);
606 for (k = 1; cycle_idx[ri][k] != -1; ++k) {
607 fxch = x87_fxch_shuffle(state, cycle_idx[ri][k], block);
609 sched_add_after(after, fxch);
611 sched_add_before(before, fxch);
614 if ((cycles[ri] & 1) == 0) {
615 /* this cycle does not include the tos */
616 fxch = x87_fxch_shuffle(state, cycle_idx[ri][0], block);
617 sched_add_after(after, fxch);
624 * Create a fxch node before another node.
626 * @param state the x87 state
627 * @param n the node after the fxch
628 * @param pos exchange st(pos) with st(0)
632 static ir_node *x87_create_fxch(x87_state *state, ir_node *n, int pos)
635 ia32_x87_attr_t *attr;
636 ir_node *block = get_nodes_block(n);
638 x87_fxch(state, pos);
640 fxch = new_bd_ia32_fxch(NULL, block);
641 attr = get_ia32_x87_attr(fxch);
642 attr->x87[0] = get_st_reg(pos);
643 attr->x87[2] = get_st_reg(0);
647 sched_add_before(n, fxch);
648 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(fxch), attr->x87[0]->name, attr->x87[2]->name));
653 * Create a fpush before node n.
655 * @param state the x87 state
656 * @param n the node after the fpush
657 * @param pos push st(pos) on stack
658 * @param op_idx replace input op_idx of n with the fpush result
660 static void x87_create_fpush(x87_state *state, ir_node *n, int pos, int op_idx)
662 ir_node *fpush, *pred = get_irn_n(n, op_idx);
663 ia32_x87_attr_t *attr;
664 const arch_register_t *out = x87_get_irn_register(pred);
666 x87_push_dbl(state, arch_register_get_index(out), pred);
668 fpush = new_bd_ia32_fpush(NULL, get_nodes_block(n));
669 attr = get_ia32_x87_attr(fpush);
670 attr->x87[0] = get_st_reg(pos);
671 attr->x87[2] = get_st_reg(0);
674 sched_add_before(n, fpush);
676 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(fpush), attr->x87[0]->name, attr->x87[2]->name));
680 * Create a fpop before node n.
682 * @param state the x87 state
683 * @param n the node after the fpop
684 * @param num pop 1 or 2 values
686 * @return the fpop node
688 static ir_node *x87_create_fpop(x87_state *state, ir_node *n, int num)
690 ir_node *fpop = NULL;
691 ia32_x87_attr_t *attr;
696 if (ia32_cg_config.use_ffreep)
697 fpop = new_bd_ia32_ffreep(NULL, get_nodes_block(n));
699 fpop = new_bd_ia32_fpop(NULL, get_nodes_block(n));
700 attr = get_ia32_x87_attr(fpop);
701 attr->x87[0] = get_st_reg(0);
702 attr->x87[1] = get_st_reg(0);
703 attr->x87[2] = get_st_reg(0);
706 sched_add_before(n, fpop);
707 DB((dbg, LEVEL_1, "<<< %s %s\n", get_irn_opname(fpop), attr->x87[0]->name));
712 /* --------------------------------- liveness ------------------------------------------ */
715 * The liveness transfer function.
716 * Updates a live set over a single step from a given node to its predecessor.
717 * Everything defined at the node is removed from the set, the uses of the node get inserted.
719 * @param irn The node at which liveness should be computed.
720 * @param live The bitset of registers live before @p irn. This set gets modified by updating it to
721 * the registers live after irn.
723 * @return The live bitset.
725 static vfp_liveness vfp_liveness_transfer(ir_node *irn, vfp_liveness live)
728 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
730 if (get_irn_mode(irn) == mode_T) {
731 foreach_out_edge(irn, edge) {
732 ir_node *proj = get_edge_src_irn(edge);
734 if (arch_irn_consider_in_reg_alloc(cls, proj)) {
735 const arch_register_t *reg = x87_get_irn_register(proj);
736 live &= ~(1 << arch_register_get_index(reg));
739 } else if (arch_irn_consider_in_reg_alloc(cls, irn)) {
740 const arch_register_t *reg = x87_get_irn_register(irn);
741 live &= ~(1 << arch_register_get_index(reg));
744 for (i = 0, n = get_irn_arity(irn); i < n; ++i) {
745 ir_node *op = get_irn_n(irn, i);
747 if (mode_is_float(get_irn_mode(op)) &&
748 arch_irn_consider_in_reg_alloc(cls, op)) {
749 const arch_register_t *reg = x87_get_irn_register(op);
750 live |= 1 << arch_register_get_index(reg);
757 * Put all live virtual registers at the end of a block into a bitset.
759 * @param sim the simulator handle
760 * @param bl the block
762 * @return The live bitset at the end of this block
764 static vfp_liveness vfp_liveness_end_of_block(x87_simulator *sim, const ir_node *block)
766 vfp_liveness live = 0;
767 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
768 const be_lv_t *lv = sim->lv;
770 be_lv_foreach(lv, block, be_lv_state_end, node) {
771 const arch_register_t *reg;
772 if (!arch_irn_consider_in_reg_alloc(cls, node))
775 reg = x87_get_irn_register(node);
776 live |= 1 << arch_register_get_index(reg);
782 /** get the register mask from an arch_register */
783 #define REGMASK(reg) (1 << (arch_register_get_index(reg)))
786 * Return a bitset of argument registers which are live at the end of a node.
788 * @param sim the simulator handle
789 * @param pos the node
790 * @param kill kill mask for the output registers
792 * @return The live bitset.
794 static unsigned vfp_live_args_after(x87_simulator *sim, const ir_node *pos, unsigned kill)
796 unsigned idx = get_irn_idx(pos);
798 assert(idx < sim->n_idx);
799 return sim->live[idx] & ~kill;
803 * Calculate the liveness for a whole block and cache it.
805 * @param sim the simulator handle
806 * @param block the block
808 static void update_liveness(x87_simulator *sim, ir_node *block)
810 vfp_liveness live = vfp_liveness_end_of_block(sim, block);
813 /* now iterate through the block backward and cache the results */
814 sched_foreach_reverse(block, irn) {
815 /* stop at the first Phi: this produces the live-in */
819 idx = get_irn_idx(irn);
820 sim->live[idx] = live;
822 live = vfp_liveness_transfer(irn, live);
824 idx = get_irn_idx(block);
825 sim->live[idx] = live;
829 * Returns true if a register is live in a set.
831 * @param reg_idx the vfp register index
832 * @param live a live bitset
834 #define is_vfp_live(reg_idx, live) ((live) & (1 << (reg_idx)))
838 * Dump liveness info.
840 * @param live the live bitset
842 static void vfp_dump_live(vfp_liveness live)
846 DB((dbg, LEVEL_2, "Live after: "));
847 for (i = 0; i < 8; ++i) {
848 if (live & (1 << i)) {
849 DB((dbg, LEVEL_2, "vf%d ", i));
852 DB((dbg, LEVEL_2, "\n"));
854 #endif /* DEBUG_libfirm */
856 /* --------------------------------- simulators ---------------------------------------- */
859 * Simulate a virtual binop.
861 * @param state the x87 state
862 * @param n the node that should be simulated (and patched)
863 * @param tmpl the template containing the 4 possible x87 opcodes
865 * @return NO_NODE_ADDED
867 static int sim_binop(x87_state *state, ir_node *n, const exchange_tmpl *tmpl)
869 int op2_idx = 0, op1_idx;
870 int out_idx, do_pop = 0;
871 ia32_x87_attr_t *attr;
873 ir_node *patched_insn;
875 x87_simulator *sim = state->sim;
876 ir_node *op1 = get_irn_n(n, n_ia32_binary_left);
877 ir_node *op2 = get_irn_n(n, n_ia32_binary_right);
878 const arch_register_t *op1_reg = x87_get_irn_register(op1);
879 const arch_register_t *op2_reg = x87_get_irn_register(op2);
880 const arch_register_t *out = x87_irn_get_register(n, pn_ia32_res);
881 int reg_index_1 = arch_register_get_index(op1_reg);
882 int reg_index_2 = arch_register_get_index(op2_reg);
883 vfp_liveness live = vfp_live_args_after(sim, n, REGMASK(out));
887 DB((dbg, LEVEL_1, ">>> %+F %s, %s -> %s\n", n,
888 arch_register_get_name(op1_reg), arch_register_get_name(op2_reg),
889 arch_register_get_name(out)));
890 DEBUG_ONLY(vfp_dump_live(live);)
891 DB((dbg, LEVEL_1, "Stack before: "));
892 DEBUG_ONLY(x87_dump_stack(state);)
894 op1_idx = x87_on_stack(state, reg_index_1);
895 assert(op1_idx >= 0);
896 op1_live_after = is_vfp_live(reg_index_1, live);
898 attr = get_ia32_x87_attr(n);
899 permuted = attr->attr.data.ins_permuted;
901 if (reg_index_2 != REG_VFP_VFP_NOREG) {
904 /* second operand is a vfp register */
905 op2_idx = x87_on_stack(state, reg_index_2);
906 assert(op2_idx >= 0);
907 op2_live_after = is_vfp_live(reg_index_2, live);
909 if (op2_live_after) {
910 /* Second operand is live. */
912 if (op1_live_after) {
913 /* Both operands are live: push the first one.
914 This works even for op1 == op2. */
915 x87_create_fpush(state, n, op1_idx, n_ia32_binary_right);
916 /* now do fxxx (tos=tos X op) */
920 dst = tmpl->normal_op;
922 /* Second live, first operand is dead here, bring it to tos. */
924 x87_create_fxch(state, n, op1_idx);
929 /* now do fxxx (tos=tos X op) */
931 dst = tmpl->normal_op;
934 /* Second operand is dead. */
935 if (op1_live_after) {
936 /* First operand is live: bring second to tos. */
938 x87_create_fxch(state, n, op2_idx);
943 /* now do fxxxr (tos = op X tos) */
945 dst = tmpl->reverse_op;
947 /* Both operands are dead here, pop them from the stack. */
950 /* Both are identically and on tos, no pop needed. */
951 /* here fxxx (tos = tos X tos) */
952 dst = tmpl->normal_op;
955 /* now do fxxxp (op = op X tos, pop) */
956 dst = tmpl->normal_pop_op;
960 } else if (op1_idx == 0) {
961 assert(op1_idx != op2_idx);
962 /* now do fxxxrp (op = tos X op, pop) */
963 dst = tmpl->reverse_pop_op;
967 /* Bring the second on top. */
968 x87_create_fxch(state, n, op2_idx);
969 if (op1_idx == op2_idx) {
970 /* Both are identically and on tos now, no pop needed. */
973 /* use fxxx (tos = tos X tos) */
974 dst = tmpl->normal_op;
977 /* op2 is on tos now */
979 /* use fxxxp (op = op X tos, pop) */
980 dst = tmpl->normal_pop_op;
988 /* second operand is an address mode */
989 if (op1_live_after) {
990 /* first operand is live: push it here */
991 x87_create_fpush(state, n, op1_idx, n_ia32_binary_left);
994 /* first operand is dead: bring it to tos */
996 x87_create_fxch(state, n, op1_idx);
1001 /* use fxxx (tos = tos X mem) */
1002 dst = permuted ? tmpl->reverse_op : tmpl->normal_op;
1006 patched_insn = x87_patch_insn(n, dst);
1007 x87_set_st(state, arch_register_get_index(out), patched_insn, out_idx);
1012 /* patch the operation */
1013 attr->x87[0] = op1_reg = get_st_reg(op1_idx);
1014 if (reg_index_2 != REG_VFP_VFP_NOREG) {
1015 attr->x87[1] = op2_reg = get_st_reg(op2_idx);
1017 attr->x87[2] = out = get_st_reg(out_idx);
1019 if (reg_index_2 != REG_VFP_VFP_NOREG) {
1020 DB((dbg, LEVEL_1, "<<< %s %s, %s -> %s\n", get_irn_opname(n),
1021 arch_register_get_name(op1_reg), arch_register_get_name(op2_reg),
1022 arch_register_get_name(out)));
1024 DB((dbg, LEVEL_1, "<<< %s %s, [AM] -> %s\n", get_irn_opname(n),
1025 arch_register_get_name(op1_reg),
1026 arch_register_get_name(out)));
1029 return NO_NODE_ADDED;
1033 * Simulate a virtual Unop.
1035 * @param state the x87 state
1036 * @param n the node that should be simulated (and patched)
1037 * @param op the x87 opcode that will replace n's opcode
1039 * @return NO_NODE_ADDED
1041 static int sim_unop(x87_state *state, ir_node *n, ir_op *op)
1043 x87_simulator *sim = state->sim;
1044 const arch_register_t *op1 = x87_get_irn_register(get_irn_n(n, 0));
1045 const arch_register_t *out = x87_get_irn_register(n);
1046 ia32_x87_attr_t *attr;
1047 unsigned live = vfp_live_args_after(sim, n, REGMASK(out));
1049 DB((dbg, LEVEL_1, ">>> %+F -> %s\n", n, out->name));
1050 DEBUG_ONLY(vfp_dump_live(live);)
1052 int op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1054 if (is_vfp_live(arch_register_get_index(op1), live)) {
1055 /* push the operand here */
1056 x87_create_fpush(state, n, op1_idx, 0);
1059 /* operand is dead, bring it to tos */
1061 x87_create_fxch(state, n, op1_idx);
1065 x87_set_tos(state, arch_register_get_index(out), x87_patch_insn(n, op));
1066 attr = get_ia32_x87_attr(n);
1067 attr->x87[0] = op1 = get_st_reg(0);
1068 attr->x87[2] = out = get_st_reg(0);
1069 DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), out->name));
1071 return NO_NODE_ADDED;
1075 * Simulate a virtual Load instruction.
1077 * @param state the x87 state
1078 * @param n the node that should be simulated (and patched)
1079 * @param op the x87 opcode that will replace n's opcode
1081 * @return NO_NODE_ADDED
1083 static int sim_load(x87_state *state, ir_node *n, ir_op *op, int res_pos)
1085 const arch_register_t *out = x87_irn_get_register(n, res_pos);
1086 ia32_x87_attr_t *attr;
1088 DB((dbg, LEVEL_1, ">>> %+F -> %s\n", n, arch_register_get_name(out)));
1089 x87_push(state, arch_register_get_index(out), x87_patch_insn(n, op));
1090 assert(out == x87_irn_get_register(n, res_pos));
1091 attr = get_ia32_x87_attr(n);
1092 attr->x87[2] = out = get_st_reg(0);
1093 DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), arch_register_get_name(out)));
1095 return NO_NODE_ADDED;
1099 * Rewire all users of @p old_val to @new_val iff they are scheduled after @p store.
1101 * @param store The store
1102 * @param old_val The former value
1103 * @param new_val The new value
1105 static void collect_and_rewire_users(ir_node *store, ir_node *old_val, ir_node *new_val)
1107 foreach_out_edge_safe(old_val, edge) {
1108 ir_node *user = get_edge_src_irn(edge);
1110 if (! user || user == store)
1113 /* if the user is scheduled after the store: rewire */
1114 if (sched_is_scheduled(user) && sched_comes_after(store, user)) {
1116 /* find the input of the user pointing to the old value */
1117 for (i = get_irn_arity(user) - 1; i >= 0; i--) {
1118 if (get_irn_n(user, i) == old_val)
1119 set_irn_n(user, i, new_val);
1126 * Simulate a virtual Store.
1128 * @param state the x87 state
1129 * @param n the node that should be simulated (and patched)
1130 * @param op the x87 store opcode
1131 * @param op_p the x87 store and pop opcode
1133 static int sim_store(x87_state *state, ir_node *n, ir_op *op, ir_op *op_p)
1135 ir_node *val = get_irn_n(n, n_ia32_vfst_val);
1136 const arch_register_t *op2 = x87_get_irn_register(val);
1137 unsigned live = vfp_live_args_after(state->sim, n, 0);
1138 int insn = NO_NODE_ADDED;
1139 ia32_x87_attr_t *attr;
1140 int op2_reg_idx, op2_idx, depth;
1141 int live_after_node;
1144 op2_reg_idx = arch_register_get_index(op2);
1145 op2_idx = x87_on_stack(state, op2_reg_idx);
1146 live_after_node = is_vfp_live(arch_register_get_index(op2), live);
1147 DB((dbg, LEVEL_1, ">>> %+F %s ->\n", n, arch_register_get_name(op2)));
1148 assert(op2_idx >= 0);
1150 mode = get_ia32_ls_mode(n);
1151 depth = x87_get_depth(state);
1153 if (live_after_node) {
1155 Problem: fst doesn't support 96bit modes (spills), only fstp does
1156 fist doesn't support 64bit mode, only fistp
1158 - stack not full: push value and fstp
1159 - stack full: fstp value and load again
1160 Note that we cannot test on mode_E, because floats might be 96bit ...
1162 if (get_mode_size_bits(mode) > 64 || (mode_is_int(mode) && get_mode_size_bits(mode) > 32)) {
1163 if (depth < N_ia32_st_REGS) {
1164 /* ok, we have a free register: push + fstp */
1165 x87_create_fpush(state, n, op2_idx, n_ia32_vfst_val);
1167 x87_patch_insn(n, op_p);
1169 ir_node *vfld, *mem, *block, *rproj, *mproj;
1170 ir_graph *irg = get_irn_irg(n);
1171 ir_node *nomem = get_irg_no_mem(irg);
1173 /* stack full here: need fstp + load */
1175 x87_patch_insn(n, op_p);
1177 block = get_nodes_block(n);
1178 vfld = new_bd_ia32_vfld(NULL, block, get_irn_n(n, 0), get_irn_n(n, 1), nomem, get_ia32_ls_mode(n));
1180 /* copy all attributes */
1181 set_ia32_frame_ent(vfld, get_ia32_frame_ent(n));
1182 if (is_ia32_use_frame(n))
1183 set_ia32_use_frame(vfld);
1184 set_ia32_op_type(vfld, ia32_AddrModeS);
1185 add_ia32_am_offs_int(vfld, get_ia32_am_offs_int(n));
1186 set_ia32_am_sc(vfld, get_ia32_am_sc(n));
1187 set_ia32_ls_mode(vfld, get_ia32_ls_mode(n));
1189 rproj = new_r_Proj(vfld, get_ia32_ls_mode(vfld), pn_ia32_vfld_res);
1190 mproj = new_r_Proj(vfld, mode_M, pn_ia32_vfld_M);
1191 mem = get_irn_Proj_for_mode(n, mode_M);
1193 assert(mem && "Store memory not found");
1195 arch_set_irn_register(rproj, op2);
1197 /* reroute all former users of the store memory to the load memory */
1198 edges_reroute(mem, mproj);
1199 /* set the memory input of the load to the store memory */
1200 set_irn_n(vfld, n_ia32_vfld_mem, mem);
1202 sched_add_after(n, vfld);
1203 sched_add_after(vfld, rproj);
1205 /* rewire all users, scheduled after the store, to the loaded value */
1206 collect_and_rewire_users(n, val, rproj);
1211 /* we can only store the tos to memory */
1213 x87_create_fxch(state, n, op2_idx);
1215 /* mode size 64 or smaller -> use normal fst */
1216 x87_patch_insn(n, op);
1219 /* we can only store the tos to memory */
1221 x87_create_fxch(state, n, op2_idx);
1224 x87_patch_insn(n, op_p);
1227 attr = get_ia32_x87_attr(n);
1228 attr->x87[1] = op2 = get_st_reg(0);
1229 DB((dbg, LEVEL_1, "<<< %s %s ->\n", get_irn_opname(n), arch_register_get_name(op2)));
1234 #define _GEN_BINOP(op, rev) \
1235 static int sim_##op(x87_state *state, ir_node *n) { \
1236 exchange_tmpl tmpl = { op_ia32_##op, op_ia32_##rev, op_ia32_##op##p, op_ia32_##rev##p }; \
1237 return sim_binop(state, n, &tmpl); \
1240 #define GEN_BINOP(op) _GEN_BINOP(op, op)
1241 #define GEN_BINOPR(op) _GEN_BINOP(op, op##r)
1243 #define GEN_LOAD(op) \
1244 static int sim_##op(x87_state *state, ir_node *n) { \
1245 return sim_load(state, n, op_ia32_##op, pn_ia32_v##op##_res); \
1248 #define GEN_UNOP(op) \
1249 static int sim_##op(x87_state *state, ir_node *n) { \
1250 return sim_unop(state, n, op_ia32_##op); \
1253 #define GEN_STORE(op) \
1254 static int sim_##op(x87_state *state, ir_node *n) { \
1255 return sim_store(state, n, op_ia32_##op, op_ia32_##op##p); \
1277 * Simulate a virtual fisttp.
1279 * @param state the x87 state
1280 * @param n the node that should be simulated (and patched)
1282 * @return NO_NODE_ADDED
1284 static int sim_fisttp(x87_state *state, ir_node *n)
1286 ir_node *val = get_irn_n(n, n_ia32_vfst_val);
1287 const arch_register_t *op2 = x87_get_irn_register(val);
1288 ia32_x87_attr_t *attr;
1289 int op2_reg_idx, op2_idx;
1291 op2_reg_idx = arch_register_get_index(op2);
1292 op2_idx = x87_on_stack(state, op2_reg_idx);
1293 DB((dbg, LEVEL_1, ">>> %+F %s ->\n", n, arch_register_get_name(op2)));
1294 assert(op2_idx >= 0);
1296 /* Note: although the value is still live here, it is destroyed because
1297 of the pop. The register allocator is aware of that and introduced a copy
1298 if the value must be alive. */
1300 /* we can only store the tos to memory */
1302 x87_create_fxch(state, n, op2_idx);
1305 x87_patch_insn(n, op_ia32_fisttp);
1307 attr = get_ia32_x87_attr(n);
1308 attr->x87[1] = op2 = get_st_reg(0);
1309 DB((dbg, LEVEL_1, "<<< %s %s ->\n", get_irn_opname(n), arch_register_get_name(op2)));
1311 return NO_NODE_ADDED;
1315 * Simulate a virtual FtstFnstsw.
1317 * @param state the x87 state
1318 * @param n the node that should be simulated (and patched)
1320 * @return NO_NODE_ADDED
1322 static int sim_FtstFnstsw(x87_state *state, ir_node *n)
1324 x87_simulator *sim = state->sim;
1325 ia32_x87_attr_t *attr = get_ia32_x87_attr(n);
1326 ir_node *op1_node = get_irn_n(n, n_ia32_vFtstFnstsw_left);
1327 const arch_register_t *reg1 = x87_get_irn_register(op1_node);
1328 int reg_index_1 = arch_register_get_index(reg1);
1329 int op1_idx = x87_on_stack(state, reg_index_1);
1330 unsigned live = vfp_live_args_after(sim, n, 0);
1332 DB((dbg, LEVEL_1, ">>> %+F %s\n", n, arch_register_get_name(reg1)));
1333 DEBUG_ONLY(vfp_dump_live(live);)
1334 DB((dbg, LEVEL_1, "Stack before: "));
1335 DEBUG_ONLY(x87_dump_stack(state);)
1336 assert(op1_idx >= 0);
1339 /* bring the value to tos */
1340 x87_create_fxch(state, n, op1_idx);
1344 /* patch the operation */
1345 x87_patch_insn(n, op_ia32_FtstFnstsw);
1346 reg1 = get_st_reg(op1_idx);
1347 attr->x87[0] = reg1;
1348 attr->x87[1] = NULL;
1349 attr->x87[2] = NULL;
1351 if (!is_vfp_live(reg_index_1, live))
1352 x87_create_fpop(state, sched_next(n), 1);
1354 return NO_NODE_ADDED;
1360 * @param state the x87 state
1361 * @param n the node that should be simulated (and patched)
1363 * @return NO_NODE_ADDED
1365 static int sim_Fucom(x87_state *state, ir_node *n)
1369 ia32_x87_attr_t *attr = get_ia32_x87_attr(n);
1371 x87_simulator *sim = state->sim;
1372 ir_node *op1_node = get_irn_n(n, n_ia32_vFucomFnstsw_left);
1373 ir_node *op2_node = get_irn_n(n, n_ia32_vFucomFnstsw_right);
1374 const arch_register_t *op1 = x87_get_irn_register(op1_node);
1375 const arch_register_t *op2 = x87_get_irn_register(op2_node);
1376 int reg_index_1 = arch_register_get_index(op1);
1377 int reg_index_2 = arch_register_get_index(op2);
1378 unsigned live = vfp_live_args_after(sim, n, 0);
1379 bool permuted = attr->attr.data.ins_permuted;
1383 DB((dbg, LEVEL_1, ">>> %+F %s, %s\n", n,
1384 arch_register_get_name(op1), arch_register_get_name(op2)));
1385 DEBUG_ONLY(vfp_dump_live(live);)
1386 DB((dbg, LEVEL_1, "Stack before: "));
1387 DEBUG_ONLY(x87_dump_stack(state);)
1389 op1_idx = x87_on_stack(state, reg_index_1);
1390 assert(op1_idx >= 0);
1392 /* BEWARE: check for comp a,a cases, they might happen */
1393 if (reg_index_2 != REG_VFP_VFP_NOREG) {
1394 /* second operand is a vfp register */
1395 op2_idx = x87_on_stack(state, reg_index_2);
1396 assert(op2_idx >= 0);
1398 if (is_vfp_live(reg_index_2, live)) {
1399 /* second operand is live */
1401 if (is_vfp_live(reg_index_1, live)) {
1402 /* both operands are live */
1405 /* res = tos X op */
1406 } else if (op2_idx == 0) {
1407 /* res = op X tos */
1408 permuted = !permuted;
1411 /* bring the first one to tos */
1412 x87_create_fxch(state, n, op1_idx);
1413 if (op1_idx == op2_idx) {
1415 } else if (op2_idx == 0) {
1419 /* res = tos X op */
1422 /* second live, first operand is dead here, bring it to tos.
1423 This means further, op1_idx != op2_idx. */
1424 assert(op1_idx != op2_idx);
1426 x87_create_fxch(state, n, op1_idx);
1431 /* res = tos X op, pop */
1435 /* second operand is dead */
1436 if (is_vfp_live(reg_index_1, live)) {
1437 /* first operand is live: bring second to tos.
1438 This means further, op1_idx != op2_idx. */
1439 assert(op1_idx != op2_idx);
1441 x87_create_fxch(state, n, op2_idx);
1446 /* res = op X tos, pop */
1448 permuted = !permuted;
1451 /* both operands are dead here, check first for identity. */
1452 if (op1_idx == op2_idx) {
1453 /* identically, one pop needed */
1455 x87_create_fxch(state, n, op1_idx);
1459 /* res = tos X op, pop */
1462 /* different, move them to st and st(1) and pop both.
1463 The tricky part is to get one into st(1).*/
1464 else if (op2_idx == 1) {
1465 /* good, second operand is already in the right place, move the first */
1467 /* bring the first on top */
1468 x87_create_fxch(state, n, op1_idx);
1469 assert(op2_idx != 0);
1472 /* res = tos X op, pop, pop */
1474 } else if (op1_idx == 1) {
1475 /* good, first operand is already in the right place, move the second */
1477 /* bring the first on top */
1478 x87_create_fxch(state, n, op2_idx);
1479 assert(op1_idx != 0);
1482 /* res = op X tos, pop, pop */
1483 permuted = !permuted;
1487 /* if one is already the TOS, we need two fxch */
1489 /* first one is TOS, move to st(1) */
1490 x87_create_fxch(state, n, 1);
1491 assert(op2_idx != 1);
1493 x87_create_fxch(state, n, op2_idx);
1495 /* res = op X tos, pop, pop */
1497 permuted = !permuted;
1499 } else if (op2_idx == 0) {
1500 /* second one is TOS, move to st(1) */
1501 x87_create_fxch(state, n, 1);
1502 assert(op1_idx != 1);
1504 x87_create_fxch(state, n, op1_idx);
1506 /* res = tos X op, pop, pop */
1509 /* none of them is either TOS or st(1), 3 fxch needed */
1510 x87_create_fxch(state, n, op2_idx);
1511 assert(op1_idx != 0);
1512 x87_create_fxch(state, n, 1);
1514 x87_create_fxch(state, n, op1_idx);
1516 /* res = tos X op, pop, pop */
1523 /* second operand is an address mode */
1524 if (is_vfp_live(reg_index_1, live)) {
1525 /* first operand is live: bring it to TOS */
1527 x87_create_fxch(state, n, op1_idx);
1531 /* first operand is dead: bring it to tos */
1533 x87_create_fxch(state, n, op1_idx);
1540 /* patch the operation */
1541 if (is_ia32_vFucomFnstsw(n)) {
1545 case 0: dst = op_ia32_FucomFnstsw; break;
1546 case 1: dst = op_ia32_FucompFnstsw; break;
1547 case 2: dst = op_ia32_FucomppFnstsw; break;
1548 default: panic("invalid popcount");
1551 for (i = 0; i < pops; ++i) {
1554 } else if (is_ia32_vFucomi(n)) {
1556 case 0: dst = op_ia32_Fucomi; break;
1557 case 1: dst = op_ia32_Fucompi; x87_pop(state); break;
1559 dst = op_ia32_Fucompi;
1561 x87_create_fpop(state, sched_next(n), 1);
1563 default: panic("invalid popcount");
1566 panic("invalid operation %+F", n);
1569 x87_patch_insn(n, dst);
1576 op1 = get_st_reg(op1_idx);
1579 op2 = get_st_reg(op2_idx);
1582 attr->x87[2] = NULL;
1583 attr->attr.data.ins_permuted = permuted;
1586 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(n),
1587 arch_register_get_name(op1), arch_register_get_name(op2)));
1589 DB((dbg, LEVEL_1, "<<< %s %s, [AM]\n", get_irn_opname(n),
1590 arch_register_get_name(op1)));
1593 return NO_NODE_ADDED;
1599 * @param state the x87 state
1600 * @param n the node that should be simulated (and patched)
1602 * @return NO_NODE_ADDED
1604 static int sim_Keep(x87_state *state, ir_node *node)
1607 const arch_register_t *op_reg;
1613 DB((dbg, LEVEL_1, ">>> %+F\n", node));
1615 arity = get_irn_arity(node);
1616 for (i = 0; i < arity; ++i) {
1617 op = get_irn_n(node, i);
1618 op_reg = arch_get_irn_register(op);
1619 if (arch_register_get_class(op_reg) != &ia32_reg_classes[CLASS_ia32_vfp])
1622 reg_id = arch_register_get_index(op_reg);
1623 live = vfp_live_args_after(state->sim, node, 0);
1625 op_stack_idx = x87_on_stack(state, reg_id);
1626 if (op_stack_idx >= 0 && !is_vfp_live(reg_id, live))
1627 x87_create_fpop(state, sched_next(node), 1);
1630 DB((dbg, LEVEL_1, "Stack after: "));
1631 DEBUG_ONLY(x87_dump_stack(state);)
1633 return NO_NODE_ADDED;
1637 * Keep the given node alive by adding a be_Keep.
1639 * @param node the node to kept alive
1641 static void keep_float_node_alive(ir_node *node)
1643 ir_node *block = get_nodes_block(node);
1644 ir_node *keep = be_new_Keep(block, 1, &node);
1646 assert(sched_is_scheduled(node));
1647 sched_add_after(node, keep);
1651 * Create a copy of a node. Recreate the node if it's a constant.
1653 * @param state the x87 state
1654 * @param n the node to be copied
1656 * @return the copy of n
1658 static ir_node *create_Copy(x87_state *state, ir_node *n)
1660 dbg_info *n_dbg = get_irn_dbg_info(n);
1661 ir_mode *mode = get_irn_mode(n);
1662 ir_node *block = get_nodes_block(n);
1663 ir_node *pred = get_irn_n(n, 0);
1664 ir_node *(*cnstr)(dbg_info *, ir_node *, ir_mode *) = NULL;
1666 const arch_register_t *out;
1667 const arch_register_t *op1;
1668 ia32_x87_attr_t *attr;
1670 /* Do not copy constants, recreate them. */
1671 switch (get_ia32_irn_opcode(pred)) {
1673 cnstr = new_bd_ia32_fldz;
1676 cnstr = new_bd_ia32_fld1;
1678 case iro_ia32_fldpi:
1679 cnstr = new_bd_ia32_fldpi;
1681 case iro_ia32_fldl2e:
1682 cnstr = new_bd_ia32_fldl2e;
1684 case iro_ia32_fldl2t:
1685 cnstr = new_bd_ia32_fldl2t;
1687 case iro_ia32_fldlg2:
1688 cnstr = new_bd_ia32_fldlg2;
1690 case iro_ia32_fldln2:
1691 cnstr = new_bd_ia32_fldln2;
1697 out = x87_get_irn_register(n);
1698 op1 = x87_get_irn_register(pred);
1700 if (cnstr != NULL) {
1701 /* copy a constant */
1702 res = (*cnstr)(n_dbg, block, mode);
1704 x87_push(state, arch_register_get_index(out), res);
1706 attr = get_ia32_x87_attr(res);
1707 attr->x87[2] = get_st_reg(0);
1709 int op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1711 res = new_bd_ia32_fpushCopy(n_dbg, block, pred, mode);
1713 x87_push(state, arch_register_get_index(out), res);
1715 attr = get_ia32_x87_attr(res);
1716 attr->x87[0] = get_st_reg(op1_idx);
1717 attr->x87[2] = get_st_reg(0);
1719 arch_set_irn_register(res, out);
1725 * Simulate a be_Copy.
1727 * @param state the x87 state
1728 * @param n the node that should be simulated (and patched)
1730 * @return NO_NODE_ADDED
1732 static int sim_Copy(x87_state *state, ir_node *n)
1735 const arch_register_t *out;
1736 const arch_register_t *op1;
1737 const arch_register_class_t *cls;
1738 ir_node *node, *next;
1739 int op1_idx, out_idx;
1742 cls = arch_get_irn_reg_class(n);
1743 if (cls != &ia32_reg_classes[CLASS_ia32_vfp])
1746 pred = be_get_Copy_op(n);
1747 out = x87_get_irn_register(n);
1748 op1 = x87_get_irn_register(pred);
1749 live = vfp_live_args_after(state->sim, n, REGMASK(out));
1751 DB((dbg, LEVEL_1, ">>> %+F %s -> %s\n", n,
1752 arch_register_get_name(op1), arch_register_get_name(out)));
1753 DEBUG_ONLY(vfp_dump_live(live);)
1755 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1757 if (is_vfp_live(arch_register_get_index(op1), live)) {
1758 /* Operand is still live, a real copy. We need here an fpush that can
1759 hold a a register, so use the fpushCopy or recreate constants */
1760 node = create_Copy(state, n);
1762 /* We have to make sure the old value doesn't go dead (which can happen
1763 * when we recreate constants). As the simulator expected that value in
1764 * the pred blocks. This is unfortunate as removing it would save us 1
1765 * instruction, but we would have to rerun all the simulation to get
1768 next = sched_next(n);
1771 sched_add_before(next, node);
1773 if (get_irn_n_edges(pred) == 0) {
1774 keep_float_node_alive(pred);
1777 DB((dbg, LEVEL_1, "<<< %+F %s -> ?\n", node, op1->name));
1779 out_idx = x87_on_stack(state, arch_register_get_index(out));
1781 if (out_idx >= 0 && out_idx != op1_idx) {
1782 /* Matze: out already on stack? how can this happen? */
1783 panic("invalid stack state");
1786 /* op1 must be killed and placed where out is */
1788 ia32_x87_attr_t *attr;
1789 /* best case, simple remove and rename */
1790 x87_patch_insn(n, op_ia32_Pop);
1791 attr = get_ia32_x87_attr(n);
1792 attr->x87[0] = op1 = get_st_reg(0);
1795 x87_set_st(state, arch_register_get_index(out), n, op1_idx - 1);
1797 ia32_x87_attr_t *attr;
1798 /* move op1 to tos, store and pop it */
1800 x87_create_fxch(state, n, op1_idx);
1803 x87_patch_insn(n, op_ia32_Pop);
1804 attr = get_ia32_x87_attr(n);
1805 attr->x87[0] = op1 = get_st_reg(out_idx);
1808 x87_set_st(state, arch_register_get_index(out), n, out_idx - 1);
1810 DB((dbg, LEVEL_1, "<<< %+F %s\n", n, op1->name));
1813 /* just a virtual copy */
1814 x87_set_st(state, arch_register_get_index(out), pred, op1_idx);
1815 /* don't remove the node to keep the verifier quiet :),
1816 the emitter won't emit any code for the node */
1819 DB((dbg, LEVEL_1, "<<< KILLED %s\n", get_irn_opname(n)));
1824 return NO_NODE_ADDED;
1828 * Returns the vf0 result Proj of a Call.
1830 * @para call the Call node
1832 static ir_node *get_call_result_proj(ir_node *call)
1834 /* search the result proj */
1835 foreach_out_edge(call, edge) {
1836 ir_node *proj = get_edge_src_irn(edge);
1837 long pn = get_Proj_proj(proj);
1839 if (pn == pn_ia32_Call_vf0)
1846 static int sim_Asm(x87_state *const state, ir_node *const n)
1850 for (size_t i = get_irn_arity(n); i-- != 0;) {
1851 arch_register_req_t const *const req = arch_get_irn_register_req_in(n, i);
1852 if (req->cls == &ia32_reg_classes[CLASS_ia32_vfp])
1853 panic("cannot handle %+F with x87 constraints", n);
1856 for (size_t i = arch_get_irn_n_outs(n); i-- != 0;) {
1857 arch_register_req_t const *const req = arch_get_irn_register_req_out(n, i);
1858 if (req->cls == &ia32_reg_classes[CLASS_ia32_vfp])
1859 panic("cannot handle %+F with x87 constraints", n);
1862 return NO_NODE_ADDED;
1866 * Simulate a ia32_Call.
1868 * @param state the x87 state
1869 * @param n the node that should be simulated (and patched)
1871 * @return NO_NODE_ADDED
1873 static int sim_Call(x87_state *state, ir_node *n)
1875 ir_type *call_tp = get_ia32_call_attr_const(n)->call_tp;
1879 const arch_register_t *reg;
1881 DB((dbg, LEVEL_1, ">>> %+F\n", n));
1883 /* at the begin of a call the x87 state should be empty */
1884 assert(state->depth == 0 && "stack not empty before call");
1886 if (get_method_n_ress(call_tp) <= 0)
1890 * If the called function returns a float, it is returned in st(0).
1891 * This even happens if the return value is NOT used.
1892 * Moreover, only one return result is supported.
1894 res_type = get_method_res_type(call_tp, 0);
1895 mode = get_type_mode(res_type);
1897 if (mode == NULL || !mode_is_float(mode))
1900 resproj = get_call_result_proj(n);
1901 assert(resproj != NULL);
1903 reg = x87_get_irn_register(resproj);
1904 x87_push(state, arch_register_get_index(reg), resproj);
1907 DB((dbg, LEVEL_1, "Stack after: "));
1908 DEBUG_ONLY(x87_dump_stack(state);)
1910 return NO_NODE_ADDED;
1914 * Simulate a be_Return.
1916 * @param state the x87 state
1917 * @param n the node that should be simulated (and patched)
1919 * @return NO_NODE_ADDED
1921 static int sim_Return(x87_state *state, ir_node *n)
1923 #ifdef DEBUG_libfirm
1924 /* only floating point return values must reside on stack */
1925 int n_float_res = 0;
1926 int const n_res = be_Return_get_n_rets(n);
1927 for (int i = 0; i < n_res; ++i) {
1928 ir_node *const res = get_irn_n(n, n_be_Return_val + i);
1929 if (mode_is_float(get_irn_mode(res)))
1932 assert(x87_get_depth(state) == n_float_res);
1935 /* pop them virtually */
1937 return NO_NODE_ADDED;
1941 * Simulate a be_Perm.
1943 * @param state the x87 state
1944 * @param irn the node that should be simulated (and patched)
1946 * @return NO_NODE_ADDED
1948 static int sim_Perm(x87_state *state, ir_node *irn)
1951 ir_node *pred = get_irn_n(irn, 0);
1954 /* handle only floating point Perms */
1955 if (! mode_is_float(get_irn_mode(pred)))
1956 return NO_NODE_ADDED;
1958 DB((dbg, LEVEL_1, ">>> %+F\n", irn));
1960 /* Perm is a pure virtual instruction on x87.
1961 All inputs must be on the FPU stack and are pairwise
1962 different from each other.
1963 So, all we need to do is to permutate the stack state. */
1964 n = get_irn_arity(irn);
1965 NEW_ARR_A(int, stack_pos, n);
1967 /* collect old stack positions */
1968 for (i = 0; i < n; ++i) {
1969 const arch_register_t *inreg = x87_get_irn_register(get_irn_n(irn, i));
1970 int idx = x87_on_stack(state, arch_register_get_index(inreg));
1972 assert(idx >= 0 && "Perm argument not on x87 stack");
1976 /* now do the permutation */
1977 foreach_out_edge(irn, edge) {
1978 ir_node *proj = get_edge_src_irn(edge);
1979 const arch_register_t *out = x87_get_irn_register(proj);
1980 long num = get_Proj_proj(proj);
1982 assert(0 <= num && num < n && "More Proj's than Perm inputs");
1983 x87_set_st(state, arch_register_get_index(out), proj, stack_pos[(unsigned)num]);
1985 DB((dbg, LEVEL_1, "<<< %+F\n", irn));
1987 return NO_NODE_ADDED;
1991 * Kill any dead registers at block start by popping them from the stack.
1993 * @param sim the simulator handle
1994 * @param block the current block
1995 * @param start_state the x87 state at the begin of the block
1997 * @return the x87 state after dead register killed
1999 static x87_state *x87_kill_deads(x87_simulator *sim, ir_node *block, x87_state *start_state)
2001 x87_state *state = start_state;
2002 ir_node *first_insn = sched_first(block);
2003 ir_node *keep = NULL;
2004 unsigned live = vfp_live_args_after(sim, block, 0);
2006 int i, depth, num_pop;
2009 depth = x87_get_depth(state);
2010 for (i = depth - 1; i >= 0; --i) {
2011 int reg = x87_get_st_reg(state, i);
2013 if (! is_vfp_live(reg, live))
2014 kill_mask |= (1 << i);
2018 /* create a new state, will be changed */
2019 state = x87_clone_state(sim, state);
2021 DB((dbg, LEVEL_1, "Killing deads:\n"));
2022 DEBUG_ONLY(vfp_dump_live(live);)
2023 DEBUG_ONLY(x87_dump_stack(state);)
2025 if (kill_mask != 0 && live == 0) {
2026 /* special case: kill all registers */
2027 if (ia32_cg_config.use_femms || ia32_cg_config.use_emms) {
2028 if (ia32_cg_config.use_femms) {
2029 /* use FEMMS on AMD processors to clear all */
2030 keep = new_bd_ia32_femms(NULL, block);
2032 /* use EMMS to clear all */
2033 keep = new_bd_ia32_emms(NULL, block);
2035 sched_add_before(first_insn, keep);
2041 /* now kill registers */
2043 /* we can only kill from TOS, so bring them up */
2044 if (! (kill_mask & 1)) {
2045 /* search from behind, because we can to a double-pop */
2046 for (i = depth - 1; i >= 0; --i) {
2047 if (kill_mask & (1 << i)) {
2048 kill_mask &= ~(1 << i);
2055 x87_set_st(state, -1, keep, i);
2056 x87_create_fxch(state, first_insn, i);
2059 if ((kill_mask & 3) == 3) {
2060 /* we can do a double-pop */
2064 /* only a single pop */
2069 kill_mask >>= num_pop;
2070 keep = x87_create_fpop(state, first_insn, num_pop);
2078 * Run a simulation and fix all virtual instructions for a block.
2080 * @param sim the simulator handle
2081 * @param block the current block
2083 static void x87_simulate_block(x87_simulator *sim, ir_node *block)
2086 blk_state *bl_state = x87_get_bl_state(sim, block);
2087 x87_state *state = bl_state->begin;
2088 ir_node *start_block;
2090 assert(state != NULL);
2091 /* already processed? */
2092 if (bl_state->end != NULL)
2095 DB((dbg, LEVEL_1, "Simulate %+F\n", block));
2096 DB((dbg, LEVEL_2, "State at Block begin:\n "));
2097 DEBUG_ONLY(x87_dump_stack(state);)
2099 /* at block begin, kill all dead registers */
2100 state = x87_kill_deads(sim, block, state);
2101 /* create a new state, will be changed */
2102 state = x87_clone_state(sim, state);
2104 /* beware, n might change */
2105 for (n = sched_first(block); !sched_is_end(n); n = next) {
2108 ir_op *op = get_irn_op(n);
2111 * get the next node to be simulated here.
2112 * n might be completely removed from the schedule-
2114 next = sched_next(n);
2115 if (op->ops.generic != NULL) {
2116 func = (sim_func)op->ops.generic;
2119 node_inserted = (*func)(state, n);
2122 * sim_func might have added an additional node after n,
2123 * so update next node
2124 * beware: n must not be changed by sim_func
2125 * (i.e. removed from schedule) in this case
2127 if (node_inserted != NO_NODE_ADDED)
2128 next = sched_next(n);
2132 start_block = get_irg_start_block(get_irn_irg(block));
2134 DB((dbg, LEVEL_2, "State at Block end:\n ")); DEBUG_ONLY(x87_dump_stack(state);)
2136 /* check if the state must be shuffled */
2137 foreach_block_succ(block, edge) {
2138 ir_node *succ = get_edge_src_irn(edge);
2139 blk_state *succ_state;
2141 if (succ == start_block)
2144 succ_state = x87_get_bl_state(sim, succ);
2146 if (succ_state->begin == NULL) {
2147 DB((dbg, LEVEL_2, "Set begin state for succ %+F:\n", succ));
2148 DEBUG_ONLY(x87_dump_stack(state);)
2149 succ_state->begin = state;
2151 waitq_put(sim->worklist, succ);
2153 DB((dbg, LEVEL_2, "succ %+F already has a state, shuffling\n", succ));
2154 /* There is already a begin state for the successor, bad.
2155 Do the necessary permutations.
2156 Note that critical edges are removed, so this is always possible:
2157 If the successor has more than one possible input, then it must
2160 x87_shuffle(block, state, succ_state->begin);
2163 bl_state->end = state;
2167 * Register a simulator function.
2169 * @param op the opcode to simulate
2170 * @param func the simulator function for the opcode
2172 static void register_sim(ir_op *op, sim_func func)
2174 assert(op->ops.generic == NULL);
2175 op->ops.generic = (op_func) func;
2179 * Create a new x87 simulator.
2181 * @param sim a simulator handle, will be initialized
2182 * @param irg the current graph
2184 static void x87_init_simulator(x87_simulator *sim, ir_graph *irg)
2186 obstack_init(&sim->obst);
2187 sim->blk_states = pmap_create();
2188 sim->n_idx = get_irg_last_idx(irg);
2189 sim->live = OALLOCN(&sim->obst, vfp_liveness, sim->n_idx);
2191 DB((dbg, LEVEL_1, "--------------------------------\n"
2192 "x87 Simulator started for %+F\n", irg));
2194 /* set the generic function pointer of instruction we must simulate */
2195 ir_clear_opcodes_generic_func();
2197 register_sim(op_ia32_Asm, sim_Asm);
2198 register_sim(op_ia32_Call, sim_Call);
2199 register_sim(op_ia32_vfld, sim_fld);
2200 register_sim(op_ia32_vfild, sim_fild);
2201 register_sim(op_ia32_vfld1, sim_fld1);
2202 register_sim(op_ia32_vfldz, sim_fldz);
2203 register_sim(op_ia32_vfadd, sim_fadd);
2204 register_sim(op_ia32_vfsub, sim_fsub);
2205 register_sim(op_ia32_vfmul, sim_fmul);
2206 register_sim(op_ia32_vfdiv, sim_fdiv);
2207 register_sim(op_ia32_vfprem, sim_fprem);
2208 register_sim(op_ia32_vfabs, sim_fabs);
2209 register_sim(op_ia32_vfchs, sim_fchs);
2210 register_sim(op_ia32_vfist, sim_fist);
2211 register_sim(op_ia32_vfisttp, sim_fisttp);
2212 register_sim(op_ia32_vfst, sim_fst);
2213 register_sim(op_ia32_vFtstFnstsw, sim_FtstFnstsw);
2214 register_sim(op_ia32_vFucomFnstsw, sim_Fucom);
2215 register_sim(op_ia32_vFucomi, sim_Fucom);
2216 register_sim(op_be_Copy, sim_Copy);
2217 register_sim(op_be_Return, sim_Return);
2218 register_sim(op_be_Perm, sim_Perm);
2219 register_sim(op_be_Keep, sim_Keep);
2223 * Destroy a x87 simulator.
2225 * @param sim the simulator handle
2227 static void x87_destroy_simulator(x87_simulator *sim)
2229 pmap_destroy(sim->blk_states);
2230 obstack_free(&sim->obst, NULL);
2231 DB((dbg, LEVEL_1, "x87 Simulator stopped\n\n"));
2235 * Pre-block walker: calculate the liveness information for the block
2236 * and store it into the sim->live cache.
2238 static void update_liveness_walker(ir_node *block, void *data)
2240 x87_simulator *sim = (x87_simulator*)data;
2241 update_liveness(sim, block);
2245 * Run a simulation and fix all virtual instructions for a graph.
2246 * Replaces all virtual floating point instructions and registers
2249 void ia32_x87_simulate_graph(ir_graph *irg)
2251 /* TODO improve code quality (less executed fxch) by using execfreqs */
2253 ir_node *block, *start_block;
2254 blk_state *bl_state;
2257 /* create the simulator */
2258 x87_init_simulator(&sim, irg);
2260 start_block = get_irg_start_block(irg);
2261 bl_state = x87_get_bl_state(&sim, start_block);
2263 /* start with the empty state */
2264 bl_state->begin = empty;
2267 sim.worklist = new_waitq();
2268 waitq_put(sim.worklist, start_block);
2270 be_assure_live_sets(irg);
2271 sim.lv = be_get_irg_liveness(irg);
2273 /* Calculate the liveness for all nodes. We must precalculate this info,
2274 * because the simulator adds new nodes (possible before Phi nodes) which
2275 * would let a lazy calculation fail.
2276 * On the other hand we reduce the computation amount due to
2277 * precaching from O(n^2) to O(n) at the expense of O(n) cache memory.
2279 irg_block_walk_graph(irg, update_liveness_walker, NULL, &sim);
2283 block = (ir_node*)waitq_get(sim.worklist);
2284 x87_simulate_block(&sim, block);
2285 } while (! waitq_empty(sim.worklist));
2288 del_waitq(sim.worklist);
2289 x87_destroy_simulator(&sim);
2292 /* Initializes the x87 simulator. */
2293 void ia32_init_x87(void)
2295 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.x87");