2 * Copyright (C) 1995-2010 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the x87 support and virtual to stack
23 * register translation for the ia32 backend.
24 * @author Michael Beck
33 #include "iredges_t.h"
48 #include "bearch_ia32_t.h"
49 #include "ia32_new_nodes.h"
50 #include "gen_ia32_new_nodes.h"
51 #include "gen_ia32_regalloc_if.h"
53 #include "ia32_architecture.h"
55 #define MASK_TOS(x) ((x) & (N_ia32_st_REGS - 1))
57 /** the debug handle */
58 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
60 /* Forward declaration. */
61 typedef struct x87_simulator x87_simulator;
64 * An exchange template.
65 * Note that our virtual functions have the same inputs
66 * and attributes as the real ones, so we can simple exchange
68 * Further, x87 supports inverse instructions, so we can handle them.
70 typedef struct exchange_tmpl {
71 ir_op *normal_op; /**< the normal one */
72 ir_op *reverse_op; /**< the reverse one if exists */
73 ir_op *normal_pop_op; /**< the normal one with tos pop */
74 ir_op *reverse_pop_op; /**< the reverse one with tos pop */
78 * An entry on the simulated x87 stack.
80 typedef struct st_entry {
81 int reg_idx; /**< the virtual register index of this stack value */
82 ir_node *node; /**< the node that produced this value */
88 typedef struct x87_state {
89 st_entry st[N_ia32_st_REGS]; /**< the register stack */
90 int depth; /**< the current stack depth */
91 int tos; /**< position of the tos */
92 x87_simulator *sim; /**< The simulator. */
95 /** An empty state, used for blocks without fp instructions. */
96 static x87_state _empty = { { {0, NULL}, }, 0, 0, NULL };
97 static x87_state *empty = (x87_state *)&_empty;
100 * Return values of the instruction simulator functions.
103 NO_NODE_ADDED = 0, /**< No node that needs simulation was added. */
104 NODE_ADDED = 1 /**< A node that must be simulated was added by the simulator
105 in the schedule AFTER the current node. */
109 * The type of an instruction simulator function.
111 * @param state the x87 state
112 * @param n the node to be simulated
114 * @return NODE_ADDED if a node was added AFTER n in schedule that MUST be
116 * NO_NODE_ADDED otherwise
118 typedef int (*sim_func)(x87_state *state, ir_node *n);
121 * A block state: Every block has a x87 state at the beginning and at the end.
123 typedef struct blk_state {
124 x87_state *begin; /**< state at the begin or NULL if not assigned */
125 x87_state *end; /**< state at the end or NULL if not assigned */
128 #define PTR_TO_BLKSTATE(p) ((blk_state *)(p))
130 /** liveness bitset for vfp registers. */
131 typedef unsigned char vfp_liveness;
136 struct x87_simulator {
137 struct obstack obst; /**< An obstack for fast allocating. */
138 pmap *blk_states; /**< Map blocks to states. */
139 be_lv_t *lv; /**< intrablock liveness. */
140 vfp_liveness *live; /**< Liveness information. */
141 unsigned n_idx; /**< The cached get_irg_last_idx() result. */
142 waitq *worklist; /**< Worklist of blocks that must be processed. */
143 ia32_isa_t *isa; /**< the ISA object */
147 * Returns the current stack depth.
149 * @param state the x87 state
151 * @return the x87 stack depth
153 static int x87_get_depth(const x87_state *state)
159 * Return the virtual register index at st(pos).
161 * @param state the x87 state
162 * @param pos a stack position
164 * @return the vfp register index that produced the value at st(pos)
166 static int x87_get_st_reg(const x87_state *state, int pos)
168 assert(pos < state->depth);
169 return state->st[MASK_TOS(state->tos + pos)].reg_idx;
174 * Return the node at st(pos).
176 * @param state the x87 state
177 * @param pos a stack position
179 * @return the IR node that produced the value at st(pos)
181 static ir_node *x87_get_st_node(const x87_state *state, int pos)
183 assert(pos < state->depth);
184 return state->st[MASK_TOS(state->tos + pos)].node;
188 * Dump the stack for debugging.
190 * @param state the x87 state
192 static void x87_dump_stack(const x87_state *state)
196 for (i = state->depth - 1; i >= 0; --i) {
197 DB((dbg, LEVEL_2, "vf%d(%+F) ", x87_get_st_reg(state, i),
198 x87_get_st_node(state, i)));
200 DB((dbg, LEVEL_2, "<-- TOS\n"));
202 #endif /* DEBUG_libfirm */
205 * Set a virtual register to st(pos).
207 * @param state the x87 state
208 * @param reg_idx the vfp register index that should be set
209 * @param node the IR node that produces the value of the vfp register
210 * @param pos the stack position where the new value should be entered
212 static void x87_set_st(x87_state *state, int reg_idx, ir_node *node, int pos)
214 assert(0 < state->depth);
215 state->st[MASK_TOS(state->tos + pos)].reg_idx = reg_idx;
216 state->st[MASK_TOS(state->tos + pos)].node = node;
218 DB((dbg, LEVEL_2, "After SET_REG: "));
219 DEBUG_ONLY(x87_dump_stack(state);)
223 * Set the tos virtual register.
225 * @param state the x87 state
226 * @param reg_idx the vfp register index that should be set
227 * @param node the IR node that produces the value of the vfp register
229 static void x87_set_tos(x87_state *state, int reg_idx, ir_node *node)
231 x87_set_st(state, reg_idx, node, 0);
235 * Swap st(0) with st(pos).
237 * @param state the x87 state
238 * @param pos the stack position to change the tos with
240 static void x87_fxch(x87_state *state, int pos)
243 assert(pos < state->depth);
245 entry = state->st[MASK_TOS(state->tos + pos)];
246 state->st[MASK_TOS(state->tos + pos)] = state->st[MASK_TOS(state->tos)];
247 state->st[MASK_TOS(state->tos)] = entry;
249 DB((dbg, LEVEL_2, "After FXCH: "));
250 DEBUG_ONLY(x87_dump_stack(state);)
254 * Convert a virtual register to the stack index.
256 * @param state the x87 state
257 * @param reg_idx the register vfp index
259 * @return the stack position where the register is stacked
260 * or -1 if the virtual register was not found
262 static int x87_on_stack(const x87_state *state, int reg_idx)
264 int i, tos = state->tos;
266 for (i = 0; i < state->depth; ++i)
267 if (state->st[MASK_TOS(tos + i)].reg_idx == reg_idx)
273 * Push a virtual Register onto the stack, double pushed allowed.
275 * @param state the x87 state
276 * @param reg_idx the register vfp index
277 * @param node the node that produces the value of the vfp register
279 static void x87_push_dbl(x87_state *state, int reg_idx, ir_node *node)
281 assert(state->depth < N_ia32_st_REGS && "stack overrun");
284 state->tos = MASK_TOS(state->tos - 1);
285 state->st[state->tos].reg_idx = reg_idx;
286 state->st[state->tos].node = node;
288 DB((dbg, LEVEL_2, "After PUSH: ")); DEBUG_ONLY(x87_dump_stack(state);)
292 * Push a virtual Register onto the stack, double pushes are NOT allowed.
294 * @param state the x87 state
295 * @param reg_idx the register vfp index
296 * @param node the node that produces the value of the vfp register
297 * @param dbl_push if != 0 double pushes are allowed
299 static void x87_push(x87_state *state, int reg_idx, ir_node *node)
301 assert(x87_on_stack(state, reg_idx) == -1 && "double push");
303 x87_push_dbl(state, reg_idx, node);
307 * Pop a virtual Register from the stack.
309 * @param state the x87 state
311 static void x87_pop(x87_state *state)
313 assert(state->depth > 0 && "stack underrun");
316 state->tos = MASK_TOS(state->tos + 1);
318 DB((dbg, LEVEL_2, "After POP: ")); DEBUG_ONLY(x87_dump_stack(state);)
322 * Empty the fpu stack
324 * @param state the x87 state
326 static void x87_emms(x87_state *state)
333 * Returns the block state of a block.
335 * @param sim the x87 simulator handle
336 * @param block the current block
338 * @return the block state
340 static blk_state *x87_get_bl_state(x87_simulator *sim, ir_node *block)
342 pmap_entry *entry = pmap_find(sim->blk_states, block);
345 blk_state *bl_state = OALLOC(&sim->obst, blk_state);
346 bl_state->begin = NULL;
347 bl_state->end = NULL;
349 pmap_insert(sim->blk_states, block, bl_state);
353 return PTR_TO_BLKSTATE(entry->value);
357 * Creates a new x87 state.
359 * @param sim the x87 simulator handle
361 * @return a new x87 state
363 static x87_state *x87_alloc_state(x87_simulator *sim)
365 x87_state *res = OALLOC(&sim->obst, x87_state);
374 * @param sim the x87 simulator handle
375 * @param src the x87 state that will be cloned
377 * @return a cloned copy of the src state
379 static x87_state *x87_clone_state(x87_simulator *sim, const x87_state *src)
381 x87_state *res = x87_alloc_state(sim);
388 * Patch a virtual instruction into a x87 one and return
389 * the node representing the result value.
391 * @param n the IR node to patch
392 * @param op the x87 opcode to patch in
394 static ir_node *x87_patch_insn(ir_node *n, ir_op *op)
396 ir_mode *mode = get_irn_mode(n);
401 if (mode == mode_T) {
402 /* patch all Proj's */
403 const ir_edge_t *edge;
405 foreach_out_edge(n, edge) {
406 ir_node *proj = get_edge_src_irn(edge);
408 mode = get_irn_mode(proj);
409 if (mode_is_float(mode)) {
411 set_irn_mode(proj, ia32_reg_classes[CLASS_ia32_st].mode);
415 } else if (mode_is_float(mode))
416 set_irn_mode(n, ia32_reg_classes[CLASS_ia32_st].mode);
421 * Returns the first Proj of a mode_T node having a given mode.
423 * @param n the mode_T node
424 * @param m the desired mode of the Proj
425 * @return The first Proj of mode @p m found or NULL.
427 static ir_node *get_irn_Proj_for_mode(ir_node *n, ir_mode *m)
429 const ir_edge_t *edge;
431 assert(get_irn_mode(n) == mode_T && "Need mode_T node");
433 foreach_out_edge(n, edge) {
434 ir_node *proj = get_edge_src_irn(edge);
435 if (get_irn_mode(proj) == m)
443 * Wrap the arch_* function here so we can check for errors.
445 static inline const arch_register_t *x87_get_irn_register(const ir_node *irn)
447 const arch_register_t *res = arch_get_irn_register(irn);
449 assert(res->reg_class == &ia32_reg_classes[CLASS_ia32_vfp]);
453 static inline const arch_register_t *x87_irn_get_register(const ir_node *irn,
456 const arch_register_t *res = arch_get_irn_register_out(irn, pos);
458 assert(res->reg_class == &ia32_reg_classes[CLASS_ia32_vfp]);
462 static inline const arch_register_t *get_st_reg(int index)
464 return &ia32_registers[REG_ST0 + index];
467 /* -------------- x87 perm --------------- */
470 * Creates a fxch for shuffle.
472 * @param state the x87 state
473 * @param pos parameter for fxch
474 * @param block the block were fxch is inserted
476 * Creates a new fxch node and reroute the user of the old node
479 * @return the fxch node
481 static ir_node *x87_fxch_shuffle(x87_state *state, int pos, ir_node *block)
484 ia32_x87_attr_t *attr;
486 fxch = new_bd_ia32_fxch(NULL, block);
487 attr = get_ia32_x87_attr(fxch);
488 attr->x87[0] = get_st_reg(pos);
489 attr->x87[2] = get_st_reg(0);
493 x87_fxch(state, pos);
498 * Calculate the necessary permutations to reach dst_state.
500 * These permutations are done with fxch instructions and placed
501 * at the end of the block.
503 * Note that critical edges are removed here, so we need only
504 * a shuffle if the current block has only one successor.
506 * @param sim the simulator handle
507 * @param block the current block
508 * @param state the current x87 stack state, might be modified
509 * @param dst_block the destination block
510 * @param dst_state destination state
514 static x87_state *x87_shuffle(x87_simulator *sim, ir_node *block,
515 x87_state *state, ir_node *dst_block,
516 const x87_state *dst_state)
518 int i, n_cycles, k, ri;
519 unsigned cycles[4], all_mask;
520 char cycle_idx[4][8];
521 ir_node *fxch, *before, *after;
525 assert(state->depth == dst_state->depth);
527 /* Some mathematics here:
528 If we have a cycle of length n that includes the tos,
529 we need n-1 exchange operations.
530 We can always add the tos and restore it, so we need
531 n+1 exchange operations for a cycle not containing the tos.
532 So, the maximum of needed operations is for a cycle of 7
533 not including the tos == 8.
534 This is the same number of ops we would need for using stores,
535 so exchange is cheaper (we save the loads).
536 On the other hand, we might need an additional exchange
537 in the next block to bring one operand on top, so the
538 number of ops in the first case is identical.
539 Further, no more than 4 cycles can exists (4 x 2).
541 all_mask = (1 << (state->depth)) - 1;
543 for (n_cycles = 0; all_mask; ++n_cycles) {
544 int src_idx, dst_idx;
546 /* find the first free slot */
547 for (i = 0; i < state->depth; ++i) {
548 if (all_mask & (1 << i)) {
549 all_mask &= ~(1 << i);
551 /* check if there are differences here */
552 if (x87_get_st_reg(state, i) != x87_get_st_reg(dst_state, i))
558 /* no more cycles found */
563 cycles[n_cycles] = (1 << i);
564 cycle_idx[n_cycles][k++] = i;
565 for (src_idx = i; ; src_idx = dst_idx) {
566 dst_idx = x87_on_stack(dst_state, x87_get_st_reg(state, src_idx));
568 if ((all_mask & (1 << dst_idx)) == 0)
571 cycle_idx[n_cycles][k++] = dst_idx;
572 cycles[n_cycles] |= (1 << dst_idx);
573 all_mask &= ~(1 << dst_idx);
575 cycle_idx[n_cycles][k] = -1;
579 /* no permutation needed */
583 /* Hmm: permutation needed */
584 DB((dbg, LEVEL_2, "\n%+F needs permutation: from\n", block));
585 DEBUG_ONLY(x87_dump_stack(state);)
586 DB((dbg, LEVEL_2, " to\n"));
587 DEBUG_ONLY(x87_dump_stack(dst_state);)
591 DB((dbg, LEVEL_2, "Need %d cycles\n", n_cycles));
592 for (ri = 0; ri < n_cycles; ++ri) {
593 DB((dbg, LEVEL_2, " Ring %d:\n ", ri));
594 for (k = 0; cycle_idx[ri][k] != -1; ++k)
595 DB((dbg, LEVEL_2, " st%d ->", cycle_idx[ri][k]));
596 DB((dbg, LEVEL_2, "\n"));
603 * Find the place node must be insert.
604 * We have only one successor block, so the last instruction should
607 before = sched_last(block);
608 assert(is_cfop(before));
610 /* now do the permutations */
611 for (ri = 0; ri < n_cycles; ++ri) {
612 if ((cycles[ri] & 1) == 0) {
613 /* this cycle does not include the tos */
614 fxch = x87_fxch_shuffle(state, cycle_idx[ri][0], block);
616 sched_add_after(after, fxch);
618 sched_add_before(before, fxch);
621 for (k = 1; cycle_idx[ri][k] != -1; ++k) {
622 fxch = x87_fxch_shuffle(state, cycle_idx[ri][k], block);
624 sched_add_after(after, fxch);
626 sched_add_before(before, fxch);
629 if ((cycles[ri] & 1) == 0) {
630 /* this cycle does not include the tos */
631 fxch = x87_fxch_shuffle(state, cycle_idx[ri][0], block);
632 sched_add_after(after, fxch);
639 * Create a fxch node before another node.
641 * @param state the x87 state
642 * @param n the node after the fxch
643 * @param pos exchange st(pos) with st(0)
647 static ir_node *x87_create_fxch(x87_state *state, ir_node *n, int pos)
650 ia32_x87_attr_t *attr;
651 ir_node *block = get_nodes_block(n);
653 x87_fxch(state, pos);
655 fxch = new_bd_ia32_fxch(NULL, block);
656 attr = get_ia32_x87_attr(fxch);
657 attr->x87[0] = get_st_reg(pos);
658 attr->x87[2] = get_st_reg(0);
662 sched_add_before(n, fxch);
663 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(fxch), attr->x87[0]->name, attr->x87[2]->name));
668 * Create a fpush before node n.
670 * @param state the x87 state
671 * @param n the node after the fpush
672 * @param pos push st(pos) on stack
673 * @param op_idx replace input op_idx of n with the fpush result
675 static void x87_create_fpush(x87_state *state, ir_node *n, int pos, int op_idx)
677 ir_node *fpush, *pred = get_irn_n(n, op_idx);
678 ia32_x87_attr_t *attr;
679 const arch_register_t *out = x87_get_irn_register(pred);
681 x87_push_dbl(state, arch_register_get_index(out), pred);
683 fpush = new_bd_ia32_fpush(NULL, get_nodes_block(n));
684 attr = get_ia32_x87_attr(fpush);
685 attr->x87[0] = get_st_reg(pos);
686 attr->x87[2] = get_st_reg(0);
689 sched_add_before(n, fpush);
691 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(fpush), attr->x87[0]->name, attr->x87[2]->name));
695 * Create a fpop before node n.
697 * @param state the x87 state
698 * @param n the node after the fpop
699 * @param num pop 1 or 2 values
701 * @return the fpop node
703 static ir_node *x87_create_fpop(x87_state *state, ir_node *n, int num)
705 ir_node *fpop = NULL;
706 ia32_x87_attr_t *attr;
711 if (ia32_cg_config.use_ffreep)
712 fpop = new_bd_ia32_ffreep(NULL, get_nodes_block(n));
714 fpop = new_bd_ia32_fpop(NULL, get_nodes_block(n));
715 attr = get_ia32_x87_attr(fpop);
716 attr->x87[0] = get_st_reg(0);
717 attr->x87[1] = get_st_reg(0);
718 attr->x87[2] = get_st_reg(0);
721 sched_add_before(n, fpop);
722 DB((dbg, LEVEL_1, "<<< %s %s\n", get_irn_opname(fpop), attr->x87[0]->name));
727 /* --------------------------------- liveness ------------------------------------------ */
730 * The liveness transfer function.
731 * Updates a live set over a single step from a given node to its predecessor.
732 * Everything defined at the node is removed from the set, the uses of the node get inserted.
734 * @param irn The node at which liveness should be computed.
735 * @param live The bitset of registers live before @p irn. This set gets modified by updating it to
736 * the registers live after irn.
738 * @return The live bitset.
740 static vfp_liveness vfp_liveness_transfer(ir_node *irn, vfp_liveness live)
743 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
745 if (get_irn_mode(irn) == mode_T) {
746 const ir_edge_t *edge;
748 foreach_out_edge(irn, edge) {
749 ir_node *proj = get_edge_src_irn(edge);
751 if (arch_irn_consider_in_reg_alloc(cls, proj)) {
752 const arch_register_t *reg = x87_get_irn_register(proj);
753 live &= ~(1 << arch_register_get_index(reg));
756 } else if (arch_irn_consider_in_reg_alloc(cls, irn)) {
757 const arch_register_t *reg = x87_get_irn_register(irn);
758 live &= ~(1 << arch_register_get_index(reg));
761 for (i = 0, n = get_irn_arity(irn); i < n; ++i) {
762 ir_node *op = get_irn_n(irn, i);
764 if (mode_is_float(get_irn_mode(op)) &&
765 arch_irn_consider_in_reg_alloc(cls, op)) {
766 const arch_register_t *reg = x87_get_irn_register(op);
767 live |= 1 << arch_register_get_index(reg);
774 * Put all live virtual registers at the end of a block into a bitset.
776 * @param sim the simulator handle
777 * @param lv the liveness information
778 * @param bl the block
780 * @return The live bitset at the end of this block
782 static vfp_liveness vfp_liveness_end_of_block(x87_simulator *sim, const ir_node *block)
785 vfp_liveness live = 0;
786 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
787 const be_lv_t *lv = sim->lv;
789 be_lv_foreach(lv, block, be_lv_state_end, i) {
790 const arch_register_t *reg;
791 const ir_node *node = be_lv_get_irn(lv, block, i);
792 if (!arch_irn_consider_in_reg_alloc(cls, node))
795 reg = x87_get_irn_register(node);
796 live |= 1 << arch_register_get_index(reg);
802 /** get the register mask from an arch_register */
803 #define REGMASK(reg) (1 << (arch_register_get_index(reg)))
806 * Return a bitset of argument registers which are live at the end of a node.
808 * @param sim the simulator handle
809 * @param pos the node
810 * @param kill kill mask for the output registers
812 * @return The live bitset.
814 static unsigned vfp_live_args_after(x87_simulator *sim, const ir_node *pos, unsigned kill)
816 unsigned idx = get_irn_idx(pos);
818 assert(idx < sim->n_idx);
819 return sim->live[idx] & ~kill;
823 * Calculate the liveness for a whole block and cache it.
825 * @param sim the simulator handle
826 * @param lv the liveness handle
827 * @param block the block
829 static void update_liveness(x87_simulator *sim, ir_node *block)
831 vfp_liveness live = vfp_liveness_end_of_block(sim, block);
835 /* now iterate through the block backward and cache the results */
836 sched_foreach_reverse(block, irn) {
837 /* stop at the first Phi: this produces the live-in */
841 idx = get_irn_idx(irn);
842 sim->live[idx] = live;
844 live = vfp_liveness_transfer(irn, live);
846 idx = get_irn_idx(block);
847 sim->live[idx] = live;
851 * Returns true if a register is live in a set.
853 * @param reg_idx the vfp register index
854 * @param live a live bitset
856 #define is_vfp_live(reg_idx, live) ((live) & (1 << (reg_idx)))
860 * Dump liveness info.
862 * @param live the live bitset
864 static void vfp_dump_live(vfp_liveness live)
868 DB((dbg, LEVEL_2, "Live after: "));
869 for (i = 0; i < 8; ++i) {
870 if (live & (1 << i)) {
871 DB((dbg, LEVEL_2, "vf%d ", i));
874 DB((dbg, LEVEL_2, "\n"));
876 #endif /* DEBUG_libfirm */
878 /* --------------------------------- simulators ---------------------------------------- */
881 * Simulate a virtual binop.
883 * @param state the x87 state
884 * @param n the node that should be simulated (and patched)
885 * @param tmpl the template containing the 4 possible x87 opcodes
887 * @return NO_NODE_ADDED
889 static int sim_binop(x87_state *state, ir_node *n, const exchange_tmpl *tmpl)
891 int op2_idx = 0, op1_idx;
892 int out_idx, do_pop = 0;
893 ia32_x87_attr_t *attr;
895 ir_node *patched_insn;
897 x87_simulator *sim = state->sim;
898 ir_node *op1 = get_irn_n(n, n_ia32_binary_left);
899 ir_node *op2 = get_irn_n(n, n_ia32_binary_right);
900 const arch_register_t *op1_reg = x87_get_irn_register(op1);
901 const arch_register_t *op2_reg = x87_get_irn_register(op2);
902 const arch_register_t *out = x87_irn_get_register(n, pn_ia32_res);
903 int reg_index_1 = arch_register_get_index(op1_reg);
904 int reg_index_2 = arch_register_get_index(op2_reg);
905 vfp_liveness live = vfp_live_args_after(sim, n, REGMASK(out));
909 DB((dbg, LEVEL_1, ">>> %+F %s, %s -> %s\n", n,
910 arch_register_get_name(op1_reg), arch_register_get_name(op2_reg),
911 arch_register_get_name(out)));
912 DEBUG_ONLY(vfp_dump_live(live);)
913 DB((dbg, LEVEL_1, "Stack before: "));
914 DEBUG_ONLY(x87_dump_stack(state);)
916 op1_idx = x87_on_stack(state, reg_index_1);
917 assert(op1_idx >= 0);
918 op1_live_after = is_vfp_live(reg_index_1, live);
920 attr = get_ia32_x87_attr(n);
921 permuted = attr->attr.data.ins_permuted;
923 if (reg_index_2 != REG_VFP_VFP_NOREG) {
926 /* second operand is a vfp register */
927 op2_idx = x87_on_stack(state, reg_index_2);
928 assert(op2_idx >= 0);
929 op2_live_after = is_vfp_live(reg_index_2, live);
931 if (op2_live_after) {
932 /* Second operand is live. */
934 if (op1_live_after) {
935 /* Both operands are live: push the first one.
936 This works even for op1 == op2. */
937 x87_create_fpush(state, n, op1_idx, n_ia32_binary_right);
938 /* now do fxxx (tos=tos X op) */
942 dst = tmpl->normal_op;
944 /* Second live, first operand is dead here, bring it to tos. */
946 x87_create_fxch(state, n, op1_idx);
951 /* now do fxxx (tos=tos X op) */
953 dst = tmpl->normal_op;
956 /* Second operand is dead. */
957 if (op1_live_after) {
958 /* First operand is live: bring second to tos. */
960 x87_create_fxch(state, n, op2_idx);
965 /* now do fxxxr (tos = op X tos) */
967 dst = tmpl->reverse_op;
969 /* Both operands are dead here, pop them from the stack. */
972 /* Both are identically and on tos, no pop needed. */
973 /* here fxxx (tos = tos X tos) */
974 dst = tmpl->normal_op;
977 /* now do fxxxp (op = op X tos, pop) */
978 dst = tmpl->normal_pop_op;
982 } else if (op1_idx == 0) {
983 assert(op1_idx != op2_idx);
984 /* now do fxxxrp (op = tos X op, pop) */
985 dst = tmpl->reverse_pop_op;
989 /* Bring the second on top. */
990 x87_create_fxch(state, n, op2_idx);
991 if (op1_idx == op2_idx) {
992 /* Both are identically and on tos now, no pop needed. */
995 /* use fxxx (tos = tos X tos) */
996 dst = tmpl->normal_op;
999 /* op2 is on tos now */
1001 /* use fxxxp (op = op X tos, pop) */
1002 dst = tmpl->normal_pop_op;
1010 /* second operand is an address mode */
1011 if (op1_live_after) {
1012 /* first operand is live: push it here */
1013 x87_create_fpush(state, n, op1_idx, n_ia32_binary_left);
1016 /* first operand is dead: bring it to tos */
1018 x87_create_fxch(state, n, op1_idx);
1023 /* use fxxx (tos = tos X mem) */
1024 dst = permuted ? tmpl->reverse_op : tmpl->normal_op;
1028 patched_insn = x87_patch_insn(n, dst);
1029 x87_set_st(state, arch_register_get_index(out), patched_insn, out_idx);
1034 /* patch the operation */
1035 attr->x87[0] = op1_reg = get_st_reg(op1_idx);
1036 if (reg_index_2 != REG_VFP_VFP_NOREG) {
1037 attr->x87[1] = op2_reg = get_st_reg(op2_idx);
1039 attr->x87[2] = out = get_st_reg(out_idx);
1041 if (reg_index_2 != REG_VFP_VFP_NOREG) {
1042 DB((dbg, LEVEL_1, "<<< %s %s, %s -> %s\n", get_irn_opname(n),
1043 arch_register_get_name(op1_reg), arch_register_get_name(op2_reg),
1044 arch_register_get_name(out)));
1046 DB((dbg, LEVEL_1, "<<< %s %s, [AM] -> %s\n", get_irn_opname(n),
1047 arch_register_get_name(op1_reg),
1048 arch_register_get_name(out)));
1051 return NO_NODE_ADDED;
1055 * Simulate a virtual Unop.
1057 * @param state the x87 state
1058 * @param n the node that should be simulated (and patched)
1059 * @param op the x87 opcode that will replace n's opcode
1061 * @return NO_NODE_ADDED
1063 static int sim_unop(x87_state *state, ir_node *n, ir_op *op)
1066 x87_simulator *sim = state->sim;
1067 const arch_register_t *op1 = x87_get_irn_register(get_irn_n(n, 0));
1068 const arch_register_t *out = x87_get_irn_register(n);
1069 ia32_x87_attr_t *attr;
1070 unsigned live = vfp_live_args_after(sim, n, REGMASK(out));
1072 DB((dbg, LEVEL_1, ">>> %+F -> %s\n", n, out->name));
1073 DEBUG_ONLY(vfp_dump_live(live);)
1075 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1077 if (is_vfp_live(arch_register_get_index(op1), live)) {
1078 /* push the operand here */
1079 x87_create_fpush(state, n, op1_idx, 0);
1083 /* operand is dead, bring it to tos */
1085 x87_create_fxch(state, n, op1_idx);
1090 x87_set_tos(state, arch_register_get_index(out), x87_patch_insn(n, op));
1091 attr = get_ia32_x87_attr(n);
1092 attr->x87[0] = op1 = get_st_reg(0);
1093 attr->x87[2] = out = get_st_reg(0);
1094 DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), out->name));
1096 return NO_NODE_ADDED;
1100 * Simulate a virtual Load instruction.
1102 * @param state the x87 state
1103 * @param n the node that should be simulated (and patched)
1104 * @param op the x87 opcode that will replace n's opcode
1106 * @return NO_NODE_ADDED
1108 static int sim_load(x87_state *state, ir_node *n, ir_op *op, int res_pos)
1110 const arch_register_t *out = x87_irn_get_register(n, res_pos);
1111 ia32_x87_attr_t *attr;
1113 DB((dbg, LEVEL_1, ">>> %+F -> %s\n", n, arch_register_get_name(out)));
1114 x87_push(state, arch_register_get_index(out), x87_patch_insn(n, op));
1115 assert(out == x87_irn_get_register(n, res_pos));
1116 attr = get_ia32_x87_attr(n);
1117 attr->x87[2] = out = get_st_reg(0);
1118 DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), arch_register_get_name(out)));
1120 return NO_NODE_ADDED;
1124 * Rewire all users of @p old_val to @new_val iff they are scheduled after @p store.
1126 * @param store The store
1127 * @param old_val The former value
1128 * @param new_val The new value
1130 static void collect_and_rewire_users(ir_node *store, ir_node *old_val, ir_node *new_val)
1132 const ir_edge_t *edge, *ne;
1134 foreach_out_edge_safe(old_val, edge, ne) {
1135 ir_node *user = get_edge_src_irn(edge);
1137 if (! user || user == store)
1140 /* if the user is scheduled after the store: rewire */
1141 if (sched_is_scheduled(user) && sched_comes_after(store, user)) {
1143 /* find the input of the user pointing to the old value */
1144 for (i = get_irn_arity(user) - 1; i >= 0; i--) {
1145 if (get_irn_n(user, i) == old_val)
1146 set_irn_n(user, i, new_val);
1153 * Simulate a virtual Store.
1155 * @param state the x87 state
1156 * @param n the node that should be simulated (and patched)
1157 * @param op the x87 store opcode
1158 * @param op_p the x87 store and pop opcode
1160 static int sim_store(x87_state *state, ir_node *n, ir_op *op, ir_op *op_p)
1162 ir_node *val = get_irn_n(n, n_ia32_vfst_val);
1163 const arch_register_t *op2 = x87_get_irn_register(val);
1164 unsigned live = vfp_live_args_after(state->sim, n, 0);
1165 int insn = NO_NODE_ADDED;
1166 ia32_x87_attr_t *attr;
1167 int op2_reg_idx, op2_idx, depth;
1168 int live_after_node;
1171 op2_reg_idx = arch_register_get_index(op2);
1172 op2_idx = x87_on_stack(state, op2_reg_idx);
1173 live_after_node = is_vfp_live(arch_register_get_index(op2), live);
1174 DB((dbg, LEVEL_1, ">>> %+F %s ->\n", n, arch_register_get_name(op2)));
1175 assert(op2_idx >= 0);
1177 mode = get_ia32_ls_mode(n);
1178 depth = x87_get_depth(state);
1180 if (live_after_node) {
1182 Problem: fst doesn't support 96bit modes (spills), only fstp does
1183 fist doesn't support 64bit mode, only fistp
1185 - stack not full: push value and fstp
1186 - stack full: fstp value and load again
1187 Note that we cannot test on mode_E, because floats might be 96bit ...
1189 if (get_mode_size_bits(mode) > 64 || (mode_is_int(mode) && get_mode_size_bits(mode) > 32)) {
1190 if (depth < N_ia32_st_REGS) {
1191 /* ok, we have a free register: push + fstp */
1192 x87_create_fpush(state, n, op2_idx, n_ia32_vfst_val);
1194 x87_patch_insn(n, op_p);
1196 ir_node *vfld, *mem, *block, *rproj, *mproj;
1197 ir_graph *irg = get_irn_irg(n);
1198 ir_node *nomem = get_irg_no_mem(irg);
1200 /* stack full here: need fstp + load */
1202 x87_patch_insn(n, op_p);
1204 block = get_nodes_block(n);
1205 vfld = new_bd_ia32_vfld(NULL, block, get_irn_n(n, 0), get_irn_n(n, 1), nomem, get_ia32_ls_mode(n));
1207 /* copy all attributes */
1208 set_ia32_frame_ent(vfld, get_ia32_frame_ent(n));
1209 if (is_ia32_use_frame(n))
1210 set_ia32_use_frame(vfld);
1211 set_ia32_op_type(vfld, ia32_AddrModeS);
1212 add_ia32_am_offs_int(vfld, get_ia32_am_offs_int(n));
1213 set_ia32_am_sc(vfld, get_ia32_am_sc(n));
1214 set_ia32_ls_mode(vfld, get_ia32_ls_mode(n));
1216 rproj = new_r_Proj(vfld, get_ia32_ls_mode(vfld), pn_ia32_vfld_res);
1217 mproj = new_r_Proj(vfld, mode_M, pn_ia32_vfld_M);
1218 mem = get_irn_Proj_for_mode(n, mode_M);
1220 assert(mem && "Store memory not found");
1222 arch_set_irn_register(rproj, op2);
1224 /* reroute all former users of the store memory to the load memory */
1225 edges_reroute(mem, mproj);
1226 /* set the memory input of the load to the store memory */
1227 set_irn_n(vfld, n_ia32_vfld_mem, mem);
1229 sched_add_after(n, vfld);
1230 sched_add_after(vfld, rproj);
1232 /* rewire all users, scheduled after the store, to the loaded value */
1233 collect_and_rewire_users(n, val, rproj);
1238 /* we can only store the tos to memory */
1240 x87_create_fxch(state, n, op2_idx);
1242 /* mode size 64 or smaller -> use normal fst */
1243 x87_patch_insn(n, op);
1246 /* we can only store the tos to memory */
1248 x87_create_fxch(state, n, op2_idx);
1251 x87_patch_insn(n, op_p);
1254 attr = get_ia32_x87_attr(n);
1255 attr->x87[1] = op2 = get_st_reg(0);
1256 DB((dbg, LEVEL_1, "<<< %s %s ->\n", get_irn_opname(n), arch_register_get_name(op2)));
1261 #define _GEN_BINOP(op, rev) \
1262 static int sim_##op(x87_state *state, ir_node *n) { \
1263 exchange_tmpl tmpl = { op_ia32_##op, op_ia32_##rev, op_ia32_##op##p, op_ia32_##rev##p }; \
1264 return sim_binop(state, n, &tmpl); \
1267 #define GEN_BINOP(op) _GEN_BINOP(op, op)
1268 #define GEN_BINOPR(op) _GEN_BINOP(op, op##r)
1270 #define GEN_LOAD(op) \
1271 static int sim_##op(x87_state *state, ir_node *n) { \
1272 return sim_load(state, n, op_ia32_##op, pn_ia32_v##op##_res); \
1275 #define GEN_UNOP(op) \
1276 static int sim_##op(x87_state *state, ir_node *n) { \
1277 return sim_unop(state, n, op_ia32_##op); \
1280 #define GEN_STORE(op) \
1281 static int sim_##op(x87_state *state, ir_node *n) { \
1282 return sim_store(state, n, op_ia32_##op, op_ia32_##op##p); \
1304 * Simulate a virtual fisttp.
1306 * @param state the x87 state
1307 * @param n the node that should be simulated (and patched)
1309 * @return NO_NODE_ADDED
1311 static int sim_fisttp(x87_state *state, ir_node *n)
1313 ir_node *val = get_irn_n(n, n_ia32_vfst_val);
1314 const arch_register_t *op2 = x87_get_irn_register(val);
1315 ia32_x87_attr_t *attr;
1316 int op2_reg_idx, op2_idx;
1318 op2_reg_idx = arch_register_get_index(op2);
1319 op2_idx = x87_on_stack(state, op2_reg_idx);
1320 DB((dbg, LEVEL_1, ">>> %+F %s ->\n", n, arch_register_get_name(op2)));
1321 assert(op2_idx >= 0);
1323 /* Note: although the value is still live here, it is destroyed because
1324 of the pop. The register allocator is aware of that and introduced a copy
1325 if the value must be alive. */
1327 /* we can only store the tos to memory */
1329 x87_create_fxch(state, n, op2_idx);
1332 x87_patch_insn(n, op_ia32_fisttp);
1334 attr = get_ia32_x87_attr(n);
1335 attr->x87[1] = op2 = get_st_reg(0);
1336 DB((dbg, LEVEL_1, "<<< %s %s ->\n", get_irn_opname(n), arch_register_get_name(op2)));
1338 return NO_NODE_ADDED;
1342 * Simulate a virtual FtstFnstsw.
1344 * @param state the x87 state
1345 * @param n the node that should be simulated (and patched)
1347 * @return NO_NODE_ADDED
1349 static int sim_FtstFnstsw(x87_state *state, ir_node *n)
1351 x87_simulator *sim = state->sim;
1352 ia32_x87_attr_t *attr = get_ia32_x87_attr(n);
1353 ir_node *op1_node = get_irn_n(n, n_ia32_vFtstFnstsw_left);
1354 const arch_register_t *reg1 = x87_get_irn_register(op1_node);
1355 int reg_index_1 = arch_register_get_index(reg1);
1356 int op1_idx = x87_on_stack(state, reg_index_1);
1357 unsigned live = vfp_live_args_after(sim, n, 0);
1359 DB((dbg, LEVEL_1, ">>> %+F %s\n", n, arch_register_get_name(reg1)));
1360 DEBUG_ONLY(vfp_dump_live(live);)
1361 DB((dbg, LEVEL_1, "Stack before: "));
1362 DEBUG_ONLY(x87_dump_stack(state);)
1363 assert(op1_idx >= 0);
1366 /* bring the value to tos */
1367 x87_create_fxch(state, n, op1_idx);
1371 /* patch the operation */
1372 x87_patch_insn(n, op_ia32_FtstFnstsw);
1373 reg1 = get_st_reg(op1_idx);
1374 attr->x87[0] = reg1;
1375 attr->x87[1] = NULL;
1376 attr->x87[2] = NULL;
1378 if (!is_vfp_live(reg_index_1, live))
1379 x87_create_fpop(state, sched_next(n), 1);
1381 return NO_NODE_ADDED;
1387 * @param state the x87 state
1388 * @param n the node that should be simulated (and patched)
1390 * @return NO_NODE_ADDED
1392 static int sim_Fucom(x87_state *state, ir_node *n)
1396 ia32_x87_attr_t *attr = get_ia32_x87_attr(n);
1398 x87_simulator *sim = state->sim;
1399 ir_node *op1_node = get_irn_n(n, n_ia32_vFucomFnstsw_left);
1400 ir_node *op2_node = get_irn_n(n, n_ia32_vFucomFnstsw_right);
1401 const arch_register_t *op1 = x87_get_irn_register(op1_node);
1402 const arch_register_t *op2 = x87_get_irn_register(op2_node);
1403 int reg_index_1 = arch_register_get_index(op1);
1404 int reg_index_2 = arch_register_get_index(op2);
1405 unsigned live = vfp_live_args_after(sim, n, 0);
1406 bool permuted = attr->attr.data.ins_permuted;
1410 DB((dbg, LEVEL_1, ">>> %+F %s, %s\n", n,
1411 arch_register_get_name(op1), arch_register_get_name(op2)));
1412 DEBUG_ONLY(vfp_dump_live(live);)
1413 DB((dbg, LEVEL_1, "Stack before: "));
1414 DEBUG_ONLY(x87_dump_stack(state);)
1416 op1_idx = x87_on_stack(state, reg_index_1);
1417 assert(op1_idx >= 0);
1419 /* BEWARE: check for comp a,a cases, they might happen */
1420 if (reg_index_2 != REG_VFP_VFP_NOREG) {
1421 /* second operand is a vfp register */
1422 op2_idx = x87_on_stack(state, reg_index_2);
1423 assert(op2_idx >= 0);
1425 if (is_vfp_live(reg_index_2, live)) {
1426 /* second operand is live */
1428 if (is_vfp_live(reg_index_1, live)) {
1429 /* both operands are live */
1432 /* res = tos X op */
1433 } else if (op2_idx == 0) {
1434 /* res = op X tos */
1435 permuted = !permuted;
1438 /* bring the first one to tos */
1439 x87_create_fxch(state, n, op1_idx);
1440 if (op1_idx == op2_idx) {
1442 } else if (op2_idx == 0) {
1446 /* res = tos X op */
1449 /* second live, first operand is dead here, bring it to tos.
1450 This means further, op1_idx != op2_idx. */
1451 assert(op1_idx != op2_idx);
1453 x87_create_fxch(state, n, op1_idx);
1458 /* res = tos X op, pop */
1462 /* second operand is dead */
1463 if (is_vfp_live(reg_index_1, live)) {
1464 /* first operand is live: bring second to tos.
1465 This means further, op1_idx != op2_idx. */
1466 assert(op1_idx != op2_idx);
1468 x87_create_fxch(state, n, op2_idx);
1473 /* res = op X tos, pop */
1475 permuted = !permuted;
1478 /* both operands are dead here, check first for identity. */
1479 if (op1_idx == op2_idx) {
1480 /* identically, one pop needed */
1482 x87_create_fxch(state, n, op1_idx);
1486 /* res = tos X op, pop */
1489 /* different, move them to st and st(1) and pop both.
1490 The tricky part is to get one into st(1).*/
1491 else if (op2_idx == 1) {
1492 /* good, second operand is already in the right place, move the first */
1494 /* bring the first on top */
1495 x87_create_fxch(state, n, op1_idx);
1496 assert(op2_idx != 0);
1499 /* res = tos X op, pop, pop */
1501 } else if (op1_idx == 1) {
1502 /* good, first operand is already in the right place, move the second */
1504 /* bring the first on top */
1505 x87_create_fxch(state, n, op2_idx);
1506 assert(op1_idx != 0);
1509 /* res = op X tos, pop, pop */
1510 permuted = !permuted;
1514 /* if one is already the TOS, we need two fxch */
1516 /* first one is TOS, move to st(1) */
1517 x87_create_fxch(state, n, 1);
1518 assert(op2_idx != 1);
1520 x87_create_fxch(state, n, op2_idx);
1522 /* res = op X tos, pop, pop */
1524 permuted = !permuted;
1526 } else if (op2_idx == 0) {
1527 /* second one is TOS, move to st(1) */
1528 x87_create_fxch(state, n, 1);
1529 assert(op1_idx != 1);
1531 x87_create_fxch(state, n, op1_idx);
1533 /* res = tos X op, pop, pop */
1536 /* none of them is either TOS or st(1), 3 fxch needed */
1537 x87_create_fxch(state, n, op2_idx);
1538 assert(op1_idx != 0);
1539 x87_create_fxch(state, n, 1);
1541 x87_create_fxch(state, n, op1_idx);
1543 /* res = tos X op, pop, pop */
1550 /* second operand is an address mode */
1551 if (is_vfp_live(reg_index_1, live)) {
1552 /* first operand is live: bring it to TOS */
1554 x87_create_fxch(state, n, op1_idx);
1558 /* first operand is dead: bring it to tos */
1560 x87_create_fxch(state, n, op1_idx);
1567 /* patch the operation */
1568 if (is_ia32_vFucomFnstsw(n)) {
1572 case 0: dst = op_ia32_FucomFnstsw; break;
1573 case 1: dst = op_ia32_FucompFnstsw; break;
1574 case 2: dst = op_ia32_FucomppFnstsw; break;
1575 default: panic("invalid popcount in sim_Fucom");
1578 for (i = 0; i < pops; ++i) {
1581 } else if (is_ia32_vFucomi(n)) {
1583 case 0: dst = op_ia32_Fucomi; break;
1584 case 1: dst = op_ia32_Fucompi; x87_pop(state); break;
1586 dst = op_ia32_Fucompi;
1588 x87_create_fpop(state, sched_next(n), 1);
1590 default: panic("invalid popcount in sim_Fucom");
1593 panic("invalid operation %+F in sim_FucomFnstsw", n);
1596 x87_patch_insn(n, dst);
1603 op1 = get_st_reg(op1_idx);
1606 op2 = get_st_reg(op2_idx);
1609 attr->x87[2] = NULL;
1610 attr->attr.data.ins_permuted = permuted;
1613 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(n),
1614 arch_register_get_name(op1), arch_register_get_name(op2)));
1616 DB((dbg, LEVEL_1, "<<< %s %s, [AM]\n", get_irn_opname(n),
1617 arch_register_get_name(op1)));
1620 return NO_NODE_ADDED;
1626 * @param state the x87 state
1627 * @param n the node that should be simulated (and patched)
1629 * @return NO_NODE_ADDED
1631 static int sim_Keep(x87_state *state, ir_node *node)
1634 const arch_register_t *op_reg;
1640 DB((dbg, LEVEL_1, ">>> %+F\n", node));
1642 arity = get_irn_arity(node);
1643 for (i = 0; i < arity; ++i) {
1644 op = get_irn_n(node, i);
1645 op_reg = arch_get_irn_register(op);
1646 if (arch_register_get_class(op_reg) != &ia32_reg_classes[CLASS_ia32_vfp])
1649 reg_id = arch_register_get_index(op_reg);
1650 live = vfp_live_args_after(state->sim, node, 0);
1652 op_stack_idx = x87_on_stack(state, reg_id);
1653 if (op_stack_idx >= 0 && !is_vfp_live(reg_id, live))
1654 x87_create_fpop(state, sched_next(node), 1);
1657 DB((dbg, LEVEL_1, "Stack after: "));
1658 DEBUG_ONLY(x87_dump_stack(state);)
1660 return NO_NODE_ADDED;
1664 * Keep the given node alive by adding a be_Keep.
1666 * @param node the node to kept alive
1668 static void keep_float_node_alive(ir_node *node)
1670 ir_node *block = get_nodes_block(node);
1671 ir_node *keep = be_new_Keep(block, 1, &node);
1673 assert(sched_is_scheduled(node));
1674 sched_add_after(node, keep);
1678 * Create a copy of a node. Recreate the node if it's a constant.
1680 * @param state the x87 state
1681 * @param n the node to be copied
1683 * @return the copy of n
1685 static ir_node *create_Copy(x87_state *state, ir_node *n)
1687 dbg_info *n_dbg = get_irn_dbg_info(n);
1688 ir_mode *mode = get_irn_mode(n);
1689 ir_node *block = get_nodes_block(n);
1690 ir_node *pred = get_irn_n(n, 0);
1691 ir_node *(*cnstr)(dbg_info *, ir_node *, ir_mode *) = NULL;
1693 const arch_register_t *out;
1694 const arch_register_t *op1;
1695 ia32_x87_attr_t *attr;
1697 /* Do not copy constants, recreate them. */
1698 switch (get_ia32_irn_opcode(pred)) {
1700 cnstr = new_bd_ia32_fldz;
1703 cnstr = new_bd_ia32_fld1;
1705 case iro_ia32_fldpi:
1706 cnstr = new_bd_ia32_fldpi;
1708 case iro_ia32_fldl2e:
1709 cnstr = new_bd_ia32_fldl2e;
1711 case iro_ia32_fldl2t:
1712 cnstr = new_bd_ia32_fldl2t;
1714 case iro_ia32_fldlg2:
1715 cnstr = new_bd_ia32_fldlg2;
1717 case iro_ia32_fldln2:
1718 cnstr = new_bd_ia32_fldln2;
1724 out = x87_get_irn_register(n);
1725 op1 = x87_get_irn_register(pred);
1727 if (cnstr != NULL) {
1728 /* copy a constant */
1729 res = (*cnstr)(n_dbg, block, mode);
1731 x87_push(state, arch_register_get_index(out), res);
1733 attr = get_ia32_x87_attr(res);
1734 attr->x87[2] = get_st_reg(0);
1736 int op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1738 res = new_bd_ia32_fpushCopy(n_dbg, block, pred, mode);
1740 x87_push(state, arch_register_get_index(out), res);
1742 attr = get_ia32_x87_attr(res);
1743 attr->x87[0] = get_st_reg(op1_idx);
1744 attr->x87[2] = get_st_reg(0);
1746 arch_set_irn_register(res, out);
1752 * Simulate a be_Copy.
1754 * @param state the x87 state
1755 * @param n the node that should be simulated (and patched)
1757 * @return NO_NODE_ADDED
1759 static int sim_Copy(x87_state *state, ir_node *n)
1762 const arch_register_t *out;
1763 const arch_register_t *op1;
1764 const arch_register_class_t *cls;
1765 ir_node *node, *next;
1766 int op1_idx, out_idx;
1769 cls = arch_get_irn_reg_class(n);
1770 if (cls != &ia32_reg_classes[CLASS_ia32_vfp])
1773 pred = get_irn_n(n, 0);
1774 out = x87_get_irn_register(n);
1775 op1 = x87_get_irn_register(pred);
1776 live = vfp_live_args_after(state->sim, n, REGMASK(out));
1778 DB((dbg, LEVEL_1, ">>> %+F %s -> %s\n", n,
1779 arch_register_get_name(op1), arch_register_get_name(out)));
1780 DEBUG_ONLY(vfp_dump_live(live);)
1782 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1784 if (is_vfp_live(arch_register_get_index(op1), live)) {
1785 /* Operand is still live, a real copy. We need here an fpush that can
1786 hold a a register, so use the fpushCopy or recreate constants */
1787 node = create_Copy(state, n);
1789 /* We have to make sure the old value doesn't go dead (which can happen
1790 * when we recreate constants). As the simulator expected that value in
1791 * the pred blocks. This is unfortunate as removing it would save us 1
1792 * instruction, but we would have to rerun all the simulation to get
1795 next = sched_next(n);
1798 sched_add_before(next, node);
1800 if (get_irn_n_edges(pred) == 0) {
1801 keep_float_node_alive(pred);
1804 DB((dbg, LEVEL_1, "<<< %+F %s -> ?\n", node, op1->name));
1806 out_idx = x87_on_stack(state, arch_register_get_index(out));
1808 if (out_idx >= 0 && out_idx != op1_idx) {
1809 /* Matze: out already on stack? how can this happen? */
1810 panic("invalid stack state in x87 simulator");
1813 /* op1 must be killed and placed where out is */
1815 ia32_x87_attr_t *attr;
1816 /* best case, simple remove and rename */
1817 x87_patch_insn(n, op_ia32_Pop);
1818 attr = get_ia32_x87_attr(n);
1819 attr->x87[0] = op1 = get_st_reg(0);
1822 x87_set_st(state, arch_register_get_index(out), n, op1_idx - 1);
1824 ia32_x87_attr_t *attr;
1825 /* move op1 to tos, store and pop it */
1827 x87_create_fxch(state, n, op1_idx);
1830 x87_patch_insn(n, op_ia32_Pop);
1831 attr = get_ia32_x87_attr(n);
1832 attr->x87[0] = op1 = get_st_reg(out_idx);
1835 x87_set_st(state, arch_register_get_index(out), n, out_idx - 1);
1837 DB((dbg, LEVEL_1, "<<< %+F %s\n", n, op1->name));
1840 /* just a virtual copy */
1841 x87_set_st(state, arch_register_get_index(out), get_unop_op(n), op1_idx);
1842 /* don't remove the node to keep the verifier quiet :),
1843 the emitter won't emit any code for the node */
1846 DB((dbg, LEVEL_1, "<<< KILLED %s\n", get_irn_opname(n)));
1847 exchange(n, get_unop_op(n));
1851 return NO_NODE_ADDED;
1855 * Returns the vf0 result Proj of a Call.
1857 * @para call the Call node
1859 static ir_node *get_call_result_proj(ir_node *call)
1861 const ir_edge_t *edge;
1863 /* search the result proj */
1864 foreach_out_edge(call, edge) {
1865 ir_node *proj = get_edge_src_irn(edge);
1866 long pn = get_Proj_proj(proj);
1868 if (pn == pn_ia32_Call_vf0)
1876 * Simulate a ia32_Call.
1878 * @param state the x87 state
1879 * @param n the node that should be simulated (and patched)
1881 * @return NO_NODE_ADDED
1883 static int sim_Call(x87_state *state, ir_node *n)
1885 ir_type *call_tp = get_ia32_call_attr_const(n)->call_tp;
1889 const arch_register_t *reg;
1891 DB((dbg, LEVEL_1, ">>> %+F\n", n));
1893 /* at the begin of a call the x87 state should be empty */
1894 assert(state->depth == 0 && "stack not empty before call");
1896 if (get_method_n_ress(call_tp) <= 0)
1900 * If the called function returns a float, it is returned in st(0).
1901 * This even happens if the return value is NOT used.
1902 * Moreover, only one return result is supported.
1904 res_type = get_method_res_type(call_tp, 0);
1905 mode = get_type_mode(res_type);
1907 if (mode == NULL || !mode_is_float(mode))
1910 resproj = get_call_result_proj(n);
1911 assert(resproj != NULL);
1913 reg = x87_get_irn_register(resproj);
1914 x87_push(state, arch_register_get_index(reg), resproj);
1917 DB((dbg, LEVEL_1, "Stack after: "));
1918 DEBUG_ONLY(x87_dump_stack(state);)
1920 return NO_NODE_ADDED;
1924 * Simulate a be_Return.
1926 * @param state the x87 state
1927 * @param n the node that should be simulated (and patched)
1929 * @return NO_NODE_ADDED
1931 static int sim_Return(x87_state *state, ir_node *n)
1933 int n_res = be_Return_get_n_rets(n);
1934 int i, n_float_res = 0;
1936 /* only floating point return values must reside on stack */
1937 for (i = 0; i < n_res; ++i) {
1938 ir_node *res = get_irn_n(n, n_be_Return_val + i);
1940 if (mode_is_float(get_irn_mode(res)))
1943 assert(x87_get_depth(state) == n_float_res);
1945 /* pop them virtually */
1946 for (i = n_float_res - 1; i >= 0; --i)
1949 return NO_NODE_ADDED;
1952 typedef struct perm_data_t {
1953 const arch_register_t *in;
1954 const arch_register_t *out;
1958 * Simulate a be_Perm.
1960 * @param state the x87 state
1961 * @param irn the node that should be simulated (and patched)
1963 * @return NO_NODE_ADDED
1965 static int sim_Perm(x87_state *state, ir_node *irn)
1968 ir_node *pred = get_irn_n(irn, 0);
1970 const ir_edge_t *edge;
1972 /* handle only floating point Perms */
1973 if (! mode_is_float(get_irn_mode(pred)))
1974 return NO_NODE_ADDED;
1976 DB((dbg, LEVEL_1, ">>> %+F\n", irn));
1978 /* Perm is a pure virtual instruction on x87.
1979 All inputs must be on the FPU stack and are pairwise
1980 different from each other.
1981 So, all we need to do is to permutate the stack state. */
1982 n = get_irn_arity(irn);
1983 NEW_ARR_A(int, stack_pos, n);
1985 /* collect old stack positions */
1986 for (i = 0; i < n; ++i) {
1987 const arch_register_t *inreg = x87_get_irn_register(get_irn_n(irn, i));
1988 int idx = x87_on_stack(state, arch_register_get_index(inreg));
1990 assert(idx >= 0 && "Perm argument not on x87 stack");
1994 /* now do the permutation */
1995 foreach_out_edge(irn, edge) {
1996 ir_node *proj = get_edge_src_irn(edge);
1997 const arch_register_t *out = x87_get_irn_register(proj);
1998 long num = get_Proj_proj(proj);
2000 assert(0 <= num && num < n && "More Proj's than Perm inputs");
2001 x87_set_st(state, arch_register_get_index(out), proj, stack_pos[(unsigned)num]);
2003 DB((dbg, LEVEL_1, "<<< %+F\n", irn));
2005 return NO_NODE_ADDED;
2009 * Kill any dead registers at block start by popping them from the stack.
2011 * @param sim the simulator handle
2012 * @param block the current block
2013 * @param start_state the x87 state at the begin of the block
2015 * @return the x87 state after dead register killed
2017 static x87_state *x87_kill_deads(x87_simulator *sim, ir_node *block, x87_state *start_state)
2019 x87_state *state = start_state;
2020 ir_node *first_insn = sched_first(block);
2021 ir_node *keep = NULL;
2022 unsigned live = vfp_live_args_after(sim, block, 0);
2024 int i, depth, num_pop;
2027 depth = x87_get_depth(state);
2028 for (i = depth - 1; i >= 0; --i) {
2029 int reg = x87_get_st_reg(state, i);
2031 if (! is_vfp_live(reg, live))
2032 kill_mask |= (1 << i);
2036 /* create a new state, will be changed */
2037 state = x87_clone_state(sim, state);
2039 DB((dbg, LEVEL_1, "Killing deads:\n"));
2040 DEBUG_ONLY(vfp_dump_live(live);)
2041 DEBUG_ONLY(x87_dump_stack(state);)
2043 if (kill_mask != 0 && live == 0) {
2044 /* special case: kill all registers */
2045 if (ia32_cg_config.use_femms || ia32_cg_config.use_emms) {
2046 if (ia32_cg_config.use_femms) {
2047 /* use FEMMS on AMD processors to clear all */
2048 keep = new_bd_ia32_femms(NULL, block);
2050 /* use EMMS to clear all */
2051 keep = new_bd_ia32_emms(NULL, block);
2053 sched_add_before(first_insn, keep);
2059 /* now kill registers */
2061 /* we can only kill from TOS, so bring them up */
2062 if (! (kill_mask & 1)) {
2063 /* search from behind, because we can to a double-pop */
2064 for (i = depth - 1; i >= 0; --i) {
2065 if (kill_mask & (1 << i)) {
2066 kill_mask &= ~(1 << i);
2073 x87_set_st(state, -1, keep, i);
2074 x87_create_fxch(state, first_insn, i);
2077 if ((kill_mask & 3) == 3) {
2078 /* we can do a double-pop */
2082 /* only a single pop */
2087 kill_mask >>= num_pop;
2088 keep = x87_create_fpop(state, first_insn, num_pop);
2096 * Run a simulation and fix all virtual instructions for a block.
2098 * @param sim the simulator handle
2099 * @param block the current block
2101 static void x87_simulate_block(x87_simulator *sim, ir_node *block)
2104 blk_state *bl_state = x87_get_bl_state(sim, block);
2105 x87_state *state = bl_state->begin;
2106 const ir_edge_t *edge;
2107 ir_node *start_block;
2109 assert(state != NULL);
2110 /* already processed? */
2111 if (bl_state->end != NULL)
2114 DB((dbg, LEVEL_1, "Simulate %+F\n", block));
2115 DB((dbg, LEVEL_2, "State at Block begin:\n "));
2116 DEBUG_ONLY(x87_dump_stack(state);)
2118 /* at block begin, kill all dead registers */
2119 state = x87_kill_deads(sim, block, state);
2120 /* create a new state, will be changed */
2121 state = x87_clone_state(sim, state);
2123 /* beware, n might change */
2124 for (n = sched_first(block); !sched_is_end(n); n = next) {
2127 ir_op *op = get_irn_op(n);
2130 * get the next node to be simulated here.
2131 * n might be completely removed from the schedule-
2133 next = sched_next(n);
2134 if (op->ops.generic != NULL) {
2135 func = (sim_func)op->ops.generic;
2138 node_inserted = (*func)(state, n);
2141 * sim_func might have added an additional node after n,
2142 * so update next node
2143 * beware: n must not be changed by sim_func
2144 * (i.e. removed from schedule) in this case
2146 if (node_inserted != NO_NODE_ADDED)
2147 next = sched_next(n);
2151 start_block = get_irg_start_block(get_irn_irg(block));
2153 DB((dbg, LEVEL_2, "State at Block end:\n ")); DEBUG_ONLY(x87_dump_stack(state);)
2155 /* check if the state must be shuffled */
2156 foreach_block_succ(block, edge) {
2157 ir_node *succ = get_edge_src_irn(edge);
2158 blk_state *succ_state;
2160 if (succ == start_block)
2163 succ_state = x87_get_bl_state(sim, succ);
2165 if (succ_state->begin == NULL) {
2166 DB((dbg, LEVEL_2, "Set begin state for succ %+F:\n", succ));
2167 DEBUG_ONLY(x87_dump_stack(state);)
2168 succ_state->begin = state;
2170 waitq_put(sim->worklist, succ);
2172 DB((dbg, LEVEL_2, "succ %+F already has a state, shuffling\n", succ));
2173 /* There is already a begin state for the successor, bad.
2174 Do the necessary permutations.
2175 Note that critical edges are removed, so this is always possible:
2176 If the successor has more than one possible input, then it must
2179 x87_shuffle(sim, block, state, succ, succ_state->begin);
2182 bl_state->end = state;
2186 * Register a simulator function.
2188 * @param op the opcode to simulate
2189 * @param func the simulator function for the opcode
2191 static void register_sim(ir_op *op, sim_func func)
2193 assert(op->ops.generic == NULL);
2194 op->ops.generic = (op_func) func;
2198 * Create a new x87 simulator.
2200 * @param sim a simulator handle, will be initialized
2201 * @param irg the current graph
2203 static void x87_init_simulator(x87_simulator *sim, ir_graph *irg)
2205 obstack_init(&sim->obst);
2206 sim->blk_states = pmap_create();
2207 sim->n_idx = get_irg_last_idx(irg);
2208 sim->live = OALLOCN(&sim->obst, vfp_liveness, sim->n_idx);
2210 DB((dbg, LEVEL_1, "--------------------------------\n"
2211 "x87 Simulator started for %+F\n", irg));
2213 /* set the generic function pointer of instruction we must simulate */
2214 clear_irp_opcodes_generic_func();
2216 register_sim(op_ia32_Call, sim_Call);
2217 register_sim(op_ia32_vfld, sim_fld);
2218 register_sim(op_ia32_vfild, sim_fild);
2219 register_sim(op_ia32_vfld1, sim_fld1);
2220 register_sim(op_ia32_vfldz, sim_fldz);
2221 register_sim(op_ia32_vfadd, sim_fadd);
2222 register_sim(op_ia32_vfsub, sim_fsub);
2223 register_sim(op_ia32_vfmul, sim_fmul);
2224 register_sim(op_ia32_vfdiv, sim_fdiv);
2225 register_sim(op_ia32_vfprem, sim_fprem);
2226 register_sim(op_ia32_vfabs, sim_fabs);
2227 register_sim(op_ia32_vfchs, sim_fchs);
2228 register_sim(op_ia32_vfist, sim_fist);
2229 register_sim(op_ia32_vfisttp, sim_fisttp);
2230 register_sim(op_ia32_vfst, sim_fst);
2231 register_sim(op_ia32_vFtstFnstsw, sim_FtstFnstsw);
2232 register_sim(op_ia32_vFucomFnstsw, sim_Fucom);
2233 register_sim(op_ia32_vFucomi, sim_Fucom);
2234 register_sim(op_be_Copy, sim_Copy);
2235 register_sim(op_be_Return, sim_Return);
2236 register_sim(op_be_Perm, sim_Perm);
2237 register_sim(op_be_Keep, sim_Keep);
2241 * Destroy a x87 simulator.
2243 * @param sim the simulator handle
2245 static void x87_destroy_simulator(x87_simulator *sim)
2247 pmap_destroy(sim->blk_states);
2248 obstack_free(&sim->obst, NULL);
2249 DB((dbg, LEVEL_1, "x87 Simulator stopped\n\n"));
2253 * Pre-block walker: calculate the liveness information for the block
2254 * and store it into the sim->live cache.
2256 static void update_liveness_walker(ir_node *block, void *data)
2258 x87_simulator *sim = (x87_simulator*)data;
2259 update_liveness(sim, block);
2263 * Run a simulation and fix all virtual instructions for a graph.
2264 * Replaces all virtual floating point instructions and registers
2267 void ia32_x87_simulate_graph(ir_graph *irg)
2269 /* TODO improve code quality (less executed fxch) by using execfreqs */
2271 ir_node *block, *start_block;
2272 blk_state *bl_state;
2275 /* create the simulator */
2276 x87_init_simulator(&sim, irg);
2278 start_block = get_irg_start_block(irg);
2279 bl_state = x87_get_bl_state(&sim, start_block);
2281 /* start with the empty state */
2282 bl_state->begin = empty;
2285 sim.worklist = new_waitq();
2286 waitq_put(sim.worklist, start_block);
2288 be_assure_liveness(irg);
2289 sim.lv = be_get_irg_liveness(irg);
2290 be_liveness_assure_sets(sim.lv);
2292 /* Calculate the liveness for all nodes. We must precalculate this info,
2293 * because the simulator adds new nodes (possible before Phi nodes) which
2294 * would let a lazy calculation fail.
2295 * On the other hand we reduce the computation amount due to
2296 * precaching from O(n^2) to O(n) at the expense of O(n) cache memory.
2298 irg_block_walk_graph(irg, update_liveness_walker, NULL, &sim);
2302 block = (ir_node*)waitq_get(sim.worklist);
2303 x87_simulate_block(&sim, block);
2304 } while (! waitq_empty(sim.worklist));
2307 del_waitq(sim.worklist);
2308 x87_destroy_simulator(&sim);
2311 /* Initializes the x87 simulator. */
2312 void ia32_init_x87(void)
2314 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.x87");