2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the x87 support and virtual to stack
23 * register translation for the ia32 backend.
24 * @author Michael Beck
36 #include "iredges_t.h"
48 #include "../belive_t.h"
49 #include "../besched_t.h"
50 #include "../benode_t.h"
51 #include "bearch_ia32_t.h"
52 #include "ia32_new_nodes.h"
53 #include "gen_ia32_new_nodes.h"
54 #include "gen_ia32_regalloc_if.h"
56 #include "ia32_architecture.h"
63 #define MASK_TOS(x) ((x) & (N_x87_REGS - 1))
65 /** the debug handle */
66 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
68 /* Forward declaration. */
69 typedef struct _x87_simulator x87_simulator;
72 * An exchange template.
73 * Note that our virtual functions have the same inputs
74 * and attributes as the real ones, so we can simple exchange
76 * Further, x87 supports inverse instructions, so we can handle them.
78 typedef struct _exchange_tmpl {
79 ir_op *normal_op; /**< the normal one */
80 ir_op *reverse_op; /**< the reverse one if exists */
81 ir_op *normal_pop_op; /**< the normal one with tos pop */
82 ir_op *reverse_pop_op; /**< the reverse one with tos pop */
86 * An entry on the simulated x87 stack.
88 typedef struct _st_entry {
89 int reg_idx; /**< the virtual register index of this stack value */
90 ir_node *node; /**< the node that produced this value */
96 typedef struct _x87_state {
97 st_entry st[N_x87_REGS]; /**< the register stack */
98 int depth; /**< the current stack depth */
99 int tos; /**< position of the tos */
100 x87_simulator *sim; /**< The simulator. */
103 /** An empty state, used for blocks without fp instructions. */
104 static x87_state _empty = { { {0, NULL}, }, 0, 0, NULL };
105 static x87_state *empty = (x87_state *)&_empty;
108 NO_NODE_ADDED = 0, /**< No node was added. */
109 NODE_ADDED = 1 /**< A node was added by the simulator in the schedule. */
113 * The type of an instruction simulator function.
115 * @param state the x87 state
116 * @param n the node to be simulated
118 * @return NODE_ADDED if a node was added AFTER n in schedule,
121 typedef int (*sim_func)(x87_state *state, ir_node *n);
124 * A block state: Every block has a x87 state at the beginning and at the end.
126 typedef struct _blk_state {
127 x87_state *begin; /**< state at the begin or NULL if not assigned */
128 x87_state *end; /**< state at the end or NULL if not assigned */
131 #define PTR_TO_BLKSTATE(p) ((blk_state *)(p))
133 /** liveness bitset for vfp registers. */
134 typedef unsigned char vfp_liveness;
139 struct _x87_simulator {
140 struct obstack obst; /**< An obstack for fast allocating. */
141 pmap *blk_states; /**< Map blocks to states. */
142 const arch_env_t *arch_env; /**< The architecture environment. */
143 be_lv_t *lv; /**< intrablock liveness. */
144 vfp_liveness *live; /**< Liveness information. */
145 unsigned n_idx; /**< The cached get_irg_last_idx() result. */
146 waitq *worklist; /**< Worklist of blocks that must be processed. */
147 ia32_isa_t *isa; /**< the ISA object */
151 * Returns the current stack depth.
153 * @param state the x87 state
155 * @return the x87 stack depth
157 static int x87_get_depth(const x87_state *state)
160 } /* x87_get_depth */
163 * Return the virtual register index at st(pos).
165 * @param state the x87 state
166 * @param pos a stack position
168 * @return the vfp register index that produced the value at st(pos)
170 static int x87_get_st_reg(const x87_state *state, int pos)
172 assert(pos < state->depth);
173 return state->st[MASK_TOS(state->tos + pos)].reg_idx;
174 } /* x87_get_st_reg */
178 * Return the node at st(pos).
180 * @param state the x87 state
181 * @param pos a stack position
183 * @return the IR node that produced the value at st(pos)
185 static ir_node *x87_get_st_node(const x87_state *state, int pos)
187 assert(pos < state->depth);
188 return state->st[MASK_TOS(state->tos + pos)].node;
189 } /* x87_get_st_node */
192 * Dump the stack for debugging.
194 * @param state the x87 state
196 static void x87_dump_stack(const x87_state *state)
200 for (i = state->depth - 1; i >= 0; --i) {
201 DB((dbg, LEVEL_2, "vf%d(%+F) ", x87_get_st_reg(state, i),
202 x87_get_st_node(state, i)));
204 DB((dbg, LEVEL_2, "<-- TOS\n"));
205 } /* x87_dump_stack */
206 #endif /* DEBUG_libfirm */
209 * Set a virtual register to st(pos).
211 * @param state the x87 state
212 * @param reg_idx the vfp register index that should be set
213 * @param node the IR node that produces the value of the vfp register
214 * @param pos the stack position where the new value should be entered
216 static void x87_set_st(x87_state *state, int reg_idx, ir_node *node, int pos)
218 assert(0 < state->depth);
219 state->st[MASK_TOS(state->tos + pos)].reg_idx = reg_idx;
220 state->st[MASK_TOS(state->tos + pos)].node = node;
222 DB((dbg, LEVEL_2, "After SET_REG: "));
223 DEBUG_ONLY(x87_dump_stack(state));
227 * Set the tos virtual register.
229 * @param state the x87 state
230 * @param reg_idx the vfp register index that should be set
231 * @param node the IR node that produces the value of the vfp register
233 static void x87_set_tos(x87_state *state, int reg_idx, ir_node *node)
235 x87_set_st(state, reg_idx, node, 0);
239 * Swap st(0) with st(pos).
241 * @param state the x87 state
242 * @param pos the stack position to change the tos with
244 static void x87_fxch(x87_state *state, int pos)
247 assert(pos < state->depth);
249 entry = state->st[MASK_TOS(state->tos + pos)];
250 state->st[MASK_TOS(state->tos + pos)] = state->st[MASK_TOS(state->tos)];
251 state->st[MASK_TOS(state->tos)] = entry;
253 DB((dbg, LEVEL_2, "After FXCH: ")); DEBUG_ONLY(x87_dump_stack(state));
257 * Convert a virtual register to the stack index.
259 * @param state the x87 state
260 * @param reg_idx the register vfp index
262 * @return the stack position where the register is stacked
263 * or -1 if the virtual register was not found
265 static int x87_on_stack(const x87_state *state, int reg_idx)
267 int i, tos = state->tos;
269 for (i = 0; i < state->depth; ++i)
270 if (state->st[MASK_TOS(tos + i)].reg_idx == reg_idx)
276 * Push a virtual Register onto the stack, double pushed allowed.
278 * @param state the x87 state
279 * @param reg_idx the register vfp index
280 * @param node the node that produces the value of the vfp register
282 static void x87_push_dbl(x87_state *state, int reg_idx, ir_node *node)
284 assert(state->depth < N_x87_REGS && "stack overrun");
287 state->tos = MASK_TOS(state->tos - 1);
288 state->st[state->tos].reg_idx = reg_idx;
289 state->st[state->tos].node = node;
291 DB((dbg, LEVEL_2, "After PUSH: ")); DEBUG_ONLY(x87_dump_stack(state));
295 * Push a virtual Register onto the stack, double pushes are NOT allowed.
297 * @param state the x87 state
298 * @param reg_idx the register vfp index
299 * @param node the node that produces the value of the vfp register
300 * @param dbl_push if != 0 double pushes are allowed
302 static void x87_push(x87_state *state, int reg_idx, ir_node *node)
304 assert(x87_on_stack(state, reg_idx) == -1 && "double push");
306 x87_push_dbl(state, reg_idx, node);
310 * Pop a virtual Register from the stack.
312 * @param state the x87 state
314 static void x87_pop(x87_state *state)
316 assert(state->depth > 0 && "stack underrun");
319 state->tos = MASK_TOS(state->tos + 1);
321 DB((dbg, LEVEL_2, "After POP: ")); DEBUG_ONLY(x87_dump_stack(state));
325 * Empty the fpu stack
327 * @param state the x87 state
329 static void x87_emms(x87_state *state)
336 * Returns the block state of a block.
338 * @param sim the x87 simulator handle
339 * @param block the current block
341 * @return the block state
343 static blk_state *x87_get_bl_state(x87_simulator *sim, ir_node *block)
345 pmap_entry *entry = pmap_find(sim->blk_states, block);
348 blk_state *bl_state = obstack_alloc(&sim->obst, sizeof(*bl_state));
349 bl_state->begin = NULL;
350 bl_state->end = NULL;
352 pmap_insert(sim->blk_states, block, bl_state);
356 return PTR_TO_BLKSTATE(entry->value);
357 } /* x87_get_bl_state */
360 * Creates a new x87 state.
362 * @param sim the x87 simulator handle
364 * @return a new x87 state
366 static x87_state *x87_alloc_state(x87_simulator *sim)
368 x87_state *res = obstack_alloc(&sim->obst, sizeof(*res));
372 } /* x87_alloc_state */
377 * @param sim the x87 simulator handle
378 * @param src the x87 state that will be cloned
380 * @return a cloned copy of the src state
382 static x87_state *x87_clone_state(x87_simulator *sim, const x87_state *src)
384 x87_state *res = x87_alloc_state(sim);
386 memcpy(res, src, sizeof(*res));
388 } /* x87_clone_state */
391 * Patch a virtual instruction into a x87 one and return
392 * the node representing the result value.
394 * @param n the IR node to patch
395 * @param op the x87 opcode to patch in
397 static ir_node *x87_patch_insn(ir_node *n, ir_op *op)
399 ir_mode *mode = get_irn_mode(n);
404 if (mode == mode_T) {
405 /* patch all Proj's */
406 const ir_edge_t *edge;
408 foreach_out_edge(n, edge) {
409 ir_node *proj = get_edge_src_irn(edge);
411 mode = get_irn_mode(proj);
412 if (mode_is_float(mode)) {
414 set_irn_mode(proj, mode_E);
418 } else if (mode_is_float(mode))
419 set_irn_mode(n, mode_E);
421 } /* x87_patch_insn */
424 * Returns the first Proj of a mode_T node having a given mode.
426 * @param n the mode_T node
427 * @param m the desired mode of the Proj
428 * @return The first Proj of mode @p m found or NULL.
430 static ir_node *get_irn_Proj_for_mode(ir_node *n, ir_mode *m)
432 const ir_edge_t *edge;
434 assert(get_irn_mode(n) == mode_T && "Need mode_T node");
436 foreach_out_edge(n, edge) {
437 ir_node *proj = get_edge_src_irn(edge);
438 if (get_irn_mode(proj) == m)
443 } /* get_irn_Proj_for_mode */
446 * Wrap the arch_* function here so we can check for errors.
448 static INLINE const arch_register_t *x87_get_irn_register(x87_simulator *sim, const ir_node *irn)
450 const arch_register_t *res;
452 res = arch_get_irn_register(sim->arch_env, irn);
453 assert(res->reg_class->regs == ia32_vfp_regs);
455 } /* x87_get_irn_register */
457 /* -------------- x87 perm --------------- */
460 * Creates a fxch for shuffle.
462 * @param state the x87 state
463 * @param pos parameter for fxch
464 * @param block the block were fxch is inserted
466 * Creates a new fxch node and reroute the user of the old node
469 * @return the fxch node
471 static ir_node *x87_fxch_shuffle(x87_state *state, int pos, ir_node *block)
474 ia32_x87_attr_t *attr;
476 fxch = new_rd_ia32_fxch(NULL, get_irn_irg(block), block);
477 attr = get_ia32_x87_attr(fxch);
478 attr->x87[0] = &ia32_st_regs[pos];
479 attr->x87[2] = &ia32_st_regs[0];
483 x87_fxch(state, pos);
485 } /* x87_fxch_shuffle */
488 * Calculate the necessary permutations to reach dst_state.
490 * These permutations are done with fxch instructions and placed
491 * at the end of the block.
493 * Note that critical edges are removed here, so we need only
494 * a shuffle if the current block has only one successor.
496 * @param sim the simulator handle
497 * @param block the current block
498 * @param state the current x87 stack state, might be modified
499 * @param dst_block the destination block
500 * @param dst_state destination state
504 static x87_state *x87_shuffle(x87_simulator *sim, ir_node *block,
505 x87_state *state, ir_node *dst_block,
506 const x87_state *dst_state)
508 int i, n_cycles, k, ri;
509 unsigned cycles[4], all_mask;
510 char cycle_idx[4][8];
511 ir_node *fxch, *before, *after;
515 assert(state->depth == dst_state->depth);
517 /* Some mathematics here:
518 If we have a cycle of length n that includes the tos,
519 we need n-1 exchange operations.
520 We can always add the tos and restore it, so we need
521 n+1 exchange operations for a cycle not containing the tos.
522 So, the maximum of needed operations is for a cycle of 7
523 not including the tos == 8.
524 This is the same number of ops we would need for using stores,
525 so exchange is cheaper (we save the loads).
526 On the other hand, we might need an additional exchange
527 in the next block to bring one operand on top, so the
528 number of ops in the first case is identical.
529 Further, no more than 4 cycles can exists (4 x 2).
531 all_mask = (1 << (state->depth)) - 1;
533 for (n_cycles = 0; all_mask; ++n_cycles) {
534 int src_idx, dst_idx;
536 /* find the first free slot */
537 for (i = 0; i < state->depth; ++i) {
538 if (all_mask & (1 << i)) {
539 all_mask &= ~(1 << i);
541 /* check if there are differences here */
542 if (x87_get_st_reg(state, i) != x87_get_st_reg(dst_state, i))
548 /* no more cycles found */
553 cycles[n_cycles] = (1 << i);
554 cycle_idx[n_cycles][k++] = i;
555 for (src_idx = i; ; src_idx = dst_idx) {
556 dst_idx = x87_on_stack(dst_state, x87_get_st_reg(state, src_idx));
558 if ((all_mask & (1 << dst_idx)) == 0)
561 cycle_idx[n_cycles][k++] = dst_idx;
562 cycles[n_cycles] |= (1 << dst_idx);
563 all_mask &= ~(1 << dst_idx);
565 cycle_idx[n_cycles][k] = -1;
569 /* no permutation needed */
573 /* Hmm: permutation needed */
574 DB((dbg, LEVEL_2, "\n%+F needs permutation: from\n", block));
575 DEBUG_ONLY(x87_dump_stack(state));
576 DB((dbg, LEVEL_2, " to\n"));
577 DEBUG_ONLY(x87_dump_stack(dst_state));
581 DB((dbg, LEVEL_2, "Need %d cycles\n", n_cycles));
582 for (ri = 0; ri < n_cycles; ++ri) {
583 DB((dbg, LEVEL_2, " Ring %d:\n ", ri));
584 for (k = 0; cycle_idx[ri][k] != -1; ++k)
585 DB((dbg, LEVEL_2, " st%d ->", cycle_idx[ri][k]));
586 DB((dbg, LEVEL_2, "\n"));
593 * Find the place node must be insert.
594 * We have only one successor block, so the last instruction should
597 before = sched_last(block);
598 assert(is_cfop(before));
600 /* now do the permutations */
601 for (ri = 0; ri < n_cycles; ++ri) {
602 if ((cycles[ri] & 1) == 0) {
603 /* this cycle does not include the tos */
604 fxch = x87_fxch_shuffle(state, cycle_idx[ri][0], block);
606 sched_add_after(after, fxch);
608 sched_add_before(before, fxch);
611 for (k = 1; cycle_idx[ri][k] != -1; ++k) {
612 fxch = x87_fxch_shuffle(state, cycle_idx[ri][k], block);
614 sched_add_after(after, fxch);
616 sched_add_before(before, fxch);
619 if ((cycles[ri] & 1) == 0) {
620 /* this cycle does not include the tos */
621 fxch = x87_fxch_shuffle(state, cycle_idx[ri][0], block);
622 sched_add_after(after, fxch);
629 * Create a fxch node before another node.
631 * @param state the x87 state
632 * @param n the node after the fxch
633 * @param pos exchange st(pos) with st(0)
637 static ir_node *x87_create_fxch(x87_state *state, ir_node *n, int pos)
640 ia32_x87_attr_t *attr;
641 ir_graph *irg = get_irn_irg(n);
642 ir_node *block = get_nodes_block(n);
644 x87_fxch(state, pos);
646 fxch = new_rd_ia32_fxch(NULL, irg, block);
647 attr = get_ia32_x87_attr(fxch);
648 attr->x87[0] = &ia32_st_regs[pos];
649 attr->x87[2] = &ia32_st_regs[0];
653 sched_add_before(n, fxch);
654 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(fxch), attr->x87[0]->name, attr->x87[2]->name));
656 } /* x87_create_fxch */
659 * Create a fpush before node n.
661 * @param state the x87 state
662 * @param n the node after the fpush
663 * @param pos push st(pos) on stack
664 * @param op_idx replace input op_idx of n with the fpush result
666 static void x87_create_fpush(x87_state *state, ir_node *n, int pos, int op_idx)
668 ir_node *fpush, *pred = get_irn_n(n, op_idx);
669 ia32_x87_attr_t *attr;
670 const arch_register_t *out = x87_get_irn_register(state->sim, pred);
672 x87_push_dbl(state, arch_register_get_index(out), pred);
674 fpush = new_rd_ia32_fpush(NULL, get_irn_irg(n), get_nodes_block(n));
675 attr = get_ia32_x87_attr(fpush);
676 attr->x87[0] = &ia32_st_regs[pos];
677 attr->x87[2] = &ia32_st_regs[0];
680 sched_add_before(n, fpush);
682 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(fpush), attr->x87[0]->name, attr->x87[2]->name));
683 } /* x87_create_fpush */
686 * Create a fpop before node n.
688 * @param state the x87 state
689 * @param n the node after the fpop
690 * @param num pop 1 or 2 values
692 * @return the fpop node
694 static ir_node *x87_create_fpop(x87_state *state, ir_node *n, int num)
696 ir_node *fpop = NULL;
697 ia32_x87_attr_t *attr;
702 if (ia32_cg_config.use_ffreep)
703 fpop = new_rd_ia32_ffreep(NULL, get_irn_irg(n), get_nodes_block(n));
705 fpop = new_rd_ia32_fpop(NULL, get_irn_irg(n), get_nodes_block(n));
706 attr = get_ia32_x87_attr(fpop);
707 attr->x87[0] = &ia32_st_regs[0];
708 attr->x87[1] = &ia32_st_regs[0];
709 attr->x87[2] = &ia32_st_regs[0];
712 sched_add_before(n, fpop);
713 DB((dbg, LEVEL_1, "<<< %s %s\n", get_irn_opname(fpop), attr->x87[0]->name));
718 } /* x87_create_fpop */
721 * Creates an fldz before node n
723 * @param state the x87 state
724 * @param n the node after the fldz
726 * @return the fldz node
728 static ir_node *x87_create_fldz(x87_state *state, ir_node *n, int regidx)
730 ir_graph *irg = get_irn_irg(n);
731 ir_node *block = get_nodes_block(n);
734 fldz = new_rd_ia32_fldz(NULL, irg, block, mode_E);
736 sched_add_before(n, fldz);
737 DB((dbg, LEVEL_1, "<<< %s\n", get_irn_opname(fldz)));
740 x87_push(state, regidx, fldz);
745 /* --------------------------------- liveness ------------------------------------------ */
748 * The liveness transfer function.
749 * Updates a live set over a single step from a given node to its predecessor.
750 * Everything defined at the node is removed from the set, the uses of the node get inserted.
752 * @param sim The simulator handle.
753 * @param irn The node at which liveness should be computed.
754 * @param live The bitset of registers live before @p irn. This set gets modified by updating it to
755 * the registers live after irn.
757 * @return The live bitset.
759 static vfp_liveness vfp_liveness_transfer(x87_simulator *sim, ir_node *irn, vfp_liveness live)
762 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
763 const arch_env_t *arch_env = sim->arch_env;
765 if (get_irn_mode(irn) == mode_T) {
766 const ir_edge_t *edge;
768 foreach_out_edge(irn, edge) {
769 ir_node *proj = get_edge_src_irn(edge);
771 if (arch_irn_consider_in_reg_alloc(arch_env, cls, proj)) {
772 const arch_register_t *reg = x87_get_irn_register(sim, proj);
773 live &= ~(1 << arch_register_get_index(reg));
778 if (arch_irn_consider_in_reg_alloc(arch_env, cls, irn)) {
779 const arch_register_t *reg = x87_get_irn_register(sim, irn);
780 live &= ~(1 << arch_register_get_index(reg));
783 for (i = 0, n = get_irn_arity(irn); i < n; ++i) {
784 ir_node *op = get_irn_n(irn, i);
786 if (mode_is_float(get_irn_mode(op)) && arch_irn_consider_in_reg_alloc(arch_env, cls, op)) {
787 const arch_register_t *reg = x87_get_irn_register(sim, op);
788 live |= 1 << arch_register_get_index(reg);
792 } /* vfp_liveness_transfer */
795 * Put all live virtual registers at the end of a block into a bitset.
797 * @param sim the simulator handle
798 * @param lv the liveness information
799 * @param bl the block
801 * @return The live bitset at the end of this block
803 static vfp_liveness vfp_liveness_end_of_block(x87_simulator *sim, const ir_node *block)
806 vfp_liveness live = 0;
807 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
808 const arch_env_t *arch_env = sim->arch_env;
809 const be_lv_t *lv = sim->lv;
811 be_lv_foreach(lv, block, be_lv_state_end, i) {
812 const arch_register_t *reg;
813 const ir_node *node = be_lv_get_irn(lv, block, i);
814 if (!arch_irn_consider_in_reg_alloc(arch_env, cls, node))
817 reg = x87_get_irn_register(sim, node);
818 live |= 1 << arch_register_get_index(reg);
822 } /* vfp_liveness_end_of_block */
824 /** get the register mask from an arch_register */
825 #define REGMASK(reg) (1 << (arch_register_get_index(reg)))
828 * Return a bitset of argument registers which are live at the end of a node.
830 * @param sim the simulator handle
831 * @param pos the node
832 * @param kill kill mask for the output registers
834 * @return The live bitset.
836 static unsigned vfp_live_args_after(x87_simulator *sim, const ir_node *pos, unsigned kill)
838 unsigned idx = get_irn_idx(pos);
840 assert(idx < sim->n_idx);
841 return sim->live[idx] & ~kill;
842 } /* vfp_live_args_after */
845 * Calculate the liveness for a whole block and cache it.
847 * @param sim the simulator handle
848 * @param lv the liveness handle
849 * @param block the block
851 static void update_liveness(x87_simulator *sim, ir_node *block)
853 vfp_liveness live = vfp_liveness_end_of_block(sim, block);
857 /* now iterate through the block backward and cache the results */
858 sched_foreach_reverse(block, irn) {
859 /* stop at the first Phi: this produces the live-in */
863 idx = get_irn_idx(irn);
864 sim->live[idx] = live;
866 live = vfp_liveness_transfer(sim, irn, live);
868 idx = get_irn_idx(block);
869 sim->live[idx] = live;
870 } /* update_liveness */
873 * Returns true if a register is live in a set.
875 * @param reg_idx the vfp register index
876 * @param live a live bitset
878 #define is_vfp_live(reg_idx, live) ((live) & (1 << (reg_idx)))
882 * Dump liveness info.
884 * @param live the live bitset
886 static void vfp_dump_live(vfp_liveness live)
890 DB((dbg, LEVEL_2, "Live after: "));
891 for (i = 0; i < 8; ++i) {
892 if (live & (1 << i)) {
893 DB((dbg, LEVEL_2, "vf%d ", i));
896 DB((dbg, LEVEL_2, "\n"));
897 } /* vfp_dump_live */
898 #endif /* DEBUG_libfirm */
900 /* --------------------------------- simulators ---------------------------------------- */
902 #define XCHG(a, b) do { int t = (a); (a) = (b); (b) = t; } while (0)
914 * Simulate a virtual binop.
916 * @param state the x87 state
917 * @param n the node that should be simulated (and patched)
918 * @param tmpl the template containing the 4 possible x87 opcodes
920 * @return NO_NODE_ADDED
922 static int sim_binop(x87_state *state, ir_node *n, const exchange_tmpl *tmpl)
924 int op2_idx = 0, op1_idx;
925 int out_idx, do_pop = 0;
926 ia32_x87_attr_t *attr;
928 ir_node *patched_insn;
930 x87_simulator *sim = state->sim;
931 ir_node *op1 = get_irn_n(n, n_ia32_binary_left);
932 ir_node *op2 = get_irn_n(n, n_ia32_binary_right);
933 const arch_register_t *op1_reg = x87_get_irn_register(sim, op1);
934 const arch_register_t *op2_reg = x87_get_irn_register(sim, op2);
935 const arch_register_t *out = x87_get_irn_register(sim, n);
936 int reg_index_1 = arch_register_get_index(op1_reg);
937 int reg_index_2 = arch_register_get_index(op2_reg);
938 vfp_liveness live = vfp_live_args_after(sim, n, REGMASK(out));
942 DB((dbg, LEVEL_1, ">>> %+F %s, %s -> %s\n", n,
943 arch_register_get_name(op1_reg), arch_register_get_name(op2_reg),
944 arch_register_get_name(out)));
945 DEBUG_ONLY(vfp_dump_live(live));
946 DB((dbg, LEVEL_1, "Stack before: "));
947 DEBUG_ONLY(x87_dump_stack(state));
949 if (reg_index_1 == REG_VFP_UKNWN) {
953 op1_idx = x87_on_stack(state, reg_index_1);
954 assert(op1_idx >= 0);
955 op1_live_after = is_vfp_live(arch_register_get_index(op1_reg), live);
958 attr = get_ia32_x87_attr(n);
959 permuted = attr->attr.data.ins_permuted;
961 if (reg_index_2 != REG_VFP_NOREG) {
964 if (reg_index_2 == REG_VFP_UKNWN) {
968 /* second operand is a vfp register */
969 op2_idx = x87_on_stack(state, reg_index_2);
970 assert(op2_idx >= 0);
972 = is_vfp_live(arch_register_get_index(op2_reg), live);
975 if (op2_live_after) {
976 /* Second operand is live. */
978 if (op1_live_after) {
979 /* Both operands are live: push the first one.
980 This works even for op1 == op2. */
981 x87_create_fpush(state, n, op1_idx, n_ia32_binary_right);
982 /* now do fxxx (tos=tos X op) */
986 dst = tmpl->normal_op;
988 /* Second live, first operand is dead here, bring it to tos. */
990 x87_create_fxch(state, n, op1_idx);
995 /* now do fxxx (tos=tos X op) */
997 dst = tmpl->normal_op;
1000 /* Second operand is dead. */
1001 if (op1_live_after) {
1002 /* First operand is live: bring second to tos. */
1004 x87_create_fxch(state, n, op2_idx);
1009 /* now do fxxxr (tos = op X tos) */
1011 dst = tmpl->reverse_op;
1013 /* Both operands are dead here, pop them from the stack. */
1016 /* Both are identically and on tos, no pop needed. */
1017 /* here fxxx (tos = tos X tos) */
1018 dst = tmpl->normal_op;
1021 /* now do fxxxp (op = op X tos, pop) */
1022 dst = tmpl->normal_pop_op;
1026 } else if (op1_idx == 0) {
1027 assert(op1_idx != op2_idx);
1028 /* now do fxxxrp (op = tos X op, pop) */
1029 dst = tmpl->reverse_pop_op;
1033 /* Bring the second on top. */
1034 x87_create_fxch(state, n, op2_idx);
1035 if (op1_idx == op2_idx) {
1036 /* Both are identically and on tos now, no pop needed. */
1039 /* use fxxx (tos = tos X tos) */
1040 dst = tmpl->normal_op;
1043 /* op2 is on tos now */
1045 /* use fxxxp (op = op X tos, pop) */
1046 dst = tmpl->normal_pop_op;
1054 /* second operand is an address mode */
1055 if (op1_live_after) {
1056 /* first operand is live: push it here */
1057 x87_create_fpush(state, n, op1_idx, n_ia32_binary_left);
1060 /* first operand is dead: bring it to tos */
1062 x87_create_fxch(state, n, op1_idx);
1067 /* use fxxx (tos = tos X mem) */
1068 dst = permuted ? tmpl->reverse_op : tmpl->normal_op;
1072 patched_insn = x87_patch_insn(n, dst);
1073 x87_set_st(state, arch_register_get_index(out), patched_insn, out_idx);
1078 /* patch the operation */
1079 attr->x87[0] = op1_reg = &ia32_st_regs[op1_idx];
1080 if (reg_index_2 != REG_VFP_NOREG) {
1081 attr->x87[1] = op2_reg = &ia32_st_regs[op2_idx];
1083 attr->x87[2] = out = &ia32_st_regs[out_idx];
1085 if (reg_index_2 != REG_VFP_NOREG) {
1086 DB((dbg, LEVEL_1, "<<< %s %s, %s -> %s\n", get_irn_opname(n),
1087 arch_register_get_name(op1_reg), arch_register_get_name(op2_reg),
1088 arch_register_get_name(out)));
1090 DB((dbg, LEVEL_1, "<<< %s %s, [AM] -> %s\n", get_irn_opname(n),
1091 arch_register_get_name(op1_reg),
1092 arch_register_get_name(out)));
1095 return NO_NODE_ADDED;
1099 * Simulate a virtual Unop.
1101 * @param state the x87 state
1102 * @param n the node that should be simulated (and patched)
1103 * @param op the x87 opcode that will replace n's opcode
1105 * @return NO_NODE_ADDED
1107 static int sim_unop(x87_state *state, ir_node *n, ir_op *op)
1109 int op1_idx, out_idx;
1110 x87_simulator *sim = state->sim;
1111 const arch_register_t *op1 = x87_get_irn_register(sim, get_irn_n(n, UNOP_IDX));
1112 const arch_register_t *out = x87_get_irn_register(sim, n);
1113 ia32_x87_attr_t *attr;
1114 unsigned live = vfp_live_args_after(sim, n, REGMASK(out));
1116 DB((dbg, LEVEL_1, ">>> %+F -> %s\n", n, out->name));
1117 DEBUG_ONLY(vfp_dump_live(live));
1119 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1121 if (is_vfp_live(arch_register_get_index(op1), live)) {
1122 /* push the operand here */
1123 x87_create_fpush(state, n, op1_idx, UNOP_IDX);
1127 /* operand is dead, bring it to tos */
1129 x87_create_fxch(state, n, op1_idx);
1134 x87_set_tos(state, arch_register_get_index(out), x87_patch_insn(n, op));
1136 attr = get_ia32_x87_attr(n);
1137 attr->x87[0] = op1 = &ia32_st_regs[0];
1138 attr->x87[2] = out = &ia32_st_regs[0];
1139 DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), out->name));
1141 return NO_NODE_ADDED;
1145 * Simulate a virtual Load instruction.
1147 * @param state the x87 state
1148 * @param n the node that should be simulated (and patched)
1149 * @param op the x87 opcode that will replace n's opcode
1151 * @return NO_NODE_ADDED
1153 static int sim_load(x87_state *state, ir_node *n, ir_op *op)
1155 const arch_register_t *out = x87_get_irn_register(state->sim, n);
1156 ia32_x87_attr_t *attr;
1158 DB((dbg, LEVEL_1, ">>> %+F -> %s\n", n, arch_register_get_name(out)));
1159 x87_push(state, arch_register_get_index(out), x87_patch_insn(n, op));
1160 assert(out == x87_get_irn_register(state->sim, n));
1161 attr = get_ia32_x87_attr(n);
1162 attr->x87[2] = out = &ia32_st_regs[0];
1163 DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), arch_register_get_name(out)));
1165 return NO_NODE_ADDED;
1169 * Rewire all users of @p old_val to @new_val iff they are scheduled after @p store.
1171 * @param store The store
1172 * @param old_val The former value
1173 * @param new_val The new value
1175 static void collect_and_rewire_users(ir_node *store, ir_node *old_val, ir_node *new_val)
1177 const ir_edge_t *edge, *ne;
1179 foreach_out_edge_safe(old_val, edge, ne) {
1180 ir_node *user = get_edge_src_irn(edge);
1182 if (! user || user == store)
1185 /* if the user is scheduled after the store: rewire */
1186 if (sched_is_scheduled(user) && sched_comes_after(store, user)) {
1188 /* find the input of the user pointing to the old value */
1189 for (i = get_irn_arity(user) - 1; i >= 0; i--) {
1190 if (get_irn_n(user, i) == old_val)
1191 set_irn_n(user, i, new_val);
1195 } /* collect_and_rewire_users */
1198 * Simulate a virtual Store.
1200 * @param state the x87 state
1201 * @param n the node that should be simulated (and patched)
1202 * @param op the x87 store opcode
1203 * @param op_p the x87 store and pop opcode
1205 static int sim_store(x87_state *state, ir_node *n, ir_op *op, ir_op *op_p)
1207 x87_simulator *sim = state->sim;
1208 ir_node *val = get_irn_n(n, n_ia32_vfst_val);
1209 const arch_register_t *op2 = x87_get_irn_register(sim, val);
1210 unsigned live = vfp_live_args_after(sim, n, 0);
1211 int insn = NO_NODE_ADDED;
1212 ia32_x87_attr_t *attr;
1213 int op2_reg_idx, op2_idx, depth;
1214 int live_after_node;
1217 op2_reg_idx = arch_register_get_index(op2);
1218 if (op2_reg_idx == REG_VFP_UKNWN) {
1219 /* just take any value from stack */
1220 if (state->depth > 0) {
1222 DEBUG_ONLY(op2 = NULL);
1223 live_after_node = 1;
1225 /* produce a new value which we will consume immediately */
1226 x87_create_fldz(state, n, op2_reg_idx);
1227 live_after_node = 0;
1228 op2_idx = x87_on_stack(state, op2_reg_idx);
1229 assert(op2_idx >= 0);
1232 op2_idx = x87_on_stack(state, op2_reg_idx);
1233 live_after_node = is_vfp_live(arch_register_get_index(op2), live);
1234 DB((dbg, LEVEL_1, ">>> %+F %s ->\n", n, arch_register_get_name(op2)));
1235 assert(op2_idx >= 0);
1238 mode = get_ia32_ls_mode(n);
1239 depth = x87_get_depth(state);
1241 if (live_after_node) {
1243 Problem: fst doesn't support mode_E (spills), only fstp does
1245 - stack not full: push value and fstp
1246 - stack full: fstp value and load again
1247 Note that we cannot test on mode_E, because floats might be 96bit ...
1249 if (get_mode_size_bits(mode) > 64 || mode == mode_Ls) {
1250 if (depth < N_x87_REGS) {
1251 /* ok, we have a free register: push + fstp */
1252 x87_create_fpush(state, n, op2_idx, n_ia32_vfst_val);
1254 x87_patch_insn(n, op_p);
1256 ir_node *vfld, *mem, *block, *rproj, *mproj;
1259 /* stack full here: need fstp + load */
1261 x87_patch_insn(n, op_p);
1263 block = get_nodes_block(n);
1264 irg = get_irn_irg(n);
1265 vfld = new_rd_ia32_vfld(NULL, irg, block, get_irn_n(n, 0), get_irn_n(n, 1), new_rd_NoMem(irg), get_ia32_ls_mode(n));
1267 /* copy all attributes */
1268 set_ia32_frame_ent(vfld, get_ia32_frame_ent(n));
1269 if (is_ia32_use_frame(n))
1270 set_ia32_use_frame(vfld);
1271 set_ia32_op_type(vfld, ia32_AddrModeS);
1272 add_ia32_am_offs_int(vfld, get_ia32_am_offs_int(n));
1273 set_ia32_am_sc(vfld, get_ia32_am_sc(n));
1274 set_ia32_ls_mode(vfld, get_ia32_ls_mode(n));
1276 rproj = new_r_Proj(irg, block, vfld, get_ia32_ls_mode(vfld), pn_ia32_vfld_res);
1277 mproj = new_r_Proj(irg, block, vfld, mode_M, pn_ia32_vfld_M);
1278 mem = get_irn_Proj_for_mode(n, mode_M);
1280 assert(mem && "Store memory not found");
1282 arch_set_irn_register(sim->arch_env, rproj, op2);
1284 /* reroute all former users of the store memory to the load memory */
1285 edges_reroute(mem, mproj, irg);
1286 /* set the memory input of the load to the store memory */
1287 set_irn_n(vfld, n_ia32_vfld_mem, mem);
1289 sched_add_after(n, vfld);
1290 sched_add_after(vfld, rproj);
1292 /* rewire all users, scheduled after the store, to the loaded value */
1293 collect_and_rewire_users(n, val, rproj);
1298 /* we can only store the tos to memory */
1300 x87_create_fxch(state, n, op2_idx);
1302 /* mode != mode_E -> use normal fst */
1303 x87_patch_insn(n, op);
1306 /* we can only store the tos to memory */
1308 x87_create_fxch(state, n, op2_idx);
1311 x87_patch_insn(n, op_p);
1314 attr = get_ia32_x87_attr(n);
1315 attr->x87[1] = op2 = &ia32_st_regs[0];
1316 DB((dbg, LEVEL_1, "<<< %s %s ->\n", get_irn_opname(n), arch_register_get_name(op2)));
1321 #define _GEN_BINOP(op, rev) \
1322 static int sim_##op(x87_state *state, ir_node *n) { \
1323 exchange_tmpl tmpl = { op_ia32_##op, op_ia32_##rev, op_ia32_##op##p, op_ia32_##rev##p }; \
1324 return sim_binop(state, n, &tmpl); \
1327 #define GEN_BINOP(op) _GEN_BINOP(op, op)
1328 #define GEN_BINOPR(op) _GEN_BINOP(op, op##r)
1330 #define GEN_LOAD2(op, nop) \
1331 static int sim_##op(x87_state *state, ir_node *n) { \
1332 return sim_load(state, n, op_ia32_##nop); \
1335 #define GEN_LOAD(op) GEN_LOAD2(op, op)
1337 #define GEN_UNOP(op) \
1338 static int sim_##op(x87_state *state, ir_node *n) { \
1339 return sim_unop(state, n, op_ia32_##op); \
1342 #define GEN_STORE(op) \
1343 static int sim_##op(x87_state *state, ir_node *n) { \
1344 return sim_store(state, n, op_ia32_##op, op_ia32_##op##p); \
1366 * Simulate a virtual fisttp.
1368 * @param state the x87 state
1369 * @param n the node that should be simulated (and patched)
1371 static int sim_fisttp(x87_state *state, ir_node *n)
1373 x87_simulator *sim = state->sim;
1374 ir_node *val = get_irn_n(n, n_ia32_vfst_val);
1375 const arch_register_t *op2 = x87_get_irn_register(sim, val);
1376 int insn = NO_NODE_ADDED;
1377 ia32_x87_attr_t *attr;
1378 int op2_reg_idx, op2_idx, depth;
1380 op2_reg_idx = arch_register_get_index(op2);
1381 if (op2_reg_idx == REG_VFP_UKNWN) {
1382 /* just take any value from stack */
1383 if (state->depth > 0) {
1385 DEBUG_ONLY(op2 = NULL);
1387 /* produce a new value which we will consume immediately */
1388 x87_create_fldz(state, n, op2_reg_idx);
1389 op2_idx = x87_on_stack(state, op2_reg_idx);
1390 assert(op2_idx >= 0);
1393 op2_idx = x87_on_stack(state, op2_reg_idx);
1394 DB((dbg, LEVEL_1, ">>> %+F %s ->\n", n, arch_register_get_name(op2)));
1395 assert(op2_idx >= 0);
1398 depth = x87_get_depth(state);
1400 /* Note: although the value is still live here, it is destroyed because
1401 of the pop. The register allocator is aware of that and introduced a copy
1402 if the value must be alive. */
1404 /* we can only store the tos to memory */
1406 x87_create_fxch(state, n, op2_idx);
1409 x87_patch_insn(n, op_ia32_fisttp);
1411 attr = get_ia32_x87_attr(n);
1412 attr->x87[1] = op2 = &ia32_st_regs[0];
1413 DB((dbg, LEVEL_1, "<<< %s %s ->\n", get_irn_opname(n), arch_register_get_name(op2)));
1418 static int sim_FtstFnstsw(x87_state *state, ir_node *n)
1420 x87_simulator *sim = state->sim;
1421 ia32_x87_attr_t *attr = get_ia32_x87_attr(n);
1422 ir_node *op1_node = get_irn_n(n, n_ia32_vFtstFnstsw_left);
1423 const arch_register_t *reg1 = x87_get_irn_register(sim, op1_node);
1424 int reg_index_1 = arch_register_get_index(reg1);
1425 int op1_idx = x87_on_stack(state, reg_index_1);
1426 unsigned live = vfp_live_args_after(sim, n, 0);
1428 DB((dbg, LEVEL_1, ">>> %+F %s\n", n, arch_register_get_name(reg1)));
1429 DEBUG_ONLY(vfp_dump_live(live));
1430 DB((dbg, LEVEL_1, "Stack before: "));
1431 DEBUG_ONLY(x87_dump_stack(state));
1432 assert(op1_idx >= 0);
1435 /* bring the value to tos */
1436 x87_create_fxch(state, n, op1_idx);
1440 /* patch the operation */
1441 x87_patch_insn(n, op_ia32_FtstFnstsw);
1442 reg1 = &ia32_st_regs[op1_idx];
1443 attr->x87[0] = reg1;
1444 attr->x87[1] = NULL;
1445 attr->x87[2] = NULL;
1447 if (!is_vfp_live(reg_index_1, live)) {
1448 x87_create_fpop(state, sched_next(n), 1);
1452 return NO_NODE_ADDED;
1456 * @param state the x87 state
1457 * @param n the node that should be simulated (and patched)
1459 static int sim_Fucom(x87_state *state, ir_node *n)
1463 ia32_x87_attr_t *attr = get_ia32_x87_attr(n);
1465 x87_simulator *sim = state->sim;
1466 ir_node *op1_node = get_irn_n(n, n_ia32_vFucomFnstsw_left);
1467 ir_node *op2_node = get_irn_n(n, n_ia32_vFucomFnstsw_right);
1468 const arch_register_t *op1 = x87_get_irn_register(sim, op1_node);
1469 const arch_register_t *op2 = x87_get_irn_register(sim, op2_node);
1470 int reg_index_1 = arch_register_get_index(op1);
1471 int reg_index_2 = arch_register_get_index(op2);
1472 unsigned live = vfp_live_args_after(sim, n, 0);
1473 int permuted = attr->attr.data.ins_permuted;
1476 int node_added = NO_NODE_ADDED;
1478 DB((dbg, LEVEL_1, ">>> %+F %s, %s\n", n,
1479 arch_register_get_name(op1), arch_register_get_name(op2)));
1480 DEBUG_ONLY(vfp_dump_live(live));
1481 DB((dbg, LEVEL_1, "Stack before: "));
1482 DEBUG_ONLY(x87_dump_stack(state));
1484 op1_idx = x87_on_stack(state, reg_index_1);
1485 assert(op1_idx >= 0);
1487 /* BEWARE: check for comp a,a cases, they might happen */
1488 if (reg_index_2 != REG_VFP_NOREG) {
1489 /* second operand is a vfp register */
1490 op2_idx = x87_on_stack(state, reg_index_2);
1491 assert(op2_idx >= 0);
1493 if (is_vfp_live(reg_index_2, live)) {
1494 /* second operand is live */
1496 if (is_vfp_live(reg_index_1, live)) {
1497 /* both operands are live */
1500 /* res = tos X op */
1501 } else if (op2_idx == 0) {
1502 /* res = op X tos */
1503 permuted = !permuted;
1506 /* bring the first one to tos */
1507 x87_create_fxch(state, n, op1_idx);
1511 /* res = tos X op */
1514 /* second live, first operand is dead here, bring it to tos.
1515 This means further, op1_idx != op2_idx. */
1516 assert(op1_idx != op2_idx);
1518 x87_create_fxch(state, n, op1_idx);
1523 /* res = tos X op, pop */
1527 /* second operand is dead */
1528 if (is_vfp_live(reg_index_1, live)) {
1529 /* first operand is live: bring second to tos.
1530 This means further, op1_idx != op2_idx. */
1531 assert(op1_idx != op2_idx);
1533 x87_create_fxch(state, n, op2_idx);
1538 /* res = op X tos, pop */
1540 permuted = !permuted;
1543 /* both operands are dead here, check first for identity. */
1544 if (op1_idx == op2_idx) {
1545 /* identically, one pop needed */
1547 x87_create_fxch(state, n, op1_idx);
1551 /* res = tos X op, pop */
1554 /* different, move them to st and st(1) and pop both.
1555 The tricky part is to get one into st(1).*/
1556 else if (op2_idx == 1) {
1557 /* good, second operand is already in the right place, move the first */
1559 /* bring the first on top */
1560 x87_create_fxch(state, n, op1_idx);
1561 assert(op2_idx != 0);
1564 /* res = tos X op, pop, pop */
1566 } else if (op1_idx == 1) {
1567 /* good, first operand is already in the right place, move the second */
1569 /* bring the first on top */
1570 x87_create_fxch(state, n, op2_idx);
1571 assert(op1_idx != 0);
1574 /* res = op X tos, pop, pop */
1575 permuted = !permuted;
1579 /* if one is already the TOS, we need two fxch */
1581 /* first one is TOS, move to st(1) */
1582 x87_create_fxch(state, n, 1);
1583 assert(op2_idx != 1);
1585 x87_create_fxch(state, n, op2_idx);
1587 /* res = op X tos, pop, pop */
1589 permuted = !permuted;
1591 } else if (op2_idx == 0) {
1592 /* second one is TOS, move to st(1) */
1593 x87_create_fxch(state, n, 1);
1594 assert(op1_idx != 1);
1596 x87_create_fxch(state, n, op1_idx);
1598 /* res = tos X op, pop, pop */
1601 /* none of them is either TOS or st(1), 3 fxch needed */
1602 x87_create_fxch(state, n, op2_idx);
1603 assert(op1_idx != 0);
1604 x87_create_fxch(state, n, 1);
1606 x87_create_fxch(state, n, op1_idx);
1608 /* res = tos X op, pop, pop */
1615 /* second operand is an address mode */
1616 if (is_vfp_live(reg_index_1, live)) {
1617 /* first operand is live: bring it to TOS */
1619 x87_create_fxch(state, n, op1_idx);
1623 /* first operand is dead: bring it to tos */
1625 x87_create_fxch(state, n, op1_idx);
1632 /* patch the operation */
1633 if (is_ia32_vFucomFnstsw(n)) {
1637 case 0: dst = op_ia32_FucomFnstsw; break;
1638 case 1: dst = op_ia32_FucompFnstsw; break;
1639 case 2: dst = op_ia32_FucomppFnstsw; break;
1640 default: panic("invalid popcount in sim_Fucom");
1643 for (i = 0; i < pops; ++i) {
1646 } else if (is_ia32_vFucomi(n)) {
1648 case 0: dst = op_ia32_Fucomi; break;
1649 case 1: dst = op_ia32_Fucompi; x87_pop(state); break;
1651 dst = op_ia32_Fucompi;
1653 x87_create_fpop(state, sched_next(n), 1);
1654 node_added = NODE_ADDED;
1656 default: panic("invalid popcount in sim_Fucom");
1659 panic("invalid operation %+F in sim_FucomFnstsw", n);
1662 x87_patch_insn(n, dst);
1669 op1 = &ia32_st_regs[op1_idx];
1672 op2 = &ia32_st_regs[op2_idx];
1675 attr->x87[2] = NULL;
1676 attr->attr.data.ins_permuted = permuted;
1679 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(n),
1680 arch_register_get_name(op1), arch_register_get_name(op2)));
1682 DB((dbg, LEVEL_1, "<<< %s %s, [AM]\n", get_irn_opname(n),
1683 arch_register_get_name(op1)));
1689 static int sim_Keep(x87_state *state, ir_node *node)
1692 const arch_register_t *op_reg;
1697 int node_added = NO_NODE_ADDED;
1699 DB((dbg, LEVEL_1, ">>> %+F\n", node));
1701 arity = get_irn_arity(node);
1702 for (i = 0; i < arity; ++i) {
1703 op = get_irn_n(node, i);
1704 op_reg = arch_get_irn_register(state->sim->arch_env, op);
1705 if (arch_register_get_class(op_reg) != &ia32_reg_classes[CLASS_ia32_vfp])
1708 reg_id = arch_register_get_index(op_reg);
1709 live = vfp_live_args_after(state->sim, node, 0);
1711 op_stack_idx = x87_on_stack(state, reg_id);
1712 if (op_stack_idx >= 0 && !is_vfp_live(reg_id, live)) {
1713 x87_create_fpop(state, sched_next(node), 1);
1714 node_added = NODE_ADDED;
1718 DB((dbg, LEVEL_1, "Stack after: "));
1719 DEBUG_ONLY(x87_dump_stack(state));
1725 void keep_float_node_alive(x87_state *state, ir_node *node)
1731 const arch_register_class_t *cls;
1733 irg = get_irn_irg(node);
1734 block = get_nodes_block(node);
1735 cls = arch_get_irn_reg_class(state->sim->arch_env, node, -1);
1737 keep = be_new_Keep(cls, irg, block, 1, in);
1739 assert(sched_is_scheduled(node));
1740 sched_add_after(node, keep);
1744 * Create a copy of a node. Recreate the node if it's a constant.
1746 * @param state the x87 state
1747 * @param n the node to be copied
1749 * @return the copy of n
1751 static ir_node *create_Copy(x87_state *state, ir_node *n)
1753 x87_simulator *sim = state->sim;
1754 ir_graph *irg = get_irn_irg(n);
1755 dbg_info *n_dbg = get_irn_dbg_info(n);
1756 ir_mode *mode = get_irn_mode(n);
1757 ir_node *block = get_nodes_block(n);
1758 ir_node *pred = get_irn_n(n, 0);
1759 ir_node *(*cnstr)(dbg_info *, ir_graph *, ir_node *, ir_mode *) = NULL;
1761 const arch_register_t *out;
1762 const arch_register_t *op1;
1763 ia32_x87_attr_t *attr;
1765 /* Do not copy constants, recreate them. */
1766 switch (get_ia32_irn_opcode(pred)) {
1767 case iro_ia32_Unknown_VFP:
1769 cnstr = new_rd_ia32_fldz;
1772 cnstr = new_rd_ia32_fld1;
1774 case iro_ia32_fldpi:
1775 cnstr = new_rd_ia32_fldpi;
1777 case iro_ia32_fldl2e:
1778 cnstr = new_rd_ia32_fldl2e;
1780 case iro_ia32_fldl2t:
1781 cnstr = new_rd_ia32_fldl2t;
1783 case iro_ia32_fldlg2:
1784 cnstr = new_rd_ia32_fldlg2;
1786 case iro_ia32_fldln2:
1787 cnstr = new_rd_ia32_fldln2;
1793 out = x87_get_irn_register(sim, n);
1794 op1 = x87_get_irn_register(sim, pred);
1796 if (cnstr != NULL) {
1797 /* copy a constant */
1798 res = (*cnstr)(n_dbg, irg, block, mode);
1800 x87_push(state, arch_register_get_index(out), res);
1802 attr = get_ia32_x87_attr(res);
1803 attr->x87[2] = &ia32_st_regs[0];
1805 int op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1807 res = new_rd_ia32_fpushCopy(n_dbg, irg, block, pred, mode);
1809 x87_push(state, arch_register_get_index(out), res);
1811 attr = get_ia32_x87_attr(res);
1812 attr->x87[0] = &ia32_st_regs[op1_idx];
1813 attr->x87[2] = &ia32_st_regs[0];
1815 arch_set_irn_register(sim->arch_env, res, out);
1821 * Simulate a be_Copy.
1823 * @param state the x87 state
1824 * @param n the node that should be simulated (and patched)
1826 * @return NO_NODE_ADDED
1828 static int sim_Copy(x87_state *state, ir_node *n)
1830 x87_simulator *sim = state->sim;
1832 const arch_register_t *out;
1833 const arch_register_t *op1;
1834 const arch_register_class_t *cls;
1835 ir_node *node, *next;
1836 ia32_x87_attr_t *attr;
1837 int op1_idx, out_idx;
1840 cls = arch_get_irn_reg_class(sim->arch_env, n, -1);
1841 if (cls->regs != ia32_vfp_regs)
1844 pred = get_irn_n(n, 0);
1845 out = x87_get_irn_register(sim, n);
1846 op1 = x87_get_irn_register(sim, pred);
1847 live = vfp_live_args_after(sim, n, REGMASK(out));
1849 DB((dbg, LEVEL_1, ">>> %+F %s -> %s\n", n,
1850 arch_register_get_name(op1), arch_register_get_name(out)));
1851 DEBUG_ONLY(vfp_dump_live(live));
1853 /* handle the infamous unknown value */
1854 if (arch_register_get_index(op1) == REG_VFP_UKNWN) {
1855 /* Operand is still live, a real copy. We need here an fpush that can
1856 hold a a register, so use the fpushCopy or recreate constants */
1857 node = create_Copy(state, n);
1859 assert(is_ia32_fldz(node));
1860 next = sched_next(n);
1863 sched_add_before(next, node);
1865 DB((dbg, LEVEL_1, "<<< %+F %s -> %s\n", node, op1->name,
1866 arch_get_irn_register(sim->arch_env, node)->name));
1867 return NO_NODE_ADDED;
1870 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1872 if (is_vfp_live(arch_register_get_index(op1), live)) {
1873 ir_node *pred = get_irn_n(n, 0);
1875 /* Operand is still live, a real copy. We need here an fpush that can
1876 hold a a register, so use the fpushCopy or recreate constants */
1877 node = create_Copy(state, n);
1879 /* We have to make sure the old value doesn't go dead (which can happen
1880 * when we recreate constants). As the simulator expected that value in
1881 * the pred blocks. This is unfortunate as removing it would save us 1
1882 * instruction, but we would have to rerun all the simulation to get
1885 next = sched_next(n);
1888 sched_add_before(next, node);
1890 if (get_irn_n_edges(pred) == 0) {
1891 keep_float_node_alive(state, pred);
1894 DB((dbg, LEVEL_1, "<<< %+F %s -> ?\n", node, op1->name));
1896 out_idx = x87_on_stack(state, arch_register_get_index(out));
1898 if (out_idx >= 0 && out_idx != op1_idx) {
1899 /* Matze: out already on stack? how can this happen? */
1902 /* op1 must be killed and placed where out is */
1904 /* best case, simple remove and rename */
1905 x87_patch_insn(n, op_ia32_Pop);
1906 attr = get_ia32_x87_attr(n);
1907 attr->x87[0] = op1 = &ia32_st_regs[0];
1910 x87_set_st(state, arch_register_get_index(out), n, op1_idx - 1);
1912 /* move op1 to tos, store and pop it */
1914 x87_create_fxch(state, n, op1_idx);
1917 x87_patch_insn(n, op_ia32_Pop);
1918 attr = get_ia32_x87_attr(n);
1919 attr->x87[0] = op1 = &ia32_st_regs[out_idx];
1922 x87_set_st(state, arch_register_get_index(out), n, out_idx - 1);
1924 DB((dbg, LEVEL_1, "<<< %+F %s\n", n, op1->name));
1926 /* just a virtual copy */
1927 x87_set_st(state, arch_register_get_index(out), get_unop_op(n), op1_idx);
1928 /* don't remove the node to keep the verifier quiet :),
1929 the emitter won't emit any code for the node */
1932 DB((dbg, LEVEL_1, "<<< KILLED %s\n", get_irn_opname(n)));
1933 exchange(n, get_unop_op(n));
1937 return NO_NODE_ADDED;
1941 * Returns the result proj of the call
1943 static ir_node *get_call_result_proj(ir_node *call)
1945 const ir_edge_t *edge;
1947 /* search the result proj */
1948 foreach_out_edge(call, edge) {
1949 ir_node *proj = get_edge_src_irn(edge);
1950 long pn = get_Proj_proj(proj);
1952 if (pn == pn_ia32_Call_vf0) {
1958 } /* get_call_result_proj */
1961 * Simulate a ia32_Call.
1963 * @param state the x87 state
1964 * @param n the node that should be simulated
1966 * @return NO_NODE_ADDED
1968 static int sim_Call(x87_state *state, ir_node *n)
1970 ir_type *call_tp = get_ia32_call_attr_const(n)->call_tp;
1974 const arch_register_t *reg;
1976 DB((dbg, LEVEL_1, ">>> %+F\n", n));
1978 /* at the begin of a call the x87 state should be empty */
1979 assert(state->depth == 0 && "stack not empty before call");
1981 if (get_method_n_ress(call_tp) <= 0)
1985 * If the called function returns a float, it is returned in st(0).
1986 * This even happens if the return value is NOT used.
1987 * Moreover, only one return result is supported.
1989 res_type = get_method_res_type(call_tp, 0);
1990 mode = get_type_mode(res_type);
1992 if (mode == NULL || !mode_is_float(mode))
1995 resproj = get_call_result_proj(n);
1996 assert(resproj != NULL);
1998 reg = x87_get_irn_register(state->sim, resproj);
1999 x87_push(state, arch_register_get_index(reg), resproj);
2002 DB((dbg, LEVEL_1, "Stack after: "));
2003 DEBUG_ONLY(x87_dump_stack(state));
2005 return NO_NODE_ADDED;
2009 * Simulate a be_Spill.
2011 * @param state the x87 state
2012 * @param n the node that should be simulated (and patched)
2014 * Should not happen, spills are lowered before x87 simulator see them.
2016 static int sim_Spill(x87_state *state, ir_node *n)
2018 assert(0 && "Spill not lowered");
2019 return sim_fst(state, n);
2023 * Simulate a be_Reload.
2025 * @param state the x87 state
2026 * @param n the node that should be simulated (and patched)
2028 * Should not happen, reloads are lowered before x87 simulator see them.
2030 static int sim_Reload(x87_state *state, ir_node *n)
2032 assert(0 && "Reload not lowered");
2033 return sim_fld(state, n);
2037 * Simulate a be_Return.
2039 * @param state the x87 state
2040 * @param n the node that should be simulated (and patched)
2042 * @return NO_NODE_ADDED
2044 static int sim_Return(x87_state *state, ir_node *n)
2046 int n_res = be_Return_get_n_rets(n);
2047 int i, n_float_res = 0;
2049 /* only floating point return values must reside on stack */
2050 for (i = 0; i < n_res; ++i) {
2051 ir_node *res = get_irn_n(n, be_pos_Return_val + i);
2053 if (mode_is_float(get_irn_mode(res)))
2056 assert(x87_get_depth(state) == n_float_res);
2058 /* pop them virtually */
2059 for (i = n_float_res - 1; i >= 0; --i)
2062 return NO_NODE_ADDED;
2065 typedef struct _perm_data_t {
2066 const arch_register_t *in;
2067 const arch_register_t *out;
2071 * Simulate a be_Perm.
2073 * @param state the x87 state
2074 * @param irn the node that should be simulated (and patched)
2076 * @return NO_NODE_ADDED
2078 static int sim_Perm(x87_state *state, ir_node *irn)
2081 x87_simulator *sim = state->sim;
2082 ir_node *pred = get_irn_n(irn, 0);
2084 const ir_edge_t *edge;
2086 /* handle only floating point Perms */
2087 if (! mode_is_float(get_irn_mode(pred)))
2088 return NO_NODE_ADDED;
2090 DB((dbg, LEVEL_1, ">>> %+F\n", irn));
2092 /* Perm is a pure virtual instruction on x87.
2093 All inputs must be on the FPU stack and are pairwise
2094 different from each other.
2095 So, all we need to do is to permutate the stack state. */
2096 n = get_irn_arity(irn);
2097 NEW_ARR_A(int, stack_pos, n);
2099 /* collect old stack positions */
2100 for (i = 0; i < n; ++i) {
2101 const arch_register_t *inreg = x87_get_irn_register(sim, get_irn_n(irn, i));
2102 int idx = x87_on_stack(state, arch_register_get_index(inreg));
2104 assert(idx >= 0 && "Perm argument not on x87 stack");
2108 /* now do the permutation */
2109 foreach_out_edge(irn, edge) {
2110 ir_node *proj = get_edge_src_irn(edge);
2111 const arch_register_t *out = x87_get_irn_register(sim, proj);
2112 long num = get_Proj_proj(proj);
2114 assert(0 <= num && num < n && "More Proj's than Perm inputs");
2115 x87_set_st(state, arch_register_get_index(out), proj, stack_pos[(unsigned)num]);
2117 DB((dbg, LEVEL_1, "<<< %+F\n", irn));
2119 return NO_NODE_ADDED;
2122 static int sim_Barrier(x87_state *state, ir_node *node)
2124 //const arch_env_t *arch_env = state->sim->arch_env;
2127 /* materialize unknown if needed */
2128 arity = get_irn_arity(node);
2129 for (i = 0; i < arity; ++i) {
2130 const arch_register_t *reg;
2133 ia32_x87_attr_t *attr;
2134 ir_node *in = get_irn_n(node, i);
2136 if (!is_ia32_Unknown_VFP(in))
2139 /* TODO: not completely correct... */
2140 reg = &ia32_vfp_regs[REG_VFP_UKNWN];
2143 block = get_nodes_block(node);
2144 zero = new_rd_ia32_fldz(NULL, current_ir_graph, block, mode_E);
2145 x87_push(state, arch_register_get_index(reg), zero);
2147 attr = get_ia32_x87_attr(zero);
2148 attr->x87[2] = &ia32_st_regs[0];
2150 sched_add_before(node, zero);
2152 set_irn_n(node, i, zero);
2155 return NO_NODE_ADDED;
2160 * Kill any dead registers at block start by popping them from the stack.
2162 * @param sim the simulator handle
2163 * @param block the current block
2164 * @param start_state the x87 state at the begin of the block
2166 * @return the x87 state after dead register killed
2168 static x87_state *x87_kill_deads(x87_simulator *sim, ir_node *block, x87_state *start_state)
2170 x87_state *state = start_state;
2171 ir_node *first_insn = sched_first(block);
2172 ir_node *keep = NULL;
2173 unsigned live = vfp_live_args_after(sim, block, 0);
2175 int i, depth, num_pop;
2178 depth = x87_get_depth(state);
2179 for (i = depth - 1; i >= 0; --i) {
2180 int reg = x87_get_st_reg(state, i);
2182 if (! is_vfp_live(reg, live))
2183 kill_mask |= (1 << i);
2187 /* create a new state, will be changed */
2188 state = x87_clone_state(sim, state);
2190 DB((dbg, LEVEL_1, "Killing deads:\n"));
2191 DEBUG_ONLY(vfp_dump_live(live));
2192 DEBUG_ONLY(x87_dump_stack(state));
2194 if (kill_mask != 0 && live == 0) {
2195 /* special case: kill all registers */
2196 if (ia32_cg_config.use_femms || ia32_cg_config.use_emms) {
2197 if (ia32_cg_config.use_femms) {
2198 /* use FEMMS on AMD processors to clear all */
2199 keep = new_rd_ia32_femms(NULL, get_irn_irg(block), block);
2201 /* use EMMS to clear all */
2202 keep = new_rd_ia32_emms(NULL, get_irn_irg(block), block);
2204 sched_add_before(first_insn, keep);
2210 /* now kill registers */
2212 /* we can only kill from TOS, so bring them up */
2213 if (! (kill_mask & 1)) {
2214 /* search from behind, because we can to a double-pop */
2215 for (i = depth - 1; i >= 0; --i) {
2216 if (kill_mask & (1 << i)) {
2217 kill_mask &= ~(1 << i);
2224 x87_set_st(state, -1, keep, i);
2225 x87_create_fxch(state, first_insn, i);
2228 if ((kill_mask & 3) == 3) {
2229 /* we can do a double-pop */
2233 /* only a single pop */
2238 kill_mask >>= num_pop;
2239 keep = x87_create_fpop(state, first_insn, num_pop);
2244 } /* x87_kill_deads */
2247 * If we have PhiEs with unknown operands then we have to make sure that some
2248 * value is actually put onto the stack.
2250 static void fix_unknown_phis(x87_state *state, ir_node *block,
2251 ir_node *pred_block, int pos)
2255 sched_foreach(block, node) {
2257 const arch_register_t *reg;
2258 ia32_x87_attr_t *attr;
2263 op = get_Phi_pred(node, pos);
2264 if (!is_ia32_Unknown_VFP(op))
2267 reg = arch_get_irn_register(state->sim->arch_env, node);
2269 /* create a zero at end of pred block */
2270 zero = new_rd_ia32_fldz(NULL, current_ir_graph, pred_block, mode_E);
2271 x87_push(state, arch_register_get_index(reg), zero);
2273 attr = get_ia32_x87_attr(zero);
2274 attr->x87[2] = &ia32_st_regs[0];
2276 assert(is_ia32_fldz(zero));
2277 sched_add_before(sched_last(pred_block), zero);
2279 set_Phi_pred(node, pos, zero);
2284 * Run a simulation and fix all virtual instructions for a block.
2286 * @param sim the simulator handle
2287 * @param block the current block
2289 static void x87_simulate_block(x87_simulator *sim, ir_node *block)
2292 blk_state *bl_state = x87_get_bl_state(sim, block);
2293 x87_state *state = bl_state->begin;
2294 const ir_edge_t *edge;
2295 ir_node *start_block;
2297 assert(state != NULL);
2298 /* already processed? */
2299 if (bl_state->end != NULL)
2302 DB((dbg, LEVEL_1, "Simulate %+F\n", block));
2303 DB((dbg, LEVEL_2, "State at Block begin:\n "));
2304 DEBUG_ONLY(x87_dump_stack(state));
2306 /* at block begin, kill all dead registers */
2307 state = x87_kill_deads(sim, block, state);
2308 /* create a new state, will be changed */
2309 state = x87_clone_state(sim, state);
2311 /* beware, n might change */
2312 for (n = sched_first(block); !sched_is_end(n); n = next) {
2315 ir_op *op = get_irn_op(n);
2317 next = sched_next(n);
2318 if (op->ops.generic == NULL)
2321 func = (sim_func)op->ops.generic;
2324 node_inserted = (*func)(state, n);
2327 sim_func might have added an additional node after n,
2329 beware: n must not be changed by sim_func
2330 (i.e. removed from schedule) in this case
2332 if (node_inserted != NO_NODE_ADDED)
2333 next = sched_next(n);
2336 start_block = get_irg_start_block(get_irn_irg(block));
2338 DB((dbg, LEVEL_2, "State at Block end:\n ")); DEBUG_ONLY(x87_dump_stack(state));
2340 /* check if the state must be shuffled */
2341 foreach_block_succ(block, edge) {
2342 ir_node *succ = get_edge_src_irn(edge);
2343 blk_state *succ_state;
2345 if (succ == start_block)
2348 succ_state = x87_get_bl_state(sim, succ);
2350 fix_unknown_phis(state, succ, block, get_edge_src_pos(edge));
2352 if (succ_state->begin == NULL) {
2353 DB((dbg, LEVEL_2, "Set begin state for succ %+F:\n", succ));
2354 DEBUG_ONLY(x87_dump_stack(state));
2355 succ_state->begin = state;
2357 waitq_put(sim->worklist, succ);
2359 DB((dbg, LEVEL_2, "succ %+F already has a state, shuffling\n", succ));
2360 /* There is already a begin state for the successor, bad.
2361 Do the necessary permutations.
2362 Note that critical edges are removed, so this is always possible:
2363 If the successor has more than one possible input, then it must
2366 x87_shuffle(sim, block, state, succ, succ_state->begin);
2369 bl_state->end = state;
2370 } /* x87_simulate_block */
2372 static void register_sim(ir_op *op, sim_func func)
2374 assert(op->ops.generic == NULL);
2375 op->ops.generic = (op_func) func;
2379 * Create a new x87 simulator.
2381 * @param sim a simulator handle, will be initialized
2382 * @param irg the current graph
2383 * @param arch_env the architecture environment
2385 static void x87_init_simulator(x87_simulator *sim, ir_graph *irg,
2386 const arch_env_t *arch_env)
2388 obstack_init(&sim->obst);
2389 sim->blk_states = pmap_create();
2390 sim->arch_env = arch_env;
2391 sim->n_idx = get_irg_last_idx(irg);
2392 sim->live = obstack_alloc(&sim->obst, sizeof(*sim->live) * sim->n_idx);
2394 DB((dbg, LEVEL_1, "--------------------------------\n"
2395 "x87 Simulator started for %+F\n", irg));
2397 /* set the generic function pointer of instruction we must simulate */
2398 clear_irp_opcodes_generic_func();
2400 register_sim(op_ia32_Call, sim_Call);
2401 register_sim(op_ia32_vfld, sim_fld);
2402 register_sim(op_ia32_vfild, sim_fild);
2403 register_sim(op_ia32_vfld1, sim_fld1);
2404 register_sim(op_ia32_vfldz, sim_fldz);
2405 register_sim(op_ia32_vfadd, sim_fadd);
2406 register_sim(op_ia32_vfsub, sim_fsub);
2407 register_sim(op_ia32_vfmul, sim_fmul);
2408 register_sim(op_ia32_vfdiv, sim_fdiv);
2409 register_sim(op_ia32_vfprem, sim_fprem);
2410 register_sim(op_ia32_vfabs, sim_fabs);
2411 register_sim(op_ia32_vfchs, sim_fchs);
2412 register_sim(op_ia32_vfist, sim_fist);
2413 register_sim(op_ia32_vfisttp, sim_fisttp);
2414 register_sim(op_ia32_vfst, sim_fst);
2415 register_sim(op_ia32_vFtstFnstsw, sim_FtstFnstsw);
2416 register_sim(op_ia32_vFucomFnstsw, sim_Fucom);
2417 register_sim(op_ia32_vFucomi, sim_Fucom);
2418 register_sim(op_be_Copy, sim_Copy);
2419 register_sim(op_be_Spill, sim_Spill);
2420 register_sim(op_be_Reload, sim_Reload);
2421 register_sim(op_be_Return, sim_Return);
2422 register_sim(op_be_Perm, sim_Perm);
2423 register_sim(op_be_Keep, sim_Keep);
2424 register_sim(op_be_Barrier, sim_Barrier);
2425 } /* x87_init_simulator */
2428 * Destroy a x87 simulator.
2430 * @param sim the simulator handle
2432 static void x87_destroy_simulator(x87_simulator *sim)
2434 pmap_destroy(sim->blk_states);
2435 obstack_free(&sim->obst, NULL);
2436 DB((dbg, LEVEL_1, "x87 Simulator stopped\n\n"));
2437 } /* x87_destroy_simulator */
2440 * Pre-block walker: calculate the liveness information for the block
2441 * and store it into the sim->live cache.
2443 static void update_liveness_walker(ir_node *block, void *data)
2445 x87_simulator *sim = data;
2446 update_liveness(sim, block);
2447 } /* update_liveness_walker */
2450 * Run a simulation and fix all virtual instructions for a graph.
2452 * @param env the architecture environment
2453 * @param irg the current graph
2455 * Needs a block-schedule.
2457 void x87_simulate_graph(const arch_env_t *arch_env, be_irg_t *birg)
2459 ir_node *block, *start_block;
2460 blk_state *bl_state;
2462 ir_graph *irg = be_get_birg_irg(birg);
2464 /* create the simulator */
2465 x87_init_simulator(&sim, irg, arch_env);
2467 start_block = get_irg_start_block(irg);
2468 bl_state = x87_get_bl_state(&sim, start_block);
2470 /* start with the empty state */
2471 bl_state->begin = empty;
2474 sim.worklist = new_waitq();
2475 waitq_put(sim.worklist, start_block);
2477 be_assure_liveness(birg);
2478 sim.lv = be_get_birg_liveness(birg);
2479 // sim.lv = be_liveness(be_get_birg_irg(birg));
2480 be_liveness_assure_sets(sim.lv);
2482 /* Calculate the liveness for all nodes. We must precalculate this info,
2483 * because the simulator adds new nodes (possible before Phi nodes) which
2484 * would let a lazy calculation fail.
2485 * On the other hand we reduce the computation amount due to
2486 * precaching from O(n^2) to O(n) at the expense of O(n) cache memory.
2488 irg_block_walk_graph(irg, update_liveness_walker, NULL, &sim);
2492 block = waitq_get(sim.worklist);
2493 x87_simulate_block(&sim, block);
2494 } while (! waitq_empty(sim.worklist));
2497 del_waitq(sim.worklist);
2498 x87_destroy_simulator(&sim);
2499 } /* x87_simulate_graph */
2501 void ia32_init_x87(void)
2503 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.x87");
2504 } /* ia32_init_x87 */