2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the x87 support and virtual to stack
23 * register translation for the ia32 backend.
24 * @author Michael Beck
36 #include "iredges_t.h"
48 #include "../belive_t.h"
49 #include "../besched_t.h"
50 #include "../benode_t.h"
51 #include "bearch_ia32_t.h"
52 #include "ia32_new_nodes.h"
53 #include "gen_ia32_new_nodes.h"
54 #include "gen_ia32_regalloc_if.h"
56 #include "ia32_architecture.h"
63 #define MASK_TOS(x) ((x) & (N_x87_REGS - 1))
65 /** the debug handle */
66 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
68 /* Forward declaration. */
69 typedef struct _x87_simulator x87_simulator;
72 * An exchange template.
73 * Note that our virtual functions have the same inputs
74 * and attributes as the real ones, so we can simple exchange
76 * Further, x87 supports inverse instructions, so we can handle them.
78 typedef struct _exchange_tmpl {
79 ir_op *normal_op; /**< the normal one */
80 ir_op *reverse_op; /**< the reverse one if exists */
81 ir_op *normal_pop_op; /**< the normal one with tos pop */
82 ir_op *reverse_pop_op; /**< the reverse one with tos pop */
86 * An entry on the simulated x87 stack.
88 typedef struct _st_entry {
89 int reg_idx; /**< the virtual register index of this stack value */
90 ir_node *node; /**< the node that produced this value */
96 typedef struct _x87_state {
97 st_entry st[N_x87_REGS]; /**< the register stack */
98 int depth; /**< the current stack depth */
99 int tos; /**< position of the tos */
100 x87_simulator *sim; /**< The simulator. */
103 /** An empty state, used for blocks without fp instructions. */
104 static x87_state _empty = { { {0, NULL}, }, 0, 0, NULL };
105 static x87_state *empty = (x87_state *)&_empty;
108 NO_NODE_ADDED = 0, /**< No node was added. */
109 NODE_ADDED = 1 /**< A node was added by the simulator in the schedule. */
113 * The type of an instruction simulator function.
115 * @param state the x87 state
116 * @param n the node to be simulated
118 * @return NODE_ADDED if a node was added AFTER n in schedule,
121 typedef int (*sim_func)(x87_state *state, ir_node *n);
124 * A block state: Every block has a x87 state at the beginning and at the end.
126 typedef struct _blk_state {
127 x87_state *begin; /**< state at the begin or NULL if not assigned */
128 x87_state *end; /**< state at the end or NULL if not assigned */
131 #define PTR_TO_BLKSTATE(p) ((blk_state *)(p))
133 /** liveness bitset for vfp registers. */
134 typedef unsigned char vfp_liveness;
139 struct _x87_simulator {
140 struct obstack obst; /**< An obstack for fast allocating. */
141 pmap *blk_states; /**< Map blocks to states. */
142 const arch_env_t *arch_env; /**< The architecture environment. */
143 be_lv_t *lv; /**< intrablock liveness. */
144 vfp_liveness *live; /**< Liveness information. */
145 unsigned n_idx; /**< The cached get_irg_last_idx() result. */
146 waitq *worklist; /**< Worklist of blocks that must be processed. */
147 ia32_isa_t *isa; /**< the ISA object */
151 * Returns the current stack depth.
153 * @param state the x87 state
155 * @return the x87 stack depth
157 static int x87_get_depth(const x87_state *state) {
159 } /* x87_get_depth */
162 * Return the virtual register index at st(pos).
164 * @param state the x87 state
165 * @param pos a stack position
167 * @return the vfp register index that produced the value at st(pos)
169 static int x87_get_st_reg(const x87_state *state, int pos) {
170 assert(pos < state->depth);
171 return state->st[MASK_TOS(state->tos + pos)].reg_idx;
172 } /* x87_get_st_reg */
176 * Return the node at st(pos).
178 * @param state the x87 state
179 * @param pos a stack position
181 * @return the IR node that produced the value at st(pos)
183 static ir_node *x87_get_st_node(const x87_state *state, int pos) {
184 assert(pos < state->depth);
185 return state->st[MASK_TOS(state->tos + pos)].node;
186 } /* x87_get_st_node */
189 * Dump the stack for debugging.
191 * @param state the x87 state
193 static void x87_dump_stack(const x87_state *state) {
196 for (i = state->depth - 1; i >= 0; --i) {
197 DB((dbg, LEVEL_2, "vf%d(%+F) ", x87_get_st_reg(state, i),
198 x87_get_st_node(state, i)));
200 DB((dbg, LEVEL_2, "<-- TOS\n"));
201 } /* x87_dump_stack */
202 #endif /* DEBUG_libfirm */
205 * Set a virtual register to st(pos).
207 * @param state the x87 state
208 * @param reg_idx the vfp register index that should be set
209 * @param node the IR node that produces the value of the vfp register
210 * @param pos the stack position where the new value should be entered
212 static void x87_set_st(x87_state *state, int reg_idx, ir_node *node, int pos) {
213 assert(0 < state->depth);
214 state->st[MASK_TOS(state->tos + pos)].reg_idx = reg_idx;
215 state->st[MASK_TOS(state->tos + pos)].node = node;
217 DB((dbg, LEVEL_2, "After SET_REG: "));
218 DEBUG_ONLY(x87_dump_stack(state));
222 * Set the tos virtual register.
224 * @param state the x87 state
225 * @param reg_idx the vfp register index that should be set
226 * @param node the IR node that produces the value of the vfp register
228 static void x87_set_tos(x87_state *state, int reg_idx, ir_node *node) {
229 x87_set_st(state, reg_idx, node, 0);
233 * Swap st(0) with st(pos).
235 * @param state the x87 state
236 * @param pos the stack position to change the tos with
238 static void x87_fxch(x87_state *state, int pos) {
240 assert(pos < state->depth);
242 entry = state->st[MASK_TOS(state->tos + pos)];
243 state->st[MASK_TOS(state->tos + pos)] = state->st[MASK_TOS(state->tos)];
244 state->st[MASK_TOS(state->tos)] = entry;
246 DB((dbg, LEVEL_2, "After FXCH: ")); DEBUG_ONLY(x87_dump_stack(state));
250 * Convert a virtual register to the stack index.
252 * @param state the x87 state
253 * @param reg_idx the register vfp index
255 * @return the stack position where the register is stacked
256 * or -1 if the virtual register was not found
258 static int x87_on_stack(const x87_state *state, int reg_idx) {
259 int i, tos = state->tos;
261 for (i = 0; i < state->depth; ++i)
262 if (state->st[MASK_TOS(tos + i)].reg_idx == reg_idx)
268 * Push a virtual Register onto the stack, double pushed allowed.
270 * @param state the x87 state
271 * @param reg_idx the register vfp index
272 * @param node the node that produces the value of the vfp register
274 static void x87_push_dbl(x87_state *state, int reg_idx, ir_node *node) {
275 assert(state->depth < N_x87_REGS && "stack overrun");
278 state->tos = MASK_TOS(state->tos - 1);
279 state->st[state->tos].reg_idx = reg_idx;
280 state->st[state->tos].node = node;
282 DB((dbg, LEVEL_2, "After PUSH: ")); DEBUG_ONLY(x87_dump_stack(state));
286 * Push a virtual Register onto the stack, double pushes are NOT allowed.
288 * @param state the x87 state
289 * @param reg_idx the register vfp index
290 * @param node the node that produces the value of the vfp register
291 * @param dbl_push if != 0 double pushes are allowed
293 static void x87_push(x87_state *state, int reg_idx, ir_node *node) {
294 assert(x87_on_stack(state, reg_idx) == -1 && "double push");
296 x87_push_dbl(state, reg_idx, node);
300 * Pop a virtual Register from the stack.
302 * @param state the x87 state
304 static void x87_pop(x87_state *state) {
305 assert(state->depth > 0 && "stack underrun");
308 state->tos = MASK_TOS(state->tos + 1);
310 DB((dbg, LEVEL_2, "After POP: ")); DEBUG_ONLY(x87_dump_stack(state));
314 * Empty the fpu stack
316 * @param state the x87 state
318 static void x87_emms(x87_state *state) {
324 * Returns the block state of a block.
326 * @param sim the x87 simulator handle
327 * @param block the current block
329 * @return the block state
331 static blk_state *x87_get_bl_state(x87_simulator *sim, ir_node *block) {
332 pmap_entry *entry = pmap_find(sim->blk_states, block);
335 blk_state *bl_state = obstack_alloc(&sim->obst, sizeof(*bl_state));
336 bl_state->begin = NULL;
337 bl_state->end = NULL;
339 pmap_insert(sim->blk_states, block, bl_state);
343 return PTR_TO_BLKSTATE(entry->value);
344 } /* x87_get_bl_state */
347 * Creates a new x87 state.
349 * @param sim the x87 simulator handle
351 * @return a new x87 state
353 static x87_state *x87_alloc_state(x87_simulator *sim) {
354 x87_state *res = obstack_alloc(&sim->obst, sizeof(*res));
358 } /* x87_alloc_state */
363 * @param sim the x87 simulator handle
364 * @param src the x87 state that will be cloned
366 * @return a cloned copy of the src state
368 static x87_state *x87_clone_state(x87_simulator *sim, const x87_state *src) {
369 x87_state *res = x87_alloc_state(sim);
371 memcpy(res, src, sizeof(*res));
373 } /* x87_clone_state */
376 * Patch a virtual instruction into a x87 one and return
377 * the node representing the result value.
379 * @param n the IR node to patch
380 * @param op the x87 opcode to patch in
382 static ir_node *x87_patch_insn(ir_node *n, ir_op *op) {
383 ir_mode *mode = get_irn_mode(n);
388 if (mode == mode_T) {
389 /* patch all Proj's */
390 const ir_edge_t *edge;
392 foreach_out_edge(n, edge) {
393 ir_node *proj = get_edge_src_irn(edge);
395 mode = get_irn_mode(proj);
396 if (mode_is_float(mode)) {
398 set_irn_mode(proj, mode_E);
402 } else if (mode_is_float(mode))
403 set_irn_mode(n, mode_E);
405 } /* x87_patch_insn */
408 * Returns the first Proj of a mode_T node having a given mode.
410 * @param n the mode_T node
411 * @param m the desired mode of the Proj
412 * @return The first Proj of mode @p m found or NULL.
414 static ir_node *get_irn_Proj_for_mode(ir_node *n, ir_mode *m) {
415 const ir_edge_t *edge;
417 assert(get_irn_mode(n) == mode_T && "Need mode_T node");
419 foreach_out_edge(n, edge) {
420 ir_node *proj = get_edge_src_irn(edge);
421 if (get_irn_mode(proj) == m)
426 } /* get_irn_Proj_for_mode */
429 * Wrap the arch_* function here so we can check for errors.
431 static INLINE const arch_register_t *x87_get_irn_register(x87_simulator *sim, const ir_node *irn) {
432 const arch_register_t *res;
434 res = arch_get_irn_register(sim->arch_env, irn);
435 assert(res->reg_class->regs == ia32_vfp_regs);
437 } /* x87_get_irn_register */
439 /* -------------- x87 perm --------------- */
442 * Creates a fxch for shuffle.
444 * @param state the x87 state
445 * @param pos parameter for fxch
446 * @param block the block were fxch is inserted
448 * Creates a new fxch node and reroute the user of the old node
451 * @return the fxch node
453 static ir_node *x87_fxch_shuffle(x87_state *state, int pos, ir_node *block) {
455 ia32_x87_attr_t *attr;
457 fxch = new_rd_ia32_fxch(NULL, get_irn_irg(block), block);
458 attr = get_ia32_x87_attr(fxch);
459 attr->x87[0] = &ia32_st_regs[pos];
460 attr->x87[2] = &ia32_st_regs[0];
464 x87_fxch(state, pos);
466 } /* x87_fxch_shuffle */
469 * Calculate the necessary permutations to reach dst_state.
471 * These permutations are done with fxch instructions and placed
472 * at the end of the block.
474 * Note that critical edges are removed here, so we need only
475 * a shuffle if the current block has only one successor.
477 * @param sim the simulator handle
478 * @param block the current block
479 * @param state the current x87 stack state, might be modified
480 * @param dst_block the destination block
481 * @param dst_state destination state
485 static x87_state *x87_shuffle(x87_simulator *sim, ir_node *block,
486 x87_state *state, ir_node *dst_block,
487 const x87_state *dst_state)
489 int i, n_cycles, k, ri;
490 unsigned cycles[4], all_mask;
491 char cycle_idx[4][8];
492 ir_node *fxch, *before, *after;
496 assert(state->depth == dst_state->depth);
498 /* Some mathematics here:
499 If we have a cycle of length n that includes the tos,
500 we need n-1 exchange operations.
501 We can always add the tos and restore it, so we need
502 n+1 exchange operations for a cycle not containing the tos.
503 So, the maximum of needed operations is for a cycle of 7
504 not including the tos == 8.
505 This is the same number of ops we would need for using stores,
506 so exchange is cheaper (we save the loads).
507 On the other hand, we might need an additional exchange
508 in the next block to bring one operand on top, so the
509 number of ops in the first case is identical.
510 Further, no more than 4 cycles can exists (4 x 2).
512 all_mask = (1 << (state->depth)) - 1;
514 for (n_cycles = 0; all_mask; ++n_cycles) {
515 int src_idx, dst_idx;
517 /* find the first free slot */
518 for (i = 0; i < state->depth; ++i) {
519 if (all_mask & (1 << i)) {
520 all_mask &= ~(1 << i);
522 /* check if there are differences here */
523 if (x87_get_st_reg(state, i) != x87_get_st_reg(dst_state, i))
529 /* no more cycles found */
534 cycles[n_cycles] = (1 << i);
535 cycle_idx[n_cycles][k++] = i;
536 for (src_idx = i; ; src_idx = dst_idx) {
537 dst_idx = x87_on_stack(dst_state, x87_get_st_reg(state, src_idx));
539 if ((all_mask & (1 << dst_idx)) == 0)
542 cycle_idx[n_cycles][k++] = dst_idx;
543 cycles[n_cycles] |= (1 << dst_idx);
544 all_mask &= ~(1 << dst_idx);
546 cycle_idx[n_cycles][k] = -1;
550 /* no permutation needed */
554 /* Hmm: permutation needed */
555 DB((dbg, LEVEL_2, "\n%+F needs permutation: from\n", block));
556 DEBUG_ONLY(x87_dump_stack(state));
557 DB((dbg, LEVEL_2, " to\n"));
558 DEBUG_ONLY(x87_dump_stack(dst_state));
562 DB((dbg, LEVEL_2, "Need %d cycles\n", n_cycles));
563 for (ri = 0; ri < n_cycles; ++ri) {
564 DB((dbg, LEVEL_2, " Ring %d:\n ", ri));
565 for (k = 0; cycle_idx[ri][k] != -1; ++k)
566 DB((dbg, LEVEL_2, " st%d ->", cycle_idx[ri][k]));
567 DB((dbg, LEVEL_2, "\n"));
574 * Find the place node must be insert.
575 * We have only one successor block, so the last instruction should
578 before = sched_last(block);
579 assert(is_cfop(before));
581 /* now do the permutations */
582 for (ri = 0; ri < n_cycles; ++ri) {
583 if ((cycles[ri] & 1) == 0) {
584 /* this cycle does not include the tos */
585 fxch = x87_fxch_shuffle(state, cycle_idx[ri][0], block);
587 sched_add_after(after, fxch);
589 sched_add_before(before, fxch);
592 for (k = 1; cycle_idx[ri][k] != -1; ++k) {
593 fxch = x87_fxch_shuffle(state, cycle_idx[ri][k], block);
595 sched_add_after(after, fxch);
597 sched_add_before(before, fxch);
600 if ((cycles[ri] & 1) == 0) {
601 /* this cycle does not include the tos */
602 fxch = x87_fxch_shuffle(state, cycle_idx[ri][0], block);
603 sched_add_after(after, fxch);
610 * Create a fxch node before another node.
612 * @param state the x87 state
613 * @param n the node after the fxch
614 * @param pos exchange st(pos) with st(0)
618 static ir_node *x87_create_fxch(x87_state *state, ir_node *n, int pos)
621 ia32_x87_attr_t *attr;
622 ir_graph *irg = get_irn_irg(n);
623 ir_node *block = get_nodes_block(n);
625 x87_fxch(state, pos);
627 fxch = new_rd_ia32_fxch(NULL, irg, block);
628 attr = get_ia32_x87_attr(fxch);
629 attr->x87[0] = &ia32_st_regs[pos];
630 attr->x87[2] = &ia32_st_regs[0];
634 sched_add_before(n, fxch);
635 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(fxch), attr->x87[0]->name, attr->x87[2]->name));
637 } /* x87_create_fxch */
640 * Create a fpush before node n.
642 * @param state the x87 state
643 * @param n the node after the fpush
644 * @param pos push st(pos) on stack
645 * @param op_idx replace input op_idx of n with the fpush result
647 static void x87_create_fpush(x87_state *state, ir_node *n, int pos, int op_idx) {
648 ir_node *fpush, *pred = get_irn_n(n, op_idx);
649 ia32_x87_attr_t *attr;
650 const arch_register_t *out = x87_get_irn_register(state->sim, pred);
652 x87_push_dbl(state, arch_register_get_index(out), pred);
654 fpush = new_rd_ia32_fpush(NULL, get_irn_irg(n), get_nodes_block(n));
655 attr = get_ia32_x87_attr(fpush);
656 attr->x87[0] = &ia32_st_regs[pos];
657 attr->x87[2] = &ia32_st_regs[0];
660 sched_add_before(n, fpush);
662 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(fpush), attr->x87[0]->name, attr->x87[2]->name));
663 } /* x87_create_fpush */
666 * Create a fpop before node n.
668 * @param state the x87 state
669 * @param n the node after the fpop
670 * @param num pop 1 or 2 values
672 * @return the fpop node
674 static ir_node *x87_create_fpop(x87_state *state, ir_node *n, int num)
676 ir_node *fpop = NULL;
677 ia32_x87_attr_t *attr;
682 if (ia32_cg_config.use_ffreep)
683 fpop = new_rd_ia32_ffreep(NULL, get_irn_irg(n), get_nodes_block(n));
685 fpop = new_rd_ia32_fpop(NULL, get_irn_irg(n), get_nodes_block(n));
686 attr = get_ia32_x87_attr(fpop);
687 attr->x87[0] = &ia32_st_regs[0];
688 attr->x87[1] = &ia32_st_regs[0];
689 attr->x87[2] = &ia32_st_regs[0];
692 sched_add_before(n, fpop);
693 DB((dbg, LEVEL_1, "<<< %s %s\n", get_irn_opname(fpop), attr->x87[0]->name));
698 } /* x87_create_fpop */
701 * Creates an fldz before node n
703 * @param state the x87 state
704 * @param n the node after the fldz
706 * @return the fldz node
708 static ir_node *x87_create_fldz(x87_state *state, ir_node *n, int regidx) {
709 ir_graph *irg = get_irn_irg(n);
710 ir_node *block = get_nodes_block(n);
713 fldz = new_rd_ia32_fldz(NULL, irg, block, mode_E);
715 sched_add_before(n, fldz);
716 DB((dbg, LEVEL_1, "<<< %s\n", get_irn_opname(fldz)));
719 x87_push(state, regidx, fldz);
724 /* --------------------------------- liveness ------------------------------------------ */
727 * The liveness transfer function.
728 * Updates a live set over a single step from a given node to its predecessor.
729 * Everything defined at the node is removed from the set, the uses of the node get inserted.
731 * @param sim The simulator handle.
732 * @param irn The node at which liveness should be computed.
733 * @param live The bitset of registers live before @p irn. This set gets modified by updating it to
734 * the registers live after irn.
736 * @return The live bitset.
738 static vfp_liveness vfp_liveness_transfer(x87_simulator *sim, ir_node *irn, vfp_liveness live)
741 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
742 const arch_env_t *arch_env = sim->arch_env;
744 if (get_irn_mode(irn) == mode_T) {
745 const ir_edge_t *edge;
747 foreach_out_edge(irn, edge) {
748 ir_node *proj = get_edge_src_irn(edge);
750 if (arch_irn_consider_in_reg_alloc(arch_env, cls, proj)) {
751 const arch_register_t *reg = x87_get_irn_register(sim, proj);
752 live &= ~(1 << arch_register_get_index(reg));
757 if (arch_irn_consider_in_reg_alloc(arch_env, cls, irn)) {
758 const arch_register_t *reg = x87_get_irn_register(sim, irn);
759 live &= ~(1 << arch_register_get_index(reg));
762 for (i = 0, n = get_irn_arity(irn); i < n; ++i) {
763 ir_node *op = get_irn_n(irn, i);
765 if (mode_is_float(get_irn_mode(op)) && arch_irn_consider_in_reg_alloc(arch_env, cls, op)) {
766 const arch_register_t *reg = x87_get_irn_register(sim, op);
767 live |= 1 << arch_register_get_index(reg);
771 } /* vfp_liveness_transfer */
774 * Put all live virtual registers at the end of a block into a bitset.
776 * @param sim the simulator handle
777 * @param lv the liveness information
778 * @param bl the block
780 * @return The live bitset at the end of this block
782 static vfp_liveness vfp_liveness_end_of_block(x87_simulator *sim, const ir_node *block)
785 vfp_liveness live = 0;
786 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
787 const arch_env_t *arch_env = sim->arch_env;
788 const be_lv_t *lv = sim->lv;
790 be_lv_foreach(lv, block, be_lv_state_end, i) {
791 const arch_register_t *reg;
792 const ir_node *node = be_lv_get_irn(lv, block, i);
793 if (!arch_irn_consider_in_reg_alloc(arch_env, cls, node))
796 reg = x87_get_irn_register(sim, node);
797 live |= 1 << arch_register_get_index(reg);
801 } /* vfp_liveness_end_of_block */
803 /** get the register mask from an arch_register */
804 #define REGMASK(reg) (1 << (arch_register_get_index(reg)))
807 * Return a bitset of argument registers which are live at the end of a node.
809 * @param sim the simulator handle
810 * @param pos the node
811 * @param kill kill mask for the output registers
813 * @return The live bitset.
815 static unsigned vfp_live_args_after(x87_simulator *sim, const ir_node *pos, unsigned kill)
817 unsigned idx = get_irn_idx(pos);
819 assert(idx < sim->n_idx);
820 return sim->live[idx] & ~kill;
821 } /* vfp_live_args_after */
824 * Calculate the liveness for a whole block and cache it.
826 * @param sim the simulator handle
827 * @param lv the liveness handle
828 * @param block the block
830 static void update_liveness(x87_simulator *sim, ir_node *block) {
831 vfp_liveness live = vfp_liveness_end_of_block(sim, block);
835 /* now iterate through the block backward and cache the results */
836 sched_foreach_reverse(block, irn) {
837 /* stop at the first Phi: this produces the live-in */
841 idx = get_irn_idx(irn);
842 sim->live[idx] = live;
844 live = vfp_liveness_transfer(sim, irn, live);
846 idx = get_irn_idx(block);
847 sim->live[idx] = live;
848 } /* update_liveness */
851 * Returns true if a register is live in a set.
853 * @param reg_idx the vfp register index
854 * @param live a live bitset
856 #define is_vfp_live(reg_idx, live) ((live) & (1 << (reg_idx)))
860 * Dump liveness info.
862 * @param live the live bitset
864 static void vfp_dump_live(vfp_liveness live) {
867 DB((dbg, LEVEL_2, "Live after: "));
868 for (i = 0; i < 8; ++i) {
869 if (live & (1 << i)) {
870 DB((dbg, LEVEL_2, "vf%d ", i));
873 DB((dbg, LEVEL_2, "\n"));
874 } /* vfp_dump_live */
875 #endif /* DEBUG_libfirm */
877 /* --------------------------------- simulators ---------------------------------------- */
879 #define XCHG(a, b) do { int t = (a); (a) = (b); (b) = t; } while (0)
891 * Simulate a virtual binop.
893 * @param state the x87 state
894 * @param n the node that should be simulated (and patched)
895 * @param tmpl the template containing the 4 possible x87 opcodes
897 * @return NO_NODE_ADDED
899 static int sim_binop(x87_state *state, ir_node *n, const exchange_tmpl *tmpl) {
900 int op2_idx = 0, op1_idx;
901 int out_idx, do_pop = 0;
902 ia32_x87_attr_t *attr;
904 ir_node *patched_insn;
906 x87_simulator *sim = state->sim;
907 ir_node *op1 = get_irn_n(n, n_ia32_binary_left);
908 ir_node *op2 = get_irn_n(n, n_ia32_binary_right);
909 const arch_register_t *op1_reg = x87_get_irn_register(sim, op1);
910 const arch_register_t *op2_reg = x87_get_irn_register(sim, op2);
911 const arch_register_t *out = x87_get_irn_register(sim, n);
912 int reg_index_1 = arch_register_get_index(op1_reg);
913 int reg_index_2 = arch_register_get_index(op2_reg);
914 vfp_liveness live = vfp_live_args_after(sim, n, REGMASK(out));
918 DB((dbg, LEVEL_1, ">>> %+F %s, %s -> %s\n", n,
919 arch_register_get_name(op1_reg), arch_register_get_name(op2_reg),
920 arch_register_get_name(out)));
921 DEBUG_ONLY(vfp_dump_live(live));
922 DB((dbg, LEVEL_1, "Stack before: "));
923 DEBUG_ONLY(x87_dump_stack(state));
925 if(reg_index_1 == REG_VFP_UKNWN) {
929 op1_idx = x87_on_stack(state, reg_index_1);
930 assert(op1_idx >= 0);
931 op1_live_after = is_vfp_live(arch_register_get_index(op1_reg), live);
934 attr = get_ia32_x87_attr(n);
935 permuted = attr->attr.data.ins_permuted;
937 if (reg_index_2 != REG_VFP_NOREG) {
940 if(reg_index_2 == REG_VFP_UKNWN) {
944 /* second operand is a vfp register */
945 op2_idx = x87_on_stack(state, reg_index_2);
946 assert(op2_idx >= 0);
948 = is_vfp_live(arch_register_get_index(op2_reg), live);
951 if (op2_live_after) {
952 /* Second operand is live. */
954 if (op1_live_after) {
955 /* Both operands are live: push the first one.
956 This works even for op1 == op2. */
957 x87_create_fpush(state, n, op1_idx, n_ia32_binary_right);
958 /* now do fxxx (tos=tos X op) */
962 dst = tmpl->normal_op;
964 /* Second live, first operand is dead here, bring it to tos. */
966 x87_create_fxch(state, n, op1_idx);
971 /* now do fxxx (tos=tos X op) */
973 dst = tmpl->normal_op;
976 /* Second operand is dead. */
977 if (op1_live_after) {
978 /* First operand is live: bring second to tos. */
980 x87_create_fxch(state, n, op2_idx);
985 /* now do fxxxr (tos = op X tos) */
987 dst = tmpl->reverse_op;
989 /* Both operands are dead here, pop them from the stack. */
992 /* Both are identically and on tos, no pop needed. */
993 /* here fxxx (tos = tos X tos) */
994 dst = tmpl->normal_op;
997 /* now do fxxxp (op = op X tos, pop) */
998 dst = tmpl->normal_pop_op;
1002 } else if (op1_idx == 0) {
1003 assert(op1_idx != op2_idx);
1004 /* now do fxxxrp (op = tos X op, pop) */
1005 dst = tmpl->reverse_pop_op;
1009 /* Bring the second on top. */
1010 x87_create_fxch(state, n, op2_idx);
1011 if (op1_idx == op2_idx) {
1012 /* Both are identically and on tos now, no pop needed. */
1015 /* use fxxx (tos = tos X tos) */
1016 dst = tmpl->normal_op;
1019 /* op2 is on tos now */
1021 /* use fxxxp (op = op X tos, pop) */
1022 dst = tmpl->normal_pop_op;
1030 /* second operand is an address mode */
1031 if (op1_live_after) {
1032 /* first operand is live: push it here */
1033 x87_create_fpush(state, n, op1_idx, n_ia32_binary_left);
1036 /* first operand is dead: bring it to tos */
1038 x87_create_fxch(state, n, op1_idx);
1043 /* use fxxx (tos = tos X mem) */
1044 dst = permuted ? tmpl->reverse_op : tmpl->normal_op;
1048 patched_insn = x87_patch_insn(n, dst);
1049 x87_set_st(state, arch_register_get_index(out), patched_insn, out_idx);
1054 /* patch the operation */
1055 attr->x87[0] = op1_reg = &ia32_st_regs[op1_idx];
1056 if (reg_index_2 != REG_VFP_NOREG) {
1057 attr->x87[1] = op2_reg = &ia32_st_regs[op2_idx];
1059 attr->x87[2] = out = &ia32_st_regs[out_idx];
1061 if (reg_index_2 != REG_VFP_NOREG) {
1062 DB((dbg, LEVEL_1, "<<< %s %s, %s -> %s\n", get_irn_opname(n),
1063 arch_register_get_name(op1_reg), arch_register_get_name(op2_reg),
1064 arch_register_get_name(out)));
1066 DB((dbg, LEVEL_1, "<<< %s %s, [AM] -> %s\n", get_irn_opname(n),
1067 arch_register_get_name(op1_reg),
1068 arch_register_get_name(out)));
1071 return NO_NODE_ADDED;
1075 * Simulate a virtual Unop.
1077 * @param state the x87 state
1078 * @param n the node that should be simulated (and patched)
1079 * @param op the x87 opcode that will replace n's opcode
1081 * @return NO_NODE_ADDED
1083 static int sim_unop(x87_state *state, ir_node *n, ir_op *op) {
1084 int op1_idx, out_idx;
1085 x87_simulator *sim = state->sim;
1086 const arch_register_t *op1 = x87_get_irn_register(sim, get_irn_n(n, UNOP_IDX));
1087 const arch_register_t *out = x87_get_irn_register(sim, n);
1088 ia32_x87_attr_t *attr;
1089 unsigned live = vfp_live_args_after(sim, n, REGMASK(out));
1091 DB((dbg, LEVEL_1, ">>> %+F -> %s\n", n, out->name));
1092 DEBUG_ONLY(vfp_dump_live(live));
1094 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1096 if (is_vfp_live(arch_register_get_index(op1), live)) {
1097 /* push the operand here */
1098 x87_create_fpush(state, n, op1_idx, UNOP_IDX);
1102 /* operand is dead, bring it to tos */
1104 x87_create_fxch(state, n, op1_idx);
1109 x87_set_tos(state, arch_register_get_index(out), x87_patch_insn(n, op));
1111 attr = get_ia32_x87_attr(n);
1112 attr->x87[0] = op1 = &ia32_st_regs[0];
1113 attr->x87[2] = out = &ia32_st_regs[0];
1114 DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), out->name));
1116 return NO_NODE_ADDED;
1120 * Simulate a virtual Load instruction.
1122 * @param state the x87 state
1123 * @param n the node that should be simulated (and patched)
1124 * @param op the x87 opcode that will replace n's opcode
1126 * @return NO_NODE_ADDED
1128 static int sim_load(x87_state *state, ir_node *n, ir_op *op) {
1129 const arch_register_t *out = x87_get_irn_register(state->sim, n);
1130 ia32_x87_attr_t *attr;
1132 DB((dbg, LEVEL_1, ">>> %+F -> %s\n", n, arch_register_get_name(out)));
1133 x87_push(state, arch_register_get_index(out), x87_patch_insn(n, op));
1134 assert(out == x87_get_irn_register(state->sim, n));
1135 attr = get_ia32_x87_attr(n);
1136 attr->x87[2] = out = &ia32_st_regs[0];
1137 DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), arch_register_get_name(out)));
1139 return NO_NODE_ADDED;
1143 * Rewire all users of @p old_val to @new_val iff they are scheduled after @p store.
1145 * @param store The store
1146 * @param old_val The former value
1147 * @param new_val The new value
1149 static void collect_and_rewire_users(ir_node *store, ir_node *old_val, ir_node *new_val) {
1150 const ir_edge_t *edge, *ne;
1152 foreach_out_edge_safe(old_val, edge, ne) {
1153 ir_node *user = get_edge_src_irn(edge);
1155 if (! user || user == store)
1158 /* if the user is scheduled after the store: rewire */
1159 if (sched_is_scheduled(user) && sched_comes_after(store, user)) {
1161 /* find the input of the user pointing to the old value */
1162 for (i = get_irn_arity(user) - 1; i >= 0; i--) {
1163 if (get_irn_n(user, i) == old_val)
1164 set_irn_n(user, i, new_val);
1168 } /* collect_and_rewire_users */
1171 * Simulate a virtual Store.
1173 * @param state the x87 state
1174 * @param n the node that should be simulated (and patched)
1175 * @param op the x87 store opcode
1176 * @param op_p the x87 store and pop opcode
1178 static int sim_store(x87_state *state, ir_node *n, ir_op *op, ir_op *op_p) {
1179 x87_simulator *sim = state->sim;
1180 ir_node *val = get_irn_n(n, n_ia32_vfst_val);
1181 const arch_register_t *op2 = x87_get_irn_register(sim, val);
1182 unsigned live = vfp_live_args_after(sim, n, 0);
1183 int insn = NO_NODE_ADDED;
1184 ia32_x87_attr_t *attr;
1185 int op2_reg_idx, op2_idx, depth;
1186 int live_after_node;
1189 op2_reg_idx = arch_register_get_index(op2);
1190 if (op2_reg_idx == REG_VFP_UKNWN) {
1191 /* just take any value from stack */
1192 if(state->depth > 0) {
1194 DEBUG_ONLY(op2 = NULL);
1195 live_after_node = 1;
1197 /* produce a new value which we will consume immediately */
1198 x87_create_fldz(state, n, op2_reg_idx);
1199 live_after_node = 0;
1200 op2_idx = x87_on_stack(state, op2_reg_idx);
1201 assert(op2_idx >= 0);
1204 op2_idx = x87_on_stack(state, op2_reg_idx);
1205 live_after_node = is_vfp_live(arch_register_get_index(op2), live);
1206 DB((dbg, LEVEL_1, ">>> %+F %s ->\n", n, arch_register_get_name(op2)));
1207 assert(op2_idx >= 0);
1210 mode = get_ia32_ls_mode(n);
1211 depth = x87_get_depth(state);
1213 if (live_after_node) {
1215 Problem: fst doesn't support mode_E (spills), only fstp does
1217 - stack not full: push value and fstp
1218 - stack full: fstp value and load again
1219 Note that we cannot test on mode_E, because floats might be 96bit ...
1221 if (get_mode_size_bits(mode) > 64 || mode == mode_Ls) {
1222 if (depth < N_x87_REGS) {
1223 /* ok, we have a free register: push + fstp */
1224 x87_create_fpush(state, n, op2_idx, n_ia32_vfst_val);
1226 x87_patch_insn(n, op_p);
1228 ir_node *vfld, *mem, *block, *rproj, *mproj;
1231 /* stack full here: need fstp + load */
1233 x87_patch_insn(n, op_p);
1235 block = get_nodes_block(n);
1236 irg = get_irn_irg(n);
1237 vfld = new_rd_ia32_vfld(NULL, irg, block, get_irn_n(n, 0), get_irn_n(n, 1), new_rd_NoMem(irg), get_ia32_ls_mode(n));
1239 /* copy all attributes */
1240 set_ia32_frame_ent(vfld, get_ia32_frame_ent(n));
1241 if (is_ia32_use_frame(n))
1242 set_ia32_use_frame(vfld);
1243 set_ia32_op_type(vfld, ia32_AddrModeS);
1244 add_ia32_am_offs_int(vfld, get_ia32_am_offs_int(n));
1245 set_ia32_am_sc(vfld, get_ia32_am_sc(n));
1246 set_ia32_ls_mode(vfld, get_ia32_ls_mode(n));
1248 rproj = new_r_Proj(irg, block, vfld, get_ia32_ls_mode(vfld), pn_ia32_vfld_res);
1249 mproj = new_r_Proj(irg, block, vfld, mode_M, pn_ia32_vfld_M);
1250 mem = get_irn_Proj_for_mode(n, mode_M);
1252 assert(mem && "Store memory not found");
1254 arch_set_irn_register(sim->arch_env, rproj, op2);
1256 /* reroute all former users of the store memory to the load memory */
1257 edges_reroute(mem, mproj, irg);
1258 /* set the memory input of the load to the store memory */
1259 set_irn_n(vfld, n_ia32_vfld_mem, mem);
1261 sched_add_after(n, vfld);
1262 sched_add_after(vfld, rproj);
1264 /* rewire all users, scheduled after the store, to the loaded value */
1265 collect_and_rewire_users(n, val, rproj);
1270 /* we can only store the tos to memory */
1272 x87_create_fxch(state, n, op2_idx);
1274 /* mode != mode_E -> use normal fst */
1275 x87_patch_insn(n, op);
1278 /* we can only store the tos to memory */
1280 x87_create_fxch(state, n, op2_idx);
1283 x87_patch_insn(n, op_p);
1286 attr = get_ia32_x87_attr(n);
1287 attr->x87[1] = op2 = &ia32_st_regs[0];
1288 DB((dbg, LEVEL_1, "<<< %s %s ->\n", get_irn_opname(n), arch_register_get_name(op2)));
1293 #define _GEN_BINOP(op, rev) \
1294 static int sim_##op(x87_state *state, ir_node *n) { \
1295 exchange_tmpl tmpl = { op_ia32_##op, op_ia32_##rev, op_ia32_##op##p, op_ia32_##rev##p }; \
1296 return sim_binop(state, n, &tmpl); \
1299 #define GEN_BINOP(op) _GEN_BINOP(op, op)
1300 #define GEN_BINOPR(op) _GEN_BINOP(op, op##r)
1302 #define GEN_LOAD2(op, nop) \
1303 static int sim_##op(x87_state *state, ir_node *n) { \
1304 return sim_load(state, n, op_ia32_##nop); \
1307 #define GEN_LOAD(op) GEN_LOAD2(op, op)
1309 #define GEN_UNOP(op) \
1310 static int sim_##op(x87_state *state, ir_node *n) { \
1311 return sim_unop(state, n, op_ia32_##op); \
1314 #define GEN_STORE(op) \
1315 static int sim_##op(x87_state *state, ir_node *n) { \
1316 return sim_store(state, n, op_ia32_##op, op_ia32_##op##p); \
1338 * Simulate a virtual fisttp.
1340 * @param state the x87 state
1341 * @param n the node that should be simulated (and patched)
1343 static int sim_fisttp(x87_state *state, ir_node *n) {
1344 x87_simulator *sim = state->sim;
1345 ir_node *val = get_irn_n(n, n_ia32_vfst_val);
1346 const arch_register_t *op2 = x87_get_irn_register(sim, val);
1347 int insn = NO_NODE_ADDED;
1348 ia32_x87_attr_t *attr;
1349 int op2_reg_idx, op2_idx, depth;
1351 op2_reg_idx = arch_register_get_index(op2);
1352 if (op2_reg_idx == REG_VFP_UKNWN) {
1353 /* just take any value from stack */
1354 if (state->depth > 0) {
1356 DEBUG_ONLY(op2 = NULL);
1358 /* produce a new value which we will consume immediately */
1359 x87_create_fldz(state, n, op2_reg_idx);
1360 op2_idx = x87_on_stack(state, op2_reg_idx);
1361 assert(op2_idx >= 0);
1364 op2_idx = x87_on_stack(state, op2_reg_idx);
1365 DB((dbg, LEVEL_1, ">>> %+F %s ->\n", n, arch_register_get_name(op2)));
1366 assert(op2_idx >= 0);
1369 depth = x87_get_depth(state);
1371 /* Note: although the value is still live here, it is destroyed because
1372 of the pop. The register allocator is aware of that and introduced a copy
1373 if the value must be alive. */
1375 /* we can only store the tos to memory */
1377 x87_create_fxch(state, n, op2_idx);
1380 x87_patch_insn(n, op_ia32_fisttp);
1382 attr = get_ia32_x87_attr(n);
1383 attr->x87[1] = op2 = &ia32_st_regs[0];
1384 DB((dbg, LEVEL_1, "<<< %s %s ->\n", get_irn_opname(n), arch_register_get_name(op2)));
1389 static int sim_FtstFnstsw(x87_state *state, ir_node *n) {
1390 x87_simulator *sim = state->sim;
1391 ia32_x87_attr_t *attr = get_ia32_x87_attr(n);
1392 ir_node *op1_node = get_irn_n(n, n_ia32_vFtstFnstsw_left);
1393 const arch_register_t *reg1 = x87_get_irn_register(sim, op1_node);
1394 int reg_index_1 = arch_register_get_index(reg1);
1395 int op1_idx = x87_on_stack(state, reg_index_1);
1396 unsigned live = vfp_live_args_after(sim, n, 0);
1398 DB((dbg, LEVEL_1, ">>> %+F %s\n", n, arch_register_get_name(reg1)));
1399 DEBUG_ONLY(vfp_dump_live(live));
1400 DB((dbg, LEVEL_1, "Stack before: "));
1401 DEBUG_ONLY(x87_dump_stack(state));
1402 assert(op1_idx >= 0);
1405 /* bring the value to tos */
1406 x87_create_fxch(state, n, op1_idx);
1410 /* patch the operation */
1411 x87_patch_insn(n, op_ia32_FtstFnstsw);
1412 reg1 = &ia32_st_regs[op1_idx];
1413 attr->x87[0] = reg1;
1414 attr->x87[1] = NULL;
1415 attr->x87[2] = NULL;
1417 if(!is_vfp_live(reg_index_1, live)) {
1418 x87_create_fpop(state, sched_next(n), 1);
1422 return NO_NODE_ADDED;
1426 * @param state the x87 state
1427 * @param n the node that should be simulated (and patched)
1429 static int sim_Fucom(x87_state *state, ir_node *n) {
1432 ia32_x87_attr_t *attr = get_ia32_x87_attr(n);
1434 x87_simulator *sim = state->sim;
1435 ir_node *op1_node = get_irn_n(n, n_ia32_vFucomFnstsw_left);
1436 ir_node *op2_node = get_irn_n(n, n_ia32_vFucomFnstsw_right);
1437 const arch_register_t *op1 = x87_get_irn_register(sim, op1_node);
1438 const arch_register_t *op2 = x87_get_irn_register(sim, op2_node);
1439 int reg_index_1 = arch_register_get_index(op1);
1440 int reg_index_2 = arch_register_get_index(op2);
1441 unsigned live = vfp_live_args_after(sim, n, 0);
1442 int permuted = attr->attr.data.ins_permuted;
1445 int node_added = NO_NODE_ADDED;
1447 DB((dbg, LEVEL_1, ">>> %+F %s, %s\n", n,
1448 arch_register_get_name(op1), arch_register_get_name(op2)));
1449 DEBUG_ONLY(vfp_dump_live(live));
1450 DB((dbg, LEVEL_1, "Stack before: "));
1451 DEBUG_ONLY(x87_dump_stack(state));
1453 op1_idx = x87_on_stack(state, reg_index_1);
1454 assert(op1_idx >= 0);
1456 /* BEWARE: check for comp a,a cases, they might happen */
1457 if (reg_index_2 != REG_VFP_NOREG) {
1458 /* second operand is a vfp register */
1459 op2_idx = x87_on_stack(state, reg_index_2);
1460 assert(op2_idx >= 0);
1462 if (is_vfp_live(reg_index_2, live)) {
1463 /* second operand is live */
1465 if (is_vfp_live(reg_index_1, live)) {
1466 /* both operands are live */
1469 /* res = tos X op */
1470 } else if (op2_idx == 0) {
1471 /* res = op X tos */
1472 permuted = !permuted;
1475 /* bring the first one to tos */
1476 x87_create_fxch(state, n, op1_idx);
1480 /* res = tos X op */
1483 /* second live, first operand is dead here, bring it to tos.
1484 This means further, op1_idx != op2_idx. */
1485 assert(op1_idx != op2_idx);
1487 x87_create_fxch(state, n, op1_idx);
1492 /* res = tos X op, pop */
1496 /* second operand is dead */
1497 if (is_vfp_live(reg_index_1, live)) {
1498 /* first operand is live: bring second to tos.
1499 This means further, op1_idx != op2_idx. */
1500 assert(op1_idx != op2_idx);
1502 x87_create_fxch(state, n, op2_idx);
1507 /* res = op X tos, pop */
1509 permuted = !permuted;
1512 /* both operands are dead here, check first for identity. */
1513 if (op1_idx == op2_idx) {
1514 /* identically, one pop needed */
1516 x87_create_fxch(state, n, op1_idx);
1520 /* res = tos X op, pop */
1523 /* different, move them to st and st(1) and pop both.
1524 The tricky part is to get one into st(1).*/
1525 else if (op2_idx == 1) {
1526 /* good, second operand is already in the right place, move the first */
1528 /* bring the first on top */
1529 x87_create_fxch(state, n, op1_idx);
1530 assert(op2_idx != 0);
1533 /* res = tos X op, pop, pop */
1535 } else if (op1_idx == 1) {
1536 /* good, first operand is already in the right place, move the second */
1538 /* bring the first on top */
1539 x87_create_fxch(state, n, op2_idx);
1540 assert(op1_idx != 0);
1543 /* res = op X tos, pop, pop */
1544 permuted = !permuted;
1548 /* if one is already the TOS, we need two fxch */
1550 /* first one is TOS, move to st(1) */
1551 x87_create_fxch(state, n, 1);
1552 assert(op2_idx != 1);
1554 x87_create_fxch(state, n, op2_idx);
1556 /* res = op X tos, pop, pop */
1558 permuted = !permuted;
1560 } else if (op2_idx == 0) {
1561 /* second one is TOS, move to st(1) */
1562 x87_create_fxch(state, n, 1);
1563 assert(op1_idx != 1);
1565 x87_create_fxch(state, n, op1_idx);
1567 /* res = tos X op, pop, pop */
1570 /* none of them is either TOS or st(1), 3 fxch needed */
1571 x87_create_fxch(state, n, op2_idx);
1572 assert(op1_idx != 0);
1573 x87_create_fxch(state, n, 1);
1575 x87_create_fxch(state, n, op1_idx);
1577 /* res = tos X op, pop, pop */
1584 /* second operand is an address mode */
1585 if (is_vfp_live(reg_index_1, live)) {
1586 /* first operand is live: bring it to TOS */
1588 x87_create_fxch(state, n, op1_idx);
1592 /* first operand is dead: bring it to tos */
1594 x87_create_fxch(state, n, op1_idx);
1601 /* patch the operation */
1602 if (is_ia32_vFucomFnstsw(n)) {
1606 case 0: dst = op_ia32_FucomFnstsw; break;
1607 case 1: dst = op_ia32_FucompFnstsw; break;
1608 case 2: dst = op_ia32_FucomppFnstsw; break;
1609 default: panic("invalid popcount in sim_Fucom");
1612 for(i = 0; i < pops; ++i) {
1615 } else if(is_ia32_vFucomi(n)) {
1617 case 0: dst = op_ia32_Fucomi; break;
1618 case 1: dst = op_ia32_Fucompi; x87_pop(state); break;
1620 dst = op_ia32_Fucompi;
1622 x87_create_fpop(state, sched_next(n), 1);
1623 node_added = NODE_ADDED;
1625 default: panic("invalid popcount in sim_Fucom");
1628 panic("invalid operation %+F in sim_FucomFnstsw", n);
1631 x87_patch_insn(n, dst);
1638 op1 = &ia32_st_regs[op1_idx];
1641 op2 = &ia32_st_regs[op2_idx];
1644 attr->x87[2] = NULL;
1645 attr->attr.data.ins_permuted = permuted;
1648 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(n),
1649 arch_register_get_name(op1), arch_register_get_name(op2)));
1651 DB((dbg, LEVEL_1, "<<< %s %s, [AM]\n", get_irn_opname(n),
1652 arch_register_get_name(op1)));
1658 static int sim_Keep(x87_state *state, ir_node *node)
1661 const arch_register_t *op_reg;
1666 int node_added = NO_NODE_ADDED;
1668 DB((dbg, LEVEL_1, ">>> %+F\n", node));
1670 arity = get_irn_arity(node);
1671 for(i = 0; i < arity; ++i) {
1672 op = get_irn_n(node, i);
1673 op_reg = arch_get_irn_register(state->sim->arch_env, op);
1674 if(arch_register_get_class(op_reg) != &ia32_reg_classes[CLASS_ia32_vfp])
1677 reg_id = arch_register_get_index(op_reg);
1678 live = vfp_live_args_after(state->sim, node, 0);
1680 op_stack_idx = x87_on_stack(state, reg_id);
1681 if(op_stack_idx >= 0 && !is_vfp_live(reg_id, live)) {
1682 x87_create_fpop(state, sched_next(node), 1);
1683 node_added = NODE_ADDED;
1687 DB((dbg, LEVEL_1, "Stack after: "));
1688 DEBUG_ONLY(x87_dump_stack(state));
1694 void keep_float_node_alive(x87_state *state, ir_node *node)
1700 const arch_register_class_t *cls;
1702 irg = get_irn_irg(node);
1703 block = get_nodes_block(node);
1704 cls = arch_get_irn_reg_class(state->sim->arch_env, node, -1);
1706 keep = be_new_Keep(cls, irg, block, 1, in);
1708 assert(sched_is_scheduled(node));
1709 sched_add_after(node, keep);
1713 * Create a copy of a node. Recreate the node if it's a constant.
1715 * @param state the x87 state
1716 * @param n the node to be copied
1718 * @return the copy of n
1720 static ir_node *create_Copy(x87_state *state, ir_node *n) {
1721 x87_simulator *sim = state->sim;
1722 ir_graph *irg = get_irn_irg(n);
1723 dbg_info *n_dbg = get_irn_dbg_info(n);
1724 ir_mode *mode = get_irn_mode(n);
1725 ir_node *block = get_nodes_block(n);
1726 ir_node *pred = get_irn_n(n, 0);
1727 ir_node *(*cnstr)(dbg_info *, ir_graph *, ir_node *, ir_mode *) = NULL;
1729 const arch_register_t *out;
1730 const arch_register_t *op1;
1731 ia32_x87_attr_t *attr;
1733 /* Do not copy constants, recreate them. */
1734 switch (get_ia32_irn_opcode(pred)) {
1735 case iro_ia32_Unknown_VFP:
1737 cnstr = new_rd_ia32_fldz;
1740 cnstr = new_rd_ia32_fld1;
1742 case iro_ia32_fldpi:
1743 cnstr = new_rd_ia32_fldpi;
1745 case iro_ia32_fldl2e:
1746 cnstr = new_rd_ia32_fldl2e;
1748 case iro_ia32_fldl2t:
1749 cnstr = new_rd_ia32_fldl2t;
1751 case iro_ia32_fldlg2:
1752 cnstr = new_rd_ia32_fldlg2;
1754 case iro_ia32_fldln2:
1755 cnstr = new_rd_ia32_fldln2;
1761 out = x87_get_irn_register(sim, n);
1762 op1 = x87_get_irn_register(sim, pred);
1764 if (cnstr != NULL) {
1765 /* copy a constant */
1766 res = (*cnstr)(n_dbg, irg, block, mode);
1768 x87_push(state, arch_register_get_index(out), res);
1770 attr = get_ia32_x87_attr(res);
1771 attr->x87[2] = &ia32_st_regs[0];
1773 int op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1775 res = new_rd_ia32_fpushCopy(n_dbg, irg, block, pred, mode);
1777 x87_push(state, arch_register_get_index(out), res);
1779 attr = get_ia32_x87_attr(res);
1780 attr->x87[0] = &ia32_st_regs[op1_idx];
1781 attr->x87[2] = &ia32_st_regs[0];
1783 arch_set_irn_register(sim->arch_env, res, out);
1789 * Simulate a be_Copy.
1791 * @param state the x87 state
1792 * @param n the node that should be simulated (and patched)
1794 * @return NO_NODE_ADDED
1796 static int sim_Copy(x87_state *state, ir_node *n) {
1797 x87_simulator *sim = state->sim;
1799 const arch_register_t *out;
1800 const arch_register_t *op1;
1801 const arch_register_class_t *cls;
1802 ir_node *node, *next;
1803 ia32_x87_attr_t *attr;
1804 int op1_idx, out_idx;
1807 cls = arch_get_irn_reg_class(sim->arch_env, n, -1);
1808 if (cls->regs != ia32_vfp_regs)
1811 pred = get_irn_n(n, 0);
1812 out = x87_get_irn_register(sim, n);
1813 op1 = x87_get_irn_register(sim, pred);
1814 live = vfp_live_args_after(sim, n, REGMASK(out));
1816 DB((dbg, LEVEL_1, ">>> %+F %s -> %s\n", n,
1817 arch_register_get_name(op1), arch_register_get_name(out)));
1818 DEBUG_ONLY(vfp_dump_live(live));
1820 /* handle the infamous unknown value */
1821 if (arch_register_get_index(op1) == REG_VFP_UKNWN) {
1822 /* Operand is still live, a real copy. We need here an fpush that can
1823 hold a a register, so use the fpushCopy or recreate constants */
1824 node = create_Copy(state, n);
1826 assert(is_ia32_fldz(node));
1827 next = sched_next(n);
1830 sched_add_before(next, node);
1832 DB((dbg, LEVEL_1, "<<< %+F %s -> %s\n", node, op1->name,
1833 arch_get_irn_register(sim->arch_env, node)->name));
1834 return NO_NODE_ADDED;
1837 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1839 if (is_vfp_live(arch_register_get_index(op1), live)) {
1840 ir_node *pred = get_irn_n(n, 0);
1842 /* Operand is still live, a real copy. We need here an fpush that can
1843 hold a a register, so use the fpushCopy or recreate constants */
1844 node = create_Copy(state, n);
1846 /* We have to make sure the old value doesn't go dead (which can happen
1847 * when we recreate constants). As the simulator expected that value in
1848 * the pred blocks. This is unfortunate as removing it would save us 1
1849 * instruction, but we would have to rerun all the simulation to get
1852 next = sched_next(n);
1855 sched_add_before(next, node);
1857 if(get_irn_n_edges(pred) == 0) {
1858 keep_float_node_alive(state, pred);
1861 DB((dbg, LEVEL_1, "<<< %+F %s -> ?\n", node, op1->name));
1863 out_idx = x87_on_stack(state, arch_register_get_index(out));
1865 if (out_idx >= 0 && out_idx != op1_idx) {
1866 /* Matze: out already on stack? how can this happen? */
1869 /* op1 must be killed and placed where out is */
1871 /* best case, simple remove and rename */
1872 x87_patch_insn(n, op_ia32_Pop);
1873 attr = get_ia32_x87_attr(n);
1874 attr->x87[0] = op1 = &ia32_st_regs[0];
1877 x87_set_st(state, arch_register_get_index(out), n, op1_idx - 1);
1879 /* move op1 to tos, store and pop it */
1881 x87_create_fxch(state, n, op1_idx);
1884 x87_patch_insn(n, op_ia32_Pop);
1885 attr = get_ia32_x87_attr(n);
1886 attr->x87[0] = op1 = &ia32_st_regs[out_idx];
1889 x87_set_st(state, arch_register_get_index(out), n, out_idx - 1);
1891 DB((dbg, LEVEL_1, "<<< %+F %s\n", n, op1->name));
1893 /* just a virtual copy */
1894 x87_set_st(state, arch_register_get_index(out), get_unop_op(n), op1_idx);
1895 /* don't remove the node to keep the verifier quiet :),
1896 the emitter won't emit any code for the node */
1899 DB((dbg, LEVEL_1, "<<< KILLED %s\n", get_irn_opname(n)));
1900 exchange(n, get_unop_op(n));
1904 return NO_NODE_ADDED;
1908 * Returns the result proj of the call
1910 static ir_node *get_call_result_proj(ir_node *call) {
1911 const ir_edge_t *edge;
1913 /* search the result proj */
1914 foreach_out_edge(call, edge) {
1915 ir_node *proj = get_edge_src_irn(edge);
1916 long pn = get_Proj_proj(proj);
1918 if (pn == pn_ia32_Call_vf0) {
1924 } /* get_call_result_proj */
1927 * Simulate a ia32_Call.
1929 * @param state the x87 state
1930 * @param n the node that should be simulated
1932 * @return NO_NODE_ADDED
1934 static int sim_Call(x87_state *state, ir_node *n)
1936 ir_type *call_tp = get_ia32_call_attr_const(n)->call_tp;
1940 const arch_register_t *reg;
1942 DB((dbg, LEVEL_1, ">>> %+F\n", n));
1944 /* at the begin of a call the x87 state should be empty */
1945 assert(state->depth == 0 && "stack not empty before call");
1947 if (get_method_n_ress(call_tp) <= 0)
1951 * If the called function returns a float, it is returned in st(0).
1952 * This even happens if the return value is NOT used.
1953 * Moreover, only one return result is supported.
1955 res_type = get_method_res_type(call_tp, 0);
1956 mode = get_type_mode(res_type);
1958 if (mode == NULL || !mode_is_float(mode))
1961 resproj = get_call_result_proj(n);
1962 assert(resproj != NULL);
1964 reg = x87_get_irn_register(state->sim, resproj);
1965 x87_push(state, arch_register_get_index(reg), resproj);
1968 DB((dbg, LEVEL_1, "Stack after: "));
1969 DEBUG_ONLY(x87_dump_stack(state));
1971 return NO_NODE_ADDED;
1975 * Simulate a be_Spill.
1977 * @param state the x87 state
1978 * @param n the node that should be simulated (and patched)
1980 * Should not happen, spills are lowered before x87 simulator see them.
1982 static int sim_Spill(x87_state *state, ir_node *n) {
1983 assert(0 && "Spill not lowered");
1984 return sim_fst(state, n);
1988 * Simulate a be_Reload.
1990 * @param state the x87 state
1991 * @param n the node that should be simulated (and patched)
1993 * Should not happen, reloads are lowered before x87 simulator see them.
1995 static int sim_Reload(x87_state *state, ir_node *n) {
1996 assert(0 && "Reload not lowered");
1997 return sim_fld(state, n);
2001 * Simulate a be_Return.
2003 * @param state the x87 state
2004 * @param n the node that should be simulated (and patched)
2006 * @return NO_NODE_ADDED
2008 static int sim_Return(x87_state *state, ir_node *n) {
2009 int n_res = be_Return_get_n_rets(n);
2010 int i, n_float_res = 0;
2012 /* only floating point return values must resist on stack */
2013 for (i = 0; i < n_res; ++i) {
2014 ir_node *res = get_irn_n(n, be_pos_Return_val + i);
2016 if (mode_is_float(get_irn_mode(res)))
2019 assert(x87_get_depth(state) == n_float_res);
2021 /* pop them virtually */
2022 for (i = n_float_res - 1; i >= 0; --i)
2025 return NO_NODE_ADDED;
2028 typedef struct _perm_data_t {
2029 const arch_register_t *in;
2030 const arch_register_t *out;
2034 * Simulate a be_Perm.
2036 * @param state the x87 state
2037 * @param irn the node that should be simulated (and patched)
2039 * @return NO_NODE_ADDED
2041 static int sim_Perm(x87_state *state, ir_node *irn) {
2043 x87_simulator *sim = state->sim;
2044 ir_node *pred = get_irn_n(irn, 0);
2046 const ir_edge_t *edge;
2048 /* handle only floating point Perms */
2049 if (! mode_is_float(get_irn_mode(pred)))
2050 return NO_NODE_ADDED;
2052 DB((dbg, LEVEL_1, ">>> %+F\n", irn));
2054 /* Perm is a pure virtual instruction on x87.
2055 All inputs must be on the FPU stack and are pairwise
2056 different from each other.
2057 So, all we need to do is to permutate the stack state. */
2058 n = get_irn_arity(irn);
2059 NEW_ARR_A(int, stack_pos, n);
2061 /* collect old stack positions */
2062 for (i = 0; i < n; ++i) {
2063 const arch_register_t *inreg = x87_get_irn_register(sim, get_irn_n(irn, i));
2064 int idx = x87_on_stack(state, arch_register_get_index(inreg));
2066 assert(idx >= 0 && "Perm argument not on x87 stack");
2070 /* now do the permutation */
2071 foreach_out_edge(irn, edge) {
2072 ir_node *proj = get_edge_src_irn(edge);
2073 const arch_register_t *out = x87_get_irn_register(sim, proj);
2074 long num = get_Proj_proj(proj);
2076 assert(0 <= num && num < n && "More Proj's than Perm inputs");
2077 x87_set_st(state, arch_register_get_index(out), proj, stack_pos[(unsigned)num]);
2079 DB((dbg, LEVEL_1, "<<< %+F\n", irn));
2081 return NO_NODE_ADDED;
2084 static int sim_Barrier(x87_state *state, ir_node *node) {
2085 //const arch_env_t *arch_env = state->sim->arch_env;
2088 /* materialize unknown if needed */
2089 arity = get_irn_arity(node);
2090 for(i = 0; i < arity; ++i) {
2091 const arch_register_t *reg;
2094 ia32_x87_attr_t *attr;
2095 ir_node *in = get_irn_n(node, i);
2097 if(!is_ia32_Unknown_VFP(in))
2100 /* TODO: not completely correct... */
2101 reg = &ia32_vfp_regs[REG_VFP_UKNWN];
2104 block = get_nodes_block(node);
2105 zero = new_rd_ia32_fldz(NULL, current_ir_graph, block, mode_E);
2106 x87_push(state, arch_register_get_index(reg), zero);
2108 attr = get_ia32_x87_attr(zero);
2109 attr->x87[2] = &ia32_st_regs[0];
2111 sched_add_before(node, zero);
2113 set_irn_n(node, i, zero);
2116 return NO_NODE_ADDED;
2121 * Kill any dead registers at block start by popping them from the stack.
2123 * @param sim the simulator handle
2124 * @param block the current block
2125 * @param start_state the x87 state at the begin of the block
2127 * @return the x87 state after dead register killed
2129 static x87_state *x87_kill_deads(x87_simulator *sim, ir_node *block, x87_state *start_state) {
2130 x87_state *state = start_state;
2131 ir_node *first_insn = sched_first(block);
2132 ir_node *keep = NULL;
2133 unsigned live = vfp_live_args_after(sim, block, 0);
2135 int i, depth, num_pop;
2138 depth = x87_get_depth(state);
2139 for (i = depth - 1; i >= 0; --i) {
2140 int reg = x87_get_st_reg(state, i);
2142 if (! is_vfp_live(reg, live))
2143 kill_mask |= (1 << i);
2147 /* create a new state, will be changed */
2148 state = x87_clone_state(sim, state);
2150 DB((dbg, LEVEL_1, "Killing deads:\n"));
2151 DEBUG_ONLY(vfp_dump_live(live));
2152 DEBUG_ONLY(x87_dump_stack(state));
2154 if (kill_mask != 0 && live == 0) {
2155 /* special case: kill all registers */
2156 if (ia32_cg_config.use_femms || ia32_cg_config.use_emms) {
2157 if (ia32_cg_config.use_femms) {
2158 /* use FEMMS on AMD processors to clear all */
2159 keep = new_rd_ia32_femms(NULL, get_irn_irg(block), block);
2161 /* use EMMS to clear all */
2162 keep = new_rd_ia32_emms(NULL, get_irn_irg(block), block);
2164 sched_add_before(first_insn, keep);
2170 /* now kill registers */
2172 /* we can only kill from TOS, so bring them up */
2173 if (! (kill_mask & 1)) {
2174 /* search from behind, because we can to a double-pop */
2175 for (i = depth - 1; i >= 0; --i) {
2176 if (kill_mask & (1 << i)) {
2177 kill_mask &= ~(1 << i);
2184 x87_set_st(state, -1, keep, i);
2185 x87_create_fxch(state, first_insn, i);
2188 if ((kill_mask & 3) == 3) {
2189 /* we can do a double-pop */
2193 /* only a single pop */
2198 kill_mask >>= num_pop;
2199 keep = x87_create_fpop(state, first_insn, num_pop);
2204 } /* x87_kill_deads */
2207 * If we have PhiEs with unknown operands then we have to make sure that some
2208 * value is actually put onto the stack.
2210 static void fix_unknown_phis(x87_state *state, ir_node *block,
2211 ir_node *pred_block, int pos)
2215 sched_foreach(block, node) {
2217 const arch_register_t *reg;
2218 ia32_x87_attr_t *attr;
2223 op = get_Phi_pred(node, pos);
2224 if(!is_ia32_Unknown_VFP(op))
2227 reg = arch_get_irn_register(state->sim->arch_env, node);
2229 /* create a zero at end of pred block */
2230 zero = new_rd_ia32_fldz(NULL, current_ir_graph, pred_block, mode_E);
2231 x87_push(state, arch_register_get_index(reg), zero);
2233 attr = get_ia32_x87_attr(zero);
2234 attr->x87[2] = &ia32_st_regs[0];
2236 assert(is_ia32_fldz(zero));
2237 sched_add_before(sched_last(pred_block), zero);
2239 set_Phi_pred(node, pos, zero);
2244 * Run a simulation and fix all virtual instructions for a block.
2246 * @param sim the simulator handle
2247 * @param block the current block
2249 static void x87_simulate_block(x87_simulator *sim, ir_node *block) {
2251 blk_state *bl_state = x87_get_bl_state(sim, block);
2252 x87_state *state = bl_state->begin;
2253 const ir_edge_t *edge;
2254 ir_node *start_block;
2256 assert(state != NULL);
2257 /* already processed? */
2258 if (bl_state->end != NULL)
2261 DB((dbg, LEVEL_1, "Simulate %+F\n", block));
2262 DB((dbg, LEVEL_2, "State at Block begin:\n "));
2263 DEBUG_ONLY(x87_dump_stack(state));
2265 /* at block begin, kill all dead registers */
2266 state = x87_kill_deads(sim, block, state);
2267 /* create a new state, will be changed */
2268 state = x87_clone_state(sim, state);
2270 /* beware, n might change */
2271 for (n = sched_first(block); !sched_is_end(n); n = next) {
2274 ir_op *op = get_irn_op(n);
2276 next = sched_next(n);
2277 if (op->ops.generic == NULL)
2280 func = (sim_func)op->ops.generic;
2283 node_inserted = (*func)(state, n);
2286 sim_func might have added an additional node after n,
2288 beware: n must not be changed by sim_func
2289 (i.e. removed from schedule) in this case
2291 if (node_inserted != NO_NODE_ADDED)
2292 next = sched_next(n);
2295 start_block = get_irg_start_block(get_irn_irg(block));
2297 DB((dbg, LEVEL_2, "State at Block end:\n ")); DEBUG_ONLY(x87_dump_stack(state));
2299 /* check if the state must be shuffled */
2300 foreach_block_succ(block, edge) {
2301 ir_node *succ = get_edge_src_irn(edge);
2302 blk_state *succ_state;
2304 if (succ == start_block)
2307 succ_state = x87_get_bl_state(sim, succ);
2309 fix_unknown_phis(state, succ, block, get_edge_src_pos(edge));
2311 if (succ_state->begin == NULL) {
2312 DB((dbg, LEVEL_2, "Set begin state for succ %+F:\n", succ));
2313 DEBUG_ONLY(x87_dump_stack(state));
2314 succ_state->begin = state;
2316 waitq_put(sim->worklist, succ);
2318 DB((dbg, LEVEL_2, "succ %+F already has a state, shuffling\n", succ));
2319 /* There is already a begin state for the successor, bad.
2320 Do the necessary permutations.
2321 Note that critical edges are removed, so this is always possible:
2322 If the successor has more than one possible input, then it must
2325 x87_shuffle(sim, block, state, succ, succ_state->begin);
2328 bl_state->end = state;
2329 } /* x87_simulate_block */
2331 static void register_sim(ir_op *op, sim_func func)
2333 assert(op->ops.generic == NULL);
2334 op->ops.generic = (op_func) func;
2338 * Create a new x87 simulator.
2340 * @param sim a simulator handle, will be initialized
2341 * @param irg the current graph
2342 * @param arch_env the architecture environment
2344 static void x87_init_simulator(x87_simulator *sim, ir_graph *irg,
2345 const arch_env_t *arch_env)
2347 obstack_init(&sim->obst);
2348 sim->blk_states = pmap_create();
2349 sim->arch_env = arch_env;
2350 sim->n_idx = get_irg_last_idx(irg);
2351 sim->live = obstack_alloc(&sim->obst, sizeof(*sim->live) * sim->n_idx);
2353 DB((dbg, LEVEL_1, "--------------------------------\n"
2354 "x87 Simulator started for %+F\n", irg));
2356 /* set the generic function pointer of instruction we must simulate */
2357 clear_irp_opcodes_generic_func();
2359 register_sim(op_ia32_Call, sim_Call);
2360 register_sim(op_ia32_vfld, sim_fld);
2361 register_sim(op_ia32_vfild, sim_fild);
2362 register_sim(op_ia32_vfld1, sim_fld1);
2363 register_sim(op_ia32_vfldz, sim_fldz);
2364 register_sim(op_ia32_vfadd, sim_fadd);
2365 register_sim(op_ia32_vfsub, sim_fsub);
2366 register_sim(op_ia32_vfmul, sim_fmul);
2367 register_sim(op_ia32_vfdiv, sim_fdiv);
2368 register_sim(op_ia32_vfprem, sim_fprem);
2369 register_sim(op_ia32_vfabs, sim_fabs);
2370 register_sim(op_ia32_vfchs, sim_fchs);
2371 register_sim(op_ia32_vfist, sim_fist);
2372 register_sim(op_ia32_vfisttp, sim_fisttp);
2373 register_sim(op_ia32_vfst, sim_fst);
2374 register_sim(op_ia32_vFtstFnstsw, sim_FtstFnstsw);
2375 register_sim(op_ia32_vFucomFnstsw, sim_Fucom);
2376 register_sim(op_ia32_vFucomi, sim_Fucom);
2377 register_sim(op_be_Copy, sim_Copy);
2378 register_sim(op_be_Spill, sim_Spill);
2379 register_sim(op_be_Reload, sim_Reload);
2380 register_sim(op_be_Return, sim_Return);
2381 register_sim(op_be_Perm, sim_Perm);
2382 register_sim(op_be_Keep, sim_Keep);
2383 register_sim(op_be_Barrier, sim_Barrier);
2384 } /* x87_init_simulator */
2387 * Destroy a x87 simulator.
2389 * @param sim the simulator handle
2391 static void x87_destroy_simulator(x87_simulator *sim) {
2392 pmap_destroy(sim->blk_states);
2393 obstack_free(&sim->obst, NULL);
2394 DB((dbg, LEVEL_1, "x87 Simulator stopped\n\n"));
2395 } /* x87_destroy_simulator */
2398 * Pre-block walker: calculate the liveness information for the block
2399 * and store it into the sim->live cache.
2401 static void update_liveness_walker(ir_node *block, void *data) {
2402 x87_simulator *sim = data;
2403 update_liveness(sim, block);
2404 } /* update_liveness_walker */
2407 * Run a simulation and fix all virtual instructions for a graph.
2409 * @param env the architecture environment
2410 * @param irg the current graph
2412 * Needs a block-schedule.
2414 void x87_simulate_graph(const arch_env_t *arch_env, be_irg_t *birg) {
2415 ir_node *block, *start_block;
2416 blk_state *bl_state;
2418 ir_graph *irg = be_get_birg_irg(birg);
2420 /* create the simulator */
2421 x87_init_simulator(&sim, irg, arch_env);
2423 start_block = get_irg_start_block(irg);
2424 bl_state = x87_get_bl_state(&sim, start_block);
2426 /* start with the empty state */
2427 bl_state->begin = empty;
2430 sim.worklist = new_waitq();
2431 waitq_put(sim.worklist, start_block);
2433 be_assure_liveness(birg);
2434 sim.lv = be_get_birg_liveness(birg);
2435 // sim.lv = be_liveness(be_get_birg_irg(birg));
2436 be_liveness_assure_sets(sim.lv);
2438 /* Calculate the liveness for all nodes. We must precalculate this info,
2439 * because the simulator adds new nodes (possible before Phi nodes) which
2440 * would let a lazy calculation fail.
2441 * On the other hand we reduce the computation amount due to
2442 * precaching from O(n^2) to O(n) at the expense of O(n) cache memory.
2444 irg_block_walk_graph(irg, update_liveness_walker, NULL, &sim);
2448 block = waitq_get(sim.worklist);
2449 x87_simulate_block(&sim, block);
2450 } while (! waitq_empty(sim.worklist));
2453 del_waitq(sim.worklist);
2454 x87_destroy_simulator(&sim);
2455 } /* x87_simulate_graph */
2457 void ia32_init_x87(void) {
2458 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.x87");
2459 } /* ia32_init_x87 */