2 * Copyright (C) 1995-2010 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the x87 support and virtual to stack
23 * register translation for the ia32 backend.
24 * @author Michael Beck
33 #include "iredges_t.h"
48 #include "bearch_ia32_t.h"
49 #include "ia32_new_nodes.h"
50 #include "gen_ia32_new_nodes.h"
51 #include "gen_ia32_regalloc_if.h"
53 #include "ia32_architecture.h"
55 /** the debug handle */
56 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
58 /* Forward declaration. */
59 typedef struct x87_simulator x87_simulator;
62 * An exchange template.
63 * Note that our virtual functions have the same inputs
64 * and attributes as the real ones, so we can simple exchange
66 * Further, x87 supports inverse instructions, so we can handle them.
68 typedef struct exchange_tmpl {
69 ir_op *normal_op; /**< the normal one */
70 ir_op *reverse_op; /**< the reverse one if exists */
71 ir_op *normal_pop_op; /**< the normal one with tos pop */
72 ir_op *reverse_pop_op; /**< the reverse one with tos pop */
76 * An entry on the simulated x87 stack.
78 typedef struct st_entry {
79 int reg_idx; /**< the virtual register index of this stack value */
80 ir_node *node; /**< the node that produced this value */
86 typedef struct x87_state {
87 st_entry st[N_ia32_st_REGS]; /**< the register stack */
88 int depth; /**< the current stack depth */
89 x87_simulator *sim; /**< The simulator. */
92 /** An empty state, used for blocks without fp instructions. */
93 static x87_state empty = { { {0, NULL}, }, 0, NULL };
96 * Return values of the instruction simulator functions.
99 NO_NODE_ADDED = 0, /**< No node that needs simulation was added. */
100 NODE_ADDED = 1 /**< A node that must be simulated was added by the simulator
101 in the schedule AFTER the current node. */
105 * The type of an instruction simulator function.
107 * @param state the x87 state
108 * @param n the node to be simulated
110 * @return NODE_ADDED if a node was added AFTER n in schedule that MUST be
112 * NO_NODE_ADDED otherwise
114 typedef int (*sim_func)(x87_state *state, ir_node *n);
117 * A block state: Every block has a x87 state at the beginning and at the end.
119 typedef struct blk_state {
120 x87_state *begin; /**< state at the begin or NULL if not assigned */
121 x87_state *end; /**< state at the end or NULL if not assigned */
124 /** liveness bitset for vfp registers. */
125 typedef unsigned char vfp_liveness;
130 struct x87_simulator {
131 struct obstack obst; /**< An obstack for fast allocating. */
132 pmap *blk_states; /**< Map blocks to states. */
133 be_lv_t *lv; /**< intrablock liveness. */
134 vfp_liveness *live; /**< Liveness information. */
135 unsigned n_idx; /**< The cached get_irg_last_idx() result. */
136 waitq *worklist; /**< Worklist of blocks that must be processed. */
140 * Returns the current stack depth.
142 * @param state the x87 state
144 * @return the x87 stack depth
146 static int x87_get_depth(const x87_state *state)
151 static st_entry *x87_get_entry(x87_state *const state, int const pos)
153 assert(0 <= pos && pos < state->depth);
154 return &state->st[N_ia32_st_REGS - state->depth + pos];
158 * Return the virtual register index at st(pos).
160 * @param state the x87 state
161 * @param pos a stack position
163 * @return the vfp register index that produced the value at st(pos)
165 static int x87_get_st_reg(const x87_state *state, int pos)
167 return x87_get_entry((x87_state*)state, pos)->reg_idx;
172 * Dump the stack for debugging.
174 * @param state the x87 state
176 static void x87_dump_stack(const x87_state *state)
178 for (int i = state->depth; i-- != 0;) {
179 st_entry const *const entry = x87_get_entry((x87_state*)state, i);
180 DB((dbg, LEVEL_2, "vf%d(%+F) ", entry->reg_idx, entry->node));
182 DB((dbg, LEVEL_2, "<-- TOS\n"));
184 #endif /* DEBUG_libfirm */
187 * Set a virtual register to st(pos).
189 * @param state the x87 state
190 * @param reg_idx the vfp register index that should be set
191 * @param node the IR node that produces the value of the vfp register
192 * @param pos the stack position where the new value should be entered
194 static void x87_set_st(x87_state *state, int reg_idx, ir_node *node, int pos)
196 st_entry *const entry = x87_get_entry(state, pos);
197 entry->reg_idx = reg_idx;
200 DB((dbg, LEVEL_2, "After SET_REG: "));
201 DEBUG_ONLY(x87_dump_stack(state);)
205 * Set the tos virtual register.
207 * @param state the x87 state
208 * @param reg_idx the vfp register index that should be set
209 * @param node the IR node that produces the value of the vfp register
211 static void x87_set_tos(x87_state *state, int reg_idx, ir_node *node)
213 x87_set_st(state, reg_idx, node, 0);
217 * Swap st(0) with st(pos).
219 * @param state the x87 state
220 * @param pos the stack position to change the tos with
222 static void x87_fxch(x87_state *state, int pos)
224 st_entry *const a = x87_get_entry(state, pos);
225 st_entry *const b = x87_get_entry(state, 0);
226 st_entry const t = *a;
230 DB((dbg, LEVEL_2, "After FXCH: "));
231 DEBUG_ONLY(x87_dump_stack(state);)
235 * Convert a virtual register to the stack index.
237 * @param state the x87 state
238 * @param reg_idx the register vfp index
240 * @return the stack position where the register is stacked
241 * or -1 if the virtual register was not found
243 static int x87_on_stack(const x87_state *state, int reg_idx)
245 for (int i = 0; i < state->depth; ++i) {
246 if (x87_get_st_reg(state, i) == reg_idx)
253 * Push a virtual Register onto the stack, double pushed allowed.
255 * @param state the x87 state
256 * @param reg_idx the register vfp index
257 * @param node the node that produces the value of the vfp register
259 static void x87_push_dbl(x87_state *state, int reg_idx, ir_node *node)
261 assert(state->depth < N_ia32_st_REGS && "stack overrun");
264 st_entry *const entry = x87_get_entry(state, 0);
265 entry->reg_idx = reg_idx;
268 DB((dbg, LEVEL_2, "After PUSH: ")); DEBUG_ONLY(x87_dump_stack(state);)
272 * Push a virtual Register onto the stack, double pushes are NOT allowed.
274 * @param state the x87 state
275 * @param reg_idx the register vfp index
276 * @param node the node that produces the value of the vfp register
278 static void x87_push(x87_state *state, int reg_idx, ir_node *node)
280 assert(x87_on_stack(state, reg_idx) == -1 && "double push");
282 x87_push_dbl(state, reg_idx, node);
286 * Pop a virtual Register from the stack.
288 * @param state the x87 state
290 static void x87_pop(x87_state *state)
292 assert(state->depth > 0 && "stack underrun");
296 DB((dbg, LEVEL_2, "After POP: ")); DEBUG_ONLY(x87_dump_stack(state);)
300 * Empty the fpu stack
302 * @param state the x87 state
304 static void x87_emms(x87_state *state)
310 * Returns the block state of a block.
312 * @param sim the x87 simulator handle
313 * @param block the current block
315 * @return the block state
317 static blk_state *x87_get_bl_state(x87_simulator *sim, ir_node *block)
319 blk_state *res = pmap_get(blk_state, sim->blk_states, block);
322 res = OALLOC(&sim->obst, blk_state);
326 pmap_insert(sim->blk_states, block, res);
335 * @param sim the x87 simulator handle
336 * @param src the x87 state that will be cloned
338 * @return a cloned copy of the src state
340 static x87_state *x87_clone_state(x87_simulator *sim, const x87_state *src)
342 x87_state *const res = OALLOC(&sim->obst, x87_state);
348 * Patch a virtual instruction into a x87 one and return
349 * the node representing the result value.
351 * @param n the IR node to patch
352 * @param op the x87 opcode to patch in
354 static ir_node *x87_patch_insn(ir_node *n, ir_op *op)
356 ir_mode *mode = get_irn_mode(n);
361 if (mode == mode_T) {
362 /* patch all Proj's */
363 foreach_out_edge(n, edge) {
364 ir_node *proj = get_edge_src_irn(edge);
366 mode = get_irn_mode(proj);
367 if (mode_is_float(mode)) {
369 set_irn_mode(proj, ia32_reg_classes[CLASS_ia32_st].mode);
373 } else if (mode_is_float(mode))
374 set_irn_mode(n, ia32_reg_classes[CLASS_ia32_st].mode);
379 * Returns the first Proj of a mode_T node having a given mode.
381 * @param n the mode_T node
382 * @param m the desired mode of the Proj
383 * @return The first Proj of mode @p m found or NULL.
385 static ir_node *get_irn_Proj_for_mode(ir_node *n, ir_mode *m)
387 assert(get_irn_mode(n) == mode_T && "Need mode_T node");
389 foreach_out_edge(n, edge) {
390 ir_node *proj = get_edge_src_irn(edge);
391 if (get_irn_mode(proj) == m)
399 * Wrap the arch_* function here so we can check for errors.
401 static inline const arch_register_t *x87_get_irn_register(const ir_node *irn)
403 const arch_register_t *res = arch_get_irn_register(irn);
405 assert(res->reg_class == &ia32_reg_classes[CLASS_ia32_vfp]);
409 static inline const arch_register_t *x87_irn_get_register(const ir_node *irn,
412 const arch_register_t *res = arch_get_irn_register_out(irn, pos);
414 assert(res->reg_class == &ia32_reg_classes[CLASS_ia32_vfp]);
418 static inline const arch_register_t *get_st_reg(int index)
420 return &ia32_registers[REG_ST0 + index];
424 * Create a fxch node before another node.
426 * @param state the x87 state
427 * @param n the node after the fxch
428 * @param pos exchange st(pos) with st(0)
432 static ir_node *x87_create_fxch(x87_state *state, ir_node *n, int pos)
434 x87_fxch(state, pos);
436 ir_node *const block = get_nodes_block(n);
437 ir_node *const fxch = new_bd_ia32_fxch(NULL, block);
438 ia32_x87_attr_t *const attr = get_ia32_x87_attr(fxch);
439 attr->x87[0] = get_st_reg(pos);
440 attr->x87[2] = get_st_reg(0);
444 sched_add_before(n, fxch);
445 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(fxch), attr->x87[0]->name, attr->x87[2]->name));
449 /* -------------- x87 perm --------------- */
452 * Calculate the necessary permutations to reach dst_state.
454 * These permutations are done with fxch instructions and placed
455 * at the end of the block.
457 * Note that critical edges are removed here, so we need only
458 * a shuffle if the current block has only one successor.
460 * @param block the current block
461 * @param state the current x87 stack state, might be modified
462 * @param dst_state destination state
466 static x87_state *x87_shuffle(ir_node *block, x87_state *state, const x87_state *dst_state)
468 int i, n_cycles, k, ri;
469 unsigned cycles[4], all_mask;
470 char cycle_idx[4][8];
472 assert(state->depth == dst_state->depth);
474 /* Some mathematics here:
475 * If we have a cycle of length n that includes the tos,
476 * we need n-1 exchange operations.
477 * We can always add the tos and restore it, so we need
478 * n+1 exchange operations for a cycle not containing the tos.
479 * So, the maximum of needed operations is for a cycle of 7
480 * not including the tos == 8.
481 * This is the same number of ops we would need for using stores,
482 * so exchange is cheaper (we save the loads).
483 * On the other hand, we might need an additional exchange
484 * in the next block to bring one operand on top, so the
485 * number of ops in the first case is identical.
486 * Further, no more than 4 cycles can exists (4 x 2). */
487 all_mask = (1 << (state->depth)) - 1;
489 for (n_cycles = 0; all_mask; ++n_cycles) {
490 int src_idx, dst_idx;
492 /* find the first free slot */
493 for (i = 0; i < state->depth; ++i) {
494 if (all_mask & (1 << i)) {
495 all_mask &= ~(1 << i);
497 /* check if there are differences here */
498 if (x87_get_st_reg(state, i) != x87_get_st_reg(dst_state, i))
504 /* no more cycles found */
509 cycles[n_cycles] = (1 << i);
510 cycle_idx[n_cycles][k++] = i;
511 for (src_idx = i; ; src_idx = dst_idx) {
512 dst_idx = x87_on_stack(dst_state, x87_get_st_reg(state, src_idx));
514 if ((all_mask & (1 << dst_idx)) == 0)
517 cycle_idx[n_cycles][k++] = dst_idx;
518 cycles[n_cycles] |= (1 << dst_idx);
519 all_mask &= ~(1 << dst_idx);
521 cycle_idx[n_cycles][k] = -1;
525 /* no permutation needed */
529 /* Hmm: permutation needed */
530 DB((dbg, LEVEL_2, "\n%+F needs permutation: from\n", block));
531 DEBUG_ONLY(x87_dump_stack(state);)
532 DB((dbg, LEVEL_2, " to\n"));
533 DEBUG_ONLY(x87_dump_stack(dst_state);)
537 DB((dbg, LEVEL_2, "Need %d cycles\n", n_cycles));
538 for (ri = 0; ri < n_cycles; ++ri) {
539 DB((dbg, LEVEL_2, " Ring %d:\n ", ri));
540 for (k = 0; cycle_idx[ri][k] != -1; ++k)
541 DB((dbg, LEVEL_2, " st%d ->", cycle_idx[ri][k]));
542 DB((dbg, LEVEL_2, "\n"));
547 * Find the place node must be insert.
548 * We have only one successor block, so the last instruction should
551 ir_node *const before = sched_last(block);
552 assert(is_cfop(before));
554 /* now do the permutations */
555 for (ri = 0; ri < n_cycles; ++ri) {
556 if ((cycles[ri] & 1) == 0) {
557 /* this cycle does not include the tos */
558 x87_create_fxch(state, before, cycle_idx[ri][0]);
560 for (k = 1; cycle_idx[ri][k] != -1; ++k) {
561 x87_create_fxch(state, before, cycle_idx[ri][k]);
563 if ((cycles[ri] & 1) == 0) {
564 /* this cycle does not include the tos */
565 x87_create_fxch(state, before, cycle_idx[ri][0]);
572 * Create a fpush before node n.
574 * @param state the x87 state
575 * @param n the node after the fpush
576 * @param pos push st(pos) on stack
577 * @param val the value to push
579 static void x87_create_fpush(x87_state *state, ir_node *n, int pos, ir_node *const val)
581 arch_register_t const *const out = x87_get_irn_register(val);
582 x87_push_dbl(state, arch_register_get_index(out), val);
584 ir_node *const fpush = new_bd_ia32_fpush(NULL, get_nodes_block(n));
585 ia32_x87_attr_t *const attr = get_ia32_x87_attr(fpush);
586 attr->x87[0] = get_st_reg(pos);
587 attr->x87[2] = get_st_reg(0);
590 sched_add_before(n, fpush);
592 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(fpush), attr->x87[0]->name, attr->x87[2]->name));
596 * Create a fpop before node n.
598 * @param state the x87 state
599 * @param n the node after the fpop
600 * @param num pop 1 or 2 values
602 * @return the fpop node
604 static ir_node *x87_create_fpop(x87_state *state, ir_node *n, int num)
606 ir_node *fpop = NULL;
607 ia32_x87_attr_t *attr;
612 if (ia32_cg_config.use_ffreep)
613 fpop = new_bd_ia32_ffreep(NULL, get_nodes_block(n));
615 fpop = new_bd_ia32_fpop(NULL, get_nodes_block(n));
616 attr = get_ia32_x87_attr(fpop);
617 attr->x87[0] = get_st_reg(0);
618 attr->x87[1] = get_st_reg(0);
619 attr->x87[2] = get_st_reg(0);
622 sched_add_before(n, fpop);
623 DB((dbg, LEVEL_1, "<<< %s %s\n", get_irn_opname(fpop), attr->x87[0]->name));
628 /* --------------------------------- liveness ------------------------------------------ */
631 * The liveness transfer function.
632 * Updates a live set over a single step from a given node to its predecessor.
633 * Everything defined at the node is removed from the set, the uses of the node get inserted.
635 * @param irn The node at which liveness should be computed.
636 * @param live The bitset of registers live before @p irn. This set gets modified by updating it to
637 * the registers live after irn.
639 * @return The live bitset.
641 static vfp_liveness vfp_liveness_transfer(ir_node *irn, vfp_liveness live)
644 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
646 if (get_irn_mode(irn) == mode_T) {
647 foreach_out_edge(irn, edge) {
648 ir_node *proj = get_edge_src_irn(edge);
650 if (arch_irn_consider_in_reg_alloc(cls, proj)) {
651 const arch_register_t *reg = x87_get_irn_register(proj);
652 live &= ~(1 << arch_register_get_index(reg));
655 } else if (arch_irn_consider_in_reg_alloc(cls, irn)) {
656 const arch_register_t *reg = x87_get_irn_register(irn);
657 live &= ~(1 << arch_register_get_index(reg));
660 for (i = 0, n = get_irn_arity(irn); i < n; ++i) {
661 ir_node *op = get_irn_n(irn, i);
663 if (mode_is_float(get_irn_mode(op)) &&
664 arch_irn_consider_in_reg_alloc(cls, op)) {
665 const arch_register_t *reg = x87_get_irn_register(op);
666 live |= 1 << arch_register_get_index(reg);
673 * Put all live virtual registers at the end of a block into a bitset.
675 * @param sim the simulator handle
676 * @param bl the block
678 * @return The live bitset at the end of this block
680 static vfp_liveness vfp_liveness_end_of_block(x87_simulator *sim, const ir_node *block)
682 vfp_liveness live = 0;
683 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
684 const be_lv_t *lv = sim->lv;
686 be_lv_foreach(lv, block, be_lv_state_end, node) {
687 const arch_register_t *reg;
688 if (!arch_irn_consider_in_reg_alloc(cls, node))
691 reg = x87_get_irn_register(node);
692 live |= 1 << arch_register_get_index(reg);
698 /** get the register mask from an arch_register */
699 #define REGMASK(reg) (1 << (arch_register_get_index(reg)))
702 * Return a bitset of argument registers which are live at the end of a node.
704 * @param sim the simulator handle
705 * @param pos the node
706 * @param kill kill mask for the output registers
708 * @return The live bitset.
710 static unsigned vfp_live_args_after(x87_simulator *sim, const ir_node *pos, unsigned kill)
712 unsigned idx = get_irn_idx(pos);
714 assert(idx < sim->n_idx);
715 return sim->live[idx] & ~kill;
719 * Calculate the liveness for a whole block and cache it.
721 * @param sim the simulator handle
722 * @param block the block
724 static void update_liveness(x87_simulator *sim, ir_node *block)
726 vfp_liveness live = vfp_liveness_end_of_block(sim, block);
729 /* now iterate through the block backward and cache the results */
730 sched_foreach_reverse(block, irn) {
731 /* stop at the first Phi: this produces the live-in */
735 idx = get_irn_idx(irn);
736 sim->live[idx] = live;
738 live = vfp_liveness_transfer(irn, live);
740 idx = get_irn_idx(block);
741 sim->live[idx] = live;
745 * Returns true if a register is live in a set.
747 * @param reg_idx the vfp register index
748 * @param live a live bitset
750 #define is_vfp_live(reg_idx, live) ((live) & (1 << (reg_idx)))
754 * Dump liveness info.
756 * @param live the live bitset
758 static void vfp_dump_live(vfp_liveness live)
762 DB((dbg, LEVEL_2, "Live after: "));
763 for (i = 0; i < 8; ++i) {
764 if (live & (1 << i)) {
765 DB((dbg, LEVEL_2, "vf%d ", i));
768 DB((dbg, LEVEL_2, "\n"));
770 #endif /* DEBUG_libfirm */
772 /* --------------------------------- simulators ---------------------------------------- */
775 * Simulate a virtual binop.
777 * @param state the x87 state
778 * @param n the node that should be simulated (and patched)
779 * @param tmpl the template containing the 4 possible x87 opcodes
781 * @return NO_NODE_ADDED
783 static int sim_binop(x87_state *state, ir_node *n, const exchange_tmpl *tmpl)
785 int op2_idx = 0, op1_idx;
786 int out_idx, do_pop = 0;
787 ia32_x87_attr_t *attr;
789 ir_node *patched_insn;
791 x87_simulator *sim = state->sim;
792 ir_node *op1 = get_irn_n(n, n_ia32_binary_left);
793 ir_node *op2 = get_irn_n(n, n_ia32_binary_right);
794 const arch_register_t *op1_reg = x87_get_irn_register(op1);
795 const arch_register_t *op2_reg = x87_get_irn_register(op2);
796 const arch_register_t *out = x87_irn_get_register(n, pn_ia32_res);
797 int reg_index_1 = arch_register_get_index(op1_reg);
798 int reg_index_2 = arch_register_get_index(op2_reg);
799 vfp_liveness live = vfp_live_args_after(sim, n, REGMASK(out));
803 DB((dbg, LEVEL_1, ">>> %+F %s, %s -> %s\n", n,
804 arch_register_get_name(op1_reg), arch_register_get_name(op2_reg),
805 arch_register_get_name(out)));
806 DEBUG_ONLY(vfp_dump_live(live);)
807 DB((dbg, LEVEL_1, "Stack before: "));
808 DEBUG_ONLY(x87_dump_stack(state);)
810 op1_idx = x87_on_stack(state, reg_index_1);
811 assert(op1_idx >= 0);
812 op1_live_after = is_vfp_live(reg_index_1, live);
814 attr = get_ia32_x87_attr(n);
815 permuted = attr->attr.data.ins_permuted;
817 if (reg_index_2 != REG_VFP_VFP_NOREG) {
820 /* second operand is a vfp register */
821 op2_idx = x87_on_stack(state, reg_index_2);
822 assert(op2_idx >= 0);
823 op2_live_after = is_vfp_live(reg_index_2, live);
825 if (op2_live_after) {
826 /* Second operand is live. */
828 if (op1_live_after) {
829 /* Both operands are live: push the first one.
830 This works even for op1 == op2. */
831 x87_create_fpush(state, n, op1_idx, op2);
832 /* now do fxxx (tos=tos X op) */
836 dst = tmpl->normal_op;
838 /* Second live, first operand is dead here, bring it to tos. */
840 x87_create_fxch(state, n, op1_idx);
845 /* now do fxxx (tos=tos X op) */
847 dst = tmpl->normal_op;
850 /* Second operand is dead. */
851 if (op1_live_after) {
852 /* First operand is live: bring second to tos. */
854 x87_create_fxch(state, n, op2_idx);
859 /* now do fxxxr (tos = op X tos) */
861 dst = tmpl->reverse_op;
863 /* Both operands are dead here, pop them from the stack. */
866 /* Both are identically and on tos, no pop needed. */
867 /* here fxxx (tos = tos X tos) */
868 dst = tmpl->normal_op;
871 /* now do fxxxp (op = op X tos, pop) */
872 dst = tmpl->normal_pop_op;
876 } else if (op1_idx == 0) {
877 assert(op1_idx != op2_idx);
878 /* now do fxxxrp (op = tos X op, pop) */
879 dst = tmpl->reverse_pop_op;
883 /* Bring the second on top. */
884 x87_create_fxch(state, n, op2_idx);
885 if (op1_idx == op2_idx) {
886 /* Both are identically and on tos now, no pop needed. */
889 /* use fxxx (tos = tos X tos) */
890 dst = tmpl->normal_op;
893 /* op2 is on tos now */
895 /* use fxxxp (op = op X tos, pop) */
896 dst = tmpl->normal_pop_op;
904 /* second operand is an address mode */
905 if (op1_live_after) {
906 /* first operand is live: push it here */
907 x87_create_fpush(state, n, op1_idx, op1);
910 /* first operand is dead: bring it to tos */
912 x87_create_fxch(state, n, op1_idx);
917 /* use fxxx (tos = tos X mem) */
918 dst = permuted ? tmpl->reverse_op : tmpl->normal_op;
922 patched_insn = x87_patch_insn(n, dst);
923 x87_set_st(state, arch_register_get_index(out), patched_insn, out_idx);
928 /* patch the operation */
929 attr->x87[0] = op1_reg = get_st_reg(op1_idx);
930 if (reg_index_2 != REG_VFP_VFP_NOREG) {
931 attr->x87[1] = op2_reg = get_st_reg(op2_idx);
933 attr->x87[2] = out = get_st_reg(out_idx);
935 if (reg_index_2 != REG_VFP_VFP_NOREG) {
936 DB((dbg, LEVEL_1, "<<< %s %s, %s -> %s\n", get_irn_opname(n),
937 arch_register_get_name(op1_reg), arch_register_get_name(op2_reg),
938 arch_register_get_name(out)));
940 DB((dbg, LEVEL_1, "<<< %s %s, [AM] -> %s\n", get_irn_opname(n),
941 arch_register_get_name(op1_reg),
942 arch_register_get_name(out)));
945 return NO_NODE_ADDED;
949 * Simulate a virtual Unop.
951 * @param state the x87 state
952 * @param n the node that should be simulated (and patched)
953 * @param op the x87 opcode that will replace n's opcode
955 * @return NO_NODE_ADDED
957 static int sim_unop(x87_state *state, ir_node *n, ir_op *op)
959 arch_register_t const *const out = x87_get_irn_register(n);
960 unsigned const live = vfp_live_args_after(state->sim, n, REGMASK(out));
961 DB((dbg, LEVEL_1, ">>> %+F -> %s\n", n, out->name));
962 DEBUG_ONLY(vfp_dump_live(live);)
964 ir_node *const op1 = get_irn_n(n, 0);
965 arch_register_t const *const op1_reg = x87_get_irn_register(op1);
966 int const op1_reg_idx = arch_register_get_index(op1_reg);
967 int const op1_idx = x87_on_stack(state, op1_reg_idx);
968 if (is_vfp_live(op1_reg_idx, live)) {
969 /* push the operand here */
970 x87_create_fpush(state, n, op1_idx, op1);
972 /* operand is dead, bring it to tos */
974 x87_create_fxch(state, n, op1_idx);
978 x87_set_tos(state, arch_register_get_index(out), x87_patch_insn(n, op));
979 ia32_x87_attr_t *const attr = get_ia32_x87_attr(n);
980 attr->x87[2] = attr->x87[0] = get_st_reg(0);
981 DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), attr->x87[2]->name));
983 return NO_NODE_ADDED;
987 * Simulate a virtual Load instruction.
989 * @param state the x87 state
990 * @param n the node that should be simulated (and patched)
991 * @param op the x87 opcode that will replace n's opcode
993 * @return NO_NODE_ADDED
995 static int sim_load(x87_state *state, ir_node *n, ir_op *op, int res_pos)
997 const arch_register_t *out = x87_irn_get_register(n, res_pos);
998 ia32_x87_attr_t *attr;
1000 DB((dbg, LEVEL_1, ">>> %+F -> %s\n", n, arch_register_get_name(out)));
1001 x87_push(state, arch_register_get_index(out), x87_patch_insn(n, op));
1002 assert(out == x87_irn_get_register(n, res_pos));
1003 attr = get_ia32_x87_attr(n);
1004 attr->x87[2] = out = get_st_reg(0);
1005 DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), arch_register_get_name(out)));
1007 return NO_NODE_ADDED;
1011 * Rewire all users of @p old_val to @new_val iff they are scheduled after @p store.
1013 * @param store The store
1014 * @param old_val The former value
1015 * @param new_val The new value
1017 static void collect_and_rewire_users(ir_node *store, ir_node *old_val, ir_node *new_val)
1019 foreach_out_edge_safe(old_val, edge) {
1020 ir_node *user = get_edge_src_irn(edge);
1022 if (! user || user == store)
1025 /* if the user is scheduled after the store: rewire */
1026 if (sched_is_scheduled(user) && sched_comes_after(store, user)) {
1028 /* find the input of the user pointing to the old value */
1029 for (i = get_irn_arity(user) - 1; i >= 0; i--) {
1030 if (get_irn_n(user, i) == old_val)
1031 set_irn_n(user, i, new_val);
1038 * Simulate a virtual Store.
1040 * @param state the x87 state
1041 * @param n the node that should be simulated (and patched)
1042 * @param op the x87 store opcode
1043 * @param op_p the x87 store and pop opcode
1045 static int sim_store(x87_state *state, ir_node *n, ir_op *op, ir_op *op_p)
1047 ir_node *const val = get_irn_n(n, n_ia32_vfst_val);
1048 arch_register_t const *const op2 = x87_get_irn_register(val);
1049 DB((dbg, LEVEL_1, ">>> %+F %s ->\n", n, arch_register_get_name(op2)));
1051 int insn = NO_NODE_ADDED;
1052 int const op2_reg_idx = arch_register_get_index(op2);
1053 int const op2_idx = x87_on_stack(state, op2_reg_idx);
1054 unsigned const live = vfp_live_args_after(state->sim, n, 0);
1055 int const live_after_node = is_vfp_live(op2_reg_idx, live);
1056 assert(op2_idx >= 0);
1057 if (live_after_node) {
1058 /* Problem: fst doesn't support 80bit modes (spills), only fstp does
1059 * fist doesn't support 64bit mode, only fistp
1061 * - stack not full: push value and fstp
1062 * - stack full: fstp value and load again
1063 * Note that we cannot test on mode_E, because floats might be 80bit ... */
1064 ir_mode *const mode = get_ia32_ls_mode(n);
1065 if (get_mode_size_bits(mode) > (mode_is_int(mode) ? 32 : 64)) {
1066 if (x87_get_depth(state) < N_ia32_st_REGS) {
1067 /* ok, we have a free register: push + fstp */
1068 x87_create_fpush(state, n, op2_idx, val);
1070 x87_patch_insn(n, op_p);
1072 /* stack full here: need fstp + load */
1074 x87_patch_insn(n, op_p);
1076 ir_node *const block = get_nodes_block(n);
1077 ir_graph *const irg = get_irn_irg(n);
1078 ir_node *const nomem = get_irg_no_mem(irg);
1079 ir_node *const vfld = new_bd_ia32_vfld(NULL, block, get_irn_n(n, 0), get_irn_n(n, 1), nomem, mode);
1081 /* copy all attributes */
1082 set_ia32_frame_ent(vfld, get_ia32_frame_ent(n));
1083 if (is_ia32_use_frame(n))
1084 set_ia32_use_frame(vfld);
1085 set_ia32_op_type(vfld, ia32_AddrModeS);
1086 add_ia32_am_offs_int(vfld, get_ia32_am_offs_int(n));
1087 set_ia32_am_sc(vfld, get_ia32_am_sc(n));
1088 set_ia32_ls_mode(vfld, mode);
1090 ir_node *const rproj = new_r_Proj(vfld, mode, pn_ia32_vfld_res);
1091 ir_node *const mproj = new_r_Proj(vfld, mode_M, pn_ia32_vfld_M);
1092 ir_node *const mem = get_irn_Proj_for_mode(n, mode_M);
1094 assert(mem && "Store memory not found");
1096 arch_set_irn_register(rproj, op2);
1098 /* reroute all former users of the store memory to the load memory */
1099 edges_reroute(mem, mproj);
1100 /* set the memory input of the load to the store memory */
1101 set_irn_n(vfld, n_ia32_vfld_mem, mem);
1103 sched_add_after(n, vfld);
1104 sched_add_after(vfld, rproj);
1106 /* rewire all users, scheduled after the store, to the loaded value */
1107 collect_and_rewire_users(n, val, rproj);
1112 /* we can only store the tos to memory */
1114 x87_create_fxch(state, n, op2_idx);
1116 /* mode size 64 or smaller -> use normal fst */
1117 x87_patch_insn(n, op);
1120 /* we can only store the tos to memory */
1122 x87_create_fxch(state, n, op2_idx);
1125 x87_patch_insn(n, op_p);
1128 ia32_x87_attr_t *const attr = get_ia32_x87_attr(n);
1129 attr->x87[1] = get_st_reg(0);
1130 DB((dbg, LEVEL_1, "<<< %s %s ->\n", get_irn_opname(n), arch_register_get_name(attr->x87[1])));
1135 #define _GEN_BINOP(op, rev) \
1136 static int sim_##op(x87_state *state, ir_node *n) { \
1137 exchange_tmpl tmpl = { op_ia32_##op, op_ia32_##rev, op_ia32_##op##p, op_ia32_##rev##p }; \
1138 return sim_binop(state, n, &tmpl); \
1141 #define GEN_BINOP(op) _GEN_BINOP(op, op)
1142 #define GEN_BINOPR(op) _GEN_BINOP(op, op##r)
1144 #define GEN_LOAD(op) \
1145 static int sim_##op(x87_state *state, ir_node *n) { \
1146 return sim_load(state, n, op_ia32_##op, pn_ia32_v##op##_res); \
1149 #define GEN_UNOP(op) \
1150 static int sim_##op(x87_state *state, ir_node *n) { \
1151 return sim_unop(state, n, op_ia32_##op); \
1154 #define GEN_STORE(op) \
1155 static int sim_##op(x87_state *state, ir_node *n) { \
1156 return sim_store(state, n, op_ia32_##op, op_ia32_##op##p); \
1178 * Simulate a virtual fisttp.
1180 * @param state the x87 state
1181 * @param n the node that should be simulated (and patched)
1183 * @return NO_NODE_ADDED
1185 static int sim_fisttp(x87_state *state, ir_node *n)
1187 ir_node *val = get_irn_n(n, n_ia32_vfst_val);
1188 const arch_register_t *op2 = x87_get_irn_register(val);
1189 ia32_x87_attr_t *attr;
1190 int op2_reg_idx, op2_idx;
1192 op2_reg_idx = arch_register_get_index(op2);
1193 op2_idx = x87_on_stack(state, op2_reg_idx);
1194 DB((dbg, LEVEL_1, ">>> %+F %s ->\n", n, arch_register_get_name(op2)));
1195 assert(op2_idx >= 0);
1197 /* Note: although the value is still live here, it is destroyed because
1198 of the pop. The register allocator is aware of that and introduced a copy
1199 if the value must be alive. */
1201 /* we can only store the tos to memory */
1203 x87_create_fxch(state, n, op2_idx);
1206 x87_patch_insn(n, op_ia32_fisttp);
1208 attr = get_ia32_x87_attr(n);
1209 attr->x87[1] = op2 = get_st_reg(0);
1210 DB((dbg, LEVEL_1, "<<< %s %s ->\n", get_irn_opname(n), arch_register_get_name(op2)));
1212 return NO_NODE_ADDED;
1216 * Simulate a virtual FtstFnstsw.
1218 * @param state the x87 state
1219 * @param n the node that should be simulated (and patched)
1221 * @return NO_NODE_ADDED
1223 static int sim_FtstFnstsw(x87_state *state, ir_node *n)
1225 x87_simulator *sim = state->sim;
1226 ia32_x87_attr_t *attr = get_ia32_x87_attr(n);
1227 ir_node *op1_node = get_irn_n(n, n_ia32_vFtstFnstsw_left);
1228 const arch_register_t *reg1 = x87_get_irn_register(op1_node);
1229 int reg_index_1 = arch_register_get_index(reg1);
1230 int op1_idx = x87_on_stack(state, reg_index_1);
1231 unsigned live = vfp_live_args_after(sim, n, 0);
1233 DB((dbg, LEVEL_1, ">>> %+F %s\n", n, arch_register_get_name(reg1)));
1234 DEBUG_ONLY(vfp_dump_live(live);)
1235 DB((dbg, LEVEL_1, "Stack before: "));
1236 DEBUG_ONLY(x87_dump_stack(state);)
1237 assert(op1_idx >= 0);
1240 /* bring the value to tos */
1241 x87_create_fxch(state, n, op1_idx);
1245 /* patch the operation */
1246 x87_patch_insn(n, op_ia32_FtstFnstsw);
1247 reg1 = get_st_reg(op1_idx);
1248 attr->x87[0] = reg1;
1249 attr->x87[1] = NULL;
1250 attr->x87[2] = NULL;
1252 if (!is_vfp_live(reg_index_1, live))
1253 x87_create_fpop(state, sched_next(n), 1);
1255 return NO_NODE_ADDED;
1261 * @param state the x87 state
1262 * @param n the node that should be simulated (and patched)
1264 * @return NO_NODE_ADDED
1266 static int sim_Fucom(x87_state *state, ir_node *n)
1270 ia32_x87_attr_t *attr = get_ia32_x87_attr(n);
1272 x87_simulator *sim = state->sim;
1273 ir_node *op1_node = get_irn_n(n, n_ia32_vFucomFnstsw_left);
1274 ir_node *op2_node = get_irn_n(n, n_ia32_vFucomFnstsw_right);
1275 const arch_register_t *op1 = x87_get_irn_register(op1_node);
1276 const arch_register_t *op2 = x87_get_irn_register(op2_node);
1277 int reg_index_1 = arch_register_get_index(op1);
1278 int reg_index_2 = arch_register_get_index(op2);
1279 unsigned live = vfp_live_args_after(sim, n, 0);
1280 bool permuted = attr->attr.data.ins_permuted;
1284 DB((dbg, LEVEL_1, ">>> %+F %s, %s\n", n,
1285 arch_register_get_name(op1), arch_register_get_name(op2)));
1286 DEBUG_ONLY(vfp_dump_live(live);)
1287 DB((dbg, LEVEL_1, "Stack before: "));
1288 DEBUG_ONLY(x87_dump_stack(state);)
1290 op1_idx = x87_on_stack(state, reg_index_1);
1291 assert(op1_idx >= 0);
1293 /* BEWARE: check for comp a,a cases, they might happen */
1294 if (reg_index_2 != REG_VFP_VFP_NOREG) {
1295 /* second operand is a vfp register */
1296 op2_idx = x87_on_stack(state, reg_index_2);
1297 assert(op2_idx >= 0);
1299 if (is_vfp_live(reg_index_2, live)) {
1300 /* second operand is live */
1302 if (is_vfp_live(reg_index_1, live)) {
1303 /* both operands are live */
1306 /* res = tos X op */
1307 } else if (op2_idx == 0) {
1308 /* res = op X tos */
1309 permuted = !permuted;
1312 /* bring the first one to tos */
1313 x87_create_fxch(state, n, op1_idx);
1314 if (op1_idx == op2_idx) {
1316 } else if (op2_idx == 0) {
1320 /* res = tos X op */
1323 /* second live, first operand is dead here, bring it to tos.
1324 This means further, op1_idx != op2_idx. */
1325 assert(op1_idx != op2_idx);
1327 x87_create_fxch(state, n, op1_idx);
1332 /* res = tos X op, pop */
1336 /* second operand is dead */
1337 if (is_vfp_live(reg_index_1, live)) {
1338 /* first operand is live: bring second to tos.
1339 This means further, op1_idx != op2_idx. */
1340 assert(op1_idx != op2_idx);
1342 x87_create_fxch(state, n, op2_idx);
1347 /* res = op X tos, pop */
1349 permuted = !permuted;
1352 /* both operands are dead here, check first for identity. */
1353 if (op1_idx == op2_idx) {
1354 /* identically, one pop needed */
1356 x87_create_fxch(state, n, op1_idx);
1360 /* res = tos X op, pop */
1363 /* different, move them to st and st(1) and pop both.
1364 The tricky part is to get one into st(1).*/
1365 else if (op2_idx == 1) {
1366 /* good, second operand is already in the right place, move the first */
1368 /* bring the first on top */
1369 x87_create_fxch(state, n, op1_idx);
1370 assert(op2_idx != 0);
1373 /* res = tos X op, pop, pop */
1375 } else if (op1_idx == 1) {
1376 /* good, first operand is already in the right place, move the second */
1378 /* bring the first on top */
1379 x87_create_fxch(state, n, op2_idx);
1380 assert(op1_idx != 0);
1383 /* res = op X tos, pop, pop */
1384 permuted = !permuted;
1388 /* if one is already the TOS, we need two fxch */
1390 /* first one is TOS, move to st(1) */
1391 x87_create_fxch(state, n, 1);
1392 assert(op2_idx != 1);
1394 x87_create_fxch(state, n, op2_idx);
1396 /* res = op X tos, pop, pop */
1398 permuted = !permuted;
1400 } else if (op2_idx == 0) {
1401 /* second one is TOS, move to st(1) */
1402 x87_create_fxch(state, n, 1);
1403 assert(op1_idx != 1);
1405 x87_create_fxch(state, n, op1_idx);
1407 /* res = tos X op, pop, pop */
1410 /* none of them is either TOS or st(1), 3 fxch needed */
1411 x87_create_fxch(state, n, op2_idx);
1412 assert(op1_idx != 0);
1413 x87_create_fxch(state, n, 1);
1415 x87_create_fxch(state, n, op1_idx);
1417 /* res = tos X op, pop, pop */
1424 /* second operand is an address mode */
1425 if (is_vfp_live(reg_index_1, live)) {
1426 /* first operand is live: bring it to TOS */
1428 x87_create_fxch(state, n, op1_idx);
1432 /* first operand is dead: bring it to tos */
1434 x87_create_fxch(state, n, op1_idx);
1441 /* patch the operation */
1442 if (is_ia32_vFucomFnstsw(n)) {
1446 case 0: dst = op_ia32_FucomFnstsw; break;
1447 case 1: dst = op_ia32_FucompFnstsw; break;
1448 case 2: dst = op_ia32_FucomppFnstsw; break;
1449 default: panic("invalid popcount");
1452 for (i = 0; i < pops; ++i) {
1455 } else if (is_ia32_vFucomi(n)) {
1457 case 0: dst = op_ia32_Fucomi; break;
1458 case 1: dst = op_ia32_Fucompi; x87_pop(state); break;
1460 dst = op_ia32_Fucompi;
1462 x87_create_fpop(state, sched_next(n), 1);
1464 default: panic("invalid popcount");
1467 panic("invalid operation %+F", n);
1470 x87_patch_insn(n, dst);
1477 op1 = get_st_reg(op1_idx);
1480 op2 = get_st_reg(op2_idx);
1483 attr->x87[2] = NULL;
1484 attr->attr.data.ins_permuted = permuted;
1487 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(n),
1488 arch_register_get_name(op1), arch_register_get_name(op2)));
1490 DB((dbg, LEVEL_1, "<<< %s %s, [AM]\n", get_irn_opname(n),
1491 arch_register_get_name(op1)));
1494 return NO_NODE_ADDED;
1500 * @param state the x87 state
1501 * @param n the node that should be simulated (and patched)
1503 * @return NO_NODE_ADDED
1505 static int sim_Keep(x87_state *state, ir_node *node)
1508 const arch_register_t *op_reg;
1514 DB((dbg, LEVEL_1, ">>> %+F\n", node));
1516 arity = get_irn_arity(node);
1517 for (i = 0; i < arity; ++i) {
1518 op = get_irn_n(node, i);
1519 op_reg = arch_get_irn_register(op);
1520 if (arch_register_get_class(op_reg) != &ia32_reg_classes[CLASS_ia32_vfp])
1523 reg_id = arch_register_get_index(op_reg);
1524 live = vfp_live_args_after(state->sim, node, 0);
1526 op_stack_idx = x87_on_stack(state, reg_id);
1527 if (op_stack_idx >= 0 && !is_vfp_live(reg_id, live))
1528 x87_create_fpop(state, sched_next(node), 1);
1531 DB((dbg, LEVEL_1, "Stack after: "));
1532 DEBUG_ONLY(x87_dump_stack(state);)
1534 return NO_NODE_ADDED;
1538 * Keep the given node alive by adding a be_Keep.
1540 * @param node the node to kept alive
1542 static void keep_float_node_alive(ir_node *node)
1544 ir_node *block = get_nodes_block(node);
1545 ir_node *keep = be_new_Keep(block, 1, &node);
1547 assert(sched_is_scheduled(node));
1548 sched_add_after(node, keep);
1552 * Create a copy of a node. Recreate the node if it's a constant.
1554 * @param state the x87 state
1555 * @param n the node to be copied
1557 * @return the copy of n
1559 static ir_node *create_Copy(x87_state *state, ir_node *n)
1561 dbg_info *n_dbg = get_irn_dbg_info(n);
1562 ir_mode *mode = get_irn_mode(n);
1563 ir_node *block = get_nodes_block(n);
1564 ir_node *pred = get_irn_n(n, 0);
1565 ir_node *(*cnstr)(dbg_info *, ir_node *, ir_mode *) = NULL;
1567 const arch_register_t *out;
1568 const arch_register_t *op1;
1569 ia32_x87_attr_t *attr;
1571 /* Do not copy constants, recreate them. */
1572 switch (get_ia32_irn_opcode(pred)) {
1574 cnstr = new_bd_ia32_fldz;
1577 cnstr = new_bd_ia32_fld1;
1579 case iro_ia32_fldpi:
1580 cnstr = new_bd_ia32_fldpi;
1582 case iro_ia32_fldl2e:
1583 cnstr = new_bd_ia32_fldl2e;
1585 case iro_ia32_fldl2t:
1586 cnstr = new_bd_ia32_fldl2t;
1588 case iro_ia32_fldlg2:
1589 cnstr = new_bd_ia32_fldlg2;
1591 case iro_ia32_fldln2:
1592 cnstr = new_bd_ia32_fldln2;
1598 out = x87_get_irn_register(n);
1599 op1 = x87_get_irn_register(pred);
1601 if (cnstr != NULL) {
1602 /* copy a constant */
1603 res = (*cnstr)(n_dbg, block, mode);
1605 x87_push(state, arch_register_get_index(out), res);
1607 attr = get_ia32_x87_attr(res);
1608 attr->x87[2] = get_st_reg(0);
1610 int op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1612 res = new_bd_ia32_fpushCopy(n_dbg, block, pred, mode);
1614 x87_push(state, arch_register_get_index(out), res);
1616 attr = get_ia32_x87_attr(res);
1617 attr->x87[0] = get_st_reg(op1_idx);
1618 attr->x87[2] = get_st_reg(0);
1620 arch_set_irn_register(res, out);
1626 * Simulate a be_Copy.
1628 * @param state the x87 state
1629 * @param n the node that should be simulated (and patched)
1631 * @return NO_NODE_ADDED
1633 static int sim_Copy(x87_state *state, ir_node *n)
1635 arch_register_class_t const *const cls = arch_get_irn_reg_class(n);
1636 if (cls != &ia32_reg_classes[CLASS_ia32_vfp])
1637 return NO_NODE_ADDED;
1639 ir_node *const pred = be_get_Copy_op(n);
1640 arch_register_t const *const op1 = x87_get_irn_register(pred);
1641 arch_register_t const *const out = x87_get_irn_register(n);
1642 unsigned const live = vfp_live_args_after(state->sim, n, REGMASK(out));
1644 DB((dbg, LEVEL_1, ">>> %+F %s -> %s\n", n,
1645 arch_register_get_name(op1), arch_register_get_name(out)));
1646 DEBUG_ONLY(vfp_dump_live(live);)
1648 if (is_vfp_live(arch_register_get_index(op1), live)) {
1649 /* Operand is still live, a real copy. We need here an fpush that can
1650 hold a a register, so use the fpushCopy or recreate constants */
1651 ir_node *const node = create_Copy(state, n);
1653 /* We have to make sure the old value doesn't go dead (which can happen
1654 * when we recreate constants). As the simulator expected that value in
1655 * the pred blocks. This is unfortunate as removing it would save us 1
1656 * instruction, but we would have to rerun all the simulation to get
1659 ir_node *const next = sched_next(n);
1662 sched_add_before(next, node);
1664 if (get_irn_n_edges(pred) == 0) {
1665 keep_float_node_alive(pred);
1668 DB((dbg, LEVEL_1, "<<< %+F %s -> ?\n", node, op1->name));
1670 int const op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1671 int const out_idx = x87_on_stack(state, arch_register_get_index(out));
1672 if (out_idx >= 0 && out_idx != op1_idx) {
1673 /* Matze: out already on stack? how can this happen? */
1674 panic("invalid stack state");
1677 /* op1 must be killed and placed where out is */
1679 ia32_x87_attr_t *attr;
1680 /* best case, simple remove and rename */
1681 x87_patch_insn(n, op_ia32_Pop);
1682 attr = get_ia32_x87_attr(n);
1683 attr->x87[0] = op1 = get_st_reg(0);
1686 x87_set_st(state, arch_register_get_index(out), n, op1_idx - 1);
1688 ia32_x87_attr_t *attr;
1689 /* move op1 to tos, store and pop it */
1691 x87_create_fxch(state, n, op1_idx);
1694 x87_patch_insn(n, op_ia32_Pop);
1695 attr = get_ia32_x87_attr(n);
1696 attr->x87[0] = op1 = get_st_reg(out_idx);
1699 x87_set_st(state, arch_register_get_index(out), n, out_idx - 1);
1701 DB((dbg, LEVEL_1, "<<< %+F %s\n", n, op1->name));
1704 /* just a virtual copy */
1705 x87_set_st(state, arch_register_get_index(out), pred, op1_idx);
1706 /* don't remove the node to keep the verifier quiet :),
1707 the emitter won't emit any code for the node */
1710 DB((dbg, LEVEL_1, "<<< KILLED %s\n", get_irn_opname(n)));
1715 return NO_NODE_ADDED;
1719 * Returns the vf0 result Proj of a Call.
1721 * @para call the Call node
1723 static ir_node *get_call_result_proj(ir_node *call)
1725 /* search the result proj */
1726 foreach_out_edge(call, edge) {
1727 ir_node *proj = get_edge_src_irn(edge);
1728 long pn = get_Proj_proj(proj);
1730 if (pn == pn_ia32_Call_vf0)
1734 panic("result Proj missing");
1737 static int sim_Asm(x87_state *const state, ir_node *const n)
1741 for (size_t i = get_irn_arity(n); i-- != 0;) {
1742 arch_register_req_t const *const req = arch_get_irn_register_req_in(n, i);
1743 if (req->cls == &ia32_reg_classes[CLASS_ia32_vfp])
1744 panic("cannot handle %+F with x87 constraints", n);
1747 for (size_t i = arch_get_irn_n_outs(n); i-- != 0;) {
1748 arch_register_req_t const *const req = arch_get_irn_register_req_out(n, i);
1749 if (req->cls == &ia32_reg_classes[CLASS_ia32_vfp])
1750 panic("cannot handle %+F with x87 constraints", n);
1753 return NO_NODE_ADDED;
1757 * Simulate a ia32_Call.
1759 * @param state the x87 state
1760 * @param n the node that should be simulated (and patched)
1762 * @return NO_NODE_ADDED
1764 static int sim_Call(x87_state *state, ir_node *n)
1766 DB((dbg, LEVEL_1, ">>> %+F\n", n));
1768 /* at the begin of a call the x87 state should be empty */
1769 assert(state->depth == 0 && "stack not empty before call");
1771 ir_type *const call_tp = get_ia32_call_attr_const(n)->call_tp;
1772 if (get_method_n_ress(call_tp) != 0) {
1773 /* If the called function returns a float, it is returned in st(0).
1774 * This even happens if the return value is NOT used.
1775 * Moreover, only one return result is supported. */
1776 ir_type *const res_type = get_method_res_type(call_tp, 0);
1777 ir_mode *const mode = get_type_mode(res_type);
1778 if (mode && mode_is_float(mode)) {
1779 ir_node *const resproj = get_call_result_proj(n);
1780 arch_register_t const *const reg = x87_get_irn_register(resproj);
1781 x87_push(state, arch_register_get_index(reg), resproj);
1784 DB((dbg, LEVEL_1, "Stack after: "));
1785 DEBUG_ONLY(x87_dump_stack(state);)
1787 return NO_NODE_ADDED;
1791 * Simulate a be_Return.
1793 * @param state the x87 state
1794 * @param n the node that should be simulated (and patched)
1796 * @return NO_NODE_ADDED
1798 static int sim_Return(x87_state *state, ir_node *n)
1800 #ifdef DEBUG_libfirm
1801 /* only floating point return values must reside on stack */
1802 int n_float_res = 0;
1803 int const n_res = be_Return_get_n_rets(n);
1804 for (int i = 0; i < n_res; ++i) {
1805 ir_node *const res = get_irn_n(n, n_be_Return_val + i);
1806 if (mode_is_float(get_irn_mode(res)))
1809 assert(x87_get_depth(state) == n_float_res);
1812 /* pop them virtually */
1814 return NO_NODE_ADDED;
1818 * Simulate a be_Perm.
1820 * @param state the x87 state
1821 * @param irn the node that should be simulated (and patched)
1823 * @return NO_NODE_ADDED
1825 static int sim_Perm(x87_state *state, ir_node *irn)
1828 ir_node *pred = get_irn_n(irn, 0);
1831 /* handle only floating point Perms */
1832 if (! mode_is_float(get_irn_mode(pred)))
1833 return NO_NODE_ADDED;
1835 DB((dbg, LEVEL_1, ">>> %+F\n", irn));
1837 /* Perm is a pure virtual instruction on x87.
1838 All inputs must be on the FPU stack and are pairwise
1839 different from each other.
1840 So, all we need to do is to permutate the stack state. */
1841 n = get_irn_arity(irn);
1842 NEW_ARR_A(int, stack_pos, n);
1844 /* collect old stack positions */
1845 for (i = 0; i < n; ++i) {
1846 const arch_register_t *inreg = x87_get_irn_register(get_irn_n(irn, i));
1847 int idx = x87_on_stack(state, arch_register_get_index(inreg));
1849 assert(idx >= 0 && "Perm argument not on x87 stack");
1853 /* now do the permutation */
1854 foreach_out_edge(irn, edge) {
1855 ir_node *proj = get_edge_src_irn(edge);
1856 const arch_register_t *out = x87_get_irn_register(proj);
1857 long num = get_Proj_proj(proj);
1859 assert(0 <= num && num < n && "More Proj's than Perm inputs");
1860 x87_set_st(state, arch_register_get_index(out), proj, stack_pos[(unsigned)num]);
1862 DB((dbg, LEVEL_1, "<<< %+F\n", irn));
1864 return NO_NODE_ADDED;
1868 * Kill any dead registers at block start by popping them from the stack.
1870 * @param sim the simulator handle
1871 * @param block the current block
1872 * @param state the x87 state at the begin of the block
1874 static void x87_kill_deads(x87_simulator *const sim, ir_node *const block, x87_state *const state)
1876 ir_node *first_insn = sched_first(block);
1877 ir_node *keep = NULL;
1878 unsigned live = vfp_live_args_after(sim, block, 0);
1880 int i, depth, num_pop;
1883 depth = x87_get_depth(state);
1884 for (i = depth - 1; i >= 0; --i) {
1885 int reg = x87_get_st_reg(state, i);
1887 if (! is_vfp_live(reg, live))
1888 kill_mask |= (1 << i);
1892 DB((dbg, LEVEL_1, "Killing deads:\n"));
1893 DEBUG_ONLY(vfp_dump_live(live);)
1894 DEBUG_ONLY(x87_dump_stack(state);)
1896 if (kill_mask != 0 && live == 0) {
1897 /* special case: kill all registers */
1898 if (ia32_cg_config.use_femms || ia32_cg_config.use_emms) {
1899 if (ia32_cg_config.use_femms) {
1900 /* use FEMMS on AMD processors to clear all */
1901 keep = new_bd_ia32_femms(NULL, block);
1903 /* use EMMS to clear all */
1904 keep = new_bd_ia32_emms(NULL, block);
1906 sched_add_before(first_insn, keep);
1912 /* now kill registers */
1914 /* we can only kill from TOS, so bring them up */
1915 if (! (kill_mask & 1)) {
1916 /* search from behind, because we can to a double-pop */
1917 for (i = depth - 1; i >= 0; --i) {
1918 if (kill_mask & (1 << i)) {
1919 kill_mask &= ~(1 << i);
1926 x87_set_st(state, -1, keep, i);
1927 x87_create_fxch(state, first_insn, i);
1930 if ((kill_mask & 3) == 3) {
1931 /* we can do a double-pop */
1935 /* only a single pop */
1940 kill_mask >>= num_pop;
1941 keep = x87_create_fpop(state, first_insn, num_pop);
1948 * Run a simulation and fix all virtual instructions for a block.
1950 * @param sim the simulator handle
1951 * @param block the current block
1953 static void x87_simulate_block(x87_simulator *sim, ir_node *block)
1956 blk_state *bl_state = x87_get_bl_state(sim, block);
1957 x87_state *state = bl_state->begin;
1958 ir_node *start_block;
1960 assert(state != NULL);
1961 /* already processed? */
1962 if (bl_state->end != NULL)
1965 DB((dbg, LEVEL_1, "Simulate %+F\n", block));
1966 DB((dbg, LEVEL_2, "State at Block begin:\n "));
1967 DEBUG_ONLY(x87_dump_stack(state);)
1969 /* create a new state, will be changed */
1970 state = x87_clone_state(sim, state);
1971 /* at block begin, kill all dead registers */
1972 x87_kill_deads(sim, block, state);
1974 /* beware, n might change */
1975 for (n = sched_first(block); !sched_is_end(n); n = next) {
1978 ir_op *op = get_irn_op(n);
1981 * get the next node to be simulated here.
1982 * n might be completely removed from the schedule-
1984 next = sched_next(n);
1985 if (op->ops.generic != NULL) {
1986 func = (sim_func)op->ops.generic;
1989 node_inserted = (*func)(state, n);
1992 * sim_func might have added an additional node after n,
1993 * so update next node
1994 * beware: n must not be changed by sim_func
1995 * (i.e. removed from schedule) in this case
1997 if (node_inserted != NO_NODE_ADDED)
1998 next = sched_next(n);
2002 start_block = get_irg_start_block(get_irn_irg(block));
2004 DB((dbg, LEVEL_2, "State at Block end:\n ")); DEBUG_ONLY(x87_dump_stack(state);)
2006 /* check if the state must be shuffled */
2007 foreach_block_succ(block, edge) {
2008 ir_node *succ = get_edge_src_irn(edge);
2009 blk_state *succ_state;
2011 if (succ == start_block)
2014 succ_state = x87_get_bl_state(sim, succ);
2016 if (succ_state->begin == NULL) {
2017 DB((dbg, LEVEL_2, "Set begin state for succ %+F:\n", succ));
2018 DEBUG_ONLY(x87_dump_stack(state);)
2019 succ_state->begin = state;
2021 waitq_put(sim->worklist, succ);
2023 DB((dbg, LEVEL_2, "succ %+F already has a state, shuffling\n", succ));
2024 /* There is already a begin state for the successor, bad.
2025 Do the necessary permutations.
2026 Note that critical edges are removed, so this is always possible:
2027 If the successor has more than one possible input, then it must
2030 x87_shuffle(block, state, succ_state->begin);
2033 bl_state->end = state;
2037 * Register a simulator function.
2039 * @param op the opcode to simulate
2040 * @param func the simulator function for the opcode
2042 static void register_sim(ir_op *op, sim_func func)
2044 assert(op->ops.generic == NULL);
2045 op->ops.generic = (op_func) func;
2049 * Create a new x87 simulator.
2051 * @param sim a simulator handle, will be initialized
2052 * @param irg the current graph
2054 static void x87_init_simulator(x87_simulator *sim, ir_graph *irg)
2056 obstack_init(&sim->obst);
2057 sim->blk_states = pmap_create();
2058 sim->n_idx = get_irg_last_idx(irg);
2059 sim->live = OALLOCN(&sim->obst, vfp_liveness, sim->n_idx);
2061 DB((dbg, LEVEL_1, "--------------------------------\n"
2062 "x87 Simulator started for %+F\n", irg));
2064 /* set the generic function pointer of instruction we must simulate */
2065 ir_clear_opcodes_generic_func();
2067 register_sim(op_ia32_Asm, sim_Asm);
2068 register_sim(op_ia32_Call, sim_Call);
2069 register_sim(op_ia32_vfld, sim_fld);
2070 register_sim(op_ia32_vfild, sim_fild);
2071 register_sim(op_ia32_vfld1, sim_fld1);
2072 register_sim(op_ia32_vfldz, sim_fldz);
2073 register_sim(op_ia32_vfadd, sim_fadd);
2074 register_sim(op_ia32_vfsub, sim_fsub);
2075 register_sim(op_ia32_vfmul, sim_fmul);
2076 register_sim(op_ia32_vfdiv, sim_fdiv);
2077 register_sim(op_ia32_vfprem, sim_fprem);
2078 register_sim(op_ia32_vfabs, sim_fabs);
2079 register_sim(op_ia32_vfchs, sim_fchs);
2080 register_sim(op_ia32_vfist, sim_fist);
2081 register_sim(op_ia32_vfisttp, sim_fisttp);
2082 register_sim(op_ia32_vfst, sim_fst);
2083 register_sim(op_ia32_vFtstFnstsw, sim_FtstFnstsw);
2084 register_sim(op_ia32_vFucomFnstsw, sim_Fucom);
2085 register_sim(op_ia32_vFucomi, sim_Fucom);
2086 register_sim(op_be_Copy, sim_Copy);
2087 register_sim(op_be_Return, sim_Return);
2088 register_sim(op_be_Perm, sim_Perm);
2089 register_sim(op_be_Keep, sim_Keep);
2093 * Destroy a x87 simulator.
2095 * @param sim the simulator handle
2097 static void x87_destroy_simulator(x87_simulator *sim)
2099 pmap_destroy(sim->blk_states);
2100 obstack_free(&sim->obst, NULL);
2101 DB((dbg, LEVEL_1, "x87 Simulator stopped\n\n"));
2105 * Pre-block walker: calculate the liveness information for the block
2106 * and store it into the sim->live cache.
2108 static void update_liveness_walker(ir_node *block, void *data)
2110 x87_simulator *sim = (x87_simulator*)data;
2111 update_liveness(sim, block);
2115 * Run a simulation and fix all virtual instructions for a graph.
2116 * Replaces all virtual floating point instructions and registers
2119 void ia32_x87_simulate_graph(ir_graph *irg)
2121 /* TODO improve code quality (less executed fxch) by using execfreqs */
2123 ir_node *block, *start_block;
2124 blk_state *bl_state;
2127 /* create the simulator */
2128 x87_init_simulator(&sim, irg);
2130 start_block = get_irg_start_block(irg);
2131 bl_state = x87_get_bl_state(&sim, start_block);
2133 /* start with the empty state */
2135 bl_state->begin = ∅
2137 sim.worklist = new_waitq();
2138 waitq_put(sim.worklist, start_block);
2140 be_assure_live_sets(irg);
2141 sim.lv = be_get_irg_liveness(irg);
2143 /* Calculate the liveness for all nodes. We must precalculate this info,
2144 * because the simulator adds new nodes (possible before Phi nodes) which
2145 * would let a lazy calculation fail.
2146 * On the other hand we reduce the computation amount due to
2147 * precaching from O(n^2) to O(n) at the expense of O(n) cache memory.
2149 irg_block_walk_graph(irg, update_liveness_walker, NULL, &sim);
2153 block = (ir_node*)waitq_get(sim.worklist);
2154 x87_simulate_block(&sim, block);
2155 } while (! waitq_empty(sim.worklist));
2158 del_waitq(sim.worklist);
2159 x87_destroy_simulator(&sim);
2162 /* Initializes the x87 simulator. */
2163 void ia32_init_x87(void)
2165 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.x87");