2 * Copyright (C) 1995-2010 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the x87 support and virtual to stack
23 * register translation for the ia32 backend.
24 * @author Michael Beck
33 #include "iredges_t.h"
48 #include "bearch_ia32_t.h"
49 #include "ia32_new_nodes.h"
50 #include "gen_ia32_new_nodes.h"
51 #include "gen_ia32_regalloc_if.h"
53 #include "ia32_architecture.h"
55 /** the debug handle */
56 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
58 /* Forward declaration. */
59 typedef struct x87_simulator x87_simulator;
62 * An entry on the simulated x87 stack.
64 typedef struct st_entry {
65 int reg_idx; /**< the virtual register index of this stack value */
66 ir_node *node; /**< the node that produced this value */
72 typedef struct x87_state {
73 st_entry st[N_ia32_st_REGS]; /**< the register stack */
74 int depth; /**< the current stack depth */
75 x87_simulator *sim; /**< The simulator. */
78 /** An empty state, used for blocks without fp instructions. */
79 static x87_state empty = { { {0, NULL}, }, 0, NULL };
82 * Return values of the instruction simulator functions.
85 NO_NODE_ADDED = 0, /**< No node that needs simulation was added. */
86 NODE_ADDED = 1 /**< A node that must be simulated was added by the simulator
87 in the schedule AFTER the current node. */
91 * The type of an instruction simulator function.
93 * @param state the x87 state
94 * @param n the node to be simulated
96 * @return NODE_ADDED if a node was added AFTER n in schedule that MUST be
98 * NO_NODE_ADDED otherwise
100 typedef int (*sim_func)(x87_state *state, ir_node *n);
103 * A block state: Every block has a x87 state at the beginning and at the end.
105 typedef struct blk_state {
106 x87_state *begin; /**< state at the begin or NULL if not assigned */
107 x87_state *end; /**< state at the end or NULL if not assigned */
110 /** liveness bitset for vfp registers. */
111 typedef unsigned char vfp_liveness;
116 struct x87_simulator {
117 struct obstack obst; /**< An obstack for fast allocating. */
118 pmap *blk_states; /**< Map blocks to states. */
119 be_lv_t *lv; /**< intrablock liveness. */
120 vfp_liveness *live; /**< Liveness information. */
121 unsigned n_idx; /**< The cached get_irg_last_idx() result. */
122 waitq *worklist; /**< Worklist of blocks that must be processed. */
126 * Returns the current stack depth.
128 * @param state the x87 state
130 * @return the x87 stack depth
132 static int x87_get_depth(const x87_state *state)
137 static st_entry *x87_get_entry(x87_state *const state, int const pos)
139 assert(0 <= pos && pos < state->depth);
140 return &state->st[N_ia32_st_REGS - state->depth + pos];
144 * Return the virtual register index at st(pos).
146 * @param state the x87 state
147 * @param pos a stack position
149 * @return the vfp register index that produced the value at st(pos)
151 static int x87_get_st_reg(const x87_state *state, int pos)
153 return x87_get_entry((x87_state*)state, pos)->reg_idx;
158 * Dump the stack for debugging.
160 * @param state the x87 state
162 static void x87_dump_stack(const x87_state *state)
164 for (int i = state->depth; i-- != 0;) {
165 st_entry const *const entry = x87_get_entry((x87_state*)state, i);
166 DB((dbg, LEVEL_2, "vf%d(%+F) ", entry->reg_idx, entry->node));
168 DB((dbg, LEVEL_2, "<-- TOS\n"));
170 #endif /* DEBUG_libfirm */
173 * Set a virtual register to st(pos).
175 * @param state the x87 state
176 * @param reg_idx the vfp register index that should be set
177 * @param node the IR node that produces the value of the vfp register
178 * @param pos the stack position where the new value should be entered
180 static void x87_set_st(x87_state *state, int reg_idx, ir_node *node, int pos)
182 st_entry *const entry = x87_get_entry(state, pos);
183 entry->reg_idx = reg_idx;
186 DB((dbg, LEVEL_2, "After SET_REG: "));
187 DEBUG_ONLY(x87_dump_stack(state);)
191 * Swap st(0) with st(pos).
193 * @param state the x87 state
194 * @param pos the stack position to change the tos with
196 static void x87_fxch(x87_state *state, int pos)
198 st_entry *const a = x87_get_entry(state, pos);
199 st_entry *const b = x87_get_entry(state, 0);
200 st_entry const t = *a;
204 DB((dbg, LEVEL_2, "After FXCH: "));
205 DEBUG_ONLY(x87_dump_stack(state);)
209 * Convert a virtual register to the stack index.
211 * @param state the x87 state
212 * @param reg_idx the register vfp index
214 * @return the stack position where the register is stacked
215 * or -1 if the virtual register was not found
217 static int x87_on_stack(const x87_state *state, int reg_idx)
219 for (int i = 0; i < state->depth; ++i) {
220 if (x87_get_st_reg(state, i) == reg_idx)
227 * Push a virtual Register onto the stack, double pushes are NOT allowed.
229 * @param state the x87 state
230 * @param reg_idx the register vfp index
231 * @param node the node that produces the value of the vfp register
233 static void x87_push(x87_state *state, int reg_idx, ir_node *node)
235 assert(x87_on_stack(state, reg_idx) == -1 && "double push");
236 assert(state->depth < N_ia32_st_REGS && "stack overrun");
239 st_entry *const entry = x87_get_entry(state, 0);
240 entry->reg_idx = reg_idx;
243 DB((dbg, LEVEL_2, "After PUSH: ")); DEBUG_ONLY(x87_dump_stack(state);)
247 * Pop a virtual Register from the stack.
249 * @param state the x87 state
251 static void x87_pop(x87_state *state)
253 assert(state->depth > 0 && "stack underrun");
257 DB((dbg, LEVEL_2, "After POP: ")); DEBUG_ONLY(x87_dump_stack(state);)
261 * Empty the fpu stack
263 * @param state the x87 state
265 static void x87_emms(x87_state *state)
271 * Returns the block state of a block.
273 * @param sim the x87 simulator handle
274 * @param block the current block
276 * @return the block state
278 static blk_state *x87_get_bl_state(x87_simulator *sim, ir_node *block)
280 blk_state *res = pmap_get(blk_state, sim->blk_states, block);
283 res = OALLOC(&sim->obst, blk_state);
287 pmap_insert(sim->blk_states, block, res);
296 * @param sim the x87 simulator handle
297 * @param src the x87 state that will be cloned
299 * @return a cloned copy of the src state
301 static x87_state *x87_clone_state(x87_simulator *sim, const x87_state *src)
303 x87_state *const res = OALLOC(&sim->obst, x87_state);
309 * Patch a virtual instruction into a x87 one and return
310 * the node representing the result value.
312 * @param n the IR node to patch
313 * @param op the x87 opcode to patch in
315 static ir_node *x87_patch_insn(ir_node *n, ir_op *op)
317 ir_mode *mode = get_irn_mode(n);
322 if (mode == mode_T) {
323 /* patch all Proj's */
324 foreach_out_edge(n, edge) {
325 ir_node *proj = get_edge_src_irn(edge);
327 mode = get_irn_mode(proj);
328 if (mode_is_float(mode)) {
330 set_irn_mode(proj, ia32_reg_classes[CLASS_ia32_st].mode);
334 } else if (mode_is_float(mode))
335 set_irn_mode(n, ia32_reg_classes[CLASS_ia32_st].mode);
340 * Returns the first Proj of a mode_T node having a given mode.
342 * @param n the mode_T node
343 * @param m the desired mode of the Proj
344 * @return The first Proj of mode @p m found.
346 static ir_node *get_irn_Proj_for_mode(ir_node *n, ir_mode *m)
348 assert(get_irn_mode(n) == mode_T && "Need mode_T node");
350 foreach_out_edge(n, edge) {
351 ir_node *proj = get_edge_src_irn(edge);
352 if (get_irn_mode(proj) == m)
356 panic("Proj not found");
360 * Wrap the arch_* function here so we can check for errors.
362 static inline const arch_register_t *x87_get_irn_register(const ir_node *irn)
364 const arch_register_t *res = arch_get_irn_register(irn);
366 assert(res->reg_class == &ia32_reg_classes[CLASS_ia32_vfp]);
370 static inline const arch_register_t *x87_irn_get_register(const ir_node *irn,
373 const arch_register_t *res = arch_get_irn_register_out(irn, pos);
375 assert(res->reg_class == &ia32_reg_classes[CLASS_ia32_vfp]);
379 static inline const arch_register_t *get_st_reg(int index)
381 return &ia32_registers[REG_ST0 + index];
385 * Create a fxch node before another node.
387 * @param state the x87 state
388 * @param n the node after the fxch
389 * @param pos exchange st(pos) with st(0)
391 static void x87_create_fxch(x87_state *state, ir_node *n, int pos)
393 x87_fxch(state, pos);
395 ir_node *const block = get_nodes_block(n);
396 ir_node *const fxch = new_bd_ia32_fxch(NULL, block);
397 ia32_x87_attr_t *const attr = get_ia32_x87_attr(fxch);
398 attr->reg = get_st_reg(pos);
402 sched_add_before(n, fxch);
403 DB((dbg, LEVEL_1, "<<< %s %s\n", get_irn_opname(fxch), attr->reg->name));
406 /* -------------- x87 perm --------------- */
409 * Calculate the necessary permutations to reach dst_state.
411 * These permutations are done with fxch instructions and placed
412 * at the end of the block.
414 * Note that critical edges are removed here, so we need only
415 * a shuffle if the current block has only one successor.
417 * @param block the current block
418 * @param state the current x87 stack state, might be modified
419 * @param dst_state destination state
423 static x87_state *x87_shuffle(ir_node *block, x87_state *state, const x87_state *dst_state)
425 int i, n_cycles, k, ri;
426 unsigned cycles[4], all_mask;
427 char cycle_idx[4][8];
429 assert(state->depth == dst_state->depth);
431 /* Some mathematics here:
432 * If we have a cycle of length n that includes the tos,
433 * we need n-1 exchange operations.
434 * We can always add the tos and restore it, so we need
435 * n+1 exchange operations for a cycle not containing the tos.
436 * So, the maximum of needed operations is for a cycle of 7
437 * not including the tos == 8.
438 * This is the same number of ops we would need for using stores,
439 * so exchange is cheaper (we save the loads).
440 * On the other hand, we might need an additional exchange
441 * in the next block to bring one operand on top, so the
442 * number of ops in the first case is identical.
443 * Further, no more than 4 cycles can exists (4 x 2). */
444 all_mask = (1 << (state->depth)) - 1;
446 for (n_cycles = 0; all_mask; ++n_cycles) {
447 int src_idx, dst_idx;
449 /* find the first free slot */
450 for (i = 0; i < state->depth; ++i) {
451 if (all_mask & (1 << i)) {
452 all_mask &= ~(1 << i);
454 /* check if there are differences here */
455 if (x87_get_st_reg(state, i) != x87_get_st_reg(dst_state, i))
461 /* no more cycles found */
466 cycles[n_cycles] = (1 << i);
467 cycle_idx[n_cycles][k++] = i;
468 for (src_idx = i; ; src_idx = dst_idx) {
469 dst_idx = x87_on_stack(dst_state, x87_get_st_reg(state, src_idx));
471 if ((all_mask & (1 << dst_idx)) == 0)
474 cycle_idx[n_cycles][k++] = dst_idx;
475 cycles[n_cycles] |= (1 << dst_idx);
476 all_mask &= ~(1 << dst_idx);
478 cycle_idx[n_cycles][k] = -1;
482 /* no permutation needed */
486 /* Hmm: permutation needed */
487 DB((dbg, LEVEL_2, "\n%+F needs permutation: from\n", block));
488 DEBUG_ONLY(x87_dump_stack(state);)
489 DB((dbg, LEVEL_2, " to\n"));
490 DEBUG_ONLY(x87_dump_stack(dst_state);)
494 DB((dbg, LEVEL_2, "Need %d cycles\n", n_cycles));
495 for (ri = 0; ri < n_cycles; ++ri) {
496 DB((dbg, LEVEL_2, " Ring %d:\n ", ri));
497 for (k = 0; cycle_idx[ri][k] != -1; ++k)
498 DB((dbg, LEVEL_2, " st%d ->", cycle_idx[ri][k]));
499 DB((dbg, LEVEL_2, "\n"));
504 * Find the place node must be insert.
505 * We have only one successor block, so the last instruction should
508 ir_node *const before = sched_last(block);
509 assert(is_cfop(before));
511 /* now do the permutations */
512 for (ri = 0; ri < n_cycles; ++ri) {
513 if ((cycles[ri] & 1) == 0) {
514 /* this cycle does not include the tos */
515 x87_create_fxch(state, before, cycle_idx[ri][0]);
517 for (k = 1; cycle_idx[ri][k] != -1; ++k) {
518 x87_create_fxch(state, before, cycle_idx[ri][k]);
520 if ((cycles[ri] & 1) == 0) {
521 /* this cycle does not include the tos */
522 x87_create_fxch(state, before, cycle_idx[ri][0]);
529 * Create a fpush before node n.
531 * @param state the x87 state
532 * @param n the node after the fpush
533 * @param pos push st(pos) on stack
534 * @param val the value to push
536 static void x87_create_fpush(x87_state *state, ir_node *n, int pos, int const out_reg_idx, ir_node *const val)
538 x87_push(state, out_reg_idx, val);
540 ir_node *const fpush = new_bd_ia32_fpush(NULL, get_nodes_block(n));
541 ia32_x87_attr_t *const attr = get_ia32_x87_attr(fpush);
542 attr->reg = get_st_reg(pos);
545 sched_add_before(n, fpush);
547 DB((dbg, LEVEL_1, "<<< %s %s\n", get_irn_opname(fpush), attr->reg->name));
551 * Create a fpop before node n.
553 * @param state the x87 state
554 * @param n the node after the fpop
556 * @return the fpop node
558 static ir_node *x87_create_fpop(x87_state *const state, ir_node *const n)
561 ir_node *const block = get_nodes_block(n);
562 ir_node *const fpop = ia32_cg_config.use_ffreep ?
563 new_bd_ia32_ffreep(NULL, block) :
564 new_bd_ia32_fpop( NULL, block);
565 ia32_x87_attr_t *const attr = get_ia32_x87_attr(fpop);
566 attr->reg = get_st_reg(0);
569 sched_add_before(n, fpop);
570 DB((dbg, LEVEL_1, "<<< %s %s\n", get_irn_opname(fpop), attr->reg->name));
574 /* --------------------------------- liveness ------------------------------------------ */
577 * The liveness transfer function.
578 * Updates a live set over a single step from a given node to its predecessor.
579 * Everything defined at the node is removed from the set, the uses of the node get inserted.
581 * @param irn The node at which liveness should be computed.
582 * @param live The bitset of registers live before @p irn. This set gets modified by updating it to
583 * the registers live after irn.
585 * @return The live bitset.
587 static vfp_liveness vfp_liveness_transfer(ir_node *irn, vfp_liveness live)
590 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
592 if (get_irn_mode(irn) == mode_T) {
593 foreach_out_edge(irn, edge) {
594 ir_node *proj = get_edge_src_irn(edge);
596 if (arch_irn_consider_in_reg_alloc(cls, proj)) {
597 const arch_register_t *reg = x87_get_irn_register(proj);
598 live &= ~(1 << reg->index);
601 } else if (arch_irn_consider_in_reg_alloc(cls, irn)) {
602 const arch_register_t *reg = x87_get_irn_register(irn);
603 live &= ~(1 << reg->index);
606 for (i = 0, n = get_irn_arity(irn); i < n; ++i) {
607 ir_node *op = get_irn_n(irn, i);
609 if (mode_is_float(get_irn_mode(op)) &&
610 arch_irn_consider_in_reg_alloc(cls, op)) {
611 const arch_register_t *reg = x87_get_irn_register(op);
612 live |= 1 << reg->index;
619 * Put all live virtual registers at the end of a block into a bitset.
621 * @param sim the simulator handle
622 * @param bl the block
624 * @return The live bitset at the end of this block
626 static vfp_liveness vfp_liveness_end_of_block(x87_simulator *sim, const ir_node *block)
628 vfp_liveness live = 0;
629 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
630 const be_lv_t *lv = sim->lv;
632 be_lv_foreach(lv, block, be_lv_state_end, node) {
633 const arch_register_t *reg;
634 if (!arch_irn_consider_in_reg_alloc(cls, node))
637 reg = x87_get_irn_register(node);
638 live |= 1 << reg->index;
644 /** get the register mask from an arch_register */
645 #define REGMASK(reg) (1 << (reg->index))
648 * Return a bitset of argument registers which are live at the end of a node.
650 * @param sim the simulator handle
651 * @param pos the node
652 * @param kill kill mask for the output registers
654 * @return The live bitset.
656 static unsigned vfp_live_args_after(x87_simulator *sim, const ir_node *pos, unsigned kill)
658 unsigned idx = get_irn_idx(pos);
660 assert(idx < sim->n_idx);
661 return sim->live[idx] & ~kill;
665 * Calculate the liveness for a whole block and cache it.
667 * @param sim the simulator handle
668 * @param block the block
670 static void update_liveness(x87_simulator *sim, ir_node *block)
672 vfp_liveness live = vfp_liveness_end_of_block(sim, block);
675 /* now iterate through the block backward and cache the results */
676 sched_foreach_reverse(block, irn) {
677 /* stop at the first Phi: this produces the live-in */
681 idx = get_irn_idx(irn);
682 sim->live[idx] = live;
684 live = vfp_liveness_transfer(irn, live);
686 idx = get_irn_idx(block);
687 sim->live[idx] = live;
691 * Returns true if a register is live in a set.
693 * @param reg_idx the vfp register index
694 * @param live a live bitset
696 #define is_vfp_live(reg_idx, live) ((live) & (1 << (reg_idx)))
700 * Dump liveness info.
702 * @param live the live bitset
704 static void vfp_dump_live(vfp_liveness live)
708 DB((dbg, LEVEL_2, "Live after: "));
709 for (i = 0; i < 8; ++i) {
710 if (live & (1 << i)) {
711 DB((dbg, LEVEL_2, "vf%d ", i));
714 DB((dbg, LEVEL_2, "\n"));
716 #endif /* DEBUG_libfirm */
718 /* --------------------------------- simulators ---------------------------------------- */
721 * Simulate a virtual binop.
723 * @param state the x87 state
724 * @param n the node that should be simulated (and patched)
726 * @return NO_NODE_ADDED
728 static int sim_binop(x87_state *const state, ir_node *const n, ir_op *const op)
730 ir_node *patched_insn;
731 x87_simulator *sim = state->sim;
732 ir_node *op1 = get_irn_n(n, n_ia32_binary_left);
733 ir_node *op2 = get_irn_n(n, n_ia32_binary_right);
734 const arch_register_t *op1_reg = x87_get_irn_register(op1);
735 const arch_register_t *op2_reg = x87_get_irn_register(op2);
736 const arch_register_t *out = x87_irn_get_register(n, pn_ia32_res);
737 int reg_index_1 = op1_reg->index;
738 int reg_index_2 = op2_reg->index;
739 vfp_liveness live = vfp_live_args_after(sim, n, REGMASK(out));
743 DB((dbg, LEVEL_1, ">>> %+F %s, %s -> %s\n", n, op1_reg->name, op2_reg->name, out->name));
744 DEBUG_ONLY(vfp_dump_live(live);)
745 DB((dbg, LEVEL_1, "Stack before: "));
746 DEBUG_ONLY(x87_dump_stack(state);)
748 int op1_idx = x87_on_stack(state, reg_index_1);
749 assert(op1_idx >= 0);
750 op1_live_after = is_vfp_live(reg_index_1, live);
755 int const out_reg_idx = out->index;
756 ia32_x87_attr_t *const attr = get_ia32_x87_attr(n);
757 if (reg_index_2 != REG_VFP_VFP_NOREG) {
758 /* second operand is a vfp register */
759 op2_idx = x87_on_stack(state, reg_index_2);
760 assert(op2_idx >= 0);
761 op2_live_after = is_vfp_live(reg_index_2, live);
763 if (op2_live_after) {
764 /* Second operand is live. */
766 if (op1_live_after) {
767 /* Both operands are live: push the first one.
768 * This works even for op1 == op2. */
769 x87_create_fpush(state, n, op1_idx, out_reg_idx, op1);
770 /* now do fxxx (tos=tos X op) */
775 /* Second live, first operand is dead: Overwrite first. */
776 if (op1_idx != 0 && op2_idx != 0) {
777 /* Bring one operand to tos. */
778 x87_create_fxch(state, n, op1_idx);
784 /* Second operand is dead. */
785 if (op1_live_after) {
786 /* First operand is live, second is dead: Overwrite second. */
787 if (op1_idx != 0 && op2_idx != 0) {
788 /* Bring one operand to tos. */
789 x87_create_fxch(state, n, op2_idx);
794 /* Both operands are dead. */
795 if (op1_idx != 0 && op2_idx != 0) {
796 /* Bring one operand to tos. */
797 x87_create_fxch(state, n, op1_idx);
798 if (op2_idx == op1_idx) op2_idx = 0;
801 out_idx = op1_idx != 0 ? op1_idx : op2_idx;
802 /* Only pop if the operands are differnt. */
803 pop = op1_idx != op2_idx;
807 /* second operand is an address mode */
808 if (op1_live_after) {
809 /* first operand is live: push it here */
810 x87_create_fpush(state, n, op1_idx, out_reg_idx, op1);
812 /* first operand is dead: bring it to tos */
814 x87_create_fxch(state, n, op1_idx);
817 op1_idx = attr->attr.data.ins_permuted ? -1 : 0;
818 op2_idx = attr->attr.data.ins_permuted ? 0 : -1;
821 assert(op1_idx == 0 || op2_idx == 0);
822 assert(out_idx == op1_idx || out_idx == op2_idx);
824 patched_insn = x87_patch_insn(n, op);
825 x87_set_st(state, out_reg_idx, patched_insn, out_idx);
829 /* patch the operation */
830 int const reg_idx = op1_idx != 0 ? op1_idx : op2_idx;
831 attr->reg = reg_idx >= 0 ? get_st_reg(reg_idx) : NULL;
832 attr->attr.data.ins_permuted = op1_idx != 0;
833 attr->res_in_reg = out_idx != 0;
837 char const *const l = op1_idx >= 0 ? get_st_reg(op1_idx)->name : "[AM]";
838 char const *const r = op2_idx >= 0 ? get_st_reg(op2_idx)->name : "[AM]";
839 char const *const o = get_st_reg(out_idx)->name;
840 DB((dbg, LEVEL_1, "<<< %s %s, %s -> %s\n", get_irn_opname(n), l, r, o));
843 return NO_NODE_ADDED;
847 * Simulate a virtual Unop.
849 * @param state the x87 state
850 * @param n the node that should be simulated (and patched)
851 * @param op the x87 opcode that will replace n's opcode
853 * @return NO_NODE_ADDED
855 static int sim_unop(x87_state *state, ir_node *n, ir_op *op)
857 arch_register_t const *const out = x87_get_irn_register(n);
858 unsigned const live = vfp_live_args_after(state->sim, n, REGMASK(out));
859 DB((dbg, LEVEL_1, ">>> %+F -> %s\n", n, out->name));
860 DEBUG_ONLY(vfp_dump_live(live);)
862 ir_node *const op1 = get_irn_n(n, 0);
863 arch_register_t const *const op1_reg = x87_get_irn_register(op1);
864 int const op1_reg_idx = op1_reg->index;
865 int const op1_idx = x87_on_stack(state, op1_reg_idx);
866 int const out_reg_idx = out->index;
867 if (is_vfp_live(op1_reg_idx, live)) {
868 /* push the operand here */
869 x87_create_fpush(state, n, op1_idx, out_reg_idx, op1);
871 /* operand is dead, bring it to tos */
873 x87_create_fxch(state, n, op1_idx);
877 x87_set_st(state, out_reg_idx, x87_patch_insn(n, op), 0);
878 DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), get_st_reg(0)->name));
880 return NO_NODE_ADDED;
884 * Simulate a virtual Load instruction.
886 * @param state the x87 state
887 * @param n the node that should be simulated (and patched)
888 * @param op the x87 opcode that will replace n's opcode
890 * @return NO_NODE_ADDED
892 static int sim_load(x87_state *state, ir_node *n, ir_op *op, int res_pos)
894 const arch_register_t *out = x87_irn_get_register(n, res_pos);
896 DB((dbg, LEVEL_1, ">>> %+F -> %s\n", n, out->name));
897 x87_push(state, out->index, x87_patch_insn(n, op));
898 assert(out == x87_irn_get_register(n, res_pos));
899 DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), get_st_reg(0)->name));
901 return NO_NODE_ADDED;
905 * Rewire all users of @p old_val to @new_val iff they are scheduled after @p store.
907 * @param store The store
908 * @param old_val The former value
909 * @param new_val The new value
911 static void collect_and_rewire_users(ir_node *store, ir_node *old_val, ir_node *new_val)
913 foreach_out_edge_safe(old_val, edge) {
914 ir_node *user = get_edge_src_irn(edge);
915 /* if the user is scheduled after the store: rewire */
916 if (sched_is_scheduled(user) && sched_comes_after(store, user)) {
917 set_irn_n(user, get_edge_src_pos(edge), new_val);
923 * Simulate a virtual Store.
925 * @param state the x87 state
926 * @param n the node that should be simulated (and patched)
927 * @param op the x87 store opcode
929 static int sim_store(x87_state *state, ir_node *n, ir_op *op)
931 ir_node *const val = get_irn_n(n, n_ia32_vfst_val);
932 arch_register_t const *const op2 = x87_get_irn_register(val);
933 DB((dbg, LEVEL_1, ">>> %+F %s ->\n", n, op2->name));
936 int insn = NO_NODE_ADDED;
937 int const op2_reg_idx = op2->index;
938 int const op2_idx = x87_on_stack(state, op2_reg_idx);
939 unsigned const live = vfp_live_args_after(state->sim, n, 0);
940 int const live_after_node = is_vfp_live(op2_reg_idx, live);
941 assert(op2_idx >= 0);
942 if (live_after_node) {
943 /* Problem: fst doesn't support 80bit modes (spills), only fstp does
944 * fist doesn't support 64bit mode, only fistp
946 * - stack not full: push value and fstp
947 * - stack full: fstp value and load again
948 * Note that we cannot test on mode_E, because floats might be 80bit ... */
949 ir_mode *const mode = get_ia32_ls_mode(n);
950 if (get_mode_size_bits(mode) > (mode_is_int(mode) ? 32 : 64)) {
951 if (x87_get_depth(state) < N_ia32_st_REGS) {
952 /* ok, we have a free register: push + fstp */
953 x87_create_fpush(state, n, op2_idx, REG_VFP_VFP_NOREG, val);
954 x87_patch_insn(n, op);
957 /* stack full here: need fstp + load */
958 x87_patch_insn(n, op);
961 ir_node *const block = get_nodes_block(n);
962 ir_node *const mem = get_irn_Proj_for_mode(n, mode_M);
963 ir_node *const vfld = new_bd_ia32_vfld(NULL, block, get_irn_n(n, 0), get_irn_n(n, 1), mem, mode);
965 /* copy all attributes */
966 set_ia32_frame_ent(vfld, get_ia32_frame_ent(n));
967 if (is_ia32_use_frame(n))
968 set_ia32_use_frame(vfld);
969 set_ia32_op_type(vfld, ia32_AddrModeS);
970 add_ia32_am_offs_int(vfld, get_ia32_am_offs_int(n));
971 set_ia32_am_sc(vfld, get_ia32_am_sc(n));
972 set_ia32_ls_mode(vfld, mode);
974 ir_node *const rproj = new_r_Proj(vfld, mode, pn_ia32_vfld_res);
975 ir_node *const mproj = new_r_Proj(vfld, mode_M, pn_ia32_vfld_M);
977 arch_set_irn_register(rproj, op2);
979 /* reroute all former users of the store memory to the load memory */
980 edges_reroute_except(mem, mproj, vfld);
982 sched_add_after(n, vfld);
984 /* rewire all users, scheduled after the store, to the loaded value */
985 collect_and_rewire_users(n, val, rproj);
990 /* we can only store the tos to memory */
992 x87_create_fxch(state, n, op2_idx);
994 /* mode size 64 or smaller -> use normal fst */
995 x87_patch_insn(n, op);
998 /* we can only store the tos to memory */
1000 x87_create_fxch(state, n, op2_idx);
1002 x87_patch_insn(n, op);
1009 ia32_x87_attr_t *const attr = get_ia32_x87_attr(n);
1011 DB((dbg, LEVEL_1, "<<< %s %s ->\n", get_irn_opname(n), get_st_reg(0)->name));
1016 #define GEN_BINOP(op) \
1017 static int sim_##op(x87_state *state, ir_node *n) { \
1018 return sim_binop(state, n, op_ia32_##op); \
1021 #define GEN_LOAD(op) \
1022 static int sim_##op(x87_state *state, ir_node *n) { \
1023 return sim_load(state, n, op_ia32_##op, pn_ia32_v##op##_res); \
1026 #define GEN_UNOP(op) \
1027 static int sim_##op(x87_state *state, ir_node *n) { \
1028 return sim_unop(state, n, op_ia32_##op); \
1031 #define GEN_STORE(op) \
1032 static int sim_##op(x87_state *state, ir_node *n) { \
1033 return sim_store(state, n, op_ia32_##op); \
1053 static int sim_fprem(x87_state *const state, ir_node *const n)
1057 panic("TODO implement");
1058 return NO_NODE_ADDED;
1062 * Simulate a virtual fisttp.
1064 * @param state the x87 state
1065 * @param n the node that should be simulated (and patched)
1067 * @return NO_NODE_ADDED
1069 static int sim_fisttp(x87_state *state, ir_node *n)
1071 ir_node *val = get_irn_n(n, n_ia32_vfst_val);
1072 const arch_register_t *op2 = x87_get_irn_register(val);
1074 int const op2_idx = x87_on_stack(state, op2->index);
1075 DB((dbg, LEVEL_1, ">>> %+F %s ->\n", n, op2->name));
1076 assert(op2_idx >= 0);
1078 /* Note: although the value is still live here, it is destroyed because
1079 of the pop. The register allocator is aware of that and introduced a copy
1080 if the value must be alive. */
1082 /* we can only store the tos to memory */
1084 x87_create_fxch(state, n, op2_idx);
1087 x87_patch_insn(n, op_ia32_fisttp);
1089 DB((dbg, LEVEL_1, "<<< %s %s ->\n", get_irn_opname(n), get_st_reg(0)->name));
1091 return NO_NODE_ADDED;
1095 * Simulate a virtual FtstFnstsw.
1097 * @param state the x87 state
1098 * @param n the node that should be simulated (and patched)
1100 * @return NO_NODE_ADDED
1102 static int sim_FtstFnstsw(x87_state *state, ir_node *n)
1104 x87_simulator *sim = state->sim;
1105 ir_node *op1_node = get_irn_n(n, n_ia32_vFtstFnstsw_left);
1106 const arch_register_t *reg1 = x87_get_irn_register(op1_node);
1107 int reg_index_1 = reg1->index;
1108 int op1_idx = x87_on_stack(state, reg_index_1);
1109 unsigned live = vfp_live_args_after(sim, n, 0);
1111 DB((dbg, LEVEL_1, ">>> %+F %s\n", n, reg1->name));
1112 DEBUG_ONLY(vfp_dump_live(live);)
1113 DB((dbg, LEVEL_1, "Stack before: "));
1114 DEBUG_ONLY(x87_dump_stack(state);)
1115 assert(op1_idx >= 0);
1118 /* bring the value to tos */
1119 x87_create_fxch(state, n, op1_idx);
1122 /* patch the operation */
1123 x87_patch_insn(n, op_ia32_FtstFnstsw);
1125 if (!is_vfp_live(reg_index_1, live))
1126 x87_create_fpop(state, sched_next(n));
1128 return NO_NODE_ADDED;
1134 * @param state the x87 state
1135 * @param n the node that should be simulated (and patched)
1137 * @return NO_NODE_ADDED
1139 static int sim_Fucom(x87_state *state, ir_node *n)
1141 ia32_x87_attr_t *attr = get_ia32_x87_attr(n);
1143 x87_simulator *sim = state->sim;
1144 ir_node *op1_node = get_irn_n(n, n_ia32_vFucomFnstsw_left);
1145 ir_node *op2_node = get_irn_n(n, n_ia32_vFucomFnstsw_right);
1146 const arch_register_t *op1 = x87_get_irn_register(op1_node);
1147 const arch_register_t *op2 = x87_get_irn_register(op2_node);
1148 int reg_index_1 = op1->index;
1149 int reg_index_2 = op2->index;
1150 unsigned live = vfp_live_args_after(sim, n, 0);
1152 DB((dbg, LEVEL_1, ">>> %+F %s, %s\n", n, op1->name, op2->name));
1153 DEBUG_ONLY(vfp_dump_live(live);)
1154 DB((dbg, LEVEL_1, "Stack before: "));
1155 DEBUG_ONLY(x87_dump_stack(state);)
1157 int op1_idx = x87_on_stack(state, reg_index_1);
1158 assert(op1_idx >= 0);
1162 /* BEWARE: check for comp a,a cases, they might happen */
1163 if (reg_index_2 != REG_VFP_VFP_NOREG) {
1164 /* second operand is a vfp register */
1165 op2_idx = x87_on_stack(state, reg_index_2);
1166 assert(op2_idx >= 0);
1168 if (is_vfp_live(reg_index_2, live)) {
1169 /* second operand is live */
1171 if (is_vfp_live(reg_index_1, live)) {
1172 /* both operands are live */
1173 if (op1_idx != 0 && op2_idx != 0) {
1174 /* bring the first one to tos */
1175 x87_create_fxch(state, n, op1_idx);
1176 if (op1_idx == op2_idx)
1179 /* res = tos X op */
1182 /* second live, first operand is dead here, bring it to tos.
1183 This means further, op1_idx != op2_idx. */
1184 assert(op1_idx != op2_idx);
1186 x87_create_fxch(state, n, op1_idx);
1191 /* res = tos X op, pop */
1195 /* second operand is dead */
1196 if (is_vfp_live(reg_index_1, live)) {
1197 /* first operand is live: bring second to tos.
1198 This means further, op1_idx != op2_idx. */
1199 assert(op1_idx != op2_idx);
1201 x87_create_fxch(state, n, op2_idx);
1206 /* res = op X tos, pop */
1209 /* both operands are dead here, check first for identity. */
1210 if (op1_idx == op2_idx) {
1211 /* identically, one pop needed */
1213 x87_create_fxch(state, n, op1_idx);
1217 /* res = tos X op, pop */
1220 /* different, move them to st and st(1) and pop both.
1221 The tricky part is to get one into st(1).*/
1222 else if (op2_idx == 1) {
1223 /* good, second operand is already in the right place, move the first */
1225 /* bring the first on top */
1226 x87_create_fxch(state, n, op1_idx);
1227 assert(op2_idx != 0);
1230 /* res = tos X op, pop, pop */
1232 } else if (op1_idx == 1) {
1233 /* good, first operand is already in the right place, move the second */
1235 /* bring the first on top */
1236 x87_create_fxch(state, n, op2_idx);
1237 assert(op1_idx != 0);
1240 /* res = op X tos, pop, pop */
1243 /* if one is already the TOS, we need two fxch */
1245 /* first one is TOS, move to st(1) */
1246 x87_create_fxch(state, n, 1);
1247 assert(op2_idx != 1);
1249 x87_create_fxch(state, n, op2_idx);
1251 /* res = op X tos, pop, pop */
1253 } else if (op2_idx == 0) {
1254 /* second one is TOS, move to st(1) */
1255 x87_create_fxch(state, n, 1);
1256 assert(op1_idx != 1);
1258 x87_create_fxch(state, n, op1_idx);
1260 /* res = tos X op, pop, pop */
1263 /* none of them is either TOS or st(1), 3 fxch needed */
1264 x87_create_fxch(state, n, op2_idx);
1265 assert(op1_idx != 0);
1266 x87_create_fxch(state, n, 1);
1268 x87_create_fxch(state, n, op1_idx);
1270 /* res = tos X op, pop, pop */
1277 /* second operand is an address mode */
1279 x87_create_fxch(state, n, op1_idx);
1280 /* Pop first operand, if it is dead. */
1281 if (!is_vfp_live(reg_index_1, live))
1284 op1_idx = attr->attr.data.ins_permuted ? -1 : 0;
1285 op2_idx = attr->attr.data.ins_permuted ? 0 : -1;
1287 assert(op1_idx == 0 || op2_idx == 0);
1289 /* patch the operation */
1290 if (is_ia32_vFucomFnstsw(n)) {
1291 dst = pops == 2 ? op_ia32_FucomppFnstsw : op_ia32_FucomFnstsw;
1292 for (int i = 0; i < pops; ++i)
1294 } else if (is_ia32_vFucomi(n)) {
1295 dst = op_ia32_Fucomi;
1299 x87_create_fpop(state, sched_next(n));
1301 panic("invalid operation %+F", n);
1304 x87_patch_insn(n, dst);
1306 int const reg_idx = op1_idx != 0 ? op1_idx : op2_idx;
1307 attr->reg = reg_idx >= 0 ? get_st_reg(reg_idx) : NULL;
1308 attr->attr.data.ins_permuted = op1_idx != 0;
1309 attr->pop = pops != 0;
1312 char const *const l = op1_idx >= 0 ? get_st_reg(op1_idx)->name : "[AM]";
1313 char const *const r = op2_idx >= 0 ? get_st_reg(op2_idx)->name : "[AM]";
1314 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(n), l, r));
1317 return NO_NODE_ADDED;
1323 * @param state the x87 state
1324 * @param n the node that should be simulated (and patched)
1326 * @return NO_NODE_ADDED
1328 static int sim_Keep(x87_state *state, ir_node *node)
1331 const arch_register_t *op_reg;
1337 DB((dbg, LEVEL_1, ">>> %+F\n", node));
1339 arity = get_irn_arity(node);
1340 for (i = 0; i < arity; ++i) {
1341 op = get_irn_n(node, i);
1342 op_reg = arch_get_irn_register(op);
1343 if (op_reg->reg_class != &ia32_reg_classes[CLASS_ia32_vfp])
1346 reg_id = op_reg->index;
1347 live = vfp_live_args_after(state->sim, node, 0);
1349 op_stack_idx = x87_on_stack(state, reg_id);
1350 if (op_stack_idx >= 0 && !is_vfp_live(reg_id, live))
1351 x87_create_fpop(state, sched_next(node));
1354 DB((dbg, LEVEL_1, "Stack after: "));
1355 DEBUG_ONLY(x87_dump_stack(state);)
1357 return NO_NODE_ADDED;
1361 * Keep the given node alive by adding a be_Keep.
1363 * @param node the node to kept alive
1365 static void keep_float_node_alive(ir_node *node)
1367 ir_node *block = get_nodes_block(node);
1368 ir_node *keep = be_new_Keep(block, 1, &node);
1369 sched_add_after(node, keep);
1373 * Create a copy of a node. Recreate the node if it's a constant.
1375 * @param state the x87 state
1376 * @param n the node to be copied
1378 * @return the copy of n
1380 static ir_node *create_Copy(x87_state *state, ir_node *n)
1382 dbg_info *n_dbg = get_irn_dbg_info(n);
1383 ir_mode *mode = get_irn_mode(n);
1384 ir_node *block = get_nodes_block(n);
1385 ir_node *pred = get_irn_n(n, 0);
1386 ir_node *(*cnstr)(dbg_info *, ir_node *, ir_mode *) = NULL;
1388 const arch_register_t *out;
1389 const arch_register_t *op1;
1391 /* Do not copy constants, recreate them. */
1392 switch (get_ia32_irn_opcode(pred)) {
1394 cnstr = new_bd_ia32_fldz;
1397 cnstr = new_bd_ia32_fld1;
1399 case iro_ia32_fldpi:
1400 cnstr = new_bd_ia32_fldpi;
1402 case iro_ia32_fldl2e:
1403 cnstr = new_bd_ia32_fldl2e;
1405 case iro_ia32_fldl2t:
1406 cnstr = new_bd_ia32_fldl2t;
1408 case iro_ia32_fldlg2:
1409 cnstr = new_bd_ia32_fldlg2;
1411 case iro_ia32_fldln2:
1412 cnstr = new_bd_ia32_fldln2;
1418 out = x87_get_irn_register(n);
1419 op1 = x87_get_irn_register(pred);
1421 if (cnstr != NULL) {
1422 /* copy a constant */
1423 res = (*cnstr)(n_dbg, block, mode);
1425 x87_push(state, out->index, res);
1427 int op1_idx = x87_on_stack(state, op1->index);
1429 res = new_bd_ia32_fpushCopy(n_dbg, block, pred, mode);
1431 x87_push(state, out->index, res);
1433 ia32_x87_attr_t *const attr = get_ia32_x87_attr(res);
1434 attr->reg = get_st_reg(op1_idx);
1436 arch_set_irn_register(res, out);
1442 * Simulate a be_Copy.
1444 * @param state the x87 state
1445 * @param n the node that should be simulated (and patched)
1447 * @return NO_NODE_ADDED
1449 static int sim_Copy(x87_state *state, ir_node *n)
1451 arch_register_class_t const *const cls = arch_get_irn_reg_class(n);
1452 if (cls != &ia32_reg_classes[CLASS_ia32_vfp])
1453 return NO_NODE_ADDED;
1455 ir_node *const pred = be_get_Copy_op(n);
1456 arch_register_t const *const op1 = x87_get_irn_register(pred);
1457 arch_register_t const *const out = x87_get_irn_register(n);
1458 unsigned const live = vfp_live_args_after(state->sim, n, REGMASK(out));
1460 DB((dbg, LEVEL_1, ">>> %+F %s -> %s\n", n, op1->name, out->name));
1461 DEBUG_ONLY(vfp_dump_live(live);)
1463 if (is_vfp_live(op1->index, live)) {
1464 /* Operand is still live, a real copy. We need here an fpush that can
1465 hold a a register, so use the fpushCopy or recreate constants */
1466 ir_node *const node = create_Copy(state, n);
1468 /* We have to make sure the old value doesn't go dead (which can happen
1469 * when we recreate constants). As the simulator expected that value in
1470 * the pred blocks. This is unfortunate as removing it would save us 1
1471 * instruction, but we would have to rerun all the simulation to get
1474 ir_node *const next = sched_next(n);
1477 sched_add_before(next, node);
1479 if (get_irn_n_edges(pred) == 0) {
1480 keep_float_node_alive(pred);
1483 DB((dbg, LEVEL_1, "<<< %+F %s -> ?\n", node, op1->name));
1485 /* Just a virtual copy. */
1486 int const op1_idx = x87_on_stack(state, op1->index);
1487 x87_set_st(state, out->index, n, op1_idx);
1489 return NO_NODE_ADDED;
1493 * Returns the vf0 result Proj of a Call.
1495 * @para call the Call node
1497 static ir_node *get_call_result_proj(ir_node *call)
1499 /* search the result proj */
1500 foreach_out_edge(call, edge) {
1501 ir_node *proj = get_edge_src_irn(edge);
1502 long pn = get_Proj_proj(proj);
1504 if (pn == pn_ia32_Call_vf0)
1508 panic("result Proj missing");
1511 static int sim_Asm(x87_state *const state, ir_node *const n)
1515 for (size_t i = get_irn_arity(n); i-- != 0;) {
1516 arch_register_req_t const *const req = arch_get_irn_register_req_in(n, i);
1517 if (req->cls == &ia32_reg_classes[CLASS_ia32_vfp])
1518 panic("cannot handle %+F with x87 constraints", n);
1521 for (size_t i = arch_get_irn_n_outs(n); i-- != 0;) {
1522 arch_register_req_t const *const req = arch_get_irn_register_req_out(n, i);
1523 if (req->cls == &ia32_reg_classes[CLASS_ia32_vfp])
1524 panic("cannot handle %+F with x87 constraints", n);
1527 return NO_NODE_ADDED;
1531 * Simulate a ia32_Call.
1533 * @param state the x87 state
1534 * @param n the node that should be simulated (and patched)
1536 * @return NO_NODE_ADDED
1538 static int sim_Call(x87_state *state, ir_node *n)
1540 DB((dbg, LEVEL_1, ">>> %+F\n", n));
1542 /* at the begin of a call the x87 state should be empty */
1543 assert(state->depth == 0 && "stack not empty before call");
1545 ir_type *const call_tp = get_ia32_call_attr_const(n)->call_tp;
1546 if (get_method_n_ress(call_tp) != 0) {
1547 /* If the called function returns a float, it is returned in st(0).
1548 * This even happens if the return value is NOT used.
1549 * Moreover, only one return result is supported. */
1550 ir_type *const res_type = get_method_res_type(call_tp, 0);
1551 ir_mode *const mode = get_type_mode(res_type);
1552 if (mode && mode_is_float(mode)) {
1553 ir_node *const resproj = get_call_result_proj(n);
1554 arch_register_t const *const reg = x87_get_irn_register(resproj);
1555 x87_push(state, reg->index, resproj);
1558 DB((dbg, LEVEL_1, "Stack after: "));
1559 DEBUG_ONLY(x87_dump_stack(state);)
1561 return NO_NODE_ADDED;
1565 * Simulate a be_Return.
1567 * @param state the x87 state
1568 * @param n the node that should be simulated (and patched)
1570 * @return NO_NODE_ADDED
1572 static int sim_Return(x87_state *state, ir_node *n)
1574 #ifdef DEBUG_libfirm
1575 /* only floating point return values must reside on stack */
1576 int n_float_res = 0;
1577 int const n_res = be_Return_get_n_rets(n);
1578 for (int i = 0; i < n_res; ++i) {
1579 ir_node *const res = get_irn_n(n, n_be_Return_val + i);
1580 if (mode_is_float(get_irn_mode(res)))
1583 assert(x87_get_depth(state) == n_float_res);
1586 /* pop them virtually */
1588 return NO_NODE_ADDED;
1592 * Simulate a be_Perm.
1594 * @param state the x87 state
1595 * @param irn the node that should be simulated (and patched)
1597 * @return NO_NODE_ADDED
1599 static int sim_Perm(x87_state *state, ir_node *irn)
1602 ir_node *pred = get_irn_n(irn, 0);
1605 /* handle only floating point Perms */
1606 if (! mode_is_float(get_irn_mode(pred)))
1607 return NO_NODE_ADDED;
1609 DB((dbg, LEVEL_1, ">>> %+F\n", irn));
1611 /* Perm is a pure virtual instruction on x87.
1612 All inputs must be on the FPU stack and are pairwise
1613 different from each other.
1614 So, all we need to do is to permutate the stack state. */
1615 n = get_irn_arity(irn);
1616 NEW_ARR_A(int, stack_pos, n);
1618 /* collect old stack positions */
1619 for (i = 0; i < n; ++i) {
1620 const arch_register_t *inreg = x87_get_irn_register(get_irn_n(irn, i));
1621 int idx = x87_on_stack(state, inreg->index);
1623 assert(idx >= 0 && "Perm argument not on x87 stack");
1627 /* now do the permutation */
1628 foreach_out_edge(irn, edge) {
1629 ir_node *proj = get_edge_src_irn(edge);
1630 const arch_register_t *out = x87_get_irn_register(proj);
1631 long num = get_Proj_proj(proj);
1633 assert(0 <= num && num < n && "More Proj's than Perm inputs");
1634 x87_set_st(state, out->index, proj, stack_pos[(unsigned)num]);
1636 DB((dbg, LEVEL_1, "<<< %+F\n", irn));
1638 return NO_NODE_ADDED;
1642 * Kill any dead registers at block start by popping them from the stack.
1644 * @param sim the simulator handle
1645 * @param block the current block
1646 * @param state the x87 state at the begin of the block
1648 static void x87_kill_deads(x87_simulator *const sim, ir_node *const block, x87_state *const state)
1650 ir_node *first_insn = sched_first(block);
1651 ir_node *keep = NULL;
1652 unsigned live = vfp_live_args_after(sim, block, 0);
1657 depth = x87_get_depth(state);
1658 for (i = depth - 1; i >= 0; --i) {
1659 int reg = x87_get_st_reg(state, i);
1661 if (! is_vfp_live(reg, live))
1662 kill_mask |= (1 << i);
1666 DB((dbg, LEVEL_1, "Killing deads:\n"));
1667 DEBUG_ONLY(vfp_dump_live(live);)
1668 DEBUG_ONLY(x87_dump_stack(state);)
1670 if (kill_mask != 0 && live == 0) {
1671 /* special case: kill all registers */
1672 if (ia32_cg_config.use_femms || ia32_cg_config.use_emms) {
1673 if (ia32_cg_config.use_femms) {
1674 /* use FEMMS on AMD processors to clear all */
1675 keep = new_bd_ia32_femms(NULL, block);
1677 /* use EMMS to clear all */
1678 keep = new_bd_ia32_emms(NULL, block);
1680 sched_add_before(first_insn, keep);
1686 /* now kill registers */
1688 /* we can only kill from TOS, so bring them up */
1689 if (! (kill_mask & 1)) {
1690 /* search from behind, because we can to a double-pop */
1691 for (i = depth - 1; i >= 0; --i) {
1692 if (kill_mask & (1 << i)) {
1693 kill_mask &= ~(1 << i);
1700 x87_set_st(state, -1, keep, i);
1701 x87_create_fxch(state, first_insn, i);
1706 keep = x87_create_fpop(state, first_insn);
1713 * Run a simulation and fix all virtual instructions for a block.
1715 * @param sim the simulator handle
1716 * @param block the current block
1718 static void x87_simulate_block(x87_simulator *sim, ir_node *block)
1721 blk_state *bl_state = x87_get_bl_state(sim, block);
1722 x87_state *state = bl_state->begin;
1723 ir_node *start_block;
1725 assert(state != NULL);
1726 /* already processed? */
1727 if (bl_state->end != NULL)
1730 DB((dbg, LEVEL_1, "Simulate %+F\n", block));
1731 DB((dbg, LEVEL_2, "State at Block begin:\n "));
1732 DEBUG_ONLY(x87_dump_stack(state);)
1734 /* create a new state, will be changed */
1735 state = x87_clone_state(sim, state);
1736 /* at block begin, kill all dead registers */
1737 x87_kill_deads(sim, block, state);
1739 /* beware, n might change */
1740 for (n = sched_first(block); !sched_is_end(n); n = next) {
1743 ir_op *op = get_irn_op(n);
1746 * get the next node to be simulated here.
1747 * n might be completely removed from the schedule-
1749 next = sched_next(n);
1750 if (op->ops.generic != NULL) {
1751 func = (sim_func)op->ops.generic;
1754 node_inserted = (*func)(state, n);
1757 * sim_func might have added an additional node after n,
1758 * so update next node
1759 * beware: n must not be changed by sim_func
1760 * (i.e. removed from schedule) in this case
1762 if (node_inserted != NO_NODE_ADDED)
1763 next = sched_next(n);
1767 start_block = get_irg_start_block(get_irn_irg(block));
1769 DB((dbg, LEVEL_2, "State at Block end:\n ")); DEBUG_ONLY(x87_dump_stack(state);)
1771 /* check if the state must be shuffled */
1772 foreach_block_succ(block, edge) {
1773 ir_node *succ = get_edge_src_irn(edge);
1774 blk_state *succ_state;
1776 if (succ == start_block)
1779 succ_state = x87_get_bl_state(sim, succ);
1781 if (succ_state->begin == NULL) {
1782 DB((dbg, LEVEL_2, "Set begin state for succ %+F:\n", succ));
1783 DEBUG_ONLY(x87_dump_stack(state);)
1784 succ_state->begin = state;
1786 waitq_put(sim->worklist, succ);
1788 DB((dbg, LEVEL_2, "succ %+F already has a state, shuffling\n", succ));
1789 /* There is already a begin state for the successor, bad.
1790 Do the necessary permutations.
1791 Note that critical edges are removed, so this is always possible:
1792 If the successor has more than one possible input, then it must
1795 x87_shuffle(block, state, succ_state->begin);
1798 bl_state->end = state;
1802 * Register a simulator function.
1804 * @param op the opcode to simulate
1805 * @param func the simulator function for the opcode
1807 static void register_sim(ir_op *op, sim_func func)
1809 assert(op->ops.generic == NULL);
1810 op->ops.generic = (op_func) func;
1814 * Create a new x87 simulator.
1816 * @param sim a simulator handle, will be initialized
1817 * @param irg the current graph
1819 static void x87_init_simulator(x87_simulator *sim, ir_graph *irg)
1821 obstack_init(&sim->obst);
1822 sim->blk_states = pmap_create();
1823 sim->n_idx = get_irg_last_idx(irg);
1824 sim->live = OALLOCN(&sim->obst, vfp_liveness, sim->n_idx);
1826 DB((dbg, LEVEL_1, "--------------------------------\n"
1827 "x87 Simulator started for %+F\n", irg));
1829 /* set the generic function pointer of instruction we must simulate */
1830 ir_clear_opcodes_generic_func();
1832 register_sim(op_ia32_Asm, sim_Asm);
1833 register_sim(op_ia32_Call, sim_Call);
1834 register_sim(op_ia32_vfld, sim_fld);
1835 register_sim(op_ia32_vfild, sim_fild);
1836 register_sim(op_ia32_vfld1, sim_fld1);
1837 register_sim(op_ia32_vfldz, sim_fldz);
1838 register_sim(op_ia32_vfadd, sim_fadd);
1839 register_sim(op_ia32_vfsub, sim_fsub);
1840 register_sim(op_ia32_vfmul, sim_fmul);
1841 register_sim(op_ia32_vfdiv, sim_fdiv);
1842 register_sim(op_ia32_vfprem, sim_fprem);
1843 register_sim(op_ia32_vfabs, sim_fabs);
1844 register_sim(op_ia32_vfchs, sim_fchs);
1845 register_sim(op_ia32_vfist, sim_fist);
1846 register_sim(op_ia32_vfisttp, sim_fisttp);
1847 register_sim(op_ia32_vfst, sim_fst);
1848 register_sim(op_ia32_vFtstFnstsw, sim_FtstFnstsw);
1849 register_sim(op_ia32_vFucomFnstsw, sim_Fucom);
1850 register_sim(op_ia32_vFucomi, sim_Fucom);
1851 register_sim(op_be_Copy, sim_Copy);
1852 register_sim(op_be_Return, sim_Return);
1853 register_sim(op_be_Perm, sim_Perm);
1854 register_sim(op_be_Keep, sim_Keep);
1858 * Destroy a x87 simulator.
1860 * @param sim the simulator handle
1862 static void x87_destroy_simulator(x87_simulator *sim)
1864 pmap_destroy(sim->blk_states);
1865 obstack_free(&sim->obst, NULL);
1866 DB((dbg, LEVEL_1, "x87 Simulator stopped\n\n"));
1870 * Pre-block walker: calculate the liveness information for the block
1871 * and store it into the sim->live cache.
1873 static void update_liveness_walker(ir_node *block, void *data)
1875 x87_simulator *sim = (x87_simulator*)data;
1876 update_liveness(sim, block);
1880 * Run a simulation and fix all virtual instructions for a graph.
1881 * Replaces all virtual floating point instructions and registers
1884 void ia32_x87_simulate_graph(ir_graph *irg)
1886 /* TODO improve code quality (less executed fxch) by using execfreqs */
1888 ir_node *block, *start_block;
1889 blk_state *bl_state;
1892 /* create the simulator */
1893 x87_init_simulator(&sim, irg);
1895 start_block = get_irg_start_block(irg);
1896 bl_state = x87_get_bl_state(&sim, start_block);
1898 /* start with the empty state */
1900 bl_state->begin = ∅
1902 sim.worklist = new_waitq();
1903 waitq_put(sim.worklist, start_block);
1905 be_assure_live_sets(irg);
1906 sim.lv = be_get_irg_liveness(irg);
1908 /* Calculate the liveness for all nodes. We must precalculate this info,
1909 * because the simulator adds new nodes (possible before Phi nodes) which
1910 * would let a lazy calculation fail.
1911 * On the other hand we reduce the computation amount due to
1912 * precaching from O(n^2) to O(n) at the expense of O(n) cache memory.
1914 irg_block_walk_graph(irg, update_liveness_walker, NULL, &sim);
1918 block = (ir_node*)waitq_get(sim.worklist);
1919 x87_simulate_block(&sim, block);
1920 } while (! waitq_empty(sim.worklist));
1923 del_waitq(sim.worklist);
1924 x87_destroy_simulator(&sim);
1927 /* Initializes the x87 simulator. */
1928 void ia32_init_x87(void)
1930 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.x87");