2 * Copyright (C) 1995-2007 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 * This file implements the x87 support and virtual to stack
22 * register translation for the ia32 backend.
24 * @author: Michael Beck
37 #include "iredges_t.h"
48 #include "../belive_t.h"
49 #include "../besched_t.h"
50 #include "../benode_t.h"
51 #include "ia32_new_nodes.h"
52 #include "gen_ia32_new_nodes.h"
53 #include "gen_ia32_regalloc_if.h"
58 /* first and second binop index */
65 /* the store val index */
66 #define STORE_VAL_IDX 2
68 #define MASK_TOS(x) ((x) & (N_x87_REGS - 1))
70 /** the debug handle */
71 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
73 /* Forward declaration. */
74 typedef struct _x87_simulator x87_simulator;
77 * An exchange template.
78 * Note that our virtual functions have the same inputs
79 * and attributes as the real ones, so we can simple exchange
81 * Further, x87 supports inverse instructions, so we can handle them.
83 typedef struct _exchange_tmpl {
84 ir_op *normal_op; /**< the normal one */
85 ir_op *reverse_op; /**< the reverse one if exists */
86 ir_op *normal_pop_op; /**< the normal one with tos pop */
87 ir_op *reverse_pop_op; /**< the reverse one with tos pop */
91 * An entry on the simulated x87 stack.
93 typedef struct _st_entry {
94 int reg_idx; /**< the virtual register index of this stack value */
95 ir_node *node; /**< the node that produced this value */
101 typedef struct _x87_state {
102 st_entry st[N_x87_REGS]; /**< the register stack */
103 int depth; /**< the current stack depth */
104 int tos; /**< position of the tos */
105 x87_simulator *sim; /**< The simulator. */
108 /** An empty state, used for blocks without fp instructions. */
109 static x87_state _empty = { { {0, NULL}, }, 0, 0 };
110 static x87_state *empty = (x87_state *)&_empty;
113 NO_NODE_ADDED = 0, /**< No node was added. */
114 NODE_ADDED = 1 /**< A node was added by the simulator in the schedule. */
118 * The type of an instruction simulator function.
120 * @param state the x87 state
121 * @param n the node to be simulated
123 * @return NODE_ADDED if a node was added AFTER n in schedule,
126 typedef int (*sim_func)(x87_state *state, ir_node *n);
129 * A block state: Every block has a x87 state at the beginning and at the end.
131 typedef struct _blk_state {
132 x87_state *begin; /**< state at the begin or NULL if not assigned */
133 x87_state *end; /**< state at the end or NULL if not assigned */
136 #define PTR_TO_BLKSTATE(p) ((blk_state *)(p))
138 /** liveness bitset for vfp registers. */
139 typedef unsigned char vfp_liveness;
144 struct _x87_simulator {
145 struct obstack obst; /**< An obstack for fast allocating. */
146 pmap *blk_states; /**< Map blocks to states. */
147 const arch_env_t *arch_env; /**< The architecture environment. */
148 be_lv_t *lv; /**< intrablock liveness. */
149 vfp_liveness *live; /**< Liveness information. */
150 unsigned n_idx; /**< The cached get_irg_last_idx() result. */
151 waitq *worklist; /**< Worklist of blocks that must be processed. */
155 * Returns the current stack depth.
157 * @param state the x87 state
159 * @return the x87 stack depth
161 static int x87_get_depth(const x87_state *state) {
163 } /* x87_get_depth */
166 * Return the virtual register index at st(pos).
168 * @param state the x87 state
169 * @param pos a stack position
171 * @return the vfp register index that produced the value at st(pos)
173 static int x87_get_st_reg(const x87_state *state, int pos) {
174 assert(pos < state->depth);
175 return state->st[MASK_TOS(state->tos + pos)].reg_idx;
176 } /* x87_get_st_reg */
179 * Return the node at st(pos).
181 * @param state the x87 state
182 * @param pos a stack position
184 * @return the IR node that produced the value at st(pos)
186 static ir_node *x87_get_st_node(const x87_state *state, int pos) {
187 assert(pos < state->depth);
188 return state->st[MASK_TOS(state->tos + pos)].node;
189 } /* x87_get_st_node */
193 * Dump the stack for debugging.
195 * @param state the x87 state
197 static void x87_dump_stack(const x87_state *state) {
200 for (i = state->depth - 1; i >= 0; --i) {
201 DB((dbg, LEVEL_2, "vf%d(%+F) ", x87_get_st_reg(state, i),
202 x87_get_st_node(state, i)));
204 DB((dbg, LEVEL_2, "<-- TOS\n"));
205 } /* x87_dump_stack */
206 #endif /* DEBUG_libfirm */
209 * Set a virtual register to st(pos).
211 * @param state the x87 state
212 * @param reg_idx the vfp register index that should be set
213 * @param node the IR node that produces the value of the vfp register
214 * @param pos the stack position where the new value should be entered
216 static void x87_set_st(x87_state *state, int reg_idx, ir_node *node, int pos) {
217 assert(0 < state->depth);
218 state->st[MASK_TOS(state->tos + pos)].reg_idx = reg_idx;
219 state->st[MASK_TOS(state->tos + pos)].node = node;
221 DB((dbg, LEVEL_2, "After SET_REG: "));
222 DEBUG_ONLY(x87_dump_stack(state));
226 * Set the tos virtual register.
228 * @param state the x87 state
229 * @param reg_idx the vfp register index that should be set
230 * @param node the IR node that produces the value of the vfp register
232 static void x87_set_tos(x87_state *state, int reg_idx, ir_node *node) {
233 x87_set_st(state, reg_idx, node, 0);
237 * Swap st(0) with st(pos).
239 * @param state the x87 state
240 * @param pos the stack position to change the tos with
242 static void x87_fxch(x87_state *state, int pos) {
244 assert(pos < state->depth);
246 entry = state->st[MASK_TOS(state->tos + pos)];
247 state->st[MASK_TOS(state->tos + pos)] = state->st[MASK_TOS(state->tos)];
248 state->st[MASK_TOS(state->tos)] = entry;
250 DB((dbg, LEVEL_2, "After FXCH: ")); DEBUG_ONLY(x87_dump_stack(state));
254 * Convert a virtual register to the stack index.
256 * @param state the x87 state
257 * @param reg_idx the register vfp index
259 * @return the stack position where the register is stacked
260 * or -1 if the virtual register was not found
262 static int x87_on_stack(const x87_state *state, int reg_idx) {
263 int i, tos = state->tos;
265 for (i = 0; i < state->depth; ++i)
266 if (state->st[MASK_TOS(tos + i)].reg_idx == reg_idx)
272 * Push a virtual Register onto the stack, double pushed allowed.
274 * @param state the x87 state
275 * @param reg_idx the register vfp index
276 * @param node the node that produces the value of the vfp register
278 static void x87_push_dbl(x87_state *state, int reg_idx, ir_node *node) {
279 assert(state->depth < N_x87_REGS && "stack overrun");
282 state->tos = MASK_TOS(state->tos - 1);
283 state->st[state->tos].reg_idx = reg_idx;
284 state->st[state->tos].node = node;
286 DB((dbg, LEVEL_2, "After PUSH: ")); DEBUG_ONLY(x87_dump_stack(state));
290 * Push a virtual Register onto the stack, double pushes are NOT allowed.
292 * @param state the x87 state
293 * @param reg_idx the register vfp index
294 * @param node the node that produces the value of the vfp register
295 * @param dbl_push if != 0 double pushes are allowed
297 static void x87_push(x87_state *state, int reg_idx, ir_node *node) {
298 assert(x87_on_stack(state, reg_idx) == -1 && "double push");
300 x87_push_dbl(state, reg_idx, node);
304 * Pop a virtual Register from the stack.
306 * @param state the x87 state
308 static void x87_pop(x87_state *state) {
309 assert(state->depth > 0 && "stack underrun");
312 state->tos = MASK_TOS(state->tos + 1);
314 DB((dbg, LEVEL_2, "After POP: ")); DEBUG_ONLY(x87_dump_stack(state));
318 * Returns the block state of a block.
320 * @param sim the x87 simulator handle
321 * @param block the current block
323 * @return the block state
325 static blk_state *x87_get_bl_state(x87_simulator *sim, ir_node *block) {
326 pmap_entry *entry = pmap_find(sim->blk_states, block);
329 blk_state *bl_state = obstack_alloc(&sim->obst, sizeof(*bl_state));
330 bl_state->begin = NULL;
331 bl_state->end = NULL;
333 pmap_insert(sim->blk_states, block, bl_state);
337 return PTR_TO_BLKSTATE(entry->value);
338 } /* x87_get_bl_state */
341 * Creates a new x87 state.
343 * @param sim the x87 simulator handle
345 * @return a new x87 state
347 static x87_state *x87_alloc_state(x87_simulator *sim) {
348 x87_state *res = obstack_alloc(&sim->obst, sizeof(*res));
352 } /* x87_alloc_state */
357 * @param sim the x87 simulator handle
358 * @param src the x87 state that will be cloned
360 * @return a cloned copy of the src state
362 static x87_state *x87_clone_state(x87_simulator *sim, const x87_state *src) {
363 x87_state *res = x87_alloc_state(sim);
365 memcpy(res, src, sizeof(*res));
367 } /* x87_clone_state */
370 * Patch a virtual instruction into a x87 one and return
371 * the node representing the result value.
373 * @param n the IR node to patch
374 * @param op the x87 opcode to patch in
376 static ir_node *x87_patch_insn(ir_node *n, ir_op *op) {
377 ir_mode *mode = get_irn_mode(n);
382 if (mode == mode_T) {
383 /* patch all Proj's */
384 const ir_edge_t *edge;
386 foreach_out_edge(n, edge) {
387 ir_node *proj = get_edge_src_irn(edge);
389 mode = get_irn_mode(proj);
390 if (mode_is_float(mode)) {
392 set_irn_mode(proj, mode_E);
396 } else if (mode_is_float(mode))
397 set_irn_mode(n, mode_E);
399 } /* x87_patch_insn */
402 * Returns the first Proj of a mode_T node having a given mode.
404 * @param n the mode_T node
405 * @param m the desired mode of the Proj
406 * @return The first Proj of mode @p m found or NULL.
408 static ir_node *get_irn_Proj_for_mode(ir_node *n, ir_mode *m) {
409 const ir_edge_t *edge;
411 assert(get_irn_mode(n) == mode_T && "Need mode_T node");
413 foreach_out_edge(n, edge) {
414 ir_node *proj = get_edge_src_irn(edge);
415 if (get_irn_mode(proj) == m)
420 } /* get_irn_Proj_for_mode */
423 * Wrap the arch_* function here so we can check for errors.
425 static INLINE const arch_register_t *x87_get_irn_register(x87_simulator *sim, const ir_node *irn) {
426 const arch_register_t *res;
428 res = arch_get_irn_register(sim->arch_env, irn);
429 assert(res->reg_class->regs == ia32_vfp_regs);
431 } /* x87_get_irn_register */
433 /* -------------- x87 perm --------------- */
436 * Creates a fxch for shuffle.
438 * @param state the x87 state
439 * @param pos parameter for fxch
440 * @param block the block were fxch is inserted
442 * Creates a new fxch node and reroute the user of the old node
445 * @return the fxch node
447 static ir_node *x87_fxch_shuffle(x87_state *state, int pos, ir_node *block) {
451 fxch = new_rd_ia32_fxch(NULL, get_irn_irg(block), block, mode_E);
452 attr = get_ia32_attr(fxch);
453 attr->x87[0] = &ia32_st_regs[pos];
454 attr->x87[2] = &ia32_st_regs[0];
458 x87_fxch(state, pos);
460 } /* x87_fxch_shuffle */
463 * Calculate the necessary permutations to reach dst_state.
465 * These permutations are done with fxch instructions and placed
466 * at the end of the block.
468 * Note that critical edges are removed here, so we need only
469 * a shuffle if the current block has only one successor.
471 * @param sim the simulator handle
472 * @param block the current block
473 * @param state the current x87 stack state, might be modified
474 * @param dst_block the destination block
475 * @param dst_state destination state
479 static x87_state *x87_shuffle(x87_simulator *sim, ir_node *block, x87_state *state, ir_node *dst_block, const x87_state *dst_state) {
480 int i, n_cycles, k, ri;
481 unsigned cycles[4], all_mask;
482 char cycle_idx[4][8];
483 ir_node *fxch, *before, *after;
485 assert(state->depth == dst_state->depth);
487 /* Some mathematics here:
488 If we have a cycle of length n that includes the tos,
489 we need n-1 exchange operations.
490 We can always add the tos and restore it, so we need
491 n+1 exchange operations for a cycle not containing the tos.
492 So, the maximum of needed operations is for a cycle of 7
493 not including the tos == 8.
494 This is the same number of ops we would need for using stores,
495 so exchange is cheaper (we save the loads).
496 On the other hand, we might need an additional exchange
497 in the next block to bring one operand on top, so the
498 number of ops in the first case is identical.
499 Further, no more than 4 cycles can exists (4 x 2).
501 all_mask = (1 << (state->depth)) - 1;
503 for (n_cycles = 0; all_mask; ++n_cycles) {
504 int src_idx, dst_idx;
506 /* find the first free slot */
507 for (i = 0; i < state->depth; ++i) {
508 if (all_mask & (1 << i)) {
509 all_mask &= ~(1 << i);
511 /* check if there are differences here */
512 if (x87_get_st_reg(state, i) != x87_get_st_reg(dst_state, i))
518 /* no more cycles found */
523 cycles[n_cycles] = (1 << i);
524 cycle_idx[n_cycles][k++] = i;
525 for (src_idx = i; ; src_idx = dst_idx) {
526 dst_idx = x87_on_stack(dst_state, x87_get_st_reg(state, src_idx));
528 if ((all_mask & (1 << dst_idx)) == 0)
531 cycle_idx[n_cycles][k++] = dst_idx;
532 cycles[n_cycles] |= (1 << dst_idx);
533 all_mask &= ~(1 << dst_idx);
535 cycle_idx[n_cycles][k] = -1;
539 /* no permutation needed */
543 /* Hmm: permutation needed */
544 DB((dbg, LEVEL_2, "\n%+F needs permutation: from\n", block));
545 DEBUG_ONLY(x87_dump_stack(state));
546 DB((dbg, LEVEL_2, " to\n"));
547 DEBUG_ONLY(x87_dump_stack(dst_state));
551 DB((dbg, LEVEL_2, "Need %d cycles\n", n_cycles));
552 for (ri = 0; ri < n_cycles; ++ri) {
553 DB((dbg, LEVEL_2, " Ring %d:\n ", ri));
554 for (k = 0; cycle_idx[ri][k] != -1; ++k)
555 DB((dbg, LEVEL_2, " st%d ->", cycle_idx[ri][k]));
556 DB((dbg, LEVEL_2, "\n"));
563 * Find the place node must be insert.
564 * We have only one successor block, so the last instruction should
567 before = sched_last(block);
568 assert(is_cfop(before));
570 /* now do the permutations */
571 for (ri = 0; ri < n_cycles; ++ri) {
572 if ((cycles[ri] & 1) == 0) {
573 /* this cycle does not include the tos */
574 fxch = x87_fxch_shuffle(state, cycle_idx[ri][0], block);
576 sched_add_after(after, fxch);
578 sched_add_before(before, fxch);
581 for (k = 1; cycle_idx[ri][k] != -1; ++k) {
582 fxch = x87_fxch_shuffle(state, cycle_idx[ri][k], block);
584 sched_add_after(after, fxch);
586 sched_add_before(before, fxch);
589 if ((cycles[ri] & 1) == 0) {
590 /* this cycle does not include the tos */
591 fxch = x87_fxch_shuffle(state, cycle_idx[ri][0], block);
592 sched_add_after(after, fxch);
599 * Create a fxch node before another node.
601 * @param state the x87 state
602 * @param n the node after the fxch
603 * @param pos exchange st(pos) with st(0)
604 * @param op_idx if >= 0, replace input op_idx of n with the fxch result
608 static ir_node *x87_create_fxch(x87_state *state, ir_node *n, int pos, int op_idx) {
611 ir_graph *irg = get_irn_irg(n);
612 ir_node *block = get_nodes_block(n);
614 x87_fxch(state, pos);
616 fxch = new_rd_ia32_fxch(NULL, irg, block, mode_E);
617 attr = get_ia32_attr(fxch);
618 attr->x87[0] = &ia32_st_regs[pos];
619 attr->x87[2] = &ia32_st_regs[0];
623 sched_add_before(n, fxch);
624 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(fxch), attr->x87[0]->name, attr->x87[2]->name));
626 } /* x87_create_fxch */
629 * Create a fpush before node n.
631 * @param state the x87 state
632 * @param n the node after the fpush
633 * @param pos push st(pos) on stack
634 * @param op_idx replace input op_idx of n with the fpush result
636 static void x87_create_fpush(x87_state *state, ir_node *n, int pos, int op_idx) {
637 ir_node *fpush, *pred = get_irn_n(n, op_idx);
639 const arch_register_t *out = x87_get_irn_register(state->sim, pred);
641 x87_push_dbl(state, arch_register_get_index(out), pred);
643 fpush = new_rd_ia32_fpush(NULL, get_irn_irg(n), get_nodes_block(n), mode_E);
644 attr = get_ia32_attr(fpush);
645 attr->x87[0] = &ia32_st_regs[pos];
646 attr->x87[2] = &ia32_st_regs[0];
649 sched_add_before(n, fpush);
651 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(fpush), attr->x87[0]->name, attr->x87[2]->name));
652 } /* x87_create_fpush */
655 * Create a fpop before node n.
657 * @param state the x87 state
658 * @param n the node after the fpop
659 * @param num pop 1 or 2 values
660 * @param pred node to use as predecessor of the fpop
662 * @return the fpop node
664 static ir_node *x87_create_fpop(x87_state *state, ir_node *n, int num, ir_node *pred) {
665 ir_node *fpop = pred;
672 fpop = new_rd_ia32_fpop(NULL, get_irn_irg(n), get_nodes_block(n), mode_E);
673 attr = get_ia32_attr(fpop);
674 attr->x87[0] = &ia32_st_regs[0];
675 attr->x87[1] = &ia32_st_regs[0];
676 attr->x87[2] = &ia32_st_regs[0];
679 sched_add_before(n, fpop);
680 DB((dbg, LEVEL_1, "<<< %s %s\n", get_irn_opname(fpop), attr->x87[0]->name));
686 } /* x87_create_fpop */
689 * Creates an fldz before node n
691 * @param state the x87 state
692 * @param n the node after the fldz
694 * @return the fldz node
696 static ir_node *x87_create_fldz(x87_state *state, ir_node *n, int regidx) {
697 ir_graph *irg = get_irn_irg(n);
698 ir_node *block = get_nodes_block(n);
701 fldz = new_rd_ia32_fldz(NULL, irg, block, mode_E);
703 sched_add_before(n, fldz);
704 DB((dbg, LEVEL_1, "<<< %s\n", get_irn_opname(fldz)));
707 x87_push(state, regidx, fldz);
712 /* --------------------------------- liveness ------------------------------------------ */
715 * The liveness transfer function.
716 * Updates a live set over a single step from a given node to its predecessor.
717 * Everything defined at the node is removed from the set, the uses of the node get inserted.
719 * @param sim The simulator handle.
720 * @param irn The node at which liveness should be computed.
721 * @param live The bitset of registers live before @p irn. This set gets modified by updating it to
722 * the registers live after irn.
724 * @return The live bitset.
726 static vfp_liveness vfp_liveness_transfer(x87_simulator *sim, ir_node *irn, vfp_liveness live)
729 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
730 const arch_env_t *arch_env = sim->arch_env;
732 if (arch_irn_consider_in_reg_alloc(arch_env, cls, irn)) {
733 const arch_register_t *reg = x87_get_irn_register(sim, irn);
734 live &= ~(1 << arch_register_get_index(reg));
737 for (i = 0, n = get_irn_arity(irn); i < n; ++i) {
738 ir_node *op = get_irn_n(irn, i);
740 if (mode_is_float(get_irn_mode(op)) && arch_irn_consider_in_reg_alloc(arch_env, cls, op)) {
741 const arch_register_t *reg = x87_get_irn_register(sim, op);
742 live |= 1 << arch_register_get_index(reg);
746 } /* vfp_liveness_transfer */
749 * Put all live virtual registers at the end of a block into a bitset.
751 * @param sim the simulator handle
752 * @param lv the liveness information
753 * @param bl the block
755 * @return The live bitset at the end of this block
757 static vfp_liveness vfp_liveness_end_of_block(x87_simulator *sim, const ir_node *block)
760 vfp_liveness live = 0;
761 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
762 const arch_env_t *arch_env = sim->arch_env;
763 const be_lv_t *lv = sim->lv;
765 be_lv_foreach(lv, block, be_lv_state_end, i) {
766 const arch_register_t *reg;
767 const ir_node *node = be_lv_get_irn(lv, block, i);
768 if (!arch_irn_consider_in_reg_alloc(arch_env, cls, node))
771 reg = x87_get_irn_register(sim, node);
772 live |= 1 << arch_register_get_index(reg);
776 } /* vfp_liveness_end_of_block */
778 /** get the register mask from an arch_register */
779 #define REGMASK(reg) (1 << (arch_register_get_index(reg)))
782 * Return a bitset of argument registers which are live at the end of a node.
784 * @param sim the simulator handle
785 * @param pos the node
786 * @param kill kill mask for the output registers
788 * @return The live bitset.
790 static unsigned vfp_live_args_after(x87_simulator *sim, const ir_node *pos, unsigned kill)
792 unsigned idx = get_irn_idx(pos);
794 assert(idx < sim->n_idx);
795 return sim->live[idx] & ~kill;
796 } /* vfp_live_args_after */
799 * Calculate the liveness for a whole block and cache it.
801 * @param sim the simulator handle
802 * @param lv the liveness handle
803 * @param block the block
805 static void update_liveness(x87_simulator *sim, ir_node *block) {
806 vfp_liveness live = vfp_liveness_end_of_block(sim, block);
810 /* now iterate through the block backward and cache the results */
811 sched_foreach_reverse(block, irn) {
812 /* stop at the first Phi: this produces the live-in */
816 idx = get_irn_idx(irn);
817 sim->live[idx] = live;
819 live = vfp_liveness_transfer(sim, irn, live);
821 idx = get_irn_idx(block);
822 sim->live[idx] = live;
823 } /* update_liveness */
826 * Returns true if a register is live in a set.
828 * @param reg_idx the vfp register index
829 * @param live a live bitset
831 #define is_vfp_live(reg_idx, live) ((live) & (1 << (reg_idx)))
835 * Dump liveness info.
837 * @param live the live bitset
839 static void vfp_dump_live(vfp_liveness live) {
842 DB((dbg, LEVEL_2, "Live after: "));
843 for (i = 0; i < 8; ++i) {
844 if (live & (1 << i)) {
845 DB((dbg, LEVEL_2, "vf%d ", i));
848 DB((dbg, LEVEL_2, "\n"));
849 } /* vfp_dump_live */
850 #endif /* DEBUG_libfirm */
852 /* --------------------------------- simulators ---------------------------------------- */
854 #define XCHG(a, b) do { int t = (a); (a) = (b); (b) = t; } while (0)
857 * Simulate a virtual binop.
859 * @param state the x87 state
860 * @param n the node that should be simulated (and patched)
861 * @param tmpl the template containing the 4 possible x87 opcodes
863 * @return NO_NODE_ADDED
865 static int sim_binop(x87_state *state, ir_node *n, const exchange_tmpl *tmpl) {
866 int op2_idx = 0, op1_idx;
867 int out_idx, do_pop = 0;
869 ir_node *patched_insn;
871 x87_simulator *sim = state->sim;
872 const arch_register_t *op1 = x87_get_irn_register(sim, get_irn_n(n, BINOP_IDX_1));
873 const arch_register_t *op2 = x87_get_irn_register(sim, get_irn_n(n, BINOP_IDX_2));
874 const arch_register_t *out = x87_get_irn_register(sim, n);
875 int reg_index_1 = arch_register_get_index(op1);
876 int reg_index_2 = arch_register_get_index(op2);
877 vfp_liveness live = vfp_live_args_after(sim, n, REGMASK(out));
879 DB((dbg, LEVEL_1, ">>> %+F %s, %s -> %s\n", n,
880 arch_register_get_name(op1), arch_register_get_name(op2),
881 arch_register_get_name(out)));
882 DEBUG_ONLY(vfp_dump_live(live));
883 DB((dbg, LEVEL_1, "Stack before: "));
884 DEBUG_ONLY(x87_dump_stack(state));
886 op1_idx = x87_on_stack(state, reg_index_1);
887 assert(op1_idx >= 0);
889 if (reg_index_2 != REG_VFP_NOREG) {
890 /* second operand is a vfp register */
891 op2_idx = x87_on_stack(state, reg_index_2);
892 assert(op2_idx >= 0);
894 if (is_vfp_live(arch_register_get_index(op2), live)) {
895 /* Second operand is live. */
897 if (is_vfp_live(arch_register_get_index(op1), live)) {
898 /* Both operands are live: push the first one.
899 This works even for op1 == op2. */
900 x87_create_fpush(state, n, op1_idx, BINOP_IDX_2);
901 /* now do fxxx (tos=tos X op) */
905 dst = tmpl->normal_op;
907 /* Second live, first operand is dead here, bring it to tos. */
909 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
914 /* now do fxxx (tos=tos X op) */
916 dst = tmpl->normal_op;
919 /* Second operand is dead. */
920 if (is_vfp_live(arch_register_get_index(op1), live)) {
921 /* First operand is live: bring second to tos. */
923 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
928 /* now do fxxxr (tos = op X tos) */
930 dst = tmpl->reverse_op;
932 /* Both operands are dead here, pop them from the stack. */
935 /* Both are identically and on tos, no pop needed. */
936 /* here fxxx (tos = tos X tos) */
937 dst = tmpl->normal_op;
940 /* now do fxxxp (op = op X tos, pop) */
941 dst = tmpl->normal_pop_op;
945 } else if (op1_idx == 0) {
946 assert(op1_idx != op2_idx);
947 /* now do fxxxrp (op = tos X op, pop) */
948 dst = tmpl->reverse_pop_op;
952 /* Bring the second on top. */
953 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
954 if (op1_idx == op2_idx) {
955 /* Both are identically and on tos now, no pop needed. */
958 /* use fxxx (tos = tos X tos) */
959 dst = tmpl->normal_op;
962 /* op2 is on tos now */
964 /* use fxxxp (op = op X tos, pop) */
965 dst = tmpl->normal_pop_op;
973 /* second operand is an address mode */
974 if (is_vfp_live(arch_register_get_index(op1), live)) {
975 /* first operand is live: push it here */
976 x87_create_fpush(state, n, op1_idx, BINOP_IDX_1);
978 /* use fxxx (tos = tos X mem) */
979 dst = tmpl->normal_op;
982 /* first operand is dead: bring it to tos */
984 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
988 /* use fxxxp (tos = tos X mem) */
989 dst = tmpl->normal_op;
994 patched_insn = x87_patch_insn(n, dst);
995 x87_set_st(state, arch_register_get_index(out), patched_insn, out_idx);
1000 /* patch the operation */
1001 attr = get_ia32_attr(n);
1002 attr->x87[0] = op1 = &ia32_st_regs[op1_idx];
1003 if (reg_index_2 != REG_VFP_NOREG) {
1004 attr->x87[1] = op2 = &ia32_st_regs[op2_idx];
1006 attr->x87[2] = out = &ia32_st_regs[out_idx];
1008 if (reg_index_2 != REG_VFP_NOREG) {
1009 DB((dbg, LEVEL_1, "<<< %s %s, %s -> %s\n", get_irn_opname(n),
1010 arch_register_get_name(op1), arch_register_get_name(op2),
1011 arch_register_get_name(out)));
1013 DB((dbg, LEVEL_1, "<<< %s %s, [AM] -> %s\n", get_irn_opname(n),
1014 arch_register_get_name(op1),
1015 arch_register_get_name(out)));
1018 return NO_NODE_ADDED;
1022 * Simulate a virtual Unop.
1024 * @param state the x87 state
1025 * @param n the node that should be simulated (and patched)
1026 * @param op the x87 opcode that will replace n's opcode
1028 * @return NO_NODE_ADDED
1030 static int sim_unop(x87_state *state, ir_node *n, ir_op *op) {
1031 int op1_idx, out_idx;
1032 x87_simulator *sim = state->sim;
1033 const arch_register_t *op1 = x87_get_irn_register(sim, get_irn_n(n, UNOP_IDX));
1034 const arch_register_t *out = x87_get_irn_register(sim, n);
1036 unsigned live = vfp_live_args_after(sim, n, REGMASK(out));
1038 DB((dbg, LEVEL_1, ">>> %+F -> %s\n", n, out->name));
1039 DEBUG_ONLY(vfp_dump_live(live));
1041 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1043 if (is_vfp_live(arch_register_get_index(op1), live)) {
1044 /* push the operand here */
1045 x87_create_fpush(state, n, op1_idx, UNOP_IDX);
1049 /* operand is dead, bring it to tos */
1051 x87_create_fxch(state, n, op1_idx, UNOP_IDX);
1056 x87_set_tos(state, arch_register_get_index(out), x87_patch_insn(n, op));
1058 attr = get_ia32_attr(n);
1059 attr->x87[0] = op1 = &ia32_st_regs[0];
1060 attr->x87[2] = out = &ia32_st_regs[0];
1061 DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), out->name));
1063 return NO_NODE_ADDED;
1067 * Simulate a virtual Load instruction.
1069 * @param state the x87 state
1070 * @param n the node that should be simulated (and patched)
1071 * @param op the x87 opcode that will replace n's opcode
1073 * @return NO_NODE_ADDED
1075 static int sim_load(x87_state *state, ir_node *n, ir_op *op) {
1076 const arch_register_t *out = x87_get_irn_register(state->sim, n);
1079 DB((dbg, LEVEL_1, ">>> %+F -> %s\n", n, arch_register_get_name(out)));
1080 x87_push(state, arch_register_get_index(out), x87_patch_insn(n, op));
1081 assert(out == x87_get_irn_register(state->sim, n));
1082 attr = get_ia32_attr(n);
1083 attr->x87[2] = out = &ia32_st_regs[0];
1084 DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), arch_register_get_name(out)));
1086 return NO_NODE_ADDED;
1090 * Rewire all users of @p old_val to @new_val iff they are scheduled after @p store.
1092 * @param store The store
1093 * @param old_val The former value
1094 * @param new_val The new value
1096 static void collect_and_rewire_users(ir_node *store, ir_node *old_val, ir_node *new_val) {
1097 const ir_edge_t *edge, *ne;
1099 foreach_out_edge_safe(old_val, edge, ne) {
1100 ir_node *user = get_edge_src_irn(edge);
1102 if (! user || user == store)
1105 /* if the user is scheduled after the store: rewire */
1106 if (sched_is_scheduled(user) && sched_comes_after(store, user)) {
1108 /* find the input of the user pointing to the old value */
1109 for (i = get_irn_arity(user) - 1; i >= 0; i--) {
1110 if (get_irn_n(user, i) == old_val)
1111 set_irn_n(user, i, new_val);
1115 } /* collect_and_rewire_users */
1118 * Simulate a virtual Store.
1120 * @param state the x87 state
1121 * @param n the node that should be simulated (and patched)
1122 * @param op the x87 store opcode
1123 * @param op_p the x87 store and pop opcode
1125 static int sim_store(x87_state *state, ir_node *n, ir_op *op, ir_op *op_p) {
1126 x87_simulator *sim = state->sim;
1127 ir_node *val = get_irn_n(n, STORE_VAL_IDX);
1128 const arch_register_t *op2 = x87_get_irn_register(sim, val);
1129 unsigned live = vfp_live_args_after(sim, n, 0);
1130 int insn = NO_NODE_ADDED;
1132 int op2_reg_idx, op2_idx, depth;
1133 int live_after_node;
1136 op2_reg_idx = arch_register_get_index(op2);
1137 if (op2_reg_idx == REG_VFP_UKNWN) {
1138 /* just take any value from stack */
1139 if(state->depth > 0) {
1141 DEBUG_ONLY(op2 = NULL);
1142 live_after_node = 1;
1144 /* produce a new value which we will consume immediately */
1145 x87_create_fldz(state, n, op2_reg_idx);
1146 live_after_node = 0;
1147 op2_idx = x87_on_stack(state, op2_reg_idx);
1148 assert(op2_idx >= 0);
1151 op2_idx = x87_on_stack(state, op2_reg_idx);
1152 live_after_node = is_vfp_live(arch_register_get_index(op2), live);
1153 assert(op2_idx >= 0);
1154 DB((dbg, LEVEL_1, ">>> %+F %s ->\n", n, arch_register_get_name(op2)));
1157 mode = get_ia32_ls_mode(n);
1158 depth = x87_get_depth(state);
1160 if (live_after_node) {
1162 Problem: fst doesn't support mode_E (spills), only fstp does
1164 - stack not full: push value and fstp
1165 - stack full: fstp value and load again
1167 if (mode == mode_E) {
1168 if (depth < N_x87_REGS) {
1169 /* ok, we have a free register: push + fstp */
1170 x87_create_fpush(state, n, op2_idx, STORE_VAL_IDX);
1172 x87_patch_insn(n, op_p);
1174 ir_node *vfld, *mem, *block, *rproj, *mproj;
1177 /* stack full here: need fstp + load */
1179 x87_patch_insn(n, op_p);
1181 block = get_nodes_block(n);
1182 irg = get_irn_irg(n);
1183 vfld = new_rd_ia32_vfld(NULL, irg, block, get_irn_n(n, 0), get_irn_n(n, 1), new_rd_NoMem(irg));
1185 /* copy all attributes */
1186 set_ia32_frame_ent(vfld, get_ia32_frame_ent(n));
1187 if (is_ia32_use_frame(n))
1188 set_ia32_use_frame(vfld);
1189 set_ia32_am_flavour(vfld, get_ia32_am_flavour(n));
1190 set_ia32_op_type(vfld, ia32_am_Source);
1191 add_ia32_am_offs_int(vfld, get_ia32_am_offs_int(n));
1192 set_ia32_am_sc(vfld, get_ia32_am_sc(n));
1193 set_ia32_ls_mode(vfld, get_ia32_ls_mode(n));
1195 rproj = new_r_Proj(irg, block, vfld, get_ia32_ls_mode(vfld), pn_ia32_vfld_res);
1196 mproj = new_r_Proj(irg, block, vfld, mode_M, pn_ia32_vfld_M);
1197 mem = get_irn_Proj_for_mode(n, mode_M);
1199 assert(mem && "Store memory not found");
1201 arch_set_irn_register(sim->arch_env, rproj, op2);
1203 /* reroute all former users of the store memory to the load memory */
1204 edges_reroute(mem, mproj, irg);
1205 /* set the memory input of the load to the store memory */
1206 set_irn_n(vfld, 2, mem);
1208 sched_add_after(n, vfld);
1209 sched_add_after(vfld, rproj);
1211 /* rewire all users, scheduled after the store, to the loaded value */
1212 collect_and_rewire_users(n, val, rproj);
1217 /* we can only store the tos to memory */
1219 x87_create_fxch(state, n, op2_idx, STORE_VAL_IDX);
1221 /* mode != mode_E -> use normal fst */
1222 x87_patch_insn(n, op);
1225 /* we can only store the tos to memory */
1227 x87_create_fxch(state, n, op2_idx, STORE_VAL_IDX);
1230 x87_patch_insn(n, op_p);
1233 attr = get_ia32_attr(n);
1234 attr->x87[1] = op2 = &ia32_st_regs[0];
1235 DB((dbg, LEVEL_1, "<<< %s %s ->\n", get_irn_opname(n), arch_register_get_name(op2)));
1241 * Simulate a virtual Phi.
1242 * Just for cosmetic reasons change the mode of Phi nodes to mode_E.
1244 * @param state the x87 state
1245 * @param n the node that should be simulated (and patched)
1246 * @param arch_env the architecture environment
1248 * @return NO_NODE_ADDED
1250 static int sim_Phi(x87_state *state, ir_node *n, const arch_env_t *arch_env) {
1251 ir_mode *mode = get_irn_mode(n);
1253 if (mode_is_float(mode))
1254 set_irn_mode(n, mode_E);
1256 return NO_NODE_ADDED;
1259 #define _GEN_BINOP(op, rev) \
1260 static int sim_##op(x87_state *state, ir_node *n) { \
1261 exchange_tmpl tmpl = { op_ia32_##op, op_ia32_##rev, op_ia32_##op##p, op_ia32_##rev##p }; \
1262 return sim_binop(state, n, &tmpl); \
1265 #define GEN_BINOP(op) _GEN_BINOP(op, op)
1266 #define GEN_BINOPR(op) _GEN_BINOP(op, op##r)
1268 #define GEN_LOAD2(op, nop) \
1269 static int sim_##op(x87_state *state, ir_node *n) { \
1270 return sim_load(state, n, op_ia32_##nop); \
1273 #define GEN_LOAD(op) GEN_LOAD2(op, op)
1275 #define GEN_UNOP(op) \
1276 static int sim_##op(x87_state *state, ir_node *n) { \
1277 return sim_unop(state, n, op_ia32_##op); \
1280 #define GEN_STORE(op) \
1281 static int sim_##op(x87_state *state, ir_node *n) { \
1282 return sim_store(state, n, op_ia32_##op, op_ia32_##op##p); \
1307 * Simulate a fCondJmp.
1309 * @param state the x87 state
1310 * @param n the node that should be simulated (and patched)
1312 * @return NO_NODE_ADDED
1314 static int sim_fCondJmp(x87_state *state, ir_node *n) {
1320 x87_simulator *sim = state->sim;
1321 const arch_register_t *op1 = x87_get_irn_register(sim, get_irn_n(n, BINOP_IDX_1));
1322 const arch_register_t *op2 = x87_get_irn_register(sim, get_irn_n(n, BINOP_IDX_2));
1323 int reg_index_1 = arch_register_get_index(op1);
1324 int reg_index_2 = arch_register_get_index(op2);
1325 unsigned live = vfp_live_args_after(sim, n, 0);
1327 DB((dbg, LEVEL_1, ">>> %+F %s, %s\n", n,
1328 arch_register_get_name(op1), arch_register_get_name(op2)));
1329 DEBUG_ONLY(vfp_dump_live(live));
1330 DB((dbg, LEVEL_1, "Stack before: "));
1331 DEBUG_ONLY(x87_dump_stack(state));
1333 op1_idx = x87_on_stack(state, reg_index_1);
1334 assert(op1_idx >= 0);
1336 /* BEWARE: check for comp a,a cases, they might happen */
1337 if (reg_index_2 != REG_VFP_NOREG) {
1338 /* second operand is a vfp register */
1339 op2_idx = x87_on_stack(state, reg_index_2);
1340 assert(op2_idx >= 0);
1342 if (is_vfp_live(arch_register_get_index(op2), live)) {
1343 /* second operand is live */
1345 if (is_vfp_live(arch_register_get_index(op1), live)) {
1346 /* both operands are live */
1349 /* res = tos X op */
1350 dst = op_ia32_fcomJmp;
1351 } else if (op2_idx == 0) {
1352 /* res = op X tos */
1353 dst = op_ia32_fcomrJmp;
1355 /* bring the first one to tos */
1356 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1360 /* res = tos X op */
1361 dst = op_ia32_fcomJmp;
1364 /* second live, first operand is dead here, bring it to tos.
1365 This means further, op1_idx != op2_idx. */
1366 assert(op1_idx != op2_idx);
1368 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1373 /* res = tos X op, pop */
1374 dst = op_ia32_fcompJmp;
1378 /* second operand is dead */
1379 if (is_vfp_live(arch_register_get_index(op1), live)) {
1380 /* first operand is live: bring second to tos.
1381 This means further, op1_idx != op2_idx. */
1382 assert(op1_idx != op2_idx);
1384 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1389 /* res = op X tos, pop */
1390 dst = op_ia32_fcomrpJmp;
1393 /* both operands are dead here, check first for identity. */
1394 if (op1_idx == op2_idx) {
1395 /* identically, one pop needed */
1397 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1401 /* res = tos X op, pop */
1402 dst = op_ia32_fcompJmp;
1405 /* different, move them to st and st(1) and pop both.
1406 The tricky part is to get one into st(1).*/
1407 else if (op2_idx == 1) {
1408 /* good, second operand is already in the right place, move the first */
1410 /* bring the first on top */
1411 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1412 assert(op2_idx != 0);
1415 /* res = tos X op, pop, pop */
1416 dst = op_ia32_fcomppJmp;
1418 } else if (op1_idx == 1) {
1419 /* good, first operand is already in the right place, move the second */
1421 /* bring the first on top */
1422 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1423 assert(op1_idx != 0);
1426 dst = op_ia32_fcomrppJmp;
1429 /* if one is already the TOS, we need two fxch */
1431 /* first one is TOS, move to st(1) */
1432 x87_create_fxch(state, n, 1, BINOP_IDX_1);
1433 assert(op2_idx != 1);
1435 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1437 /* res = op X tos, pop, pop */
1438 dst = op_ia32_fcomrppJmp;
1440 } else if (op2_idx == 0) {
1441 /* second one is TOS, move to st(1) */
1442 x87_create_fxch(state, n, 1, BINOP_IDX_2);
1443 assert(op1_idx != 1);
1445 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1447 /* res = tos X op, pop, pop */
1448 dst = op_ia32_fcomppJmp;
1451 /* none of them is either TOS or st(1), 3 fxch needed */
1452 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1453 assert(op1_idx != 0);
1454 x87_create_fxch(state, n, 1, BINOP_IDX_2);
1456 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1458 /* res = tos X op, pop, pop */
1459 dst = op_ia32_fcomppJmp;
1466 /* second operand is an address mode */
1467 if (is_vfp_live(arch_register_get_index(op1), live)) {
1468 /* first operand is live: bring it to TOS */
1470 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1473 dst = op_ia32_fcomJmp;
1475 /* first operand is dead: bring it to tos */
1477 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1480 dst = op_ia32_fcompJmp;
1485 x87_patch_insn(n, dst);
1486 assert(pop_cnt < 3);
1492 /* patch the operation */
1493 attr = get_ia32_attr(n);
1494 op1 = &ia32_st_regs[op1_idx];
1497 op2 = &ia32_st_regs[op2_idx];
1500 attr->x87[2] = NULL;
1503 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(n),
1504 arch_register_get_name(op1), arch_register_get_name(op2)));
1506 DB((dbg, LEVEL_1, "<<< %s %s, [AM]\n", get_irn_opname(n),
1507 arch_register_get_name(op1)));
1509 return NO_NODE_ADDED;
1510 } /* sim_fCondJmp */
1513 * Create a copy of a node. Recreate the node if it's a constant.
1515 * @param state the x87 state
1516 * @param n the node to be copied
1518 * @return the copy of n
1520 static ir_node *create_Copy(x87_state *state, ir_node *n) {
1521 x87_simulator *sim = state->sim;
1522 ir_graph *irg = get_irn_irg(n);
1523 dbg_info *n_dbg = get_irn_dbg_info(n);
1524 ir_mode *mode = get_irn_mode(n);
1525 ir_node *block = get_nodes_block(n);
1526 ir_node *pred = get_irn_n(n, 0);
1527 ir_node *(*cnstr)(dbg_info *, ir_graph *, ir_node *, ir_mode *) = NULL;
1529 const arch_register_t *out;
1530 const arch_register_t *op1;
1533 /* Do not copy constants, recreate them. */
1534 switch (get_ia32_irn_opcode(pred)) {
1535 case iro_ia32_Unknown_VFP:
1537 cnstr = new_rd_ia32_fldz;
1540 cnstr = new_rd_ia32_fld1;
1542 case iro_ia32_fldpi:
1543 cnstr = new_rd_ia32_fldpi;
1545 case iro_ia32_fldl2e:
1546 cnstr = new_rd_ia32_fldl2e;
1548 case iro_ia32_fldl2t:
1549 cnstr = new_rd_ia32_fldl2t;
1551 case iro_ia32_fldlg2:
1552 cnstr = new_rd_ia32_fldlg2;
1554 case iro_ia32_fldln2:
1555 cnstr = new_rd_ia32_fldln2;
1561 out = x87_get_irn_register(sim, n);
1562 op1 = x87_get_irn_register(sim, pred);
1564 if (cnstr != NULL) {
1565 /* copy a constant */
1566 res = (*cnstr)(n_dbg, irg, block, mode);
1568 x87_push(state, arch_register_get_index(out), res);
1570 attr = get_ia32_attr(res);
1571 attr->x87[2] = &ia32_st_regs[0];
1573 int op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1575 res = new_rd_ia32_fpushCopy(n_dbg, irg, block, pred, mode);
1577 x87_push(state, arch_register_get_index(out), res);
1579 attr = get_ia32_attr(res);
1580 attr->x87[0] = &ia32_st_regs[op1_idx];
1581 attr->x87[2] = &ia32_st_regs[0];
1583 arch_set_irn_register(sim->arch_env, res, out);
1589 * Simulate a be_Copy.
1591 * @param state the x87 state
1592 * @param n the node that should be simulated (and patched)
1594 * @return NO_NODE_ADDED
1596 static int sim_Copy(x87_state *state, ir_node *n) {
1599 const arch_register_t *out;
1600 const arch_register_t *op1;
1601 ir_node *node, *next;
1603 int op1_idx, out_idx;
1606 ir_mode *mode = get_irn_mode(n);
1608 if (!mode_is_float(mode))
1612 pred = get_irn_n(n, 0);
1613 out = x87_get_irn_register(sim, n);
1614 op1 = x87_get_irn_register(sim, pred);
1615 live = vfp_live_args_after(sim, n, REGMASK(out));
1617 DB((dbg, LEVEL_1, ">>> %+F %s -> %s\n", n,
1618 arch_register_get_name(op1), arch_register_get_name(out)));
1619 DEBUG_ONLY(vfp_dump_live(live));
1621 /* handle the infamous unknown value */
1622 if (arch_register_get_index(op1) == REG_VFP_UKNWN) {
1623 /* Operand is still live, a real copy. We need here an fpush that can
1624 hold a a register, so use the fpushCopy or recreate constants */
1625 node = create_Copy(state, n);
1627 assert(is_ia32_fldz(node));
1628 next = sched_next(n);
1631 sched_add_before(next, node);
1633 DB((dbg, LEVEL_1, "<<< %+F %s -> %s\n", node, op1->name,
1634 arch_get_irn_register(sim->arch_env, node)->name));
1635 return NO_NODE_ADDED;
1638 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1640 if (is_vfp_live(arch_register_get_index(op1), live)) {
1641 /* Operand is still live, a real copy. We need here an fpush that can
1642 hold a a register, so use the fpushCopy or recreate constants */
1643 node = create_Copy(state, n);
1645 next = sched_next(n);
1648 sched_add_before(next, node);
1649 DB((dbg, LEVEL_1, "<<< %+F %s -> %s\n", node, op1->name,
1650 arch_get_irn_register(sim->arch_env, node)->name));
1652 out_idx = x87_on_stack(state, arch_register_get_index(out));
1654 if (out_idx >= 0 && out_idx != op1_idx) {
1655 /* Matze: out already on stack? how can this happen? */
1658 /* op1 must be killed and placed where out is */
1660 /* best case, simple remove and rename */
1661 x87_patch_insn(n, op_ia32_Pop);
1662 attr = get_ia32_attr(n);
1663 attr->x87[0] = op1 = &ia32_st_regs[0];
1666 x87_set_st(state, arch_register_get_index(out), n, op1_idx - 1);
1668 /* move op1 to tos, store and pop it */
1670 x87_create_fxch(state, n, op1_idx, 0);
1673 x87_patch_insn(n, op_ia32_Pop);
1674 attr = get_ia32_attr(n);
1675 attr->x87[0] = op1 = &ia32_st_regs[out_idx];
1678 x87_set_st(state, arch_register_get_index(out), n, out_idx - 1);
1680 DB((dbg, LEVEL_1, "<<< %+F %s\n", n, op1->name));
1682 /* just a virtual copy */
1683 x87_set_st(state, arch_register_get_index(out), get_unop_op(n), op1_idx);
1684 /* don't remove the node to keep the verifier quiet :),
1685 the emitter won't emit any code for the node */
1688 DB((dbg, LEVEL_1, "<<< KILLED %s\n", get_irn_opname(n)));
1689 exchange(n, get_unop_op(n));
1693 return NO_NODE_ADDED;
1697 * Returns the result proj of the call, or NULL if the result is not used
1699 static ir_node *get_call_result_proj(ir_node *call) {
1700 const ir_edge_t *edge;
1701 ir_node *resproj = NULL;
1703 /* search the result proj */
1704 foreach_out_edge(call, edge) {
1705 ir_node *proj = get_edge_src_irn(edge);
1706 long pn = get_Proj_proj(proj);
1708 if (pn == pn_be_Call_first_res) {
1713 if (resproj == NULL) {
1717 /* the result proj is connected to a Keep and maybe other nodes */
1718 foreach_out_edge(resproj, edge) {
1719 ir_node *pred = get_edge_src_irn(edge);
1720 if (!be_is_Keep(pred)) {
1725 /* only be_Keep found, so result is not used */
1727 } /* get_call_result_proj */
1730 * Simulate a be_Call.
1732 * @param state the x87 state
1733 * @param n the node that should be simulated
1734 * @param arch_env the architecture environment
1736 * @return NO_NODE_ADDED
1738 static int sim_Call(x87_state *state, ir_node *n, const arch_env_t *arch_env) {
1739 ir_type *call_tp = be_Call_get_type(n);
1743 const arch_register_t *reg;
1745 /* at the begin of a call the x87 state should be empty */
1746 assert(state->depth == 0 && "stack not empty before call");
1748 if (get_method_n_ress(call_tp) <= 0)
1749 return NO_NODE_ADDED;
1752 * If the called function returns a float, it is returned in st(0).
1753 * This even happens if the return value is NOT used.
1754 * Moreover, only one return result is supported.
1756 res_type = get_method_res_type(call_tp, 0);
1757 mode = get_type_mode(res_type);
1759 if (mode == NULL || !mode_is_float(mode))
1760 return NO_NODE_ADDED;
1762 resproj = get_call_result_proj(n);
1763 if (resproj == NULL)
1764 return NO_NODE_ADDED;
1766 reg = x87_get_irn_register(state->sim, resproj);
1767 x87_push(state, arch_register_get_index(reg), resproj);
1769 return NO_NODE_ADDED;
1773 * Simulate a be_Spill.
1775 * @param state the x87 state
1776 * @param n the node that should be simulated (and patched)
1778 * Should not happen, spills are lowered before x87 simulator see them.
1780 static int sim_Spill(x87_state *state, ir_node *n) {
1781 assert(0 && "Spill not lowered");
1782 return sim_fst(state, n);
1786 * Simulate a be_Reload.
1788 * @param state the x87 state
1789 * @param n the node that should be simulated (and patched)
1791 * Should not happen, reloads are lowered before x87 simulator see them.
1793 static int sim_Reload(x87_state *state, ir_node *n) {
1794 assert(0 && "Reload not lowered");
1795 return sim_fld(state, n);
1799 * Simulate a be_Return.
1801 * @param state the x87 state
1802 * @param n the node that should be simulated (and patched)
1804 * @return NO_NODE_ADDED
1806 static int sim_Return(x87_state *state, ir_node *n) {
1807 int n_res = be_Return_get_n_rets(n);
1808 int i, n_float_res = 0;
1810 /* only floating point return values must resist on stack */
1811 for (i = 0; i < n_res; ++i) {
1812 ir_node *res = get_irn_n(n, be_pos_Return_val + i);
1814 if (mode_is_float(get_irn_mode(res)))
1817 assert(x87_get_depth(state) == n_float_res);
1819 /* pop them virtually */
1820 for (i = n_float_res - 1; i >= 0; --i)
1823 return NO_NODE_ADDED;
1826 typedef struct _perm_data_t {
1827 const arch_register_t *in;
1828 const arch_register_t *out;
1832 * Simulate a be_Perm.
1834 * @param state the x87 state
1835 * @param irn the node that should be simulated (and patched)
1837 * @return NO_NODE_ADDED
1839 static int sim_Perm(x87_state *state, ir_node *irn) {
1841 x87_simulator *sim = state->sim;
1842 ir_node *pred = get_irn_n(irn, 0);
1844 const ir_edge_t *edge;
1846 /* handle only floating point Perms */
1847 if (! mode_is_float(get_irn_mode(pred)))
1848 return NO_NODE_ADDED;
1850 DB((dbg, LEVEL_1, ">>> %+F\n", irn));
1852 /* Perm is a pure virtual instruction on x87.
1853 All inputs must be on the FPU stack and are pairwise
1854 different from each other.
1855 So, all we need to do is to permutate the stack state. */
1856 n = get_irn_arity(irn);
1857 NEW_ARR_A(int, stack_pos, n);
1859 /* collect old stack positions */
1860 for (i = 0; i < n; ++i) {
1861 const arch_register_t *inreg = x87_get_irn_register(sim, get_irn_n(irn, i));
1862 int idx = x87_on_stack(state, arch_register_get_index(inreg));
1864 assert(idx >= 0 && "Perm argument not on x87 stack");
1868 /* now do the permutation */
1869 foreach_out_edge(irn, edge) {
1870 ir_node *proj = get_edge_src_irn(edge);
1871 const arch_register_t *out = x87_get_irn_register(sim, proj);
1872 long num = get_Proj_proj(proj);
1874 assert(0 <= num && num < n && "More Proj's than Perm inputs");
1875 x87_set_st(state, arch_register_get_index(out), proj, stack_pos[(unsigned)num]);
1877 DB((dbg, LEVEL_1, "<<< %+F\n", irn));
1879 return NO_NODE_ADDED;
1883 * Kill any dead registers at block start by popping them from the stack.
1885 * @param sim the simulator handle
1886 * @param block the current block
1887 * @param start_state the x87 state at the begin of the block
1889 * @return the x87 state after dead register killed
1891 static x87_state *x87_kill_deads(x87_simulator *sim, ir_node *block, x87_state *start_state) {
1892 x87_state *state = start_state;
1893 ir_node *first_insn = sched_first(block);
1894 ir_node *keep = NULL;
1895 unsigned live = vfp_live_args_after(sim, block, 0);
1897 int i, depth, num_pop;
1900 depth = x87_get_depth(state);
1901 for (i = depth - 1; i >= 0; --i) {
1902 int reg = x87_get_st_reg(state, i);
1904 if (! is_vfp_live(reg, live))
1905 kill_mask |= (1 << i);
1909 /* create a new state, will be changed */
1910 state = x87_clone_state(sim, state);
1912 DB((dbg, LEVEL_1, "Killing deads:\n"));
1913 DEBUG_ONLY(vfp_dump_live(live));
1914 DEBUG_ONLY(x87_dump_stack(state));
1916 /* now kill registers */
1918 /* we can only kill from TOS, so bring them up */
1919 if (! (kill_mask & 1)) {
1920 /* search from behind, because we can to a double-pop */
1921 for (i = depth - 1; i >= 0; --i) {
1922 if (kill_mask & (1 << i)) {
1923 kill_mask &= ~(1 << i);
1930 x87_set_st(state, -1, keep, i);
1931 keep = x87_create_fxch(state, first_insn, i, -1);
1934 keep = x87_get_st_node(state, 0);
1936 if ((kill_mask & 3) == 3) {
1937 /* we can do a double-pop */
1941 /* only a single pop */
1946 kill_mask >>= num_pop;
1947 keep = x87_create_fpop(state, first_insn, num_pop, keep);
1952 } /* x87_kill_deads */
1955 * Run a simulation and fix all virtual instructions for a block.
1957 * @param sim the simulator handle
1958 * @param block the current block
1960 static void x87_simulate_block(x87_simulator *sim, ir_node *block) {
1962 blk_state *bl_state = x87_get_bl_state(sim, block);
1963 x87_state *state = bl_state->begin;
1964 const ir_edge_t *edge;
1965 ir_node *start_block;
1967 assert(state != NULL);
1968 /* already processed? */
1969 if (bl_state->end != NULL)
1972 DB((dbg, LEVEL_1, "Simulate %+F\n", block));
1973 DB((dbg, LEVEL_2, "State at Block begin:\n "));
1974 DEBUG_ONLY(x87_dump_stack(state));
1976 /* at block begin, kill all dead registers */
1977 state = x87_kill_deads(sim, block, state);
1979 /* beware, n might change */
1980 for (n = sched_first(block); !sched_is_end(n); n = next) {
1983 ir_op *op = get_irn_op(n);
1985 next = sched_next(n);
1986 if (op->ops.generic == NULL)
1989 func = (sim_func)op->ops.generic;
1991 /* have work to do */
1992 if (state == bl_state->begin) {
1993 /* create a new state, will be changed */
1994 state = x87_clone_state(sim, state);
1998 node_inserted = (*func)(state, n);
2001 sim_func might have added an additional node after n,
2003 beware: n must not be changed by sim_func
2004 (i.e. removed from schedule) in this case
2006 if (node_inserted != NO_NODE_ADDED)
2007 next = sched_next(n);
2010 start_block = get_irg_start_block(get_irn_irg(block));
2012 /* check if the state must be shuffled */
2013 foreach_block_succ(block, edge) {
2014 ir_node *succ = get_edge_src_irn(edge);
2015 blk_state *succ_state;
2017 if (succ == start_block)
2020 succ_state = x87_get_bl_state(sim, succ);
2022 if (succ_state->begin == NULL) {
2023 succ_state->begin = state;
2024 waitq_put(sim->worklist, succ);
2026 /* There is already a begin state for the successor, bad.
2027 Do the necessary permutations.
2028 Note that critical edges are removed, so this is always possible:
2029 If the successor has more than one possible input, then it must
2032 x87_shuffle(sim, block, state, succ, succ_state->begin);
2035 bl_state->end = state;
2037 DB((dbg, LEVEL_2, "State at Block end:\n ")); DEBUG_ONLY(x87_dump_stack(state));
2038 } /* x87_simulate_block */
2041 * Create a new x87 simulator.
2043 * @param sim a simulator handle, will be initialized
2044 * @param irg the current graph
2045 * @param arch_env the architecture environment
2047 static void x87_init_simulator(x87_simulator *sim, ir_graph *irg,
2048 const arch_env_t *arch_env)
2050 obstack_init(&sim->obst);
2051 sim->blk_states = pmap_create();
2052 sim->arch_env = arch_env;
2053 sim->n_idx = get_irg_last_idx(irg);
2054 sim->live = obstack_alloc(&sim->obst, sizeof(*sim->live) * sim->n_idx);
2056 DB((dbg, LEVEL_1, "--------------------------------\n"
2057 "x87 Simulator started for %+F\n", irg));
2059 /* set the generic function pointer of instruction we must simulate */
2060 clear_irp_opcodes_generic_func();
2062 #define ASSOC(op) (op_ ## op)->ops.generic = (op_func)(sim_##op)
2063 #define ASSOC_IA32(op) (op_ia32_v ## op)->ops.generic = (op_func)(sim_##op)
2064 #define ASSOC_BE(op) (op_be_ ## op)->ops.generic = (op_func)(sim_##op)
2081 ASSOC_IA32(fCondJmp);
2092 } /* x87_init_simulator */
2095 * Destroy a x87 simulator.
2097 * @param sim the simulator handle
2099 static void x87_destroy_simulator(x87_simulator *sim) {
2100 pmap_destroy(sim->blk_states);
2101 obstack_free(&sim->obst, NULL);
2102 DB((dbg, LEVEL_1, "x87 Simulator stopped\n\n"));
2103 } /* x87_destroy_simulator */
2106 * Pre-block walker: calculate the liveness information for the block
2107 * and store it into the sim->live cache.
2109 static void update_liveness_walker(ir_node *block, void *data) {
2110 x87_simulator *sim = data;
2111 update_liveness(sim, block);
2112 } /* update_liveness_walker */
2115 * Run a simulation and fix all virtual instructions for a graph.
2117 * @param env the architecture environment
2118 * @param irg the current graph
2120 * Needs a block-schedule.
2122 void x87_simulate_graph(const arch_env_t *arch_env, be_irg_t *birg) {
2123 ir_node *block, *start_block;
2124 blk_state *bl_state;
2126 ir_graph *irg = be_get_birg_irg(birg);
2128 /* create the simulator */
2129 x87_init_simulator(&sim, irg, arch_env);
2131 start_block = get_irg_start_block(irg);
2132 bl_state = x87_get_bl_state(&sim, start_block);
2134 /* start with the empty state */
2135 bl_state->begin = empty;
2138 sim.worklist = new_waitq();
2139 waitq_put(sim.worklist, start_block);
2141 be_invalidate_liveness(birg);
2142 be_assure_liveness(birg);
2143 sim.lv = be_get_birg_liveness(birg);
2145 /* Calculate the liveness for all nodes. We must precalculate this info,
2146 * because the simulator adds new nodes (possible before Phi nodes) which
2147 * would let a lazy calculation fail.
2148 * On the other hand we reduce the computation amount due to
2149 * precaching from O(n^2) to O(n) at the expense of O(n) cache memory.
2151 irg_block_walk_graph(irg, update_liveness_walker, NULL, &sim);
2155 block = waitq_get(sim.worklist);
2156 x87_simulate_block(&sim, block);
2157 } while (! waitq_empty(sim.worklist));
2160 del_waitq(sim.worklist);
2161 x87_destroy_simulator(&sim);
2162 } /* x87_simulate_graph */
2164 void ia32_init_x87(void) {
2165 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.x87");
2166 } /* ia32_init_x87 */