2 * This file implements the x87 support and virtual to stack
3 * register translation for the ia32 backend.
5 * @author: Michael Beck
12 #endif /* HAVE_CONFIG_H */
19 #include "iredges_t.h"
28 #include "../belive_t.h"
29 #include "../besched_t.h"
30 #include "../benode_t.h"
31 #include "ia32_new_nodes.h"
32 #include "gen_ia32_new_nodes.h"
33 #include "gen_ia32_regalloc_if.h"
38 /* first and second binop index */
45 /* the store val index */
46 #define STORE_VAL_IDX 2
48 #define MASK_TOS(x) ((x) & (N_x87_REGS - 1))
50 /** the debug handle */
51 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
53 /* Forward declaration. */
54 typedef struct _x87_simulator x87_simulator;
57 * An exchange template.
58 * Note that our virtual functions have the same inputs
59 * and attributes as the real ones, so we can simple exchange
61 * Further, x87 supports inverse instructions, so we can handle them.
63 typedef struct _exchange_tmpl {
64 ir_op *normal_op; /**< the normal one */
65 ir_op *reverse_op; /**< the reverse one if exists */
66 ir_op *normal_pop_op; /**< the normal one with tos pop */
67 ir_op *reverse_pop_op; /**< the reverse one with tos pop */
71 * An entry on the simulated x87 stack.
73 typedef struct _st_entry {
74 int reg_idx; /**< the virtual register index of this stack value */
75 ir_node *node; /**< the node that produced this value */
81 typedef struct _x87_state {
82 st_entry st[N_x87_REGS]; /**< the register stack */
83 int depth; /**< the current stack depth */
84 int tos; /**< position of the tos */
85 x87_simulator *sim; /**< The simulator. */
88 /** An empty state, used for blocks without fp instructions. */
89 static x87_state _empty = { { {0, NULL}, }, 0, 0 };
90 static x87_state *empty = (x87_state *)&_empty;
92 /** The type of an instruction simulator function. */
93 typedef int (*sim_func)(x87_state *state, ir_node *n);
96 * A block state: Every block has a x87 state at the beginning and at the end.
98 typedef struct _blk_state {
99 x87_state *begin; /**< state at the begin or NULL if not assigned */
100 x87_state *end; /**< state at the end or NULL if not assigned */
103 #define PTR_TO_BLKSTATE(p) ((blk_state *)(p))
108 struct _x87_simulator {
109 struct obstack obst; /**< An obstack for fast allocating. */
110 pmap *blk_states; /**< Map blocks to states. */
111 const arch_env_t *env; /**< The architecture environment. */
112 unsigned char *live; /**< Liveness information. */
113 unsigned n_idx; /**< The cached get_irg_last_idx() result. */
117 * Returns the current stack depth.
119 * @param state the x87 state
121 * @return the x87 stack depth
123 static int x87_get_depth(const x87_state *state) {
128 * Check if the state is empty.
130 * @param state the x87 state
132 * returns non-zero if the x87 stack is empty
134 static int x87_state_is_empty(const x87_state *state) {
135 return state->depth == 0;
139 * Return the virtual register index at st(pos).
141 * @param state the x87 state
142 * @param pos a stack position
144 * @return the vfp register index that produced the value at st(pos)
146 static int x87_get_st_reg(const x87_state *state, int pos) {
147 assert(pos < state->depth);
148 return state->st[MASK_TOS(state->tos + pos)].reg_idx;
152 * Return the node at st(pos).
154 * @param state the x87 state
155 * @param pos a stack position
157 * @return the IR node that produced the value at st(pos)
159 static ir_node *x87_get_st_node(const x87_state *state, int pos) {
160 assert(pos < state->depth);
161 return state->st[MASK_TOS(state->tos + pos)].node;
162 } /* x87_get_st_node */
166 * Dump the stack for debugging.
168 * @param state the x87 state
170 static void x87_dump_stack(const x87_state *state) {
173 for (i = state->depth - 1; i >= 0; --i) {
174 DB((dbg, LEVEL_2, "vf%d ", x87_get_st_reg(state, i)));
176 DB((dbg, LEVEL_2, "<-- TOS\n"));
177 } /* x87_dump_stack */
178 #endif /* DEBUG_libfirm */
181 * Set a virtual register to st(pos).
183 * @param state the x87 state
184 * @param reg_idx the vfp register index that should be set
185 * @param node the IR node that produces the value of the vfp register
186 * @param pos the stack position where the new value should be entered
188 static void x87_set_st(x87_state *state, int reg_idx, ir_node *node, int pos) {
189 assert(0 < state->depth);
190 state->st[MASK_TOS(state->tos + pos)].reg_idx = reg_idx;
191 state->st[MASK_TOS(state->tos + pos)].node = node;
193 DB((dbg, LEVEL_2, "After SET_REG:\n ")); DEBUG_ONLY(x87_dump_stack(state));
197 * Set the tos virtual register.
199 * @param state the x87 state
200 * @param reg_idx the vfp register index that should be set
201 * @param node the IR node that produces the value of the vfp register
203 static void x87_set_tos(x87_state *state, int reg_idx, ir_node *node) {
204 x87_set_st(state, reg_idx, node, 0);
209 * Flush the x87 stack.
211 * @param state the x87 state
213 static void x87_flush(x87_state *state) {
220 * Swap st(0) with st(pos).
222 * @param state the x87 state
223 * @param pos the stack position to change the tos with
225 static void x87_fxch(x87_state *state, int pos) {
227 assert(pos < state->depth);
229 entry = state->st[MASK_TOS(state->tos + pos)];
230 state->st[MASK_TOS(state->tos + pos)] = state->st[MASK_TOS(state->tos)];
231 state->st[MASK_TOS(state->tos)] = entry;
233 DB((dbg, LEVEL_2, "After FXCH:\n ")); DEBUG_ONLY(x87_dump_stack(state));
237 * Convert a virtual register to the stack index.
239 * @param state the x87 state
240 * @param reg_idx the register vfp index
242 * @return the stack position where the register is stacked
243 * or -1 if the virtual register was not found
245 static int x87_on_stack(const x87_state *state, int reg_idx) {
246 int i, tos = state->tos;
248 for (i = 0; i < state->depth; ++i)
249 if (state->st[MASK_TOS(tos + i)].reg_idx == reg_idx)
255 * Push a virtual Register onto the stack, double pushed allowed.
257 * @param state the x87 state
258 * @param reg_idx the register vfp index
259 * @param node the node that produces the value of the vfp register
261 static void x87_push_dbl(x87_state *state, int reg_idx, ir_node *node) {
262 assert(state->depth < N_x87_REGS && "stack overrun");
265 state->tos = MASK_TOS(state->tos - 1);
266 state->st[state->tos].reg_idx = reg_idx;
267 state->st[state->tos].node = node;
269 DB((dbg, LEVEL_2, "After PUSH:\n ")); DEBUG_ONLY(x87_dump_stack(state));
273 * Push a virtual Register onto the stack, double pushes are NOT allowed..
275 * @param state the x87 state
276 * @param reg_idx the register vfp index
277 * @param node the node that produces the value of the vfp register
278 * @param dbl_push if != 0 double pushes are allowed
280 static void x87_push(x87_state *state, int reg_idx, ir_node *node) {
281 assert(x87_on_stack(state, reg_idx) == -1 && "double push");
283 x87_push_dbl(state, reg_idx, node);
287 * Pop a virtual Register from the stack.
289 static void x87_pop(x87_state *state) {
290 assert(state->depth > 0 && "stack underrun");
293 state->tos = MASK_TOS(state->tos + 1);
295 DB((dbg, LEVEL_2, "After POP:\n ")); DEBUG_ONLY(x87_dump_stack(state));
299 * Returns the block state of a block.
301 * @param sim the x87 simulator handle
302 * @param block the current block
304 * @return the block state
306 static blk_state *x87_get_bl_state(x87_simulator *sim, ir_node *block) {
307 pmap_entry *entry = pmap_find(sim->blk_states, block);
310 blk_state *bl_state = obstack_alloc(&sim->obst, sizeof(*bl_state));
311 bl_state->begin = NULL;
312 bl_state->end = NULL;
314 pmap_insert(sim->blk_states, block, bl_state);
318 return PTR_TO_BLKSTATE(entry->value);
319 } /* x87_get_bl_state */
322 * Creates a new x87 state.
324 * @param sim the x87 simulator handle
326 * @return a new x87 state
328 static x87_state *x87_alloc_state(x87_simulator *sim) {
329 x87_state *res = obstack_alloc(&sim->obst, sizeof(*res));
333 } /* x87_alloc_state */
337 * Create a new empty x87 state.
339 * @param sim the x87 simulator handle
341 * @return a new empty x87 state
343 static x87_state *x87_alloc_empty_state(x87_simulator *sim) {
344 x87_state *res = x87_alloc_state(sim);
348 } /* x87_alloc_empty_state */
354 * @param sim the x87 simulator handle
355 * @param src the x87 state that will be cloned
357 * @return a cloned copy of the src state
359 static x87_state *x87_clone_state(x87_simulator *sim, const x87_state *src) {
360 x87_state *res = x87_alloc_state(sim);
362 memcpy(res, src, sizeof(*res));
364 } /* x87_clone_state */
367 * Patch a virtual instruction into a x87 one and return
370 * @param n the IR node to patch
371 * @param op the x87 opcode to patch in
373 static ir_node *x87_patch_insn(ir_node *n, ir_op *op) {
374 ir_mode *mode = get_irn_mode(n);
379 if (mode == mode_T) {
380 /* patch all Proj's */
381 const ir_edge_t *edge;
383 foreach_out_edge(n, edge) {
384 ir_node *proj = get_edge_src_irn(edge);
386 mode = get_irn_mode(proj);
387 if (mode_is_float(mode)) {
389 set_irn_mode(proj, mode_E);
394 else if (mode_is_float(mode))
395 set_irn_mode(n, mode_E);
397 } /* x87_patch_insn */
400 * Returns the first Proj of a mode_T node having a given mode.
402 * @param n the mode_T node
403 * @param m the desired mode of the Proj
404 * @return The first Proj of mode @p m found or NULL.
406 static ir_node *get_irn_Proj_for_mode(ir_node *n, ir_mode *m) {
407 const ir_edge_t *edge;
409 assert(get_irn_mode(n) == mode_T && "Need mode_T node");
411 foreach_out_edge(n, edge) {
412 ir_node *proj = get_edge_src_irn(edge);
413 if (get_irn_mode(proj) == m)
418 } /* get_irn_Proj_for_mode */
421 * Wrap the arch_* function here so we can check for errors.
423 static const arch_register_t *x87_get_irn_register(x87_simulator *sim, const ir_node *irn) {
424 const arch_register_t *res;
426 res = arch_get_irn_register(sim->env, irn);
427 assert(res->reg_class->regs == ia32_vfp_regs);
431 /* -------------- x87 perm --------------- */
434 * Creates a fxch for shuffle.
436 * @param state the x87 state
437 * @param pos parameter for fxch
438 * @param block the block were fxch is inserted
440 * Creates a new fxch node and reroute the user of the old node
443 * @return the fxch node
445 static ir_node *x87_fxch_shuffle(x87_state *state, int pos, ir_node *block)
450 fxch = new_rd_ia32_fxch(NULL, get_irn_irg(block), block, mode_E);
451 attr = get_ia32_attr(fxch);
452 attr->x87[0] = &ia32_st_regs[pos];
453 attr->x87[2] = &ia32_st_regs[0];
457 x87_fxch(state, pos);
459 } /* x87_fxch_shuffle */
462 * Calculate the necessary permutations to reach dst_state.
464 * These permutations are done with fxch instructions and placed
465 * at the end of the block.
467 * Note that critical edges are removed here, so we need only
468 * a shuffle if the current block has only one successor.
470 * @param sim the simulator handle
471 * @param block the current block
472 * @param state the current x87 stack state, might be modified
473 * @param dst_block the destination block
474 * @param dst_state destination state
478 static x87_state *x87_shuffle(x87_simulator *sim, ir_node *block, x87_state *state, ir_node *dst_block, const x87_state *dst_state) {
479 int i, n_cycles, k, ri;
480 unsigned cycles[4], all_mask;
481 char cycle_idx[4][8];
482 ir_node *fxch, *before, *after;
484 assert(state->depth == dst_state->depth);
486 /* Some mathematics here:
487 If we have a cycle of length n that includes the tos,
488 we need n-1 exchange operations.
489 We can always add the tos and restore it, so we need
490 n+1 exchange operations for a cycle not containing the tos.
491 So, the maximum of needed operations is for a cycle of 7
492 not including the tos == 8.
493 This is the same number of ops we would need for using stores,
494 so exchange is cheaper (we save the loads).
495 On the other hand, we might need an additional exchange
496 in the next block to bring one operand on top, so the
497 number of ops in the first case is identical.
498 Further, no more than 4 cycles can exists (4 x 2).
500 all_mask = (1 << (state->depth)) - 1;
502 for (n_cycles = 0; all_mask; ++n_cycles) {
503 int src_idx, dst_idx;
505 /* find the first free slot */
506 for (i = 0; i < state->depth; ++i) {
507 if (all_mask & (1 << i)) {
508 all_mask &= ~(1 << i);
510 /* check if there are differences here */
511 if (x87_get_st_reg(state, i) != x87_get_st_reg(dst_state, i))
517 /* no more cycles found */
522 cycles[n_cycles] = (1 << i);
523 cycle_idx[n_cycles][k++] = i;
524 for (src_idx = i; ; src_idx = dst_idx) {
525 dst_idx = x87_on_stack(dst_state, x87_get_st_reg(state, src_idx));
527 if ((all_mask & (1 << dst_idx)) == 0)
530 cycle_idx[n_cycles][k++] = dst_idx;
531 cycles[n_cycles] |= (1 << dst_idx);
532 all_mask &= ~(1 << dst_idx);
534 cycle_idx[n_cycles][k] = -1;
538 /* no permutation needed */
542 /* Hmm: permutation needed */
543 DB((dbg, LEVEL_2, "\n%+F needs permutation: from\n", block));
544 DEBUG_ONLY(x87_dump_stack(state));
545 DB((dbg, LEVEL_2, " to\n"));
546 DEBUG_ONLY(x87_dump_stack(dst_state));
550 DB((dbg, LEVEL_2, "Need %d cycles\n", n_cycles));
551 for (ri = 0; ri < n_cycles; ++ri) {
552 DB((dbg, LEVEL_2, " Ring %d:\n ", ri));
553 for (k = 0; cycle_idx[ri][k] != -1; ++k)
554 DB((dbg, LEVEL_2, " st%d ->", cycle_idx[ri][k]));
555 DB((dbg, LEVEL_2, "\n"));
562 * Find the place node must be insert.
563 * We have only one successor block, so the last instruction should
566 before = sched_last(block);
567 assert(is_cfop(before));
569 /* now do the permutations */
570 for (ri = 0; ri < n_cycles; ++ri) {
571 if ((cycles[ri] & 1) == 0) {
572 /* this cycle does not include the tos */
573 fxch = x87_fxch_shuffle(state, cycle_idx[ri][0], block);
575 sched_add_after(after, fxch);
577 sched_add_before(before, fxch);
580 for (k = 1; cycle_idx[ri][k] != -1; ++k) {
581 fxch = x87_fxch_shuffle(state, cycle_idx[ri][k], block);
583 sched_add_after(after, fxch);
585 sched_add_before(before, fxch);
588 if ((cycles[ri] & 1) == 0) {
589 /* this cycle does not include the tos */
590 fxch = x87_fxch_shuffle(state, cycle_idx[ri][0], block);
591 sched_add_after(after, fxch);
598 * Create a fxch node before another node.
600 * @param state the x87 state
601 * @param n the node after the fxch
602 * @param pos exchange st(pos) with st(0)
603 * @param op_idx if >= 0, replace input op_idx of n with the fxch result
607 static ir_node *x87_create_fxch(x87_state *state, ir_node *n, int pos, int op_idx) {
611 x87_fxch(state, pos);
613 fxch = new_rd_ia32_fxch(NULL, get_irn_irg(n), get_nodes_block(n), mode_E);
614 attr = get_ia32_attr(fxch);
615 attr->x87[0] = &ia32_st_regs[pos];
616 attr->x87[2] = &ia32_st_regs[0];
620 sched_add_before(n, fxch);
621 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(fxch), attr->x87[0]->name, attr->x87[2]->name));
623 } /* x87_create_fxch */
626 * Create a fpush before node n.
628 * @param state the x87 state
629 * @param n the node after the fpush
630 * @param pos push st(pos) on stack
631 * @param op_idx replace input op_idx of n with the fpush result
633 static void x87_create_fpush(x87_state *state, ir_node *n, int pos, int op_idx) {
634 ir_node *fpush, *pred = get_irn_n(n, op_idx);
636 const arch_register_t *out = x87_get_irn_register(state->sim, pred);
638 x87_push_dbl(state, arch_register_get_index(out), pred);
640 fpush = new_rd_ia32_fpush(NULL, get_irn_irg(n), get_nodes_block(n), mode_E);
641 attr = get_ia32_attr(fpush);
642 attr->x87[0] = &ia32_st_regs[pos];
643 attr->x87[2] = &ia32_st_regs[0];
646 sched_add_before(n, fpush);
648 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(fpush), attr->x87[0]->name, attr->x87[2]->name));
649 } /* x87_create_fpush */
652 * Create a fpop before node n.
654 * @param state the x87 state
655 * @param n the node after the fpop
656 * @param num pop 1 or 2 values
657 * @param pred node to use as predecessor of the fpop
659 * @return the fpop node
661 static ir_node *x87_create_fpop(x87_state *state, ir_node *n, int num, ir_node *pred) {
662 ir_node *fpop = pred;
669 fpop = new_rd_ia32_fpop(NULL, get_irn_irg(n), get_nodes_block(n), mode_E);
670 attr = get_ia32_attr(fpop);
671 attr->x87[0] = &ia32_st_regs[0];
672 attr->x87[1] = &ia32_st_regs[0];
673 attr->x87[2] = &ia32_st_regs[0];
675 sched_add_before(n, fpop);
676 DB((dbg, LEVEL_1, "<<< %s %s\n", get_irn_opname(fpop), attr->x87[0]->name));
682 } /* x87_create_fpop */
684 /* --------------------------------- liveness ------------------------------------------ */
687 * The liveness transfer function.
688 * Updates a live set over a single step from a given node to its predecessor.
689 * Everything defined at the node is removed from the set, the uses of the node get inserted.
691 * @param sim The simulator handle.
692 * @param irn The node at which liveness should be computed.
693 * @param live The bitset of registers live before @p irn. This set gets modified by updating it to
694 * the registers live after irn.
696 * @return The live bitset.
698 static unsigned vfp_liveness_transfer(x87_simulator *sim, ir_node *irn, unsigned live)
701 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
702 const arch_env_t *arch_env = sim->env;
704 if (arch_irn_consider_in_reg_alloc(arch_env, cls, irn)) {
705 const arch_register_t *reg = x87_get_irn_register(sim, irn);
706 live &= ~(1 << arch_register_get_index(reg));
709 for (i = 0, n = get_irn_arity(irn); i < n; ++i) {
710 ir_node *op = get_irn_n(irn, i);
712 if (mode_is_float(get_irn_mode(op)) && arch_irn_consider_in_reg_alloc(arch_env, cls, op)) {
713 const arch_register_t *reg = x87_get_irn_register(sim, op);
714 live |= 1 << arch_register_get_index(reg);
718 } /* vfp_liveness_transfer */
721 * Put all live virtual registers at the end of a block into a bitset.
723 * @param sim the simulator handle
724 * @param lv the liveness information
725 * @param bl the block
727 * @return The live bitset at the end of this block
729 static unsigned vfp_liveness_end_of_block(x87_simulator *sim, be_lv_t *lv, const ir_node *bl)
733 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
734 const arch_env_t *arch_env = sim->env;
736 be_lv_foreach(lv, bl, be_lv_state_end, i) {
737 ir_node *irn = be_lv_get_irn(lv, bl, i);
738 if (arch_irn_consider_in_reg_alloc(arch_env, cls, irn)) {
739 const arch_register_t *reg = x87_get_irn_register(sim, irn);
740 live |= 1 << arch_register_get_index(reg);
745 } /* vfp_liveness_end_of_block */
747 /** get the register mask from an arch_register */
748 #define REGMASK(reg) (1 << (arch_register_get_index(reg)))
751 * Return a bitset of argument registers which are live at the end of a node.
753 * @param sim the simulator handle
754 * @param pos the node
755 * @param kill kill mask for the output registers
757 * @return The live bitset.
759 static unsigned vfp_live_args_after(x87_simulator *sim, const ir_node *pos, unsigned kill)
761 unsigned idx = get_irn_idx(pos);
763 assert(idx < sim->n_idx);
764 return sim->live[idx] & ~kill;
765 } /* vfp_live_args_after */
768 * Calculate the liveness for a whole block and cache it.
770 * @param sim the simulator handle
771 * @param lv the liveness handle
772 * @param blk the block
774 static void update_liveness(x87_simulator *sim, be_lv_t *lv, ir_node *blk) {
775 unsigned live = vfp_liveness_end_of_block(sim, lv, blk);
779 /* now iterate through the block backward and cache the results */
780 sched_foreach_reverse(blk, irn) {
781 /* stop at the first Phi: this produces the live-in */
785 idx = get_irn_idx(irn);
786 sim->live[idx] = live;
788 live = vfp_liveness_transfer(sim, irn, live);
790 idx = get_irn_idx(blk);
791 sim->live[idx] = live;
792 } /* update_liveness */
795 * Returns true if a register is live in a set.
797 * @param reg_idx the vfp register index
798 * @param live a live bitset
800 #define is_vfp_live(reg_idx, live) ((live) & (1 << (reg_idx)))
804 * Dump liveness info.
806 * @param live the live bitset
808 static void vfp_dump_live(unsigned live) {
811 DB((dbg, LEVEL_2, "Live registers here: \n"));
812 for (i = 0; i < 8; ++i) {
813 if (live & (1 << i)) {
814 DB((dbg, LEVEL_2, " vf%d", i));
817 DB((dbg, LEVEL_2, "\n"));
818 } /* vfp_dump_live */
819 #endif /* DEBUG_libfirm */
821 /* --------------------------------- simulators ---------------------------------------- */
823 #define XCHG(a, b) do { int t = (a); (a) = (b); (b) = t; } while (0)
826 * Simulate a virtual binop.
828 * @param state the x87 state
829 * @param n the node that should be simulated (and patched)
830 * @param tmpl the template containing the 4 possible x87 opcodes
832 static int sim_binop(x87_state *state, ir_node *n, const exchange_tmpl *tmpl) {
833 int op2_idx, op1_idx = -1;
834 int out_idx, do_pop =0;
837 x87_simulator *sim = state->sim;
838 const arch_register_t *op1 = x87_get_irn_register(sim, get_irn_n(n, BINOP_IDX_1));
839 const arch_register_t *op2 = x87_get_irn_register(sim, get_irn_n(n, BINOP_IDX_2));
840 const arch_register_t *out = x87_get_irn_register(sim, n);
841 unsigned live = vfp_live_args_after(sim, n, REGMASK(out));
843 DB((dbg, LEVEL_1, ">>> %+F %s, %s -> %s\n", n,
844 arch_register_get_name(op1), arch_register_get_name(op2),
845 arch_register_get_name(out)));
846 DEBUG_ONLY(vfp_dump_live(live));
848 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
849 op2_idx = x87_on_stack(state, arch_register_get_index(op2));
851 if (arch_register_get_index(op2) != REG_VFP_NOREG) {
852 /* second operand is a vfp register */
854 if (is_vfp_live(arch_register_get_index(op2), live)) {
855 /* Second operand is live. */
857 if (is_vfp_live(arch_register_get_index(op1), live)) {
858 /* Both operands are live: push the first one.
859 This works even for op1 == op2. */
860 x87_create_fpush(state, n, op2_idx, BINOP_IDX_2);
861 out_idx = op2_idx = 0;
863 dst = tmpl->normal_op;
867 /* Second live, first operand is dead here, bring it to tos. */
869 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
873 op1_idx = out_idx = 0;
874 dst = tmpl->normal_op;
879 /* Second operand is dead. */
880 if (is_vfp_live(arch_register_get_index(op1), live)) {
881 /* First operand is live: bring second to tos. */
883 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
887 op2_idx = out_idx = 0;
888 dst = tmpl->reverse_op;
892 /* Both operands are dead here, pop them from the stack. */
895 if (op1_idx == op2_idx) {
896 /* Both are identically, no pop needed. */
897 dst = tmpl->normal_op;
901 dst = tmpl->normal_pop_op;
905 else if (op1_idx == 0) {
907 XCHG(op2_idx, op1_idx);
908 if (op1_idx == op2_idx) {
909 /* Both are identically, no pop needed. */
910 dst = tmpl->reverse_op;
914 dst = tmpl->reverse_pop_op;
919 /* Bring the second on top. */
920 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
921 if (op1_idx == op2_idx) {
922 /* Both are identically, no pop needed. */
923 out_idx = op1_idx = op2_idx = 0;
924 dst = tmpl->normal_op;
930 dst = tmpl->normal_pop_op;
938 /* second operand is an address mode */
939 if (is_vfp_live(arch_register_get_index(op1), live)) {
940 /* first operand is live: push it here */
941 x87_create_fpush(state, n, op1_idx, BINOP_IDX_1);
944 /* first operand is dead: bring it to tos */
946 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
948 op1_idx = out_idx = 0;
949 dst = tmpl->normal_op;
953 x87_set_st(state, arch_register_get_index(out), x87_patch_insn(n, dst), out_idx);
957 /* patch the operation */
958 attr = get_ia32_attr(n);
959 attr->x87[0] = op1 = &ia32_st_regs[op1_idx];
961 attr->x87[1] = op2 = &ia32_st_regs[op2_idx];
962 attr->x87[2] = out = &ia32_st_regs[out_idx];
965 DB((dbg, LEVEL_1, "<<< %s %s, %s -> %s\n", get_irn_opname(n),
966 arch_register_get_name(op1), arch_register_get_name(op2),
967 arch_register_get_name(out)));
969 DB((dbg, LEVEL_1, "<<< %s %s, [AM] -> %s\n", get_irn_opname(n),
970 arch_register_get_name(op1),
971 arch_register_get_name(out)));
977 * Simulate a virtual Unop.
979 * @param state the x87 state
980 * @param n the node that should be simulated (and patched)
981 * @param op the x87 opcode that will replace n's opcode
983 static int sim_unop(x87_state *state, ir_node *n, ir_op *op) {
984 int op1_idx, out_idx;
985 x87_simulator *sim = state->sim;
986 const arch_register_t *op1 = x87_get_irn_register(sim, get_irn_n(n, UNOP_IDX));
987 const arch_register_t *out = x87_get_irn_register(sim, n);
989 unsigned live = vfp_live_args_after(sim, n, REGMASK(out));
991 DB((dbg, LEVEL_1, ">>> %+F -> %s\n", n, out->name));
992 DEBUG_ONLY(vfp_dump_live(live));
994 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
996 if (is_vfp_live(arch_register_get_index(op1), live)) {
997 /* push the operand here */
998 x87_create_fpush(state, n, op1_idx, UNOP_IDX);
1001 /* operand is dead, bring it to tos */
1003 x87_create_fxch(state, n, op1_idx, UNOP_IDX);
1006 x87_set_tos(state, arch_register_get_index(out), x87_patch_insn(n, op));
1007 op1_idx = out_idx = 0;
1008 attr = get_ia32_attr(n);
1009 attr->x87[0] = op1 = &ia32_st_regs[0];
1010 attr->x87[2] = out = &ia32_st_regs[0];
1011 DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), out->name));
1017 * Simulate a virtual Load instruction.
1019 * @param state the x87 state
1020 * @param n the node that should be simulated (and patched)
1021 * @param op the x87 opcode that will replace n's opcode
1023 static int sim_load(x87_state *state, ir_node *n, ir_op *op) {
1024 const arch_register_t *out = x87_get_irn_register(state->sim, n);
1027 DB((dbg, LEVEL_1, ">>> %+F -> %s\n", n, arch_register_get_name(out)));
1028 x87_push(state, arch_register_get_index(out), x87_patch_insn(n, op));
1029 assert(out == x87_get_irn_register(state->sim, n));
1030 attr = get_ia32_attr(n);
1031 attr->x87[2] = out = &ia32_st_regs[0];
1032 DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), arch_register_get_name(out)));
1038 * Rewire all users of @p old_val to @new_val iff they are scheduled after @p store.
1040 * @param store The store
1041 * @param old_val The former value
1042 * @param new_val The new value
1044 static void collect_and_rewire_users(ir_node *store, ir_node *old_val, ir_node *new_val) {
1045 const ir_edge_t *edge, *ne;
1047 foreach_out_edge_safe(old_val, edge, ne) {
1048 ir_node *user = get_edge_src_irn(edge);
1050 if (! user || user == store)
1053 /* if the user is scheduled after the store: rewire */
1054 if (sched_is_scheduled(user) && sched_comes_after(store, user)) {
1056 /* find the input of the user pointing to the old value */
1057 for (i = get_irn_arity(user) - 1; i >= 0; i--) {
1058 if (get_irn_n(user, i) == old_val)
1059 set_irn_n(user, i, new_val);
1063 } /* collect_and_rewire_users */
1066 * Simulate a virtual Store.
1068 * @param state the x87 state
1069 * @param n the node that should be simulated (and patched)
1070 * @param op the x87 store opcode
1071 * @param op_p the x87 store and pop opcode
1073 static int sim_store(x87_state *state, ir_node *n, ir_op *op, ir_op *op_p) {
1074 x87_simulator *sim = state->sim;
1075 ir_node *val = get_irn_n(n, STORE_VAL_IDX);
1076 const arch_register_t *op2 = x87_get_irn_register(sim, val);
1077 unsigned live = vfp_live_args_after(sim, n, 0);
1083 op2_idx = x87_on_stack(state, arch_register_get_index(op2));
1084 assert(op2_idx >= 0);
1086 DB((dbg, LEVEL_1, ">>> %+F %s ->\n", n, arch_register_get_name(op2)));
1088 mode = get_ia32_ls_mode(n);
1089 depth = x87_get_depth(state);
1092 We can only store the tos to memory.
1093 A store of mode_E with free registers
1094 pushes value to tos, so skip it here.
1096 if (! (mode == mode_E && depth < N_x87_REGS) && op2_idx != 0)
1097 x87_create_fxch(state, n, op2_idx, STORE_VAL_IDX);
1099 if (is_vfp_live(arch_register_get_index(op2), live)) {
1101 Problem: fst doesn't support mode_E (spills), only fstp does
1103 - stack not full: push value and fstp
1104 - stack full: fstp value and load again
1106 if (mode == mode_E) {
1107 if (depth < N_x87_REGS) {
1108 /* ok, we have a free register: push + fstp */
1109 x87_create_fpush(state, n, op2_idx, STORE_VAL_IDX);
1111 x87_patch_insn(n, op_p);
1114 ir_node *vfld, *mem, *block, *rproj, *mproj;
1117 /* stack full here: need fstp + load */
1119 x87_patch_insn(n, op_p);
1121 block = get_nodes_block(n);
1122 irg = get_irn_irg(n);
1123 vfld = new_rd_ia32_vfld(NULL, irg, block, get_irn_n(n, 0), get_irn_n(n, 1), new_rd_NoMem(irg));
1125 /* copy all attributes */
1126 set_ia32_frame_ent(vfld, get_ia32_frame_ent(n));
1127 if (is_ia32_use_frame(n))
1128 set_ia32_use_frame(vfld);
1129 set_ia32_am_flavour(vfld, get_ia32_am_flavour(n));
1130 set_ia32_op_type(vfld, ia32_am_Source);
1131 add_ia32_am_offs(vfld, get_ia32_am_offs(n));
1132 set_ia32_am_sc(vfld, get_ia32_am_sc(n));
1133 set_ia32_ls_mode(vfld, get_ia32_ls_mode(n));
1135 rproj = new_r_Proj(irg, block, vfld, get_ia32_ls_mode(vfld), pn_ia32_vfld_res);
1136 mproj = new_r_Proj(irg, block, vfld, mode_M, pn_ia32_vfld_M);
1137 mem = get_irn_Proj_for_mode(n, mode_M);
1139 assert(mem && "Store memory not found");
1141 arch_set_irn_register(sim->env, rproj, op2);
1143 /* reroute all former users of the store memory to the load memory */
1144 edges_reroute(mem, mproj, irg);
1145 /* set the memory input of the load to the store memory */
1146 set_irn_n(vfld, 2, mem);
1148 sched_add_after(n, vfld);
1149 sched_add_after(vfld, rproj);
1151 /* rewire all users, scheduled after the store, to the loaded value */
1152 collect_and_rewire_users(n, val, rproj);
1158 /* mode != mode_E -> use normal fst */
1159 x87_patch_insn(n, op);
1164 x87_patch_insn(n, op_p);
1167 attr = get_ia32_attr(n);
1168 attr->x87[1] = op2 = &ia32_st_regs[0];
1169 DB((dbg, LEVEL_1, "<<< %s %s ->\n", get_irn_opname(n), arch_register_get_name(op2)));
1175 * Simulate a virtual Phi.
1176 * Just for cosmetic reasons change the mode of Phi nodes to mode_E.
1178 * @param state the x87 state
1179 * @param n the node that should be simulated (and patched)
1180 * @param env the architecture environment
1182 static int sim_Phi(x87_state *state, ir_node *n, const arch_env_t *env) {
1183 ir_mode *mode = get_irn_mode(n);
1185 if (mode_is_float(mode))
1186 set_irn_mode(n, mode_E);
1192 #define _GEN_BINOP(op, rev) \
1193 static int sim_##op(x87_state *state, ir_node *n) { \
1194 exchange_tmpl tmpl = { op_ia32_##op, op_ia32_##rev, op_ia32_##op##p, op_ia32_##rev##p }; \
1195 return sim_binop(state, n, &tmpl); \
1198 #define GEN_BINOP(op) _GEN_BINOP(op, op)
1199 #define GEN_BINOPR(op) _GEN_BINOP(op, op##r)
1201 #define GEN_LOAD2(op, nop) \
1202 static int sim_##op(x87_state *state, ir_node *n) { \
1203 return sim_load(state, n, op_ia32_##nop); \
1206 #define GEN_LOAD(op) GEN_LOAD2(op, op)
1208 #define GEN_UNOP(op) \
1209 static int sim_##op(x87_state *state, ir_node *n) { \
1210 return sim_unop(state, n, op_ia32_##op); \
1213 #define GEN_STORE(op) \
1214 static int sim_##op(x87_state *state, ir_node *n) { \
1215 return sim_store(state, n, op_ia32_##op, op_ia32_##op##p); \
1234 GEN_LOAD2(fConst, fldConst)
1240 * Simulate a fCondJmp.
1242 * @param state the x87 state
1243 * @param n the node that should be simulated (and patched)
1245 static int sim_fCondJmp(x87_state *state, ir_node *n) {
1246 int op2_idx, op1_idx = -1, pop_cnt = 0;
1249 x87_simulator *sim = state->sim;
1250 const arch_register_t *op1 = x87_get_irn_register(sim, get_irn_n(n, BINOP_IDX_1));
1251 const arch_register_t *op2 = x87_get_irn_register(sim, get_irn_n(n, BINOP_IDX_2));
1252 unsigned live = vfp_live_args_after(sim, n, 0);
1254 DB((dbg, LEVEL_1, ">>> %+F %s, %s\n", n,
1255 arch_register_get_name(op1), arch_register_get_name(op2)));
1256 DEBUG_ONLY(vfp_dump_live(live));
1258 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1259 op2_idx = x87_on_stack(state, arch_register_get_index(op2));
1261 /* BEWARE: check for comp a,a cases, they might happen */
1262 if (arch_register_get_index(op2) != REG_VFP_NOREG) {
1263 /* second operand is a vfp register */
1265 if (is_vfp_live(arch_register_get_index(op2), live)) {
1266 /* second operand is live */
1268 if (is_vfp_live(arch_register_get_index(op1), live)) {
1269 /* both operands are live: move one of them to tos */
1271 XCHG(op2_idx, op1_idx);
1272 dst = op_ia32_fcomrJmp;
1274 else if (op1_idx == 0) {
1275 dst = op_ia32_fcomJmp;
1278 /* bring the first on top */
1279 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1280 if (op1_idx == op2_idx)
1283 dst = op_ia32_fcomJmp;
1287 /* second live, first operand is dead here, bring it to tos.
1288 This means further, op1_idx != op2_idx. */
1290 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1295 dst = op_ia32_fcompJmp;
1300 /* second operand is dead */
1301 if (is_vfp_live(arch_register_get_index(op1), live)) {
1302 /* first operand is live: bring second to tos.
1303 This means further, op1_idx != op2_idx. */
1305 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1310 dst = op_ia32_fcomrpJmp;
1314 /* both operands are dead here, check first for identity. */
1315 if (op1_idx == op2_idx) {
1316 /* identically, one one needed */
1318 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1319 op1_idx = op2_idx = 0;
1321 dst = op_ia32_fcompJmp;
1324 /* different, move them to st and st(1) and pop both.
1325 The tricky part is to get one into st(1).*/
1326 else if (op2_idx == 1) {
1327 /* good, second operand is already in the right place, move the first */
1329 /* bring the first on top */
1330 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1333 dst = op_ia32_fcomppJmp;
1336 else if (op1_idx == 1) {
1337 /* good, first operand is already in the right place, move the second */
1339 /* bring the first on top */
1340 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1343 dst = op_ia32_fcomrppJmp;
1347 /* if one is already the TOS, we need two fxch */
1349 /* first one is TOS, move to st(1) */
1350 x87_create_fxch(state, n, 1, BINOP_IDX_1);
1352 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1354 dst = op_ia32_fcomrppJmp;
1357 else if (op2_idx == 0) {
1358 /* second one is TOS, move to st(1) */
1359 x87_create_fxch(state, n, 1, BINOP_IDX_2);
1361 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1363 dst = op_ia32_fcomrppJmp;
1367 /* none of them is either TOS or st(1), 3 fxch needed */
1368 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1369 x87_create_fxch(state, n, 1, BINOP_IDX_2);
1370 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1373 dst = op_ia32_fcomppJmp;
1381 /* second operand is an address mode */
1382 if (is_vfp_live(arch_register_get_index(op1), live)) {
1383 /* first operand is live: bring it to TOS */
1385 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1388 dst = op_ia32_fcomJmp;
1391 /* first operand is dead: bring it to tos */
1393 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1397 dst = op_ia32_fcompJmp;
1401 x87_patch_insn(n, dst);
1407 /* patch the operation */
1408 attr = get_ia32_attr(n);
1409 attr->x87[1] = op1 = &ia32_st_regs[op1_idx];
1411 attr->x87[2] = op2 = &ia32_st_regs[op2_idx];
1414 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(n),
1415 arch_register_get_name(op1), arch_register_get_name(op2)));
1417 DB((dbg, LEVEL_1, "<<< %s %s, [AM]\n", get_irn_opname(n),
1418 arch_register_get_name(op1)));
1421 } /* sim_fCondJmp */
1424 * Simulate a be_Copy.
1426 * @param state the x87 state
1427 * @param n the node that should be simulated (and patched)
1429 static int sim_Copy(x87_state *state, ir_node *n) {
1430 ir_mode *mode = get_irn_mode(n);
1432 if (mode_is_float(mode)) {
1433 x87_simulator *sim = state->sim;
1434 ir_node *pred = get_irn_n(n, 0);
1435 const arch_register_t *out = x87_get_irn_register(sim, n);
1436 const arch_register_t *op1 = x87_get_irn_register(sim, pred);
1437 ir_node *node, *next;
1439 int op1_idx, out_idx;
1440 unsigned live = vfp_live_args_after(sim, n, REGMASK(out));
1441 ir_node *(*cnstr)(dbg_info *, ir_graph *, ir_node *, ir_mode *);
1443 DB((dbg, LEVEL_1, ">>> %+F %s -> %s\n", n,
1444 arch_register_get_name(op1), arch_register_get_name(out)));
1445 DEBUG_ONLY(vfp_dump_live(live));
1447 /* Do not copy constants, recreate them. */
1448 switch (get_ia32_irn_opcode(pred)) {
1450 cnstr = new_rd_ia32_fldz;
1453 cnstr = new_rd_ia32_fld1;
1455 case iro_ia32_fldpi:
1456 cnstr = new_rd_ia32_fldpi;
1458 case iro_ia32_fldl2e:
1459 cnstr = new_rd_ia32_fldl2e;
1461 case iro_ia32_fldl2t:
1462 cnstr = new_rd_ia32_fldl2t;
1464 case iro_ia32_fldlg2:
1465 cnstr = new_rd_ia32_fldlg2;
1467 case iro_ia32_fldln2:
1468 cnstr = new_rd_ia32_fldln2;
1474 /* copy a constant */
1475 node = (*cnstr)(get_irn_dbg_info(n), get_irn_irg(n), get_nodes_block(n), mode);
1476 arch_set_irn_register(sim->env, node, out);
1478 x87_push(state, arch_register_get_index(out), node);
1480 attr = get_ia32_attr(node);
1481 attr->x87[2] = out = &ia32_st_regs[0];
1483 next = sched_next(n);
1486 sched_add_before(next, node);
1487 DB((dbg, LEVEL_1, ">>> %+F -> %s\n", node, arch_register_get_name(out)));
1491 /* handle the infamous unknown value */
1492 if (arch_register_get_index(op1) == REG_VFP_UKNWN) {
1493 /* This happens before Phi nodes */
1494 if (x87_state_is_empty(state)) {
1495 /* create some value */
1496 x87_patch_insn(n, op_ia32_fldz);
1497 attr = get_ia32_attr(n);
1498 attr->x87[2] = out = &ia32_st_regs[0];
1499 DB((dbg, LEVEL_1, "<<< %+F -> %s\n", n,
1500 arch_register_get_name(out)));
1502 /* Just copy one. We need here an fpush that can hold a
1503 a register, so use the fpushCopy. */
1504 node = new_rd_ia32_fpushCopy(get_irn_dbg_info(n), get_irn_irg(n), get_nodes_block(n), get_irn_n(n, 0), mode);
1505 arch_set_irn_register(sim->env, node, out);
1507 x87_push(state, arch_register_get_index(out), node);
1509 attr = get_ia32_attr(node);
1510 attr->x87[0] = op1 =
1511 attr->x87[2] = out = &ia32_st_regs[0];
1513 next = sched_next(n);
1516 sched_add_before(next, node);
1517 DB((dbg, LEVEL_1, "<<< %+F %s -> %s\n", node,
1518 arch_register_get_name(op1),
1519 arch_register_get_name(out)));
1524 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1526 if (is_vfp_live(arch_register_get_index(op1), live)) {
1527 /* Operand is still live,a real copy. We need here an fpush that can hold a
1528 a register, so use the fpushCopy. */
1529 node = new_rd_ia32_fpushCopy(get_irn_dbg_info(n), get_irn_irg(n), get_nodes_block(n), get_irn_n(n, 0), mode);
1530 arch_set_irn_register(sim->env, node, out);
1532 x87_push(state, arch_register_get_index(out), node);
1534 attr = get_ia32_attr(node);
1535 attr->x87[0] = op1 = &ia32_st_regs[op1_idx];
1536 attr->x87[2] = out = &ia32_st_regs[0];
1538 next = sched_next(n);
1541 sched_add_before(next, node);
1542 DB((dbg, LEVEL_1, ">>> %+F %s -> %s\n", node, op1->name, out->name));
1545 out_idx = x87_on_stack(state, arch_register_get_index(out));
1547 if (out_idx >= 0 && out_idx != op1_idx) {
1548 /* op1 must be killed and placed where out is */
1550 /* best case, simple remove and rename */
1551 x87_patch_insn(n, op_ia32_Pop);
1552 attr = get_ia32_attr(n);
1553 attr->x87[0] = op1 = &ia32_st_regs[0];
1556 x87_set_st(state, arch_register_get_index(out), n, op1_idx - 1);
1559 /* move op1 to tos, store and pop it */
1561 x87_create_fxch(state, n, op1_idx, 0);
1564 x87_patch_insn(n, op_ia32_Pop);
1565 attr = get_ia32_attr(n);
1566 attr->x87[0] = op1 = &ia32_st_regs[out_idx];
1569 x87_set_st(state, arch_register_get_index(out), n, out_idx - 1);
1571 DB((dbg, LEVEL_1, ">>> %+F %s\n", n, op1->name));
1574 /* just a virtual copy */
1575 x87_set_st(state, arch_register_get_index(out), get_unop_op(n), op1_idx);
1577 DB((dbg, LEVEL_1, ">>> KILLED %s\n", get_irn_opname(n)));
1578 exchange(n, get_unop_op(n));
1586 * Simulate a be_Call.
1588 * @param state the x87 state
1589 * @param n the node that should be simulated
1590 * @param env the architecture environment
1592 static int sim_Call(x87_state *state, ir_node *n, const arch_env_t *env) {
1593 ir_type *call_tp = be_Call_get_type(n);
1595 /* at the begin of a call the x87 state should be empty */
1596 assert(state->depth == 0 && "stack not empty before call");
1599 * If the called function returns a float, it is returned in st(0).
1600 * This even happens if the return value is NOT used.
1601 * Moreover, only one return result is supported.
1603 if (get_method_n_ress(call_tp) > 0) {
1604 ir_type *res_type = get_method_res_type(call_tp, 0);
1605 ir_mode *mode = get_type_mode(res_type);
1607 if (mode && mode_is_float(mode)) {
1609 * TODO: what to push here? The result might be unused and currently
1610 * we have no possibility to detect this :-(
1612 x87_push(state, 0, n);
1620 * Simulate a be_Spill.
1622 * @param state the x87 state
1623 * @param n the node that should be simulated (and patched)
1624 * @param env the architecture environment
1626 * Should not happen, spills are lowered before x87 simulator see them.
1628 static int sim_Spill(x87_state *state, ir_node *n) {
1629 assert(0 && "Spill not lowered");
1630 return sim_fst(state, n);
1634 * Simulate a be_Reload.
1636 * @param state the x87 state
1637 * @param n the node that should be simulated (and patched)
1638 * @param env the architecture environment
1640 * Should not happen, reloads are lowered before x87 simulator see them.
1642 static int sim_Reload(x87_state *state, ir_node *n) {
1643 assert(0 && "Reload not lowered");
1644 return sim_fld(state, n);
1648 * Simulate a be_Return.
1650 * @param state the x87 state
1651 * @param n the node that should be simulated (and patched)
1652 * @param env the architecture environment
1654 static int sim_Return(x87_state *state, ir_node *n) {
1655 int n_res = be_Return_get_n_rets(n);
1656 int i, n_float_res = 0;
1658 /* only floating point return values must resist on stack */
1659 for (i = 0; i < n_res; ++i) {
1660 ir_node *res = get_irn_n(n, be_pos_Return_val + i);
1662 if (mode_is_float(get_irn_mode(res)))
1665 assert(x87_get_depth(state) == n_float_res);
1667 /* pop them virtually */
1668 for (i = n_float_res - 1; i >= 0; --i)
1674 typedef struct _perm_data_t {
1675 const arch_register_t *in;
1676 const arch_register_t *out;
1680 * Simulate a be_Perm.
1682 * @param state the x87 state
1683 * @param irn the node that should be simulated (and patched)
1685 static int sim_Perm(x87_state *state, ir_node *irn) {
1687 x87_simulator *sim = state->sim;
1688 ir_node *pred = get_irn_n(irn, 0);
1690 const ir_edge_t *edge;
1692 /* handle only floating point Perms */
1693 if (! mode_is_float(get_irn_mode(pred)))
1696 DB((dbg, LEVEL_1, ">>> %+F\n", irn));
1698 /* Perm is a pure virtual instruction on x87.
1699 All inputs must be on the FPU stack and are pairwise
1700 different from each other.
1701 So, all we need to do is to permutate the stack state. */
1702 n = get_irn_arity(irn);
1703 NEW_ARR_A(int, stack_pos, n);
1705 /* collect old stack positions */
1706 for (i = 0; i < n; ++i) {
1707 const arch_register_t *inreg = x87_get_irn_register(sim, get_irn_n(irn, i));
1708 int idx = x87_on_stack(state, arch_register_get_index(inreg));
1710 assert(idx >= 0 && "Perm argument not on x87 stack");
1714 /* now do the permutation */
1715 foreach_out_edge(irn, edge) {
1716 ir_node *proj = get_edge_src_irn(edge);
1717 const arch_register_t *out = x87_get_irn_register(sim, proj);
1718 long num = get_Proj_proj(proj);
1720 assert(0 <= num && num < n && "More Proj's than Perm inputs");
1721 x87_set_st(state, arch_register_get_index(out), proj, stack_pos[(unsigned)num]);
1723 DB((dbg, LEVEL_1, "<<< %+F\n", irn));
1729 * Kill any dead registers at block start by popping them from the stack.
1731 * @param sim the simulator handle
1732 * @param block the current block
1733 * @param start_state the x87 state at the begin of the block
1735 static x87_state *x87_kill_deads(x87_simulator *sim, ir_node *block, x87_state *start_state) {
1736 x87_state *state = start_state;
1737 ir_node *first_insn = sched_first(block);
1738 ir_node *keep = NULL;
1739 unsigned live = vfp_live_args_after(sim, block, 0);
1741 int i, depth, num_pop;
1744 depth = x87_get_depth(state);
1745 for (i = depth - 1; i >= 0; --i) {
1746 int reg = x87_get_st_reg(state, i);
1748 if (! is_vfp_live(reg, live))
1749 kill_mask |= (1 << i);
1753 /* create a new state, will be changed */
1754 state = x87_clone_state(sim, state);
1756 DB((dbg, LEVEL_1, "Killing deads:\n"));
1757 DEBUG_ONLY(vfp_dump_live(live));
1758 DEBUG_ONLY(x87_dump_stack(state));
1760 /* now kill registers */
1762 /* we can only kill from TOS, so bring them up */
1763 if (! (kill_mask & 1)) {
1764 /* search from behind, because we can to a double-pop */
1765 for (i = depth - 1; i >= 0; --i) {
1766 if (kill_mask & (1 << i)) {
1767 kill_mask &= ~(1 << i);
1774 x87_set_st(state, -1, keep, i);
1775 keep = x87_create_fxch(state, first_insn, i, -1);
1778 keep = x87_get_st_node(state, 0);
1780 if ((kill_mask & 3) == 3) {
1781 /* we can do a double-pop */
1785 /* only a single pop */
1790 kill_mask >>= num_pop;
1791 keep = x87_create_fpop(state, first_insn, num_pop, keep);
1796 } /* x87_kill_deads */
1799 * Run a simulation and fix all virtual instructions for a block.
1801 * @param sim the simulator handle
1802 * @param block the current block
1804 * @return non-zero if simulation is complete,
1805 * zero if the simulation must be rerun
1807 static int x87_simulate_block(x87_simulator *sim, ir_node *block) {
1809 blk_state *bl_state = x87_get_bl_state(sim, block);
1810 x87_state *state = bl_state->begin;
1811 const ir_edge_t *edge;
1812 ir_node *start_block;
1814 /* if we have no assigned start state, we must wait ... */
1818 assert(bl_state->end == NULL);
1820 DB((dbg, LEVEL_1, "Simulate %+F\n", block));
1821 DB((dbg, LEVEL_2, "State at Block begin:\n ")); DEBUG_ONLY(x87_dump_stack(state));
1823 /* at block begin, kill all dead registers */
1824 state = x87_kill_deads(sim, block, state);
1826 /* beware, n might changed */
1827 for (n = sched_first(block); !sched_is_end(n); n = next) {
1828 ir_op *op = get_irn_op(n);
1830 next = sched_next(n);
1831 if (op->ops.generic) {
1833 sim_func func = (sim_func)op->ops.generic;
1835 /* have work to do */
1836 if (state == bl_state->begin) {
1837 /* create a new state, will be changed */
1838 state = x87_clone_state(sim, state);
1842 node_inserted = (*func)(state, n);
1845 sim_func might have added additional nodes after n,
1847 beware: n must not be changed by sim_func
1848 (i.e. removed from schedule) in this case
1851 next = sched_next(n);
1855 start_block = get_irg_start_block(get_irn_irg(block));
1857 /* check if the state must be shuffled */
1858 foreach_block_succ(block, edge) {
1859 ir_node *succ = get_edge_src_irn(edge);
1860 blk_state *succ_state = x87_get_bl_state(sim, succ);
1862 if (succ_state->begin && succ != start_block) {
1863 /* There is already a begin state for this block, bad.
1864 Do the necessary permutations.
1865 Note that critical edges are removed, so this is always possible. */
1866 x87_shuffle(sim, block, state, succ, succ_state->begin);
1868 /* Note further, that there can be only one such situation,
1869 so we can break here. */
1873 bl_state->end = state;
1875 /* now propagate the state to all successor blocks */
1876 foreach_block_succ(block, edge) {
1877 ir_node *succ = get_edge_src_irn(edge);
1878 blk_state *succ_state = x87_get_bl_state(sim, succ);
1880 if (! succ_state->begin)
1881 succ_state->begin = state;
1884 DB((dbg, LEVEL_2, "State at Block end:\n ")); DEBUG_ONLY(x87_dump_stack(state));
1887 } /* x87_simulate_block */
1890 * Create a new x87 simulator.
1892 * @param sim a simulator handle, will be initialized
1893 * @param irg the current graph
1894 * @param env the architecture environment
1896 static void x87_init_simulator(x87_simulator *sim, ir_graph *irg, const arch_env_t *env)
1898 obstack_init(&sim->obst);
1899 sim->blk_states = pmap_create();
1901 sim->n_idx = get_irg_last_idx(irg);
1902 sim->live = obstack_alloc(&sim->obst, sizeof(*sim->live) * sim->n_idx);
1904 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.x87");
1906 DB((dbg, LEVEL_1, "--------------------------------\n"
1907 "x87 Simulator started for %+F\n", irg));
1909 /* set the generic function pointer of instruction we must simulate */
1910 clear_irp_opcodes_generic_func();
1912 #define ASSOC(op) (op_ ## op)->ops.generic = (op_func)(sim_##op)
1913 #define ASSOC_IA32(op) (op_ia32_v ## op)->ops.generic = (op_func)(sim_##op)
1914 #define ASSOC_BE(op) (op_be_ ## op)->ops.generic = (op_func)(sim_##op)
1931 ASSOC_IA32(fCondJmp);
1942 } /* x87_init_simulator */
1945 * Destroy a x87 simulator.
1947 * @param sim the simulator handle
1949 static void x87_destroy_simulator(x87_simulator *sim) {
1950 pmap_destroy(sim->blk_states);
1951 obstack_free(&sim->obst, NULL);
1952 DB((dbg, LEVEL_1, "x87 Simulator stopped\n\n"));
1953 } /* x87_destroy_simulator */
1956 * Run a simulation and fix all virtual instructions for a graph.
1958 * @param env the architecture environment
1959 * @param irg the current graph
1960 * @param blk_list the block schedule list
1962 * Needs a block-schedule.
1964 void x87_simulate_graph(const arch_env_t *env, ir_graph *irg, ir_node **blk_list) {
1965 ir_node *block, *start_block;
1967 blk_state *bl_state;
1971 ir_graph *rem = current_ir_graph;
1973 current_ir_graph = irg;
1975 /* create the simulator */
1976 x87_init_simulator(&sim, irg, env);
1978 start_block = get_irg_start_block(irg);
1979 bl_state = x87_get_bl_state(&sim, start_block);
1981 /* start with the empty state */
1982 bl_state->begin = empty;
1985 worklist = new_waitq();
1987 /* Create the worklist for the schedule and calculate the liveness
1988 for all nodes. We must precalculate this info, because the
1989 simulator adds new nodes (and possible before Phi nodes) which
1990 let fail the old lazy calculation.
1991 On the other hand we reduce the computation amount due to
1992 precaching from O(n^2) to O(n) at the expense of O(n) cache memory.
1994 lv = be_liveness(irg);
1995 for (i = 0, n = ARR_LEN(blk_list); i < n; ++i) {
1996 update_liveness(&sim, lv, blk_list[i]);
1997 waitq_put(worklist, blk_list[i]);
1999 be_liveness_free(lv);
2003 block = waitq_get(worklist);
2004 if (! x87_simulate_block(&sim, block)) {
2005 waitq_put(worklist, block);
2008 } while (! pdeq_empty(worklist));
2011 del_waitq(worklist);
2012 x87_destroy_simulator(&sim);
2013 current_ir_graph = rem;
2014 } /* x87_simulate_graph */