2 * This file implements the x87 support and virtual to stack
3 * register translation for the ia32 backend.
5 * @author: Michael Beck
12 #endif /* HAVE_CONFIG_H */
19 #include "iredges_t.h"
28 #include "../belive_t.h"
29 #include "../besched_t.h"
30 #include "../benode_t.h"
31 #include "ia32_new_nodes.h"
32 #include "gen_ia32_new_nodes.h"
33 #include "gen_ia32_regalloc_if.h"
38 /* first and second binop index */
45 /* the store val index */
46 #define STORE_VAL_IDX 2
48 #define MASK_TOS(x) ((x) & (N_x87_REGS - 1))
50 /** the debug handle */
51 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
53 /* Forward declaration. */
54 typedef struct _x87_simulator x87_simulator;
57 * An exchange template.
58 * Note that our virtual functions have the same inputs
59 * and attributes as the real ones, so we can simple exchange
61 * Further, x87 supports inverse instructions, so we can handle them.
63 typedef struct _exchange_tmpl {
64 ir_op *normal_op; /**< the normal one */
65 ir_op *reverse_op; /**< the reverse one if exists */
66 ir_op *normal_pop_op; /**< the normal one with tos pop */
67 ir_op *reverse_pop_op; /**< the reverse one with tos pop */
71 * An entry on the simulated x87 stack.
73 typedef struct _st_entry {
74 int reg_idx; /**< the virtual register index of this stack value */
75 ir_node *node; /**< the node that produced this value */
81 typedef struct _x87_state {
82 st_entry st[N_x87_REGS]; /**< the register stack */
83 int depth; /**< the current stack depth */
84 int tos; /**< position of the tos */
85 x87_simulator *sim; /**< The simulator. */
88 /** An empty state, used for blocks without fp instructions. */
89 static x87_state _empty = { { {0, NULL}, }, 0, 0 };
90 static x87_state *empty = (x87_state *)&_empty;
92 /** The type of an instruction simulator function. */
93 typedef int (*sim_func)(x87_state *state, ir_node *n, const arch_env_t *env);
96 * A block state: Every block has a x87 state at the beginning and at the end.
98 typedef struct _blk_state {
99 x87_state *begin; /**< state at the begin or NULL if not assigned */
100 x87_state *end; /**< state at the end or NULL if not assigned */
103 #define PTR_TO_BLKSTATE(p) ((blk_state *)(p))
108 struct _x87_simulator {
109 struct obstack obst; /**< an obstack for fast allocating */
110 pmap *blk_states; /**< map blocks to states */
111 const arch_env_t *env; /**< architecture environment */
112 unsigned char *live; /**< Liveness information. */
113 unsigned n_idx; /**< cached get_irg_last_idx() result */
117 * Returns the current stack depth.
119 * @param state the x87 state
121 * @return the x87 stack depth
123 static int x87_get_depth(const x87_state *state) {
128 * Check if the state is empty.
130 * @param state the x87 state
132 * returns non-zero if the x87 stack is empty
134 static int x87_state_is_empty(const x87_state *state) {
135 return state->depth == 0;
139 * Return the virtual register index at st(pos).
141 * @param state the x87 state
142 * @param pos a stack position
144 * @return the vfp register index that produced the value at st(pos)
146 static int x87_get_st_reg(const x87_state *state, int pos) {
147 assert(pos < state->depth);
148 return state->st[MASK_TOS(state->tos + pos)].reg_idx;
152 * Return the node at st(pos).
154 * @param state the x87 state
155 * @param pos a stack position
157 * @return the IR node that produced the value at st(pos)
159 static ir_node *x87_get_st_node(const x87_state *state, int pos) {
160 assert(pos < state->depth);
161 return state->st[MASK_TOS(state->tos + pos)].node;
162 } /* x87_get_st_node */
166 * Dump the stack for debugging.
168 * @param state the x87 state
170 static void x87_dump_stack(const x87_state *state) {
173 for (i = state->depth - 1; i >= 0; --i) {
174 DB((dbg, LEVEL_2, "vf%d ", x87_get_st_reg(state, i)));
176 DB((dbg, LEVEL_2, "<-- TOS\n"));
177 } /* x87_dump_stack */
178 #endif /* DEBUG_libfirm */
181 * Set a virtual register to st(pos).
183 * @param state the x87 state
184 * @param reg_idx the vfp register index that should be set
185 * @param node the IR node that produces the value of the vfp register
186 * @param pos the stack position where the new value should be entered
188 static void x87_set_st(x87_state *state, int reg_idx, ir_node *node, int pos) {
189 assert(0 < state->depth);
190 state->st[MASK_TOS(state->tos + pos)].reg_idx = reg_idx;
191 state->st[MASK_TOS(state->tos + pos)].node = node;
193 DB((dbg, LEVEL_2, "After SET_REG:\n ")); DEBUG_ONLY(x87_dump_stack(state));
197 * Set the tos virtual register.
199 * @param state the x87 state
200 * @param reg_idx the vfp register index that should be set
201 * @param node the IR node that produces the value of the vfp register
203 static void x87_set_tos(x87_state *state, int reg_idx, ir_node *node) {
204 x87_set_st(state, reg_idx, node, 0);
208 * Flush the x87 stack.
210 * @param state the x87 state
212 static void x87_flush(x87_state *state) {
218 * Swap st(0) with st(pos).
220 * @param state the x87 state
221 * @param pos the stack position to change the tos with
223 static void x87_fxch(x87_state *state, int pos) {
225 assert(pos < state->depth);
227 entry = state->st[MASK_TOS(state->tos + pos)];
228 state->st[MASK_TOS(state->tos + pos)] = state->st[MASK_TOS(state->tos)];
229 state->st[MASK_TOS(state->tos)] = entry;
231 DB((dbg, LEVEL_2, "After FXCH:\n ")); DEBUG_ONLY(x87_dump_stack(state));
235 * Convert a virtual register to the stack index.
237 * @param state the x87 state
238 * @param reg_idx the register vfp index
240 * @return the stack position where the register is stacked
241 * or -1 if the virtual register was not found
243 static int x87_on_stack(const x87_state *state, int reg_idx) {
244 int i, tos = state->tos;
246 for (i = 0; i < state->depth; ++i)
247 if (state->st[MASK_TOS(tos + i)].reg_idx == reg_idx)
253 * Push a virtual Register onto the stack, double pushed allowed.
255 * @param state the x87 state
256 * @param reg_idx the register vfp index
257 * @param node the node that produces the value of the vfp register
259 static void x87_push_dbl(x87_state *state, int reg_idx, ir_node *node) {
260 assert(state->depth < N_x87_REGS && "stack overrun");
263 state->tos = MASK_TOS(state->tos - 1);
264 state->st[state->tos].reg_idx = reg_idx;
265 state->st[state->tos].node = node;
267 DB((dbg, LEVEL_2, "After PUSH:\n ")); DEBUG_ONLY(x87_dump_stack(state));
271 * Push a virtual Register onto the stack, double pushes are NOT allowed..
273 * @param state the x87 state
274 * @param reg_idx the register vfp index
275 * @param node the node that produces the value of the vfp register
276 * @param dbl_push if != 0 double pushes are allowed
278 static void x87_push(x87_state *state, int reg_idx, ir_node *node) {
279 assert(x87_on_stack(state, reg_idx) == -1 && "double push");
281 x87_push_dbl(state, reg_idx, node);
285 * Pop a virtual Register from the stack.
287 static void x87_pop(x87_state *state) {
288 assert(state->depth > 0 && "stack underrun");
291 state->tos = MASK_TOS(state->tos + 1);
293 DB((dbg, LEVEL_2, "After POP:\n ")); DEBUG_ONLY(x87_dump_stack(state));
297 * Returns the block state of a block.
299 * @param sim the x87 simulator handle
300 * @param block the current block
302 * @return the block state
304 static blk_state *x87_get_bl_state(x87_simulator *sim, ir_node *block) {
305 pmap_entry *entry = pmap_find(sim->blk_states, block);
308 blk_state *bl_state = obstack_alloc(&sim->obst, sizeof(*bl_state));
309 bl_state->begin = NULL;
310 bl_state->end = NULL;
312 pmap_insert(sim->blk_states, block, bl_state);
316 return PTR_TO_BLKSTATE(entry->value);
317 } /* x87_get_bl_state */
320 * Creates a new x87 state.
322 * @param sim the x87 simulator handle
324 * @return a new x87 state
326 static x87_state *x87_alloc_state(x87_simulator *sim) {
327 x87_state *res = obstack_alloc(&sim->obst, sizeof(*res));
331 } /* x87_alloc_state */
334 * Create a new empty x87 state.
336 * @param sim the x87 simulator handle
338 * @return a new empty x87 state
340 static x87_state *x87_alloc_empty_state(x87_simulator *sim) {
341 x87_state *res = x87_alloc_state(sim);
345 } /* x87_alloc_empty_state */
350 * @param sim the x87 simulator handle
351 * @param src the x87 state that will be cloned
353 * @return a cloned copy of the src state
355 static x87_state *x87_clone_state(x87_simulator *sim, const x87_state *src) {
356 x87_state *res = x87_alloc_state(sim);
358 memcpy(res, src, sizeof(*res));
360 } /* x87_clone_state */
363 * Patch a virtual instruction into a x87 one and return
366 * @param n the IR node to patch
367 * @param op the x87 opcode to patch in
369 static ir_node *x87_patch_insn(ir_node *n, ir_op *op) {
370 ir_mode *mode = get_irn_mode(n);
375 if (mode == mode_T) {
376 /* patch all Proj's */
377 const ir_edge_t *edge;
379 foreach_out_edge(n, edge) {
380 ir_node *proj = get_edge_src_irn(edge);
382 mode = get_irn_mode(proj);
383 if (mode_is_float(mode)) {
385 set_irn_mode(proj, mode_E);
390 else if (mode_is_float(mode))
391 set_irn_mode(n, mode_E);
393 } /* x87_patch_insn */
396 * Returns the first Proj of a mode_T node having a given mode.
398 * @param n the mode_T node
399 * @param m the desired mode of the Proj
400 * @return The first Proj of mode @p m found or NULL.
402 static ir_node *get_irn_Proj_for_mode(ir_node *n, ir_mode *m) {
403 const ir_edge_t *edge;
405 assert(get_irn_mode(n) == mode_T && "Need mode_T node");
407 foreach_out_edge(n, edge) {
408 ir_node *proj = get_edge_src_irn(edge);
409 if (get_irn_mode(proj) == m)
414 } /* get_irn_Proj_for_mode */
416 /* -------------- x87 perm --------------- */
419 * Creates a fxch for shuffle.
421 * @param state the x87 state
422 * @param pos parameter for fxch
423 * @param dst_block the block of the user
425 * Creates a new fxch node and reroute the user of the old node
428 * @return the fxch node
430 static ir_node *x87_fxch_shuffle(x87_state *state, int pos, ir_node *block, ir_node *dst_block)
432 const ir_edge_t *edge;
433 ir_node *n = x87_get_st_node(state, pos);
434 ir_node *user = NULL;
439 if (block == get_nodes_block(n)) {
440 /* this is a node from out block: change it's user */
441 foreach_out_edge(n, edge) {
442 ir_node *succ = get_edge_src_irn(edge);
444 if (is_Phi(succ) && get_nodes_block(succ) == dst_block) {
446 node_idx = get_edge_src_pos(edge);
453 fxch = new_rd_ia32_fxch(NULL, get_irn_irg(block), block, n, get_irn_mode(n));
454 attr = get_ia32_attr(fxch);
455 attr->x87[0] = &ia32_st_regs[pos];
456 attr->x87[2] = &ia32_st_regs[0];
459 DB((dbg, LEVEL_2, "%+F replaced input %d of %+F\n", fxch, node_idx, user));
460 set_irn_n(user, node_idx, fxch);
464 * This is a node from a dominator block. Changing it's user might be wrong,
465 * so just keep it alive.
466 * The "right" solution would require a new Phi, but we don't care here.
471 x87_fxch(state, pos);
473 } /* x87_fxch_shuffle */
476 * Calculate the necessary permutations to reach dst_state.
478 * These permutations are done with fxch instructions and placed
479 * at the end of the block.
481 * Note that critical edges are removed here, so we need only
482 * a shuffle if the current block has only one successor.
484 * @param sim the simulator handle
485 * @param block the current block
486 * @param state the current x87 stack state, might be modified
487 * @param dst_block the destination block
488 * @param dst_state destination state
492 static x87_state *x87_shuffle(x87_simulator *sim, ir_node *block, x87_state *state, ir_node *dst_block, const x87_state *dst_state) {
493 int i, n_cycles, k, ri;
494 unsigned cycles[4], all_mask;
495 char cycle_idx[4][8];
497 ir_node *before, *after;
499 assert(state->depth == dst_state->depth);
501 /* Some mathematics here:
502 If we have a cycle of length n that includes the tos,
503 we need n-1 exchange operations.
504 We can always add the tos and restore it, so we need
505 n+1 exchange operations for a cycle not containing the tos.
506 So, the maximum of needed operations is for a cycle of 7
507 not including the tos == 8.
508 This is the same number of ops we would need for using stores,
509 so exchange is cheaper (we save the loads).
510 On the other hand, we might need an additional exchange
511 in the next block to bring one operand on top, so the
512 number of ops in the first case is identical.
513 Further, no more than 4 cycles can exists (4 x 2).
515 all_mask = (1 << (state->depth)) - 1;
517 for (n_cycles = 0; all_mask; ++n_cycles) {
518 int src_idx, dst_idx;
520 /* find the first free slot */
521 for (i = 0; i < state->depth; ++i) {
522 if (all_mask & (1 << i)) {
523 all_mask &= ~(1 << i);
525 /* check if there are differences here */
526 if (x87_get_st_reg(state, i) != x87_get_st_reg(dst_state, i))
532 /* no more cycles found */
537 cycles[n_cycles] = (1 << i);
538 cycle_idx[n_cycles][k++] = i;
539 for (src_idx = i; ; src_idx = dst_idx) {
540 dst_idx = x87_on_stack(dst_state, x87_get_st_reg(state, src_idx));
542 if ((all_mask & (1 << dst_idx)) == 0)
545 cycle_idx[n_cycles][k++] = dst_idx;
546 cycles[n_cycles] |= (1 << dst_idx);
547 all_mask &= ~(1 << dst_idx);
549 cycle_idx[n_cycles][k] = -1;
553 /* no permutation needed */
557 /* Hmm: permutation needed */
558 DB((dbg, LEVEL_2, "\n%+F needs permutation: from\n", block));
559 DEBUG_ONLY(x87_dump_stack(state));
560 DB((dbg, LEVEL_2, " to\n"));
561 DEBUG_ONLY(x87_dump_stack(dst_state));
565 DB((dbg, LEVEL_2, "Need %d cycles\n", n_cycles));
566 for (ri = 0; ri < n_cycles; ++ri) {
567 DB((dbg, LEVEL_2, " Ring %d:\n ", ri));
568 for (k = 0; cycle_idx[ri][k] != -1; ++k)
569 DB((dbg, LEVEL_2, " st%d ->", cycle_idx[ri][k]));
570 DB((dbg, LEVEL_2, "\n"));
577 * Find the place node must be insert.
578 * We have only one successor block, so the last instruction should
581 before = sched_last(block);
582 assert(is_cfop(before));
584 /* now do the permutations */
585 for (ri = 0; ri < n_cycles; ++ri) {
586 if ((cycles[ri] & 1) == 0) {
587 /* this cycle does not include the tos */
588 fxch = x87_fxch_shuffle(state, cycle_idx[ri][0], block, dst_block);
590 sched_add_after(after, fxch);
592 sched_add_before(before, fxch);
595 for (k = 1; cycle_idx[ri][k] != -1; ++k) {
596 fxch = x87_fxch_shuffle(state, cycle_idx[ri][k], block, dst_block);
598 sched_add_after(after, fxch);
600 sched_add_before(before, fxch);
603 if ((cycles[ri] & 1) == 0) {
604 /* this cycle does not include the tos */
605 fxch = x87_fxch_shuffle(state, cycle_idx[ri][0], block, dst_block);
606 sched_add_after(after, fxch);
613 * Create a fxch node before another node.
615 * @param state the x87 state
616 * @param n the node before the fxch
617 * @param pos exchange st(pos) with st(0)
618 * @param op_idx if >= 0, replace input op_idx of n with the fxch result
622 static ir_node *x87_create_fxch(x87_state *state, ir_node *n, int pos, int op_idx) {
623 ir_node *fxch, *pred;
626 x87_fxch(state, pos);
629 pred = get_irn_n(n, op_idx);
631 pred = x87_get_st_node(state, pos);
633 fxch = new_rd_ia32_fxch(NULL, get_irn_irg(n), get_nodes_block(n), pred, get_irn_mode(pred));
634 attr = get_ia32_attr(fxch);
635 attr->x87[0] = &ia32_st_regs[pos];
636 attr->x87[2] = &ia32_st_regs[0];
639 set_irn_n(n, op_idx, fxch);
641 sched_add_before(n, fxch);
642 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(fxch), attr->x87[0]->name, attr->x87[2]->name));
644 } /* x87_create_fxch */
647 * Create a fpush before node n.
649 * @param state the x87 state
650 * @param n the node before the fpush
651 * @param pos push st(pos) on stack
652 * @param op_idx if >= 0, replace input op_idx of n with the fpush result
654 static void x87_create_fpush(const arch_env_t *env, x87_state *state, ir_node *n, int pos, int op_idx) {
655 ir_node *fpush, *pred = get_irn_n(n, op_idx);
657 const arch_register_t *out = arch_get_irn_register(env, pred);
659 x87_push_dbl(state, arch_register_get_index(out), pred);
661 fpush = new_rd_ia32_fpush(NULL, get_irn_irg(n), get_nodes_block(n), pred, get_irn_mode(pred));
662 attr = get_ia32_attr(fpush);
663 attr->x87[0] = &ia32_st_regs[pos];
664 attr->x87[2] = &ia32_st_regs[0];
666 set_irn_n(n, op_idx, fpush);
668 sched_add_before(n, fpush);
669 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(fpush), attr->x87[0]->name, attr->x87[2]->name));
670 } /* x87_create_fpush */
673 * Create a fpop before node n.
675 * @param state the x87 state
676 * @param n the node before the fpop
677 * @param num pop 1 or 2 values
678 * @param pred node to use as predecessor of the fpop
680 * @return the fpop node
682 static ir_node *x87_create_fpop(const arch_env_t *env, x87_state *state, ir_node *n, int num, ir_node *pred) {
688 fpop = new_rd_ia32_fpop(NULL, get_irn_irg(n), get_nodes_block(n), mode_E);
689 attr = get_ia32_attr(fpop);
690 attr->x87[0] = &ia32_st_regs[0];
691 attr->x87[1] = &ia32_st_regs[0];
692 attr->x87[2] = &ia32_st_regs[0];
694 sched_add_before(n, fpop);
695 DB((dbg, LEVEL_1, "<<< %s %s\n", get_irn_opname(fpop), attr->x87[0]->name));
701 } /* x87_create_fpop */
703 /* --------------------------------- liveness ------------------------------------------ */
706 * The liveness transfer function.
707 * Updates a live set over a single step from a given node to its predecessor.
708 * Everything defined at the node is removed from the set, the uses of the node get inserted.
710 * @param arch_env The architecture environment.
711 * @param irn The node at which liveness should be computed.
712 * @param live The bitset of registers live before @p irn. This set gets modified by updating it to
713 * the registers live after irn.
715 * @return The live bitset.
717 static unsigned vfp_liveness_transfer(const arch_env_t *arch_env, ir_node *irn, unsigned live)
720 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
722 if (arch_irn_consider_in_reg_alloc(arch_env, cls, irn)) {
723 const arch_register_t *reg = arch_get_irn_register(arch_env, irn);
724 live &= ~(1 << arch_register_get_index(reg));
727 for (i = 0, n = get_irn_arity(irn); i < n; ++i) {
728 ir_node *op = get_irn_n(irn, i);
730 if (mode_is_float(get_irn_mode(op)) && arch_irn_consider_in_reg_alloc(arch_env, cls, op)) {
731 const arch_register_t *reg = arch_get_irn_register(arch_env, op);
732 live |= 1 << arch_register_get_index(reg);
736 } /* vfp_liveness_transfer */
739 * Put all live virtual registers at the end of a block into a bitset.
741 * @param env the architecture environment
742 * @param lv the liveness information
743 * @param bl the block
745 * @return The live bitset at the end of this block
747 static unsigned vfp_liveness_end_of_block(const arch_env_t *env, be_lv_t *lv, const ir_node *bl)
751 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
753 be_lv_foreach(lv, bl, be_lv_state_end, i) {
754 ir_node *irn = be_lv_get_irn(lv, bl, i);
755 if (arch_irn_consider_in_reg_alloc(env, cls, irn)) {
756 const arch_register_t *reg = arch_get_irn_register(env, irn);
757 live |= 1 << arch_register_get_index(reg);
762 } /* vfp_liveness_end_of_block */
764 /** get the register mask from an arch_register */
765 #define REGMASK(reg) (1 << (arch_register_get_index(reg)))
768 * Return a bitset of argument registers which are live at the end of a node.
770 * @param sim the simulator handle
771 * @param pos the node
772 * @param kill kill mask for the output registers
774 * @return The live bitset.
776 static unsigned vfp_live_args_after(x87_simulator *sim, const ir_node *pos, unsigned kill)
778 unsigned idx = get_irn_idx(pos);
780 assert(idx < sim->n_idx);
781 return sim->live[idx] & ~kill;
782 } /* vfp_live_args_after */
785 * Calculate the liveness for a whole block and cache it.
787 * @param sim the simulator handle
788 * @param lv the liveness handle
789 * @param blk the block
791 static void update_liveness(x87_simulator *sim, be_lv_t *lv, ir_node *blk) {
792 unsigned live = vfp_liveness_end_of_block(sim->env, lv, blk);
796 /* now iterate through the block backward and cache the results */
797 sched_foreach_reverse(blk, irn) {
798 /* stop at the first Phi: this produces the live-in */
802 idx = get_irn_idx(irn);
803 sim->live[idx] = live;
805 live = vfp_liveness_transfer(sim->env, irn, live);
807 idx = get_irn_idx(blk);
808 sim->live[idx] = live;
809 } /* update_liveness */
812 * Returns true if a register is live in a set.
814 * @param reg_idx the vfp register index
815 * @param live a live bitset
817 #define is_vfp_live(reg_idx, live) ((live) & (1 << (reg_idx)))
821 * Dump liveness info.
823 * @param live the live bitset
825 static void vfp_dump_live(unsigned live) {
828 DB((dbg, LEVEL_2, "Live registers here: \n"));
829 for (i = 0; i < 8; ++i) {
830 if (live & (1 << i)) {
831 DB((dbg, LEVEL_2, " vf%d", i));
834 DB((dbg, LEVEL_2, "\n"));
835 } /* vfp_dump_live */
836 #endif /* DEBUG_libfirm */
838 /* --------------------------------- simulators ---------------------------------------- */
840 #define XCHG(a, b) do { int t = (a); (a) = (b); (b) = t; } while (0)
843 * Simulate a virtual binop.
845 * @param state the x87 state
846 * @param n the node that should be simulated (and patched)
847 * @param env the architecture environment
848 * @param tmpl the template containing the 4 possible x87 opcodes
850 static int sim_binop(x87_state *state, ir_node *n, const arch_env_t *env, const exchange_tmpl *tmpl) {
851 int op2_idx, op1_idx = -1;
852 int out_idx, do_pop =0;
855 const arch_register_t *op1 = arch_get_irn_register(env, get_irn_n(n, BINOP_IDX_1));
856 const arch_register_t *op2 = arch_get_irn_register(env, get_irn_n(n, BINOP_IDX_2));
857 const arch_register_t *out = arch_get_irn_register(env, n);
858 unsigned live = vfp_live_args_after(state->sim, n, REGMASK(out));
860 DB((dbg, LEVEL_1, ">>> %+F %s, %s -> %s\n", n,
861 arch_register_get_name(op1), arch_register_get_name(op2),
862 arch_register_get_name(out)));
863 DEBUG_ONLY(vfp_dump_live(live));
865 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
866 op2_idx = x87_on_stack(state, arch_register_get_index(op2));
868 if (arch_register_get_index(op2) != REG_VFP_NOREG) {
869 /* second operand is a vfp register */
871 if (is_vfp_live(arch_register_get_index(op2), live)) {
872 /* Second operand is live. */
874 if (is_vfp_live(arch_register_get_index(op1), live)) {
875 /* Both operands are live: push the first one.
876 This works even for op1 == op2. */
877 x87_create_fpush(env, state, n, op2_idx, BINOP_IDX_2);
878 out_idx = op2_idx = 0;
880 dst = tmpl->normal_op;
884 /* Second live, first operand is dead here, bring it to tos. */
886 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
890 op1_idx = out_idx = 0;
891 dst = tmpl->normal_op;
896 /* Second operand is dead. */
897 if (is_vfp_live(arch_register_get_index(op1), live)) {
898 /* First operand is live: bring second to tos. */
900 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
904 op2_idx = out_idx = 0;
905 dst = tmpl->reverse_op;
909 /* Both operands are dead here, pop them from the stack. */
912 if (op1_idx == op2_idx) {
913 /* Both are identically, no pop needed. */
914 dst = tmpl->normal_op;
918 dst = tmpl->normal_pop_op;
922 else if (op1_idx == 0) {
924 XCHG(op2_idx, op1_idx);
925 if (op1_idx == op2_idx) {
926 /* Both are identically, no pop needed. */
927 dst = tmpl->reverse_op;
931 dst = tmpl->reverse_pop_op;
936 /* Bring the second on top. */
937 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
938 if (op1_idx == op2_idx) {
939 /* Both are identically, no pop needed. */
940 out_idx = op1_idx = op2_idx = 0;
941 dst = tmpl->normal_op;
947 dst = tmpl->normal_pop_op;
955 /* second operand is an address mode */
956 if (is_vfp_live(arch_register_get_index(op1), live)) {
957 /* first operand is live: push it here */
958 x87_create_fpush(env, state, n, op1_idx, BINOP_IDX_1);
961 /* first operand is dead: bring it to tos */
963 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
965 op1_idx = out_idx = 0;
966 dst = tmpl->normal_op;
970 x87_set_st(state, arch_register_get_index(out), x87_patch_insn(n, dst), out_idx);
974 /* patch the operation */
975 attr = get_ia32_attr(n);
976 attr->x87[0] = op1 = &ia32_st_regs[op1_idx];
978 attr->x87[1] = op2 = &ia32_st_regs[op2_idx];
979 attr->x87[2] = out = &ia32_st_regs[out_idx];
982 DB((dbg, LEVEL_1, "<<< %s %s, %s -> %s\n", get_irn_opname(n),
983 arch_register_get_name(op1), arch_register_get_name(op2),
984 arch_register_get_name(out)));
986 DB((dbg, LEVEL_1, "<<< %s %s, [AM] -> %s\n", get_irn_opname(n),
987 arch_register_get_name(op1),
988 arch_register_get_name(out)));
994 * Simulate a virtual Unop.
996 * @param state the x87 state
997 * @param n the node that should be simulated (and patched)
998 * @param env the architecture environment
999 * @param op the x87 opcode that will replace n's opcode
1001 static int sim_unop(x87_state *state, ir_node *n, const arch_env_t *env, ir_op *op) {
1002 int op1_idx, out_idx;
1003 const arch_register_t *op1 = arch_get_irn_register(env, get_irn_n(n, UNOP_IDX));
1004 const arch_register_t *out = arch_get_irn_register(env, n);
1006 unsigned live = vfp_live_args_after(state->sim, n, REGMASK(out));
1008 DB((dbg, LEVEL_1, ">>> %+F -> %s\n", n, out->name));
1009 DEBUG_ONLY(vfp_dump_live(live));
1011 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1013 if (is_vfp_live(arch_register_get_index(op1), live)) {
1014 /* push the operand here */
1015 x87_create_fpush(env, state, n, op1_idx, UNOP_IDX);
1018 /* operand is dead, bring it to tos */
1020 x87_create_fxch(state, n, op1_idx, UNOP_IDX);
1023 x87_set_tos(state, arch_register_get_index(out), x87_patch_insn(n, op));
1024 op1_idx = out_idx = 0;
1025 attr = get_ia32_attr(n);
1026 attr->x87[0] = op1 = &ia32_st_regs[0];
1027 attr->x87[2] = out = &ia32_st_regs[0];
1028 DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), out->name));
1034 * Simulate a virtual Load instruction.
1036 * @param state the x87 state
1037 * @param n the node that should be simulated (and patched)
1038 * @param env the architecture environment
1039 * @param op the x87 opcode that will replace n's opcode
1041 static int sim_load(x87_state *state, ir_node *n, const arch_env_t *env, ir_op *op) {
1042 const arch_register_t *out = arch_get_irn_register(env, n);
1045 DB((dbg, LEVEL_1, ">>> %+F -> %s\n", n, arch_register_get_name(out)));
1046 x87_push(state, arch_register_get_index(out), x87_patch_insn(n, op));
1047 attr = get_ia32_attr(n);
1048 attr->x87[2] = out = &ia32_st_regs[0];
1049 DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), arch_register_get_name(out)));
1055 * Rewire all users of @p old_val to @new_val iff they are scheduled after @p store.
1057 * @param store The store
1058 * @param old_val The former value
1059 * @param new_val The new value
1061 static void collect_and_rewire_users(ir_node *store, ir_node *old_val, ir_node *new_val) {
1062 const ir_edge_t *edge, *ne;
1064 foreach_out_edge_safe(old_val, edge, ne) {
1065 ir_node *user = get_edge_src_irn(edge);
1067 if (! user || user == store)
1070 /* if the user is scheduled after the store: rewire */
1071 if (sched_is_scheduled(user) && sched_comes_after(store, user)) {
1073 /* find the input of the user pointing to the old value */
1074 for (i = get_irn_arity(user) - 1; i >= 0; i--) {
1075 if (get_irn_n(user, i) == old_val)
1076 set_irn_n(user, i, new_val);
1080 } /* collect_and_rewire_users */
1083 * Simulate a virtual Store.
1085 * @param state the x87 state
1086 * @param n the node that should be simulated (and patched)
1087 * @param env the architecture environment
1088 * @param op the x87 store opcode
1089 * @param op_p the x87 store and pop opcode
1091 static int sim_store(x87_state *state, ir_node *n, const arch_env_t *env, ir_op *op, ir_op *op_p) {
1092 ir_node *val = get_irn_n(n, STORE_VAL_IDX);
1093 const arch_register_t *op2 = arch_get_irn_register(env, val);
1094 unsigned live = vfp_live_args_after(state->sim, n, 0);
1100 op2_idx = x87_on_stack(state, arch_register_get_index(op2));
1101 assert(op2_idx >= 0);
1103 DB((dbg, LEVEL_1, ">>> %+F %s ->\n", n, arch_register_get_name(op2)));
1105 mode = get_ia32_ls_mode(n);
1106 depth = x87_get_depth(state);
1109 We can only store the tos to memory.
1110 A store of mode_E with free registers
1111 pushes value to tos, so skip it here.
1113 if (! (mode == mode_E && depth < N_x87_REGS) && op2_idx != 0)
1114 x87_create_fxch(state, n, op2_idx, STORE_VAL_IDX);
1116 if (is_vfp_live(arch_register_get_index(op2), live)) {
1118 Problem: fst doesn't support mode_E (spills), only fstp does
1120 - stack not full: push value and fstp
1121 - stack full: fstp value and load again
1123 if (mode == mode_E) {
1124 if (depth < N_x87_REGS) {
1125 /* ok, we have a free register: push + fstp */
1126 x87_create_fpush(env, state, n, op2_idx, STORE_VAL_IDX);
1128 x87_patch_insn(n, op_p);
1131 ir_node *vfld, *mem, *block, *rproj, *mproj;
1134 /* stack full here: need fstp + load */
1136 x87_patch_insn(n, op_p);
1138 block = get_nodes_block(n);
1139 irg = get_irn_irg(n);
1140 vfld = new_rd_ia32_vfld(NULL, irg, block, get_irn_n(n, 0), get_irn_n(n, 1), new_rd_NoMem(irg));
1142 /* copy all attributes */
1143 set_ia32_frame_ent(vfld, get_ia32_frame_ent(n));
1144 if (is_ia32_use_frame(n))
1145 set_ia32_use_frame(vfld);
1146 set_ia32_am_flavour(vfld, get_ia32_am_flavour(n));
1147 set_ia32_op_type(vfld, ia32_am_Source);
1148 add_ia32_am_offs(vfld, get_ia32_am_offs(n));
1149 set_ia32_am_sc(vfld, get_ia32_am_sc(n));
1150 set_ia32_ls_mode(vfld, get_ia32_ls_mode(n));
1152 rproj = new_r_Proj(irg, block, vfld, get_ia32_ls_mode(vfld), pn_ia32_vfld_res);
1153 mproj = new_r_Proj(irg, block, vfld, mode_M, pn_ia32_vfld_M);
1154 mem = get_irn_Proj_for_mode(n, mode_M);
1156 assert(mem && "Store memory not found");
1158 arch_set_irn_register(env, rproj, op2);
1160 /* reroute all former users of the store memory to the load memory */
1161 edges_reroute(mem, mproj, irg);
1162 /* set the memory input of the load to the store memory */
1163 set_irn_n(vfld, 2, mem);
1165 sched_add_after(n, vfld);
1166 sched_add_after(vfld, rproj);
1168 /* rewire all users, scheduled after the store, to the loaded value */
1169 collect_and_rewire_users(n, val, rproj);
1175 /* mode != mode_E -> use normal fst */
1176 x87_patch_insn(n, op);
1181 x87_patch_insn(n, op_p);
1184 attr = get_ia32_attr(n);
1185 attr->x87[1] = op2 = &ia32_st_regs[0];
1186 DB((dbg, LEVEL_1, "<<< %s %s ->\n", get_irn_opname(n), arch_register_get_name(op2)));
1192 * Simulate a virtual Phi.
1193 * Just for cosmetic reasons change the mode of Phi nodes to mode_E.
1195 * @param state the x87 state
1196 * @param n the node that should be simulated (and patched)
1197 * @param env the architecture environment
1199 static int sim_Phi(x87_state *state, ir_node *n, const arch_env_t *env) {
1200 ir_mode *mode = get_irn_mode(n);
1202 if (mode_is_float(mode))
1203 set_irn_mode(n, mode_E);
1209 #define _GEN_BINOP(op, rev) \
1210 static int sim_##op(x87_state *state, ir_node *n, const arch_env_t *env) { \
1211 exchange_tmpl tmpl = { op_ia32_##op, op_ia32_##rev, op_ia32_##op##p, op_ia32_##rev##p }; \
1212 return sim_binop(state, n, env, &tmpl); \
1215 #define GEN_BINOP(op) _GEN_BINOP(op, op)
1216 #define GEN_BINOPR(op) _GEN_BINOP(op, op##r)
1218 #define GEN_LOAD2(op, nop) \
1219 static int sim_##op(x87_state *state, ir_node *n, const arch_env_t *env) { \
1220 return sim_load(state, n, env, op_ia32_##nop); \
1223 #define GEN_LOAD(op) GEN_LOAD2(op, op)
1225 #define GEN_UNOP(op) \
1226 static int sim_##op(x87_state *state, ir_node *n, const arch_env_t *env) { \
1227 return sim_unop(state, n, env, op_ia32_##op); \
1230 #define GEN_STORE(op) \
1231 static int sim_##op(x87_state *state, ir_node *n, const arch_env_t *env) { \
1232 return sim_store(state, n, env, op_ia32_##op, op_ia32_##op##p); \
1251 GEN_LOAD2(fConst, fldConst)
1257 * Simulate a fCondJmp.
1259 * @param state the x87 state
1260 * @param n the node that should be simulated (and patched)
1261 * @param env the architecture environment
1263 static int sim_fCondJmp(x87_state *state, ir_node *n, const arch_env_t *env) {
1264 int op2_idx, op1_idx = -1, pop_cnt = 0;
1267 const arch_register_t *op1 = arch_get_irn_register(env, get_irn_n(n, BINOP_IDX_1));
1268 const arch_register_t *op2 = arch_get_irn_register(env, get_irn_n(n, BINOP_IDX_2));
1269 unsigned live = vfp_live_args_after(state->sim, n, 0);
1271 DB((dbg, LEVEL_1, ">>> %+F %s, %s\n", n,
1272 arch_register_get_name(op1), arch_register_get_name(op2)));
1273 DEBUG_ONLY(vfp_dump_live(live));
1275 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1276 op2_idx = x87_on_stack(state, arch_register_get_index(op2));
1278 /* BEWARE: check for comp a,a cases, they might happen */
1279 if (arch_register_get_index(op2) != REG_VFP_NOREG) {
1280 /* second operand is a vfp register */
1282 if (is_vfp_live(arch_register_get_index(op2), live)) {
1283 /* second operand is live */
1285 if (is_vfp_live(arch_register_get_index(op1), live)) {
1286 /* both operands are live: move one of them to tos */
1288 XCHG(op2_idx, op1_idx);
1289 dst = op_ia32_fcomrJmp;
1291 else if (op1_idx == 0) {
1292 dst = op_ia32_fcomJmp;
1295 /* bring the first on top */
1296 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1297 if (op1_idx == op2_idx)
1300 dst = op_ia32_fcomJmp;
1304 /* second live, first operand is dead here, bring it to tos.
1305 This means further, op1_idx != op2_idx. */
1307 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1312 dst = op_ia32_fcompJmp;
1317 /* second operand is dead */
1318 if (is_vfp_live(arch_register_get_index(op1), live)) {
1319 /* first operand is live: bring second to tos.
1320 This means further, op1_idx != op2_idx. */
1322 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1327 dst = op_ia32_fcomrpJmp;
1331 /* both operands are dead here, check first for identity. */
1332 if (op1_idx == op2_idx) {
1333 /* identically, one one needed */
1335 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1336 op1_idx = op2_idx = 0;
1338 dst = op_ia32_fcompJmp;
1341 /* different, move them to st and st(1) and pop both.
1342 The tricky part is to get one into st(1).*/
1343 else if (op2_idx == 1) {
1344 /* good, second operand is already in the right place, move the first */
1346 /* bring the first on top */
1347 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1350 dst = op_ia32_fcomppJmp;
1353 else if (op1_idx == 1) {
1354 /* good, first operand is already in the right place, move the second */
1356 /* bring the first on top */
1357 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1360 dst = op_ia32_fcomrppJmp;
1364 /* if one is already the TOS, we need two fxch */
1366 /* first one is TOS, move to st(1) */
1367 x87_create_fxch(state, n, 1, BINOP_IDX_1);
1369 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1371 dst = op_ia32_fcomrppJmp;
1374 else if (op2_idx == 0) {
1375 /* second one is TOS, move to st(1) */
1376 x87_create_fxch(state, n, 1, BINOP_IDX_2);
1378 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1380 dst = op_ia32_fcomrppJmp;
1384 /* none of them is either TOS or st(1), 3 fxch needed */
1385 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1386 x87_create_fxch(state, n, 1, BINOP_IDX_2);
1387 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1390 dst = op_ia32_fcomppJmp;
1398 /* second operand is an address mode */
1399 if (is_vfp_live(arch_register_get_index(op1), live)) {
1400 /* first operand is live: bring it to TOS */
1402 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1405 dst = op_ia32_fcomJmp;
1408 /* first operand is dead: bring it to tos */
1410 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1414 dst = op_ia32_fcompJmp;
1418 x87_patch_insn(n, dst);
1424 /* patch the operation */
1425 attr = get_ia32_attr(n);
1426 attr->x87[1] = op1 = &ia32_st_regs[op1_idx];
1428 attr->x87[2] = op2 = &ia32_st_regs[op2_idx];
1431 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(n),
1432 arch_register_get_name(op1), arch_register_get_name(op2)));
1434 DB((dbg, LEVEL_1, "<<< %s %s, [AM]\n", get_irn_opname(n),
1435 arch_register_get_name(op1)));
1438 } /* sim_fCondJmp */
1441 * Simulate a be_Copy.
1443 * @param state the x87 state
1444 * @param n the node that should be simulated (and patched)
1445 * @param env the architecture environment
1447 static int sim_Copy(x87_state *state, ir_node *n, const arch_env_t *env) {
1448 ir_mode *mode = get_irn_mode(n);
1450 if (mode_is_float(mode)) {
1451 ir_node *pred = get_irn_n(n, 0);
1452 ir_op *op = get_irn_op(pred);
1453 const arch_register_t *out = arch_get_irn_register(env, n);
1454 const arch_register_t *op1 = arch_get_irn_register(env, pred);
1455 ir_node *node, *next;
1457 int op1_idx, out_idx;
1458 unsigned live = vfp_live_args_after(state->sim, n, REGMASK(out));
1460 DB((dbg, LEVEL_1, ">>> %+F %s -> %s\n", n,
1461 arch_register_get_name(op1), arch_register_get_name(out)));
1462 DEBUG_ONLY(vfp_dump_live(live));
1464 /* FIXME: check here for all possible constants */
1465 if (op == op_ia32_fldz || op == op_ia32_fld1) {
1466 /* copy a constant */
1467 node = new_rd_ia32_fldz(get_irn_dbg_info(n), get_irn_irg(n), get_nodes_block(n), mode);
1468 set_irn_op(node, op);
1469 arch_set_irn_register(env, node, out);
1471 x87_push(state, arch_register_get_index(out), node);
1473 attr = get_ia32_attr(node);
1474 attr->x87[2] = out = &ia32_st_regs[0];
1476 next = sched_next(n);
1479 sched_add_before(next, node);
1480 DB((dbg, LEVEL_1, ">>> %+F -> %s\n", node, arch_register_get_name(out)));
1484 /* handle the infamous unknown value */
1485 if (arch_register_get_index(op1) == REG_VFP_UKNWN) {
1486 /* This happens before Phi nodes */
1487 if (x87_state_is_empty(state)) {
1488 /* create some value */
1489 x87_patch_insn(n, op_ia32_fldz);
1490 attr = get_ia32_attr(n);
1491 attr->x87[2] = out = &ia32_st_regs[0];
1492 DB((dbg, LEVEL_1, "<<< %+F -> %s\n", n,
1493 arch_register_get_name(out)));
1496 node = new_rd_ia32_fpush(get_irn_dbg_info(n), get_irn_irg(n), get_nodes_block(n), get_irn_n(n, 0), mode);
1497 arch_set_irn_register(env, node, out);
1499 x87_push(state, arch_register_get_index(out), node);
1501 attr = get_ia32_attr(node);
1502 attr->x87[0] = op1 =
1503 attr->x87[2] = out = &ia32_st_regs[0];
1505 next = sched_next(n);
1508 sched_add_before(next, node);
1509 DB((dbg, LEVEL_1, "<<< %+F %s -> %s\n", node,
1510 arch_register_get_name(op1),
1511 arch_register_get_name(out)));
1516 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1518 if (is_vfp_live(arch_register_get_index(op1), live)) {
1519 /* operand is still live,a real copy */
1520 node = new_rd_ia32_fpush(get_irn_dbg_info(n), get_irn_irg(n), get_nodes_block(n), get_irn_n(n, 0), mode);
1521 arch_set_irn_register(env, node, out);
1523 x87_push(state, arch_register_get_index(out), node);
1525 attr = get_ia32_attr(node);
1526 attr->x87[0] = op1 = &ia32_st_regs[op1_idx];
1527 attr->x87[2] = out = &ia32_st_regs[0];
1529 next = sched_next(n);
1532 sched_add_before(next, node);
1533 DB((dbg, LEVEL_1, ">>> %+F %s -> %s\n", node, op1->name, out->name));
1536 out_idx = x87_on_stack(state, arch_register_get_index(out));
1538 if (out_idx >= 0 && out_idx != op1_idx) {
1539 /* op1 must be killed and placed where out is */
1541 /* best case, simple remove and rename */
1542 x87_patch_insn(n, op_ia32_Pop);
1543 attr = get_ia32_attr(n);
1544 attr->x87[0] = op1 = &ia32_st_regs[0];
1547 x87_set_st(state, arch_register_get_index(out), n, op1_idx - 1);
1550 /* move op1 to tos, store and pop it */
1552 x87_create_fxch(state, n, op1_idx, 0);
1555 x87_patch_insn(n, op_ia32_Pop);
1556 attr = get_ia32_attr(n);
1557 attr->x87[0] = op1 = &ia32_st_regs[out_idx];
1560 x87_set_st(state, arch_register_get_index(out), n, out_idx - 1);
1562 DB((dbg, LEVEL_1, ">>> %+F %s\n", n, op1->name));
1565 /* just a virtual copy */
1566 x87_set_st(state, arch_register_get_index(out), get_unop_op(n), op1_idx);
1568 DB((dbg, LEVEL_1, ">>> KILLED %s\n", get_irn_opname(n)));
1569 exchange(n, get_unop_op(n));
1578 * Simulate a be_Call.
1580 * @param state the x87 state
1581 * @param n the node that should be simulated
1582 * @param env the architecture environment
1584 static int sim_Call(x87_state *state, ir_node *n, const arch_env_t *env) {
1585 ir_type *call_tp = be_Call_get_type(n);
1587 /* at the begin of a call the x87 state should be empty */
1588 assert(state->depth == 0 && "stack not empty before call");
1591 * If the called function returns a float, it is returned in st(0).
1592 * This even happens if the return value is NOT used.
1593 * Moreover, only one return result is supported.
1595 if (get_method_n_ress(call_tp) > 0) {
1596 ir_type *res_type = get_method_res_type(call_tp, 0);
1597 ir_mode *mode = get_type_mode(res_type);
1599 if (mode && mode_is_float(mode)) {
1601 * TODO: what to push here? The result might be unused and currently
1602 * we have no possibility to detect this :-(
1604 x87_push(state, 0, n);
1612 * Simulate a be_Spill.
1614 * @param state the x87 state
1615 * @param n the node that should be simulated (and patched)
1616 * @param env the architecture environment
1618 * Should not happen, spills are lowered before x87 simulator see them.
1620 static int sim_Spill(x87_state *state, ir_node *n, const arch_env_t *env) {
1621 assert(0 && "Spill not lowered");
1622 return sim_fst(state, n, env);
1626 * Simulate a be_Reload.
1628 * @param state the x87 state
1629 * @param n the node that should be simulated (and patched)
1630 * @param env the architecture environment
1632 * Should not happen, reloads are lowered before x87 simulator see them.
1634 static int sim_Reload(x87_state *state, ir_node *n, const arch_env_t *env) {
1635 assert(0 && "Reload not lowered");
1636 return sim_fld(state, n, env);
1640 * Simulate a be_Return.
1642 * @param state the x87 state
1643 * @param n the node that should be simulated (and patched)
1644 * @param env the architecture environment
1646 static int sim_Return(x87_state *state, ir_node *n, const arch_env_t *env) {
1647 int n_res = be_Return_get_n_rets(n);
1648 int i, n_float_res = 0;
1650 /* only floating point return values must resist on stack */
1651 for (i = 0; i < n_res; ++i) {
1652 ir_node *res = get_irn_n(n, be_pos_Return_val + i);
1654 if (mode_is_float(get_irn_mode(res)))
1657 assert(x87_get_depth(state) == n_float_res);
1659 /* pop them virtually */
1660 for (i = n_float_res - 1; i >= 0; --i)
1666 typedef struct _perm_data_t {
1667 const arch_register_t *in;
1668 const arch_register_t *out;
1672 * Simulate a be_Perm.
1674 * @param state the x87 state
1675 * @param irn the node that should be simulated (and patched)
1676 * @param env the architecture environment
1678 static int sim_Perm(x87_state *state, ir_node *irn, const arch_env_t *env) {
1680 ir_node *pred = get_irn_n(irn, 0);
1682 const ir_edge_t *edge;
1684 /* handle only floating point Perms */
1685 if (! mode_is_float(get_irn_mode(pred)))
1688 DB((dbg, LEVEL_1, ">>> %+F\n", irn));
1690 /* Perm is a pure virtual instruction on x87.
1691 All inputs must be on the FPU stack and are pairwise
1692 different from each other.
1693 So, all we need to do is to permutate the stack state. */
1694 n = get_irn_arity(irn);
1695 NEW_ARR_A(int, stack_pos, n);
1697 /* collect old stack positions */
1698 for (i = 0; i < n; ++i) {
1699 const arch_register_t *inreg = arch_get_irn_register(env, get_irn_n(irn, i));
1700 int idx = x87_on_stack(state, arch_register_get_index(inreg));
1702 assert(idx >= 0 && "Perm argument not on x87 stack");
1706 /* now do the permutation */
1707 foreach_out_edge(irn, edge) {
1708 ir_node *proj = get_edge_src_irn(edge);
1709 const arch_register_t *out = arch_get_irn_register(env, proj);
1710 long num = get_Proj_proj(proj);
1712 assert(0 <= num && num < n && "More Proj's than Perm inputs");
1713 x87_set_st(state, arch_register_get_index(out), proj, stack_pos[(unsigned)num]);
1715 DB((dbg, LEVEL_1, "<<< %+F\n", irn));
1721 * Kill any dead registers at block start by popping them from the stack.
1723 * @param sim the simulator handle
1724 * @param block the current block
1725 * @param start_state the x87 state at the begin of the block
1727 static x87_state *x87_kill_deads(x87_simulator *sim, ir_node *block, x87_state *start_state) {
1728 x87_state *state = start_state;
1729 ir_node *first_insn = sched_first(block);
1730 ir_node *keep = NULL;
1731 unsigned live = vfp_live_args_after(sim, block, 0);
1733 int i, depth, num_pop;
1736 depth = x87_get_depth(state);
1737 for (i = depth - 1; i >= 0; --i) {
1738 int reg = x87_get_st_reg(state, i);
1740 if (! is_vfp_live(reg, live))
1741 kill_mask |= (1 << i);
1745 /* create a new state, will be changed */
1746 state = x87_clone_state(sim, state);
1748 DB((dbg, LEVEL_1, "Killing deads:\n"));
1749 DEBUG_ONLY(vfp_dump_live(live));
1750 DEBUG_ONLY(x87_dump_stack(state));
1752 /* now kill registers */
1754 /* we can only kill from TOS, so bring them up */
1755 if (! (kill_mask & 1)) {
1756 /* search from behind, because we can to a double-pop */
1757 for (i = depth - 1; i >= 0; --i) {
1758 if (kill_mask & (1 << i)) {
1759 kill_mask &= ~(1 << i);
1766 x87_set_st(state, -1, keep, i);
1767 keep = x87_create_fxch(state, first_insn, i, -1);
1770 keep = x87_get_st_node(state, 0);
1772 if ((kill_mask & 3) == 3) {
1773 /* we can do a double-pop */
1777 /* only a single pop */
1782 kill_mask >>= num_pop;
1783 keep = x87_create_fpop(sim->env, state, first_insn, num_pop, keep);
1785 add_End_keepalive(get_irg_end(get_irn_irg(block)), keep);
1788 } /* x87_kill_deads */
1791 * Run a simulation and fix all virtual instructions for a block.
1793 * @param sim the simulator handle
1794 * @param block the current block
1796 * @return non-zero if simulation is complete,
1797 * zero if the simulation must be rerun
1799 static int x87_simulate_block(x87_simulator *sim, ir_node *block) {
1801 blk_state *bl_state = x87_get_bl_state(sim, block);
1802 x87_state *state = bl_state->begin;
1803 const ir_edge_t *edge;
1804 ir_node *start_block;
1806 /* if we have no assigned start state, we must wait ... */
1810 assert(bl_state->end == NULL);
1812 DB((dbg, LEVEL_1, "Simulate %+F\n", block));
1813 DB((dbg, LEVEL_2, "State at Block begin:\n ")); DEBUG_ONLY(x87_dump_stack(state));
1815 /* at block begin, kill all dead registers */
1816 state = x87_kill_deads(sim, block, state);
1818 /* beware, n might changed */
1819 for (n = sched_first(block); !sched_is_end(n); n = next) {
1820 ir_op *op = get_irn_op(n);
1822 next = sched_next(n);
1823 if (op->ops.generic) {
1825 sim_func func = (sim_func)op->ops.generic;
1827 /* have work to do */
1828 if (state == bl_state->begin) {
1829 /* create a new state, will be changed */
1830 state = x87_clone_state(sim, state);
1834 node_inserted = (*func)(state, n, sim->env);
1837 sim_func might have added additional nodes after n,
1839 beware: n must not be changed by sim_func
1840 (i.e. removed from schedule) in this case
1843 next = sched_next(n);
1847 start_block = get_irg_start_block(get_irn_irg(block));
1849 /* check if the state must be shuffled */
1850 foreach_block_succ(block, edge) {
1851 ir_node *succ = get_edge_src_irn(edge);
1852 blk_state *succ_state = x87_get_bl_state(sim, succ);
1854 if (succ_state->begin && succ != start_block) {
1855 /* There is already a begin state for this block, bad.
1856 Do the necessary permutations.
1857 Note that critical edges are removed, so this is always possible. */
1858 x87_shuffle(sim, block, state, succ, succ_state->begin);
1860 /* Note further, that there can be only one such situation,
1861 so we can break here. */
1865 bl_state->end = state;
1867 /* now propagate the state to all successor blocks */
1868 foreach_block_succ(block, edge) {
1869 ir_node *succ = get_edge_src_irn(edge);
1870 blk_state *succ_state = x87_get_bl_state(sim, succ);
1872 if (! succ_state->begin)
1873 succ_state->begin = state;
1876 DB((dbg, LEVEL_2, "State at Block end:\n ")); DEBUG_ONLY(x87_dump_stack(state));
1879 } /* x87_simulate_block */
1882 * Create a new x87 simulator.
1884 * @param sim a simulator handle, will be initialized
1885 * @param irg the current graph
1886 * @param env the architecture environment
1888 static void x87_init_simulator(x87_simulator *sim, ir_graph *irg, const arch_env_t *env)
1890 obstack_init(&sim->obst);
1891 sim->blk_states = pmap_create();
1893 sim->n_idx = get_irg_last_idx(irg);
1894 sim->live = obstack_alloc(&sim->obst, sizeof(*sim->live) * sim->n_idx);
1896 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.x87");
1898 DB((dbg, LEVEL_1, "--------------------------------\n"
1899 "x87 Simulator started for %+F\n", irg));
1901 /* set the generic function pointer of instruction we must simulate */
1902 clear_irp_opcodes_generic_func();
1904 #define ASSOC(op) (op_ ## op)->ops.generic = (op_func)(sim_##op)
1905 #define ASSOC_IA32(op) (op_ia32_v ## op)->ops.generic = (op_func)(sim_##op)
1906 #define ASSOC_BE(op) (op_be_ ## op)->ops.generic = (op_func)(sim_##op)
1923 ASSOC_IA32(fCondJmp);
1934 } /* x87_init_simulator */
1937 * Destroy a x87 simulator.
1939 * @param sim the simulator handle
1941 static void x87_destroy_simulator(x87_simulator *sim) {
1942 pmap_destroy(sim->blk_states);
1943 obstack_free(&sim->obst, NULL);
1944 DB((dbg, LEVEL_1, "x87 Simulator stopped\n\n"));
1945 } /* x87_destroy_simulator */
1948 * Run a simulation and fix all virtual instructions for a graph.
1950 * @param env the architecture environment
1951 * @param irg the current graph
1952 * @param blk_list the block schedule list
1954 * Needs a block-schedule.
1956 void x87_simulate_graph(const arch_env_t *env, ir_graph *irg, ir_node **blk_list) {
1957 ir_node *block, *start_block;
1959 blk_state *bl_state;
1964 /* create the simulator */
1965 x87_init_simulator(&sim, irg, env);
1967 start_block = get_irg_start_block(irg);
1968 bl_state = x87_get_bl_state(&sim, start_block);
1970 /* start with the empty state */
1971 bl_state->begin = empty;
1974 worklist = new_waitq();
1976 /* Create the worklist for the schedule and calculate the liveness
1977 for all nodes. We must precalculate this info, because the
1978 simulator adds new nodes (and possible before Phi nodes) which
1979 let fail the old lazy calculation.
1980 On the other hand we reduce the computation amount due to
1981 precaching from O(n^2) to O(n) at the expense of O(n) cache memory.
1983 lv = be_liveness(irg);
1984 for (i = 0, n = ARR_LEN(blk_list); i < n; ++i) {
1985 update_liveness(&sim, lv, blk_list[i]);
1986 waitq_put(worklist, blk_list[i]);
1988 be_liveness_free(lv);
1992 block = waitq_get(worklist);
1993 if (! x87_simulate_block(&sim, block)) {
1994 waitq_put(worklist, block);
1997 } while (! pdeq_empty(worklist));
2000 del_waitq(worklist);
2001 x87_destroy_simulator(&sim);
2002 } /* x87_simulate_graph */