2 * This file implements the x87 support and virtual to stack
3 * register translation for the ia32 backend.
5 * @author: Michael Beck
12 #endif /* HAVE_CONFIG_H */
19 #include "iredges_t.h"
28 #include "../belive_t.h"
29 #include "../besched_t.h"
30 #include "../benode_t.h"
31 #include "ia32_new_nodes.h"
32 #include "gen_ia32_new_nodes.h"
33 #include "gen_ia32_regalloc_if.h"
38 /* first and second binop index */
45 /* the store val index */
46 #define STORE_VAL_IDX 2
48 #define MASK_TOS(x) ((x) & (N_x87_REGS - 1))
50 /** the debug handle */
51 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
53 /* Forward declaration. */
54 typedef struct _x87_simulator x87_simulator;
57 * An exchange template.
58 * Note that our virtual functions have the same inputs
59 * and attributes as the real ones, so we can simple exchange
61 * Further, x87 supports inverse instructions, so we can handle them.
63 typedef struct _exchange_tmpl {
64 ir_op *normal_op; /**< the normal one */
65 ir_op *reverse_op; /**< the reverse one if exists */
66 ir_op *normal_pop_op; /**< the normal one with tos pop */
67 ir_op *reverse_pop_op; /**< the reverse one with tos pop */
71 * An entry on the simulated x87 stack.
73 typedef struct _st_entry {
74 int reg_idx; /**< the virtual register index of this stack value */
75 ir_node *node; /**< the node that produced this value */
81 typedef struct _x87_state {
82 st_entry st[N_x87_REGS]; /**< the register stack */
83 int depth; /**< the current stack depth */
84 int tos; /**< position of the tos */
85 x87_simulator *sim; /**< The simulator. */
88 /** An empty state, used for blocks without fp instructions. */
89 static x87_state _empty = { { {0, NULL}, }, 0, 0 };
90 static x87_state *empty = (x87_state *)&_empty;
92 /** The type of an instruction simulator function. */
93 typedef int (*sim_func)(x87_state *state, ir_node *n, const arch_env_t *env);
96 * A block state: Every block has a x87 state at the beginning and at the end.
98 typedef struct _blk_state {
99 x87_state *begin; /**< state at the begin or NULL if not assigned */
100 x87_state *end; /**< state at the end or NULL if not assigned */
103 #define PTR_TO_BLKSTATE(p) ((blk_state *)(p))
108 struct _x87_simulator {
109 struct obstack obst; /**< an obstack for fast allocating */
110 pmap *blk_states; /**< map blocks to states */
111 const arch_env_t *env; /**< architecture environment */
112 unsigned char *live; /**< Liveness information. */
113 unsigned n_idx; /**< cached get_irg_last_idx() result */
117 * Returns the current stack depth.
119 * @param state the x87 state
121 * @return the x87 stack depth
123 static int x87_get_depth(const x87_state *state) {
128 * Check if the state is empty.
130 * @param state the x87 state
132 * returns non-zero if the x87 stack is empty
134 static int x87_state_is_empty(const x87_state *state) {
135 return state->depth == 0;
139 * Return the virtual register index at st(pos).
141 * @param state the x87 state
142 * @param pos a stack position
144 * @return the vfp register index that produced the value at st(pos)
146 static int x87_get_st_reg(const x87_state *state, int pos) {
147 assert(pos < state->depth);
148 return state->st[MASK_TOS(state->tos + pos)].reg_idx;
152 * Return the node at st(pos).
154 * @param state the x87 state
155 * @param pos a stack position
157 * @return the IR node that produced the value at st(pos)
159 static ir_node *x87_get_st_node(const x87_state *state, int pos) {
160 assert(pos < state->depth);
161 return state->st[MASK_TOS(state->tos + pos)].node;
162 } /* x87_get_st_node */
166 * Dump the stack for debugging.
168 * @param state the x87 state
170 static void x87_dump_stack(const x87_state *state) {
173 for (i = state->depth - 1; i >= 0; --i) {
174 DB((dbg, LEVEL_2, "vf%d ", x87_get_st_reg(state, i)));
176 DB((dbg, LEVEL_2, "<-- TOS\n"));
177 } /* x87_dump_stack */
178 #endif /* DEBUG_libfirm */
181 * Set a virtual register to st(pos).
183 * @param state the x87 state
184 * @param reg_idx the vfp register index that should be set
185 * @param node the IR node that produces the value of the vfp register
186 * @param pos the stack position where the new value should be entered
188 static void x87_set_st(x87_state *state, int reg_idx, ir_node *node, int pos) {
189 assert(0 < state->depth);
190 state->st[MASK_TOS(state->tos + pos)].reg_idx = reg_idx;
191 state->st[MASK_TOS(state->tos + pos)].node = node;
193 DB((dbg, LEVEL_2, "After SET_REG:\n ")); DEBUG_ONLY(x87_dump_stack(state));
197 * Set the tos virtual register.
199 * @param state the x87 state
200 * @param reg_idx the vfp register index that should be set
201 * @param node the IR node that produces the value of the vfp register
203 static void x87_set_tos(x87_state *state, int reg_idx, ir_node *node) {
204 x87_set_st(state, reg_idx, node, 0);
208 * Flush the x87 stack.
210 * @param state the x87 state
212 static void x87_flush(x87_state *state) {
218 * Swap st(0) with st(pos).
220 * @param state the x87 state
221 * @param pos the stack position to change the tos with
223 static void x87_fxch(x87_state *state, int pos) {
225 assert(pos < state->depth);
227 entry = state->st[MASK_TOS(state->tos + pos)];
228 state->st[MASK_TOS(state->tos + pos)] = state->st[MASK_TOS(state->tos)];
229 state->st[MASK_TOS(state->tos)] = entry;
231 DB((dbg, LEVEL_2, "After FXCH:\n ")); DEBUG_ONLY(x87_dump_stack(state));
235 * Convert a virtual register to the stack index.
237 * @param state the x87 state
238 * @param reg_idx the register vfp index
240 * @return the stack position where the register is stacked
241 * or -1 if the virtual register was not found
243 static int x87_on_stack(const x87_state *state, int reg_idx) {
244 int i, tos = state->tos;
246 for (i = 0; i < state->depth; ++i)
247 if (state->st[MASK_TOS(tos + i)].reg_idx == reg_idx)
253 * Push a virtual Register onto the stack, double pushed allowed.
255 * @param state the x87 state
256 * @param reg_idx the register vfp index
257 * @param node the node that produces the value of the vfp register
259 static void x87_push_dbl(x87_state *state, int reg_idx, ir_node *node) {
260 assert(state->depth < N_x87_REGS && "stack overrun");
263 state->tos = MASK_TOS(state->tos - 1);
264 state->st[state->tos].reg_idx = reg_idx;
265 state->st[state->tos].node = node;
267 DB((dbg, LEVEL_2, "After PUSH:\n ")); DEBUG_ONLY(x87_dump_stack(state));
271 * Push a virtual Register onto the stack, double pushes are NOT allowed..
273 * @param state the x87 state
274 * @param reg_idx the register vfp index
275 * @param node the node that produces the value of the vfp register
276 * @param dbl_push if != 0 double pushes are allowed
278 static void x87_push(x87_state *state, int reg_idx, ir_node *node) {
279 assert(x87_on_stack(state, reg_idx) == -1 && "double push");
281 x87_push_dbl(state, reg_idx, node);
285 * Pop a virtual Register from the stack.
287 static void x87_pop(x87_state *state) {
288 assert(state->depth > 0 && "stack underrun");
291 state->tos = MASK_TOS(state->tos + 1);
293 DB((dbg, LEVEL_2, "After POP:\n ")); DEBUG_ONLY(x87_dump_stack(state));
297 * Returns the block state of a block.
299 * @param sim the x87 simulator handle
300 * @param block the current block
302 * @return the block state
304 static blk_state *x87_get_bl_state(x87_simulator *sim, ir_node *block) {
305 pmap_entry *entry = pmap_find(sim->blk_states, block);
308 blk_state *bl_state = obstack_alloc(&sim->obst, sizeof(*bl_state));
309 bl_state->begin = NULL;
310 bl_state->end = NULL;
312 pmap_insert(sim->blk_states, block, bl_state);
316 return PTR_TO_BLKSTATE(entry->value);
317 } /* x87_get_bl_state */
320 * Creates a new x87 state.
322 * @param sim the x87 simulator handle
324 * @return a new x87 state
326 static x87_state *x87_alloc_state(x87_simulator *sim) {
327 x87_state *res = obstack_alloc(&sim->obst, sizeof(*res));
331 } /* x87_alloc_state */
334 * Create a new empty x87 state.
336 * @param sim the x87 simulator handle
338 * @return a new empty x87 state
340 static x87_state *x87_alloc_empty_state(x87_simulator *sim) {
341 x87_state *res = x87_alloc_state(sim);
345 } /* x87_alloc_empty_state */
350 * @param sim the x87 simulator handle
351 * @param src the x87 state that will be cloned
353 * @return a cloned copy of the src state
355 static x87_state *x87_clone_state(x87_simulator *sim, const x87_state *src) {
356 x87_state *res = x87_alloc_state(sim);
358 memcpy(res, src, sizeof(*res));
360 } /* x87_clone_state */
363 * Patch a virtual instruction into a x87 one and return
366 * @param n the IR node to patch
367 * @param op the x87 opcode to patch in
369 static ir_node *x87_patch_insn(ir_node *n, ir_op *op) {
370 ir_mode *mode = get_irn_mode(n);
375 if (mode == mode_T) {
376 /* patch all Proj's */
377 const ir_edge_t *edge;
379 foreach_out_edge(n, edge) {
380 ir_node *proj = get_edge_src_irn(edge);
382 mode = get_irn_mode(proj);
383 if (mode_is_float(mode)) {
385 set_irn_mode(proj, mode_E);
390 else if (mode_is_float(mode))
391 set_irn_mode(n, mode_E);
393 } /* x87_patch_insn */
396 * Returns the first Proj of a mode_T node having a given mode.
398 * @param n the mode_T node
399 * @param m the desired mode of the Proj
400 * @return The first Proj of mode @p m found or NULL.
402 static ir_node *get_irn_Proj_for_mode(ir_node *n, ir_mode *m) {
403 const ir_edge_t *edge;
405 assert(get_irn_mode(n) == mode_T && "Need mode_T node");
407 foreach_out_edge(n, edge) {
408 ir_node *proj = get_edge_src_irn(edge);
409 if (get_irn_mode(proj) == m)
414 } /* get_irn_Proj_for_mode */
416 /* -------------- x87 perm --------------- */
419 * Creates a fxch for shuffle.
421 * @param state the x87 state
422 * @param pos parameter for fxch
423 * @param dst_block the block of the user
425 * Creates a new fxch node and reroute the user of the old node
428 * @return the fxch node
430 static ir_node *x87_fxch_shuffle(x87_state *state, int pos, ir_node *block, ir_node *dst_block)
432 const ir_edge_t *edge;
433 ir_node *n = x87_get_st_node(state, pos);
434 ir_node *user = NULL;
439 if (block == get_nodes_block(n)) {
440 /* this is a node from out block: change it's user */
441 foreach_out_edge(n, edge) {
442 ir_node *succ = get_edge_src_irn(edge);
444 if (is_Phi(succ) && get_nodes_block(succ) == dst_block) {
446 node_idx = get_edge_src_pos(edge);
453 fxch = new_rd_ia32_fxch(NULL, get_irn_irg(block), block, n, get_irn_mode(n));
454 attr = get_ia32_attr(fxch);
455 attr->x87[0] = &ia32_st_regs[pos];
456 attr->x87[2] = &ia32_st_regs[0];
459 DB((dbg, LEVEL_2, "%+F replaced input %d of %+F\n", fxch, node_idx, user));
460 set_irn_n(user, node_idx, fxch);
464 * This is a node from a dominator block. Changing it's user might be wrong,
465 * so just keep it alive.
466 * The "right" solution would require a new Phi, but we don't care here.
471 x87_fxch(state, pos);
473 } /* x87_fxch_shuffle */
476 * Calculate the necessary permutations to reach dst_state.
478 * These permutations are done with fxch instructions and placed
479 * at the end of the block.
481 * Note that critical edges are removed here, so we need only
482 * a shuffle if the current block has only one successor.
484 * @param sim the simulator handle
485 * @param block the current block
486 * @param state the current x87 stack state, might be modified
487 * @param dst_block the destination block
488 * @param dst_state destination state
492 static x87_state *x87_shuffle(x87_simulator *sim, ir_node *block, x87_state *state, ir_node *dst_block, const x87_state *dst_state) {
493 int i, n_cycles, k, ri;
494 unsigned cycles[4], all_mask;
495 char cycle_idx[4][8];
497 ir_node *before, *after;
499 assert(state->depth == dst_state->depth);
501 /* Some mathematics here:
502 If we have a cycle of length n that includes the tos,
503 we need n-1 exchange operations.
504 We can always add the tos and restore it, so we need
505 n+1 exchange operations for a cycle not containing the tos.
506 So, the maximum of needed operations is for a cycle of 7
507 not including the tos == 8.
508 This is the same number of ops we would need for using stores,
509 so exchange is cheaper (we save the loads).
510 On the other hand, we might need an additional exchange
511 in the next block to bring one operand on top, so the
512 number of ops in the first case is identical.
513 Further, no more than 4 cycles can exists (4 x 2).
515 all_mask = (1 << (state->depth)) - 1;
517 for (n_cycles = 0; all_mask; ++n_cycles) {
518 int src_idx, dst_idx;
520 /* find the first free slot */
521 for (i = 0; i < state->depth; ++i) {
522 if (all_mask & (1 << i)) {
523 all_mask &= ~(1 << i);
525 /* check if there are differences here */
526 if (x87_get_st_reg(state, i) != x87_get_st_reg(dst_state, i))
532 /* no more cycles found */
537 cycles[n_cycles] = (1 << i);
538 cycle_idx[n_cycles][k++] = i;
539 for (src_idx = i; ; src_idx = dst_idx) {
540 dst_idx = x87_on_stack(dst_state, x87_get_st_reg(state, src_idx));
542 if ((all_mask & (1 << dst_idx)) == 0)
545 cycle_idx[n_cycles][k++] = dst_idx;
546 cycles[n_cycles] |= (1 << dst_idx);
547 all_mask &= ~(1 << dst_idx);
549 cycle_idx[n_cycles][k] = -1;
553 /* no permutation needed */
557 /* Hmm: permutation needed */
558 DB((dbg, LEVEL_2, "\n%+F needs permutation: from\n", block));
559 DEBUG_ONLY(x87_dump_stack(state));
560 DB((dbg, LEVEL_2, " to\n"));
561 DEBUG_ONLY(x87_dump_stack(dst_state));
565 DB((dbg, LEVEL_2, "Need %d cycles\n", n_cycles));
566 for (ri = 0; ri < n_cycles; ++ri) {
567 DB((dbg, LEVEL_2, " Ring %d:\n ", ri));
568 for (k = 0; cycle_idx[ri][k] != -1; ++k)
569 DB((dbg, LEVEL_2, " st%d ->", cycle_idx[ri][k]));
570 DB((dbg, LEVEL_2, "\n"));
577 * Find the place node must be insert.
578 * We have only one successor block, so the last instruction should
581 before = sched_last(block);
582 assert(is_cfop(before));
584 /* now do the permutations */
585 for (ri = 0; ri < n_cycles; ++ri) {
586 if ((cycles[ri] & 1) == 0) {
587 /* this cycle does not include the tos */
588 fxch = x87_fxch_shuffle(state, cycle_idx[ri][0], block, dst_block);
590 sched_add_after(after, fxch);
592 sched_add_before(before, fxch);
595 for (k = 1; cycle_idx[ri][k] != -1; ++k) {
596 fxch = x87_fxch_shuffle(state, cycle_idx[ri][k], block, dst_block);
598 sched_add_after(after, fxch);
600 sched_add_before(before, fxch);
603 if ((cycles[ri] & 1) == 0) {
604 /* this cycle does not include the tos */
605 fxch = x87_fxch_shuffle(state, cycle_idx[ri][0], block, dst_block);
606 sched_add_after(after, fxch);
613 * Create a fxch node before another node.
615 * @param state the x87 state
616 * @param n the node before the fxch
617 * @param pos exchange st(pos) with st(0)
618 * @param op_idx if >= 0, replace input op_idx of n with the fxch result
622 static ir_node *x87_create_fxch(x87_state *state, ir_node *n, int pos, int op_idx) {
623 ir_node *fxch, *pred;
626 x87_fxch(state, pos);
629 pred = get_irn_n(n, op_idx);
631 pred = x87_get_st_node(state, pos);
633 fxch = new_rd_ia32_fxch(NULL, get_irn_irg(n), get_nodes_block(n), pred, get_irn_mode(pred));
634 attr = get_ia32_attr(fxch);
635 attr->x87[0] = &ia32_st_regs[pos];
636 attr->x87[2] = &ia32_st_regs[0];
639 set_irn_n(n, op_idx, fxch);
641 sched_add_before(n, fxch);
642 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(fxch), attr->x87[0]->name, attr->x87[2]->name));
644 } /* x87_create_fxch */
647 * Create a fpush before node n.
649 * @param state the x87 state
650 * @param n the node before the fpush
651 * @param pos push st(pos) on stack
652 * @param op_idx if >= 0, replace input op_idx of n with the fpush result
654 static void x87_create_fpush(const arch_env_t *env, x87_state *state, ir_node *n, int pos, int op_idx) {
655 ir_node *fpush, *pred = get_irn_n(n, op_idx);
657 const arch_register_t *out = arch_get_irn_register(env, pred);
659 x87_push_dbl(state, arch_register_get_index(out), pred);
661 fpush = new_rd_ia32_fpush(NULL, get_irn_irg(n), get_nodes_block(n), pred, get_irn_mode(pred));
662 attr = get_ia32_attr(fpush);
663 attr->x87[0] = &ia32_st_regs[pos];
664 attr->x87[2] = &ia32_st_regs[0];
666 set_irn_n(n, op_idx, fpush);
668 sched_add_before(n, fpush);
669 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(fpush), attr->x87[0]->name, attr->x87[2]->name));
670 } /* x87_create_fpush */
673 * Create a fpop before node n.
675 * @param state the x87 state
676 * @param n the node before the fpop
677 * @param num pop 1 or 2 values
678 * @param pred node to use as predecessor of the fpop
680 * @return the fpop node
682 static ir_node *x87_create_fpop(const arch_env_t *env, x87_state *state, ir_node *n, int num, ir_node *pred) {
688 fpop = new_rd_ia32_fpop(NULL, get_irn_irg(n), get_nodes_block(n), mode_E);
689 attr = get_ia32_attr(fpop);
690 attr->x87[0] = &ia32_st_regs[0];
691 attr->x87[1] = &ia32_st_regs[0];
692 attr->x87[2] = &ia32_st_regs[0];
694 sched_add_before(n, fpop);
695 DB((dbg, LEVEL_1, "<<< %s %s\n", get_irn_opname(fpop), attr->x87[0]->name));
701 } /* x87_create_fpop */
703 /* --------------------------------- liveness ------------------------------------------ */
706 * The liveness transfer function.
707 * Updates a live set over a single step from a given node to its predecessor.
708 * Everything defined at the node is removed from the set, the uses of the node get inserted.
710 * @param arch_env The architecture environment.
711 * @param irn The node at which liveness should be computed.
712 * @param live The bitset of registers live before @p irn. This set gets modified by updating it to
713 * the registers live after irn.
715 * @return The live bitset.
717 static unsigned vfp_liveness_transfer(const arch_env_t *arch_env, ir_node *irn, unsigned live)
720 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
722 if (arch_irn_consider_in_reg_alloc(arch_env, cls, irn)) {
723 const arch_register_t *reg = arch_get_irn_register(arch_env, irn);
724 live &= ~(1 << reg->index);
727 for (i = 0, n = get_irn_arity(irn); i < n; ++i) {
728 ir_node *op = get_irn_n(irn, i);
730 if (mode_is_float(get_irn_mode(op)) && arch_irn_consider_in_reg_alloc(arch_env, cls, op)) {
731 const arch_register_t *reg = arch_get_irn_register(arch_env, op);
732 live |= 1 << reg->index;
736 } /* vfp_liveness_transfer */
739 * Put all live virtual registers at the end of a block into a bitset.
741 * @param env the architecture environment
742 * @param lv the liveness information
743 * @param bl the block
745 * @return The live bitset at the end of this block
747 static unsigned vfp_liveness_end_of_block(const arch_env_t *env, be_lv_t *lv, const ir_node *bl)
751 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
753 be_lv_foreach(lv, bl, be_lv_state_end, i) {
754 ir_node *irn = be_lv_get_irn(lv, bl, i);
755 if (arch_irn_consider_in_reg_alloc(env, cls, irn)) {
756 const arch_register_t *reg = arch_get_irn_register(env, irn);
757 live |= 1 << reg->index;
762 } /* vfp_liveness_end_of_block */
764 /** get the register mask from an arch_register */
765 #define REGMASK(reg) (1 << (reg)->index)
768 * Return a bitset of argument registers which are live at the end of a node.
770 * @param sim the simulator handle
771 * @param pos the node
772 * @param kill kill mask for the output registers
774 * @return The live bitset.
776 static unsigned vfp_live_args_after(x87_simulator *sim, const ir_node *pos, unsigned kill)
778 unsigned idx = get_irn_idx(pos);
780 assert(idx < sim->n_idx);
781 return sim->live[idx] & ~kill;
782 } /* vfp_live_args_after */
785 * Calculate the liveness for a whole block and cache it.
787 * @param sim the simulator handle
788 * @param lv the liveness handle
789 * @param blk the block
791 static void update_liveness(x87_simulator *sim, be_lv_t *lv, ir_node *blk) {
792 unsigned live = vfp_liveness_end_of_block(sim->env, lv, blk);
796 /* now iterate through the block backward and cache the results */
797 sched_foreach_reverse(blk, irn) {
798 /* stop at the first Phi: this produces the live-in */
802 idx = get_irn_idx(irn);
803 sim->live[idx] = live;
805 live = vfp_liveness_transfer(sim->env, irn, live);
807 idx = get_irn_idx(blk);
808 sim->live[idx] = live;
809 } /* update_liveness */
812 * Returns true if a register is live in a set.
814 * @param reg_idx the vfp register index
815 * @param live a live bitset
817 #define is_vfp_live(reg_idx, live) ((live) & (1 << (reg_idx)))
821 * Dump liveness info.
823 * @param live the live bitset
825 static void vfp_dump_live(unsigned live) {
828 DB((dbg, LEVEL_2, "Live registers here: \n"));
829 for (i = 0; i < 8; ++i) {
830 if (live & (1 << i)) {
831 DB((dbg, LEVEL_2, " vf%d", i));
834 DB((dbg, LEVEL_2, "\n"));
835 } /* vfp_dump_live */
836 #endif /* DEBUG_libfirm */
838 /* --------------------------------- simulators ---------------------------------------- */
840 #define XCHG(a, b) do { int t = (a); (a) = (b); (b) = t; } while (0)
843 * Simulate a virtual binop.
845 * @param state the x87 state
846 * @param n the node that should be simulated (and patched)
847 * @param env the architecture environment
848 * @param tmpl the template containing the 4 possible x87 opcodes
850 static int sim_binop(x87_state *state, ir_node *n, const arch_env_t *env, const exchange_tmpl *tmpl) {
851 int op2_idx, op1_idx = -1;
852 int out_idx, do_pop =0;
855 const arch_register_t *op1 = arch_get_irn_register(env, get_irn_n(n, BINOP_IDX_1));
856 const arch_register_t *op2 = arch_get_irn_register(env, get_irn_n(n, BINOP_IDX_2));
857 const arch_register_t *out = arch_get_irn_register(env, n);
858 unsigned live = vfp_live_args_after(state->sim, n, REGMASK(out));
860 DB((dbg, LEVEL_1, ">>> %+F %s, %s -> %s\n", n,
861 arch_register_get_name(op1), arch_register_get_name(op2),
862 arch_register_get_name(out)));
863 DEBUG_ONLY(vfp_dump_live(live));
865 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
866 op2_idx = x87_on_stack(state, arch_register_get_index(op2));
868 if (op2->index != REG_VFP_NOREG) {
869 /* second operand is a vfp register */
871 if (is_vfp_live(op2->index, live)) {
872 /* Second operand is live. */
874 if (is_vfp_live(op1->index, live)) {
875 /* Both operands are live: push the first one.
876 This works even for op1 == op2. */
877 x87_create_fpush(env, state, n, op2_idx, BINOP_IDX_2);
878 out_idx = op2_idx = 0;
880 dst = tmpl->normal_op;
884 /* Second live, first operand is dead here, bring it to tos. */
886 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
890 op1_idx = out_idx = 0;
891 dst = tmpl->normal_op;
896 /* Second operand is dead. */
897 if (is_vfp_live(op1->index, live)) {
898 /* First operand is live: bring second to tos. */
900 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
904 op2_idx = out_idx = 0;
905 dst = tmpl->reverse_op;
909 /* Both operands are dead here, pop them from the stack. */
912 if (op1_idx == op2_idx) {
913 /* Both are identically, no pop needed. */
914 dst = tmpl->normal_op;
918 dst = tmpl->normal_pop_op;
922 else if (op1_idx == 0) {
924 XCHG(op2_idx, op1_idx);
925 if (op1_idx == op2_idx) {
926 /* Both are identically, no pop needed. */
927 dst = tmpl->reverse_op;
931 dst = tmpl->reverse_pop_op;
936 /* Bring the second on top. */
937 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
938 if (op1_idx == op2_idx) {
939 /* Both are identically, no pop needed. */
940 out_idx = op1_idx = op2_idx = 0;
941 dst = tmpl->normal_op;
947 dst = tmpl->normal_pop_op;
955 /* second operand is an address mode */
956 if (is_vfp_live(op1->index, live)) {
957 /* first operand is live: push it here */
958 x87_create_fpush(env, state, n, op1_idx, BINOP_IDX_1);
961 /* first operand is dead: bring it to tos */
963 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
965 op1_idx = out_idx = 0;
966 dst = tmpl->normal_op;
970 x87_set_st(state, out->index, x87_patch_insn(n, dst), out_idx);
974 /* patch the operation */
975 attr = get_ia32_attr(n);
976 attr->x87[0] = op1 = &ia32_st_regs[op1_idx];
978 attr->x87[1] = op2 = &ia32_st_regs[op2_idx];
979 attr->x87[2] = out = &ia32_st_regs[out_idx];
982 DB((dbg, LEVEL_1, "<<< %s %s, %s -> %s\n", get_irn_opname(n),
983 arch_register_get_name(op1), arch_register_get_name(op2),
984 arch_register_get_name(out)));
986 DB((dbg, LEVEL_1, "<<< %s %s, [AM] -> %s\n", get_irn_opname(n),
987 arch_register_get_name(op1),
988 arch_register_get_name(out)));
994 * Simulate a virtual Unop.
996 * @param state the x87 state
997 * @param n the node that should be simulated (and patched)
998 * @param env the architecture environment
999 * @param op the x87 opcode that will replace n's opcode
1001 static int sim_unop(x87_state *state, ir_node *n, const arch_env_t *env, ir_op *op) {
1002 int op1_idx, out_idx;
1003 const arch_register_t *op1 = arch_get_irn_register(env, get_irn_n(n, UNOP_IDX));
1004 const arch_register_t *out = arch_get_irn_register(env, n);
1006 unsigned live = vfp_live_args_after(state->sim, n, REGMASK(out));
1008 DB((dbg, LEVEL_1, ">>> %+F -> %s\n", n, out->name));
1009 DEBUG_ONLY(vfp_dump_live(live));
1011 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1013 if (is_vfp_live(op1->index, live)) {
1014 /* push the operand here */
1015 x87_create_fpush(env, state, n, op1_idx, UNOP_IDX);
1018 /* operand is dead, bring it to tos */
1020 x87_create_fxch(state, n, op1_idx, UNOP_IDX);
1023 x87_set_tos(state, arch_register_get_index(out), x87_patch_insn(n, op));
1024 op1_idx = out_idx = 0;
1025 attr = get_ia32_attr(n);
1026 attr->x87[0] = op1 = &ia32_st_regs[0];
1027 attr->x87[2] = out = &ia32_st_regs[0];
1028 DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), out->name));
1034 * Simulate a virtual Load instruction.
1036 * @param state the x87 state
1037 * @param n the node that should be simulated (and patched)
1038 * @param env the architecture environment
1039 * @param op the x87 opcode that will replace n's opcode
1041 static int sim_load(x87_state *state, ir_node *n, const arch_env_t *env, ir_op *op) {
1042 const arch_register_t *out = arch_get_irn_register(env, n);
1045 DB((dbg, LEVEL_1, ">>> %+F -> %s\n", n, arch_register_get_name(out)));
1046 x87_push(state, arch_register_get_index(out), x87_patch_insn(n, op));
1047 attr = get_ia32_attr(n);
1048 attr->x87[2] = out = &ia32_st_regs[0];
1049 DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), arch_register_get_name(out)));
1055 * Rewire all users of @p old_val to @new_val iff they are scheduled after @p store.
1057 * @param store The store
1058 * @param old_val The former value
1059 * @param new_val The new value
1061 static void collect_and_rewire_users(ir_node *store, ir_node *old_val, ir_node *new_val) {
1062 const ir_edge_t *edge, *ne;
1064 foreach_out_edge_safe(old_val, edge, ne) {
1065 ir_node *user = get_edge_src_irn(edge);
1067 if (! user || user == store)
1070 /* if the user is scheduled after the store: rewire */
1071 if (sched_is_scheduled(user) && sched_comes_after(store, user)) {
1073 /* find the input of the user pointing to the old value */
1074 for (i = get_irn_arity(user) - 1; i >= 0; i--) {
1075 if (get_irn_n(user, i) == old_val)
1076 set_irn_n(user, i, new_val);
1080 } /* collect_and_rewire_users */
1083 * Simulate a virtual Store.
1085 * @param state the x87 state
1086 * @param n the node that should be simulated (and patched)
1087 * @param env the architecture environment
1088 * @param op the x87 store opcode
1089 * @param op_p the x87 store and pop opcode
1091 static int sim_store(x87_state *state, ir_node *n, const arch_env_t *env, ir_op *op, ir_op *op_p) {
1092 ir_node *val = get_irn_n(n, STORE_VAL_IDX);
1093 const arch_register_t *op2 = arch_get_irn_register(env, val);
1094 unsigned live = vfp_live_args_after(state->sim, n, 0);
1100 op2_idx = x87_on_stack(state, arch_register_get_index(op2));
1101 assert(op2_idx >= 0);
1103 DB((dbg, LEVEL_1, ">>> %+F %s ->\n", n, arch_register_get_name(op2)));
1105 mode = get_ia32_ls_mode(n);
1106 depth = x87_get_depth(state);
1109 We can only store the tos to memory.
1110 A store of mode_E with free registers
1111 pushes value to tos, so skip it here.
1113 if (! (mode == mode_E && depth < N_x87_REGS) && op2_idx != 0)
1114 x87_create_fxch(state, n, op2_idx, STORE_VAL_IDX);
1116 if (is_vfp_live(op2->index, live)) {
1118 Problem: fst doesn't support mode_E (spills), only fstp does
1120 - stack not full: push value and fstp
1121 - stack full: fstp value and load again
1123 if (mode == mode_E) {
1124 if (depth < N_x87_REGS) {
1125 /* ok, we have a free register: push + fstp */
1126 x87_create_fpush(env, state, n, op2_idx, STORE_VAL_IDX);
1128 x87_patch_insn(n, op_p);
1131 ir_node *vfld, *mem, *block, *rproj, *mproj;
1134 /* stack full here: need fstp + load */
1136 x87_patch_insn(n, op_p);
1138 block = get_nodes_block(n);
1139 irg = get_irn_irg(n);
1140 vfld = new_rd_ia32_vfld(NULL, irg, block, get_irn_n(n, 0), get_irn_n(n, 1), new_rd_NoMem(irg));
1142 /* copy all attributes */
1143 set_ia32_frame_ent(vfld, get_ia32_frame_ent(n));
1144 if (is_ia32_use_frame(n))
1145 set_ia32_use_frame(vfld);
1146 set_ia32_am_flavour(vfld, get_ia32_am_flavour(n));
1147 set_ia32_op_type(vfld, ia32_am_Source);
1148 add_ia32_am_offs(vfld, get_ia32_am_offs(n));
1149 set_ia32_am_sc(vfld, get_ia32_am_sc(n));
1150 set_ia32_ls_mode(vfld, get_ia32_ls_mode(n));
1152 rproj = new_r_Proj(irg, block, vfld, get_ia32_ls_mode(vfld), pn_ia32_vfld_res);
1153 mproj = new_r_Proj(irg, block, vfld, mode_M, pn_ia32_vfld_M);
1154 mem = get_irn_Proj_for_mode(n, mode_M);
1156 assert(mem && "Store memory not found");
1158 arch_set_irn_register(env, rproj, op2);
1160 /* reroute all former users of the store memory to the load memory */
1161 edges_reroute(mem, mproj, irg);
1162 /* set the memory input of the load to the store memory */
1163 set_irn_n(vfld, 2, mem);
1165 sched_add_after(n, vfld);
1166 sched_add_after(vfld, rproj);
1168 /* rewire all users, scheduled after the store, to the loaded value */
1169 collect_and_rewire_users(n, val, rproj);
1175 /* mode != mode_E -> use normal fst */
1176 x87_patch_insn(n, op);
1181 x87_patch_insn(n, op_p);
1184 attr = get_ia32_attr(n);
1185 attr->x87[1] = op2 = &ia32_st_regs[0];
1186 DB((dbg, LEVEL_1, "<<< %s %s ->\n", get_irn_opname(n), arch_register_get_name(op2)));
1192 * Simulate a virtual Phi.
1193 * Just for cosmetic reasons change the mode of Phi nodes to mode_E.
1195 * @param state the x87 state
1196 * @param n the node that should be simulated (and patched)
1197 * @param env the architecture environment
1199 static int sim_Phi(x87_state *state, ir_node *n, const arch_env_t *env) {
1200 ir_mode *mode = get_irn_mode(n);
1202 if (mode_is_float(mode))
1203 set_irn_mode(n, mode_E);
1209 #define _GEN_BINOP(op, rev) \
1210 static int sim_##op(x87_state *state, ir_node *n, const arch_env_t *env) { \
1211 exchange_tmpl tmpl = { op_ia32_##op, op_ia32_##rev, op_ia32_##op##p, op_ia32_##rev##p }; \
1212 return sim_binop(state, n, env, &tmpl); \
1215 #define GEN_BINOP(op) _GEN_BINOP(op, op)
1216 #define GEN_BINOPR(op) _GEN_BINOP(op, op##r)
1218 #define GEN_LOAD2(op, nop) \
1219 static int sim_##op(x87_state *state, ir_node *n, const arch_env_t *env) { \
1220 return sim_load(state, n, env, op_ia32_##nop); \
1223 #define GEN_LOAD(op) GEN_LOAD2(op, op)
1225 #define GEN_UNOP(op) \
1226 static int sim_##op(x87_state *state, ir_node *n, const arch_env_t *env) { \
1227 return sim_unop(state, n, env, op_ia32_##op); \
1230 #define GEN_STORE(op) \
1231 static int sim_##op(x87_state *state, ir_node *n, const arch_env_t *env) { \
1232 return sim_store(state, n, env, op_ia32_##op, op_ia32_##op##p); \
1251 GEN_LOAD2(fConst, fldConst)
1257 * Simulate a fCondJmp.
1259 * @param state the x87 state
1260 * @param n the node that should be simulated (and patched)
1261 * @param env the architecture environment
1263 static int sim_fCondJmp(x87_state *state, ir_node *n, const arch_env_t *env) {
1264 int op2_idx, op1_idx = -1, pop_cnt = 0;
1267 const arch_register_t *op1 = arch_get_irn_register(env, get_irn_n(n, BINOP_IDX_1));
1268 const arch_register_t *op2 = arch_get_irn_register(env, get_irn_n(n, BINOP_IDX_2));
1269 unsigned live = vfp_live_args_after(state->sim, n, 0);
1271 DB((dbg, LEVEL_1, ">>> %+F %s, %s\n", n,
1272 arch_register_get_name(op1), arch_register_get_name(op2)));
1273 DEBUG_ONLY(vfp_dump_live(live));
1275 op1_idx = x87_on_stack(state, op1->index);
1276 op2_idx = x87_on_stack(state, op2->index);
1278 /* BEWARE: check for comp a,a cases, they might happen */
1279 if (op2->index != REG_VFP_NOREG) {
1280 /* second operand is a vfp register */
1282 if (is_vfp_live(op2->index, live)) {
1283 /* second operand is live */
1285 if (is_vfp_live(op1->index, live)) {
1286 /* both operands are live: move one of them to tos */
1288 XCHG(op2_idx, op1_idx);
1289 dst = op_ia32_fcomrJmp;
1291 else if (op1_idx == 0) {
1292 dst = op_ia32_fcomJmp;
1295 /* bring the first on top */
1296 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1297 if (op1_idx == op2_idx)
1300 dst = op_ia32_fcomJmp;
1304 /* second live, first operand is dead here, bring it to tos.
1305 This means further, op1_idx != op2_idx. */
1307 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1312 dst = op_ia32_fcompJmp;
1317 /* second operand is dead */
1318 if (is_vfp_live(op1->index, live)) {
1319 /* first operand is live: bring second to tos.
1320 This means further, op1_idx != op2_idx. */
1322 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1327 dst = op_ia32_fcomrpJmp;
1331 /* both operands are dead here, check first for identity. */
1332 if (op1_idx == op2_idx) {
1333 /* identically, one one needed */
1335 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1336 op1_idx = op2_idx = 0;
1338 dst = op_ia32_fcompJmp;
1341 /* different, move them to st and st(1) and pop both.
1342 The tricky part is to get one into st(1).*/
1343 else if (op2_idx == 1) {
1344 /* good, second operand is already in the right place, move the first */
1346 /* bring the first on top */
1347 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1350 dst = op_ia32_fcomppJmp;
1353 else if (op1_idx == 1) {
1354 /* good, first operand is already in the right place, move the second */
1356 /* bring the first on top */
1357 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1360 dst = op_ia32_fcomrppJmp;
1364 /* if one is already the TOS, we need two fxch */
1366 /* first one is TOS, move to st(1) */
1367 x87_create_fxch(state, n, 1, BINOP_IDX_1);
1369 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1371 dst = op_ia32_fcomrppJmp;
1374 else if (op2_idx == 0) {
1375 /* second one is TOS, move to st(1) */
1376 x87_create_fxch(state, n, 1, BINOP_IDX_2);
1378 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1380 dst = op_ia32_fcomrppJmp;
1384 /* none of them is either TOS or st(1), 3 fxch needed */
1385 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1386 x87_create_fxch(state, n, 1, BINOP_IDX_2);
1387 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1390 dst = op_ia32_fcomppJmp;
1398 /* second operand is an address mode */
1399 if (is_vfp_live(op1->index, live)) {
1400 /* first operand is live: bring it to TOS */
1402 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1405 dst = op_ia32_fcomJmp;
1408 /* first operand is dead: bring it to tos */
1410 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1414 dst = op_ia32_fcompJmp;
1418 x87_patch_insn(n, dst);
1424 /* patch the operation */
1425 attr = get_ia32_attr(n);
1426 attr->x87[1] = op1 = &ia32_st_regs[op1_idx];
1428 attr->x87[2] = op2 = &ia32_st_regs[op2_idx];
1431 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(n),
1432 arch_register_get_name(op1), arch_register_get_name(op2)));
1434 DB((dbg, LEVEL_1, "<<< %s %s, [AM]\n", get_irn_opname(n),
1435 arch_register_get_name(op1)));
1438 } /* sim_fCondJmp */
1441 * Simulate a be_Copy.
1443 * @param state the x87 state
1444 * @param n the node that should be simulated (and patched)
1445 * @param env the architecture environment
1447 static int sim_Copy(x87_state *state, ir_node *n, const arch_env_t *env) {
1448 ir_mode *mode = get_irn_mode(n);
1450 if (mode_is_float(mode)) {
1451 const arch_register_t *op1 = arch_get_irn_register(env, get_irn_n(n, 0));
1452 const arch_register_t *out = arch_get_irn_register(env, n);
1453 ir_node *node, *next;
1455 int op1_idx, out_idx;
1456 unsigned live = vfp_live_args_after(state->sim, n, REGMASK(out));
1458 /* handle the infamous unknown value */
1459 if (op1->index == REG_VFP_UKNWN) {
1460 /* This happens before Phi nodes */
1461 if (x87_state_is_empty(state)) {
1462 /* create some value */
1463 x87_patch_insn(n, op_ia32_fldz);
1464 attr = get_ia32_attr(n);
1465 attr->x87[2] = out = &ia32_st_regs[0];
1466 DB((dbg, LEVEL_1, ">>> %+F -> %s\n", n, out->name));
1469 node = new_rd_ia32_fpush(get_irn_dbg_info(n), get_irn_irg(n), get_nodes_block(n), get_irn_n(n, 0), mode);
1470 arch_set_irn_register(env, node, out);
1472 x87_push(state, arch_register_get_index(out), node);
1474 attr = get_ia32_attr(node);
1475 attr->x87[0] = op1 =
1476 attr->x87[2] = out = &ia32_st_regs[0];
1478 next = sched_next(n);
1481 sched_add_before(next, node);
1482 DB((dbg, LEVEL_1, ">>> %+F %s -> %s\n", node, op1->name, out->name));
1487 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1489 DB((dbg, LEVEL_1, ">>> %+F %s -> %s\n", n,
1490 arch_register_get_name(op1), arch_register_get_name(out)));
1491 DEBUG_ONLY(vfp_dump_live(live));
1493 if (is_vfp_live(op1->index, live)) {
1494 /* operand is still live,a real copy */
1495 node = new_rd_ia32_fpush(get_irn_dbg_info(n), get_irn_irg(n), get_nodes_block(n), get_irn_n(n, 0), mode);
1496 arch_set_irn_register(env, node, out);
1498 x87_push(state, arch_register_get_index(out), node);
1500 attr = get_ia32_attr(node);
1501 attr->x87[0] = op1 = &ia32_st_regs[op1_idx];
1502 attr->x87[2] = out = &ia32_st_regs[0];
1504 next = sched_next(n);
1507 sched_add_before(next, node);
1508 DB((dbg, LEVEL_1, ">>> %+F %s -> %s\n", node, op1->name, out->name));
1511 out_idx = x87_on_stack(state, arch_register_get_index(out));
1513 if (out_idx >= 0 && out_idx != op1_idx) {
1514 /* op1 must be killed and placed where out is */
1516 /* best case, simple remove and rename */
1517 x87_patch_insn(n, op_ia32_Pop);
1518 attr = get_ia32_attr(n);
1519 attr->x87[0] = op1 = &ia32_st_regs[0];
1522 x87_set_st(state, arch_register_get_index(out), n, op1_idx - 1);
1525 /* move op1 to tos, store and pop it */
1527 x87_create_fxch(state, n, op1_idx, 0);
1530 x87_patch_insn(n, op_ia32_Pop);
1531 attr = get_ia32_attr(n);
1532 attr->x87[0] = op1 = &ia32_st_regs[out_idx];
1535 x87_set_st(state, arch_register_get_index(out), n, out_idx - 1);
1537 DB((dbg, LEVEL_1, ">>> %+F %s\n", n, op1->name));
1540 /* just a virtual copy */
1541 x87_set_st(state, arch_register_get_index(out), get_unop_op(n), op1_idx);
1543 DB((dbg, LEVEL_1, ">>> KILLED %s\n", get_irn_opname(n)));
1544 exchange(n, get_unop_op(n));
1553 * Simulate a be_Call.
1555 * @param state the x87 state
1556 * @param n the node that should be simulated
1557 * @param env the architecture environment
1559 static int sim_Call(x87_state *state, ir_node *n, const arch_env_t *env) {
1560 ir_type *call_tp = be_Call_get_type(n);
1562 /* at the begin of a call the x87 state should be empty */
1563 assert(state->depth == 0 && "stack not empty before call");
1566 * If the called function returns a float, it is returned in st(0).
1567 * This even happens if the return value is NOT used.
1568 * Moreover, only one return result is supported.
1570 if (get_method_n_ress(call_tp) > 0) {
1571 ir_type *res_type = get_method_res_type(call_tp, 0);
1572 ir_mode *mode = get_type_mode(res_type);
1574 if (mode && mode_is_float(mode)) {
1576 * TODO: what to push here? The result might be unused and currently
1577 * we have no possibility to detect this :-(
1579 x87_push(state, 0, n);
1587 * Simulate a be_Spill.
1589 * @param state the x87 state
1590 * @param n the node that should be simulated (and patched)
1591 * @param env the architecture environment
1593 * Should not happen, spills are lowered before x87 simulator see them.
1595 static int sim_Spill(x87_state *state, ir_node *n, const arch_env_t *env) {
1596 assert(0 && "Spill not lowered");
1597 return sim_fst(state, n, env);
1601 * Simulate a be_Reload.
1603 * @param state the x87 state
1604 * @param n the node that should be simulated (and patched)
1605 * @param env the architecture environment
1607 * Should not happen, reloads are lowered before x87 simulator see them.
1609 static int sim_Reload(x87_state *state, ir_node *n, const arch_env_t *env) {
1610 assert(0 && "Reload not lowered");
1611 return sim_fld(state, n, env);
1615 * Simulate a be_Return.
1617 * @param state the x87 state
1618 * @param n the node that should be simulated (and patched)
1619 * @param env the architecture environment
1621 static int sim_Return(x87_state *state, ir_node *n, const arch_env_t *env) {
1622 int n_res = be_Return_get_n_rets(n);
1623 int i, n_float_res = 0;
1625 /* only floating point return values must resist on stack */
1626 for (i = 0; i < n_res; ++i) {
1627 ir_node *res = get_irn_n(n, be_pos_Return_val + i);
1629 if (mode_is_float(get_irn_mode(res)))
1632 assert(x87_get_depth(state) == n_float_res);
1634 /* pop them virtually */
1635 for (i = n_float_res - 1; i >= 0; --i)
1641 typedef struct _perm_data_t {
1642 const arch_register_t *in;
1643 const arch_register_t *out;
1647 * Simulate a be_Perm.
1649 * @param state the x87 state
1650 * @param irn the node that should be simulated (and patched)
1651 * @param env the architecture environment
1653 static int sim_Perm(x87_state *state, ir_node *irn, const arch_env_t *env) {
1655 ir_node *pred = get_irn_n(irn, 0);
1657 const ir_edge_t *edge;
1659 /* handle only floating point Perms */
1660 if (! mode_is_float(get_irn_mode(pred)))
1663 DB((dbg, LEVEL_1, ">>> %+F\n", irn));
1665 /* Perm is a pure virtual instruction on x87.
1666 All inputs must be on the FPU stack and are pairwise
1667 different from each other.
1668 So, all we need to do is to permutate the stack state. */
1669 n = get_irn_arity(irn);
1670 NEW_ARR_A(int, stack_pos, n);
1672 /* collect old stack positions */
1673 for (i = 0; i < n; ++i) {
1674 const arch_register_t *inreg = arch_get_irn_register(env, get_irn_n(irn, i));
1675 int idx = x87_on_stack(state, inreg->index);
1677 assert(idx >= 0 && "Perm argument not on x87 stack");
1681 /* now do the permutation */
1682 foreach_out_edge(irn, edge) {
1683 ir_node *proj = get_edge_src_irn(edge);
1684 const arch_register_t *out = arch_get_irn_register(env, proj);
1685 long num = get_Proj_proj(proj);
1687 assert(0 <= num && num < n && "More Proj's than Perm inputs");
1688 x87_set_st(state, out->index, proj, stack_pos[(unsigned)num]);
1690 DB((dbg, LEVEL_1, "<<< %+F\n", irn));
1696 * Kill any dead registers at block start by popping them from the stack.
1698 * @param sim the simulator handle
1699 * @param block the current block
1700 * @param start_state the x87 state at the begin of the block
1702 static x87_state *x87_kill_deads(x87_simulator *sim, ir_node *block, x87_state *start_state) {
1703 x87_state *state = start_state;
1704 ir_node *first_insn = sched_first(block);
1705 ir_node *keep = NULL;
1706 unsigned live = vfp_live_args_after(sim, block, 0);
1708 int i, depth, num_pop;
1711 depth = x87_get_depth(state);
1712 for (i = depth - 1; i >= 0; --i) {
1713 int reg = x87_get_st_reg(state, i);
1715 if (! is_vfp_live(reg, live))
1716 kill_mask |= (1 << i);
1720 /* create a new state, will be changed */
1721 state = x87_clone_state(sim, state);
1723 DB((dbg, LEVEL_1, "Killing deads:\n"));
1724 DEBUG_ONLY(vfp_dump_live(live));
1725 DEBUG_ONLY(x87_dump_stack(state));
1727 /* now kill registers */
1729 /* we can only kill from TOS, so bring them up */
1730 if (! (kill_mask & 1)) {
1731 /* search from behind, because we can to a double-pop */
1732 for (i = depth - 1; i >= 0; --i) {
1733 if (kill_mask & (1 << i)) {
1734 kill_mask &= ~(1 << i);
1741 x87_set_st(state, -1, keep, i);
1742 keep = x87_create_fxch(state, first_insn, i, -1);
1745 keep = x87_get_st_node(state, 0);
1747 if ((kill_mask & 3) == 3) {
1748 /* we can do a double-pop */
1752 /* only a single pop */
1757 kill_mask >>= num_pop;
1758 keep = x87_create_fpop(sim->env, state, first_insn, num_pop, keep);
1760 add_End_keepalive(get_irg_end(get_irn_irg(block)), keep);
1763 } /* x87_kill_deads */
1766 * Run a simulation and fix all virtual instructions for a block.
1768 * @param sim the simulator handle
1769 * @param block the current block
1771 * @return non-zero if simulation is complete,
1772 * zero if the simulation must be rerun
1774 static int x87_simulate_block(x87_simulator *sim, ir_node *block) {
1776 blk_state *bl_state = x87_get_bl_state(sim, block);
1777 x87_state *state = bl_state->begin;
1778 const ir_edge_t *edge;
1779 ir_node *start_block;
1781 /* if we have no assigned start state, we must wait ... */
1785 assert(bl_state->end == NULL);
1787 DB((dbg, LEVEL_1, "Simulate %+F\n", block));
1788 DB((dbg, LEVEL_2, "State at Block begin:\n ")); DEBUG_ONLY(x87_dump_stack(state));
1790 /* at block begin, kill all dead registers */
1791 state = x87_kill_deads(sim, block, state);
1793 /* beware, n might changed */
1794 for (n = sched_first(block); !sched_is_end(n); n = next) {
1795 ir_op *op = get_irn_op(n);
1797 next = sched_next(n);
1798 if (op->ops.generic) {
1800 sim_func func = (sim_func)op->ops.generic;
1802 /* have work to do */
1803 if (state == bl_state->begin) {
1804 /* create a new state, will be changed */
1805 state = x87_clone_state(sim, state);
1809 node_inserted = (*func)(state, n, sim->env);
1812 sim_func might have added additional nodes after n,
1814 beware: n must not be changed by sim_func
1815 (i.e. removed from schedule) in this case
1818 next = sched_next(n);
1822 start_block = get_irg_start_block(get_irn_irg(block));
1824 /* check if the state must be shuffled */
1825 foreach_block_succ(block, edge) {
1826 ir_node *succ = get_edge_src_irn(edge);
1827 blk_state *succ_state = x87_get_bl_state(sim, succ);
1829 if (succ_state->begin && succ != start_block) {
1830 /* There is already a begin state for this block, bad.
1831 Do the necessary permutations.
1832 Note that critical edges are removed, so this is always possible. */
1833 x87_shuffle(sim, block, state, succ, succ_state->begin);
1835 /* Note further, that there can be only one such situation,
1836 so we can break here. */
1840 bl_state->end = state;
1842 /* now propagate the state to all successor blocks */
1843 foreach_block_succ(block, edge) {
1844 ir_node *succ = get_edge_src_irn(edge);
1845 blk_state *succ_state = x87_get_bl_state(sim, succ);
1847 if (! succ_state->begin)
1848 succ_state->begin = state;
1851 DB((dbg, LEVEL_2, "State at Block end:\n ")); DEBUG_ONLY(x87_dump_stack(state));
1854 } /* x87_simulate_block */
1857 * Create a new x87 simulator.
1859 * @param sim a simulator handle, will be initialized
1860 * @param irg the current graph
1861 * @param env the architecture environment
1863 static void x87_init_simulator(x87_simulator *sim, ir_graph *irg, const arch_env_t *env)
1865 obstack_init(&sim->obst);
1866 sim->blk_states = pmap_create();
1868 sim->n_idx = get_irg_last_idx(irg);
1869 sim->live = obstack_alloc(&sim->obst, sizeof(*sim->live) * sim->n_idx);
1871 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.x87");
1873 DB((dbg, LEVEL_1, "--------------------------------\n"
1874 "x87 Simulator started for %+F\n", irg));
1876 /* set the generic function pointer of instruction we must simulate */
1877 clear_irp_opcodes_generic_func();
1879 #define ASSOC(op) (op_ ## op)->ops.generic = (op_func)(sim_##op)
1880 #define ASSOC_IA32(op) (op_ia32_v ## op)->ops.generic = (op_func)(sim_##op)
1881 #define ASSOC_BE(op) (op_be_ ## op)->ops.generic = (op_func)(sim_##op)
1899 ASSOC_IA32(fCondJmp);
1910 } /* x87_init_simulator */
1913 * Destroy a x87 simulator.
1915 * @param sim the simulator handle
1917 static void x87_destroy_simulator(x87_simulator *sim) {
1918 pmap_destroy(sim->blk_states);
1919 obstack_free(&sim->obst, NULL);
1920 DB((dbg, LEVEL_1, "x87 Simulator stopped\n\n"));
1921 } /* x87_destroy_simulator */
1924 * Run a simulation and fix all virtual instructions for a graph.
1926 * @param env the architecture environment
1927 * @param irg the current graph
1928 * @param blk_list the block schedule list
1930 * Needs a block-schedule.
1932 void x87_simulate_graph(const arch_env_t *env, ir_graph *irg, ir_node **blk_list) {
1933 ir_node *block, *start_block;
1935 blk_state *bl_state;
1940 /* create the simulator */
1941 x87_init_simulator(&sim, irg, env);
1943 start_block = get_irg_start_block(irg);
1944 bl_state = x87_get_bl_state(&sim, start_block);
1946 /* start with the empty state */
1947 bl_state->begin = empty;
1950 worklist = new_waitq();
1952 /* Create the worklist for the schedule and calculate the liveness
1953 for all nodes. We must precalculate this info, because the
1954 simulator adds new nodes (and possible before Phi nodes) which
1955 let fail the old lazy calculation.
1956 On the other hand we reduce the computation amount due to
1957 precaching from O(n^2) to O(n) at the expense of O(n) cache memory.
1959 lv = be_liveness(irg);
1960 for (i = 0, n = ARR_LEN(blk_list); i < n; ++i) {
1961 update_liveness(&sim, lv, blk_list[i]);
1962 waitq_put(worklist, blk_list[i]);
1964 be_liveness_free(lv);
1968 block = waitq_get(worklist);
1969 if (! x87_simulate_block(&sim, block)) {
1970 waitq_put(worklist, block);
1973 } while (! pdeq_empty(worklist));
1976 del_waitq(worklist);
1977 x87_destroy_simulator(&sim);
1978 } /* x87_simulate_graph */