2 * This file implements the x87 support and virtual to stack
3 * register translation for the ia32 backend.
5 * @author: Michael Beck
12 #endif /* HAVE_CONFIG_H */
19 #include "iredges_t.h"
28 #include "../belive_t.h"
29 #include "../besched_t.h"
30 #include "../benode_t.h"
31 #include "ia32_new_nodes.h"
32 #include "gen_ia32_new_nodes.h"
33 #include "gen_ia32_regalloc_if.h"
38 /* first and second binop index */
45 /* the store val index */
46 #define STORE_VAL_IDX 2
48 #define MASK_TOS(x) ((x) & (N_x87_REGS - 1))
50 /** the debug handle */
51 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
53 /* Forward declaration. */
54 typedef struct _x87_simulator x87_simulator;
57 * An exchange template.
58 * Note that our virtual functions have the same inputs
59 * and attributes as the real ones, so we can simple exchange
61 * Further, x87 supports inverse instructions, so we can handle them.
63 typedef struct _exchange_tmpl {
64 ir_op *normal_op; /**< the normal one */
65 ir_op *reverse_op; /**< the reverse one if exists */
66 ir_op *normal_pop_op; /**< the normal one with tos pop */
67 ir_op *reverse_pop_op; /**< the reverse one with tos pop */
71 * An entry on the simulated x87 stack.
73 typedef struct _st_entry {
74 int reg_idx; /**< the virtual register index of this stack value */
75 ir_node *node; /**< the node that produced this value */
81 typedef struct _x87_state {
82 st_entry st[N_x87_REGS]; /**< the register stack */
83 int depth; /**< the current stack depth */
84 int tos; /**< position of the tos */
85 x87_simulator *sim; /**< The simulator. */
88 /** An empty state, used for blocks without fp instructions. */
89 static x87_state _empty = { { {0, NULL}, }, 0, 0 };
90 static x87_state *empty = (x87_state *)&_empty;
92 /** The type of an instruction simulator function. */
93 typedef int (*sim_func)(x87_state *state, ir_node *n, const arch_env_t *env);
96 * A block state: Every block has a x87 state at the beginning and at the end.
98 typedef struct _blk_state {
99 x87_state *begin; /**< state at the begin or NULL if not assigned */
100 x87_state *end; /**< state at the end or NULL if not assigned */
103 #define PTR_TO_BLKSTATE(p) ((blk_state *)(p))
108 struct _x87_simulator {
109 struct obstack obst; /**< an obstack for fast allocating */
110 pmap *blk_states; /**< map blocks to states */
111 const arch_env_t *env; /**< architecture environment */
112 unsigned char *live; /**< Liveness information. */
113 unsigned n_idx; /**< cached get_irg_last_idx() result */
117 * Returns the current stack depth.
119 * @param state the x87 state
121 * @return the x87 stack depth
123 static int x87_get_depth(const x87_state *state) {
128 * Check if the state is empty.
130 * @param state the x87 state
132 * returns non-zero if the x87 stack is empty
134 static int x87_state_is_empty(const x87_state *state) {
135 return state->depth == 0;
139 * Return the virtual register index at st(pos).
141 * @param state the x87 state
142 * @param pos a stack position
144 * @return the vfp register index that produced the value at st(pos)
146 static int x87_get_st_reg(const x87_state *state, int pos) {
147 assert(pos < state->depth);
148 return state->st[MASK_TOS(state->tos + pos)].reg_idx;
152 * Return the node at st(pos).
154 * @param state the x87 state
155 * @param pos a stack position
157 * @return the IR node that produced the value at st(pos)
159 static ir_node *x87_get_st_node(const x87_state *state, int pos) {
160 assert(pos < state->depth);
161 return state->st[MASK_TOS(state->tos + pos)].node;
162 } /* x87_get_st_node */
166 * Dump the stack for debugging.
168 * @param state the x87 state
170 static void x87_dump_stack(const x87_state *state) {
173 for (i = state->depth - 1; i >= 0; --i) {
174 DB((dbg, LEVEL_2, "vf%d ", x87_get_st_reg(state, i)));
176 DB((dbg, LEVEL_2, "<-- TOS\n"));
177 } /* x87_dump_stack */
178 #endif /* DEBUG_libfirm */
181 * Set a virtual register to st(pos).
183 * @param state the x87 state
184 * @param reg_idx the vfp register index that should be set
185 * @param node the IR node that produces the value of the vfp register
186 * @param pos the stack position where the new value should be entered
188 static void x87_set_st(x87_state *state, int reg_idx, ir_node *node, int pos) {
189 assert(0 < state->depth);
190 state->st[MASK_TOS(state->tos + pos)].reg_idx = reg_idx;
191 state->st[MASK_TOS(state->tos + pos)].node = node;
193 DB((dbg, LEVEL_2, "After SET_REG:\n ")); DEBUG_ONLY(x87_dump_stack(state));
197 * Set the tos virtual register.
199 * @param state the x87 state
200 * @param reg_idx the vfp register index that should be set
201 * @param node the IR node that produces the value of the vfp register
203 static void x87_set_tos(x87_state *state, int reg_idx, ir_node *node) {
204 x87_set_st(state, reg_idx, node, 0);
208 * Flush the x87 stack.
210 * @param state the x87 state
212 static void x87_flush(x87_state *state) {
218 * Swap st(0) with st(pos).
220 * @param state the x87 state
221 * @param pos the stack position to change the tos with
223 static void x87_fxch(x87_state *state, int pos) {
225 assert(pos < state->depth);
227 entry = state->st[MASK_TOS(state->tos + pos)];
228 state->st[MASK_TOS(state->tos + pos)] = state->st[MASK_TOS(state->tos)];
229 state->st[MASK_TOS(state->tos)] = entry;
231 DB((dbg, LEVEL_2, "After FXCH:\n ")); DEBUG_ONLY(x87_dump_stack(state));
235 * Convert a virtual register to the stack index.
237 * @param state the x87 state
238 * @param reg_idx the register vfp index
240 * @return the stack position where the register is stacked
241 * or -1 if the virtual register was not found
243 static int x87_on_stack(const x87_state *state, int reg_idx) {
244 int i, tos = state->tos;
246 for (i = 0; i < state->depth; ++i)
247 if (state->st[MASK_TOS(tos + i)].reg_idx == reg_idx)
253 * Push a virtual Register onto the stack, double pushed allowed.
255 * @param state the x87 state
256 * @param reg_idx the register vfp index
257 * @param node the node that produces the value of the vfp register
259 static void x87_push_dbl(x87_state *state, int reg_idx, ir_node *node) {
260 assert(state->depth < N_x87_REGS && "stack overrun");
263 state->tos = MASK_TOS(state->tos - 1);
264 state->st[state->tos].reg_idx = reg_idx;
265 state->st[state->tos].node = node;
267 DB((dbg, LEVEL_2, "After PUSH:\n ")); DEBUG_ONLY(x87_dump_stack(state));
271 * Push a virtual Register onto the stack, double pushes are NOT allowed..
273 * @param state the x87 state
274 * @param reg_idx the register vfp index
275 * @param node the node that produces the value of the vfp register
276 * @param dbl_push if != 0 double pushes are allowed
278 static void x87_push(x87_state *state, int reg_idx, ir_node *node) {
279 assert(x87_on_stack(state, reg_idx) == -1 && "double push");
281 x87_push_dbl(state, reg_idx, node);
285 * Pop a virtual Register from the stack.
287 static void x87_pop(x87_state *state) {
288 assert(state->depth > 0 && "stack underrun");
291 state->tos = MASK_TOS(state->tos + 1);
293 DB((dbg, LEVEL_2, "After POP:\n ")); DEBUG_ONLY(x87_dump_stack(state));
297 * Returns the block state of a block.
299 * @param sim the x87 simulator handle
300 * @param block the current block
302 * @return the block state
304 static blk_state *x87_get_bl_state(x87_simulator *sim, ir_node *block) {
305 pmap_entry *entry = pmap_find(sim->blk_states, block);
308 blk_state *bl_state = obstack_alloc(&sim->obst, sizeof(*bl_state));
309 bl_state->begin = NULL;
310 bl_state->end = NULL;
312 pmap_insert(sim->blk_states, block, bl_state);
316 return PTR_TO_BLKSTATE(entry->value);
317 } /* x87_get_bl_state */
320 * Creates a new x87 state.
322 * @param sim the x87 simulator handle
324 * @return a new x87 state
326 static x87_state *x87_alloc_state(x87_simulator *sim) {
327 x87_state *res = obstack_alloc(&sim->obst, sizeof(*res));
331 } /* x87_alloc_state */
334 * Create a new empty x87 state.
336 * @param sim the x87 simulator handle
338 * @return a new empty x87 state
340 static x87_state *x87_alloc_empty_state(x87_simulator *sim) {
341 x87_state *res = x87_alloc_state(sim);
345 } /* x87_alloc_empty_state */
350 * @param sim the x87 simulator handle
351 * @param src the x87 state that will be cloned
353 * @return a cloned copy of the src state
355 static x87_state *x87_clone_state(x87_simulator *sim, const x87_state *src) {
356 x87_state *res = x87_alloc_state(sim);
358 memcpy(res, src, sizeof(*res));
360 } /* x87_clone_state */
363 * Patch a virtual instruction into a x87 one and return
366 * @param n the IR node to patch
367 * @param op the x87 opcode to patch in
369 static ir_node *x87_patch_insn(ir_node *n, ir_op *op) {
370 ir_mode *mode = get_irn_mode(n);
375 if (mode == mode_T) {
376 /* patch all Proj's */
377 const ir_edge_t *edge;
379 foreach_out_edge(n, edge) {
380 ir_node *proj = get_edge_src_irn(edge);
382 mode = get_irn_mode(proj);
383 if (mode_is_float(mode)) {
385 set_irn_mode(proj, mode_E);
390 else if (mode_is_float(mode))
391 set_irn_mode(n, mode_E);
393 } /* x87_patch_insn */
396 * Returns the first Proj of a mode_T node having a given mode.
398 * @param n the mode_T node
399 * @param m the desired mode of the Proj
400 * @return The first Proj of mode @p m found or NULL.
402 static ir_node *get_irn_Proj_for_mode(ir_node *n, ir_mode *m) {
403 const ir_edge_t *edge;
405 assert(get_irn_mode(n) == mode_T && "Need mode_T node");
407 foreach_out_edge(n, edge) {
408 ir_node *proj = get_edge_src_irn(edge);
409 if (get_irn_mode(proj) == m)
414 } /* get_irn_Proj_for_mode */
416 /* -------------- x87 perm --------------- */
419 * Creates a fxch for shuffle.
421 * @param state the x87 state
422 * @param pos parameter for fxch
423 * @param dst_block the block of the user
425 * Creates a new fxch node and reroute the user of the old node
428 * @return the fxch node
430 static ir_node *x87_fxch_shuffle(x87_state *state, int pos, ir_node *block, ir_node *dst_block)
432 const ir_edge_t *edge;
433 ir_node *n = x87_get_st_node(state, pos);
434 ir_node *user = NULL;
439 if (block == get_nodes_block(n)) {
440 /* this is a node from out block: change it's user */
441 foreach_out_edge(n, edge) {
442 ir_node *succ = get_edge_src_irn(edge);
444 if (is_Phi(succ) && get_nodes_block(succ) == dst_block) {
446 node_idx = get_edge_src_pos(edge);
453 fxch = new_rd_ia32_fxch(NULL, get_irn_irg(block), block, n, get_irn_mode(n));
454 attr = get_ia32_attr(fxch);
455 attr->x87[0] = &ia32_st_regs[pos];
456 attr->x87[2] = &ia32_st_regs[0];
459 DB((dbg, LEVEL_2, "%+F replaced input %d of %+F\n", fxch, node_idx, user));
460 set_irn_n(user, node_idx, fxch);
464 * This is a node from a dominator block. Changing it's user might be wrong,
465 * so just keep it alive.
466 * The "right" solution would require a new Phi, but we don't care here.
471 x87_fxch(state, pos);
473 } /* x87_fxch_shuffle */
476 * Calculate the necessary permutations to reach dst_state.
478 * These permutations are done with fxch instructions and placed
479 * at the end of the block.
481 * Note that critical edges are removed here, so we need only
482 * a shuffle if the current block has only one successor.
484 * @param sim the simulator handle
485 * @param block the current block
486 * @param state the current x87 stack state, might be modified
487 * @param dst_block the destination block
488 * @param dst_state destination state
492 static x87_state *x87_shuffle(x87_simulator *sim, ir_node *block, x87_state *state, ir_node *dst_block, const x87_state *dst_state) {
493 int i, n_cycles, k, ri;
494 unsigned cycles[4], all_mask;
495 char cycle_idx[4][8];
497 ir_node *before, *after;
499 assert(state->depth == dst_state->depth);
501 /* Some mathematics here:
502 If we have a cycle of length n that includes the tos,
503 we need n-1 exchange operations.
504 We can always add the tos and restore it, so we need
505 n+1 exchange operations for a cycle not containing the tos.
506 So, the maximum of needed operations is for a cycle of 7
507 not including the tos == 8.
508 This is the same number of ops we would need for using stores,
509 so exchange is cheaper (we save the loads).
510 On the other hand, we might need an additional exchange
511 in the next block to bring one operand on top, so the
512 number of ops in the first case is identical.
513 Further, no more than 4 cycles can exists (4 x 2).
515 all_mask = (1 << (state->depth)) - 1;
517 for (n_cycles = 0; all_mask; ++n_cycles) {
518 int src_idx, dst_idx;
520 /* find the first free slot */
521 for (i = 0; i < state->depth; ++i) {
522 if (all_mask & (1 << i)) {
523 all_mask &= ~(1 << i);
525 /* check if there are differences here */
526 if (x87_get_st_reg(state, i) != x87_get_st_reg(dst_state, i))
532 /* no more cycles found */
537 cycles[n_cycles] = (1 << i);
538 cycle_idx[n_cycles][k++] = i;
539 for (src_idx = i; ; src_idx = dst_idx) {
540 dst_idx = x87_on_stack(dst_state, x87_get_st_reg(state, src_idx));
542 if ((all_mask & (1 << dst_idx)) == 0)
545 cycle_idx[n_cycles][k++] = dst_idx;
546 cycles[n_cycles] |= (1 << dst_idx);
547 all_mask &= ~(1 << dst_idx);
549 cycle_idx[n_cycles][k] = -1;
553 /* no permutation needed */
557 /* Hmm: permutation needed */
558 DB((dbg, LEVEL_2, "\n%+F needs permutation: from\n", block));
559 DEBUG_ONLY(x87_dump_stack(state));
560 DB((dbg, LEVEL_2, " to\n"));
561 DEBUG_ONLY(x87_dump_stack(dst_state));
565 DB((dbg, LEVEL_2, "Need %d cycles\n", n_cycles));
566 for (ri = 0; ri < n_cycles; ++ri) {
567 DB((dbg, LEVEL_2, " Ring %d:\n ", ri));
568 for (k = 0; cycle_idx[ri][k] != -1; ++k)
569 DB((dbg, LEVEL_2, " st%d ->", cycle_idx[ri][k]));
570 DB((dbg, LEVEL_2, "\n"));
577 * Find the place node must be insert.
578 * We have only one successor block, so the last instruction should
581 before = sched_last(block);
582 assert(is_cfop(before));
584 /* now do the permutations */
585 for (ri = 0; ri < n_cycles; ++ri) {
586 if ((cycles[ri] & 1) == 0) {
587 /* this cycle does not include the tos */
588 fxch = x87_fxch_shuffle(state, cycle_idx[ri][0], block, dst_block);
590 sched_add_after(after, fxch);
592 sched_add_before(before, fxch);
595 for (k = 1; cycle_idx[ri][k] != -1; ++k) {
596 fxch = x87_fxch_shuffle(state, cycle_idx[ri][k], block, dst_block);
598 sched_add_after(after, fxch);
600 sched_add_before(before, fxch);
603 if ((cycles[ri] & 1) == 0) {
604 /* this cycle does not include the tos */
605 fxch = x87_fxch_shuffle(state, cycle_idx[ri][0], block, dst_block);
606 sched_add_after(after, fxch);
613 * Create a fxch node before another node.
615 * @param state the x87 state
616 * @param n the node before the fxch
617 * @param pos exchange st(pos) with st(0)
618 * @param op_idx if >= 0, replace input op_idx of n with the fxch result
622 static ir_node *x87_create_fxch(x87_state *state, ir_node *n, int pos, int op_idx) {
623 ir_node *fxch, *pred;
626 x87_fxch(state, pos);
629 pred = get_irn_n(n, op_idx);
631 pred = x87_get_st_node(state, pos);
633 fxch = new_rd_ia32_fxch(NULL, get_irn_irg(n), get_nodes_block(n), pred, get_irn_mode(pred));
634 attr = get_ia32_attr(fxch);
635 attr->x87[0] = &ia32_st_regs[pos];
636 attr->x87[2] = &ia32_st_regs[0];
639 set_irn_n(n, op_idx, fxch);
641 sched_add_before(n, fxch);
642 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(fxch), attr->x87[0]->name, attr->x87[2]->name));
644 } /* x87_create_fxch */
647 * Create a fpush before node n.
649 * @param state the x87 state
650 * @param n the node before the fpush
651 * @param pos push st(pos) on stack
652 * @param op_idx if >= 0, replace input op_idx of n with the fpush result
654 static void x87_create_fpush(const arch_env_t *env, x87_state *state, ir_node *n, int pos, int op_idx) {
655 ir_node *fpush, *pred = get_irn_n(n, op_idx);
657 const arch_register_t *out = arch_get_irn_register(env, pred);
659 x87_push_dbl(state, arch_register_get_index(out), pred);
661 fpush = new_rd_ia32_fpush(NULL, get_irn_irg(n), get_nodes_block(n), pred, get_irn_mode(pred));
662 attr = get_ia32_attr(fpush);
663 attr->x87[0] = &ia32_st_regs[pos];
664 attr->x87[2] = &ia32_st_regs[0];
666 set_irn_n(n, op_idx, fpush);
668 sched_add_before(n, fpush);
669 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(fpush), attr->x87[0]->name, attr->x87[2]->name));
670 } /* x87_create_fpush */
673 * Create a fpop before node n.
675 * @param state the x87 state
676 * @param n the node before the fpop
677 * @param num pop 1 or 2 values
678 * @param pred node to use as predecessor of the fpop
680 * @return the fpop node
682 static ir_node *x87_create_fpop(const arch_env_t *env, x87_state *state, ir_node *n, int num, ir_node *pred) {
688 fpop = new_rd_ia32_fpop(NULL, get_irn_irg(n), get_nodes_block(n), pred, mode_E);
689 attr = get_ia32_attr(fpop);
690 attr->x87[0] = &ia32_st_regs[0];
691 attr->x87[1] = &ia32_st_regs[0];
692 attr->x87[2] = &ia32_st_regs[0];
694 sched_add_before(n, fpop);
695 DB((dbg, LEVEL_1, "<<< %s %s\n", get_irn_opname(fpop), attr->x87[0]->name));
701 } /* x87_create_fpop */
703 /* --------------------------------- liveness ------------------------------------------ */
706 * The liveness transfer function.
707 * Updates a live set over a single step from a given node to its predecessor.
708 * Everything defined at the node is removed from the set, the uses of the node get inserted.
710 * @param arch_env The architecture environment.
711 * @param irn The node at which liveness should be computed.
712 * @param live The bitset of registers live before @p irn. This set gets modified by updating it to
713 * the registers live after irn.
715 * @return The live bitset.
717 static unsigned vfp_liveness_transfer(const arch_env_t *arch_env, ir_node *irn, unsigned live)
720 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
722 if (arch_irn_consider_in_reg_alloc(arch_env, cls, irn)) {
723 const arch_register_t *reg = arch_get_irn_register(arch_env, irn);
724 live &= ~(1 << reg->index);
727 for (i = 0, n = get_irn_arity(irn); i < n; ++i) {
728 ir_node *op = get_irn_n(irn, i);
730 if (mode_is_float(get_irn_mode(op)) && arch_irn_consider_in_reg_alloc(arch_env, cls, op)) {
731 const arch_register_t *reg = arch_get_irn_register(arch_env, op);
732 live |= 1 << reg->index;
736 } /* vfp_liveness_transfer */
739 * Put all live virtual registers at the end of a block into a bitset.
741 * @param env the architecture environment
742 * @param lv the liveness information
743 * @param bl the block
745 * @return The live bitset at the end of this block
747 static unsigned vfp_liveness_end_of_block(const arch_env_t *env, be_lv_t *lv, const ir_node *bl)
751 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
753 be_lv_foreach(lv, bl, be_lv_state_end, i) {
754 ir_node *irn = be_lv_get_irn(lv, bl, i);
755 if (arch_irn_consider_in_reg_alloc(env, cls, irn)) {
756 const arch_register_t *reg = arch_get_irn_register(env, irn);
757 live |= 1 << reg->index;
762 } /* vfp_liveness_end_of_block */
765 * Return a bitset of registers which are live at a node.
767 * @param sim the simulator handle
768 * @param pos the node
770 * @return The live bitset.
772 static unsigned vfp_liveness_nodes_live_at(x87_simulator *sim, const ir_node *pos)
774 unsigned idx = get_irn_idx(pos);
776 assert(idx < sim->n_idx);
777 return sim->live[idx];
778 } /* vfp_liveness_nodes_live_at */
781 * Calculate the liveness for a whole block and cache it.
783 * @param sim the simulator handle
784 * @param lv the liveness handle
785 * @param blk the block
787 static void update_liveness(x87_simulator *sim, be_lv_t *lv, ir_node *blk) {
788 unsigned live = vfp_liveness_end_of_block(sim->env, lv, blk);
792 /* now iterate through the block backward and cache the results */
793 sched_foreach_reverse(blk, irn) {
794 idx = get_irn_idx(irn);
795 sim->live[idx] = live;
797 live = vfp_liveness_transfer(sim->env, irn, live);
799 idx = get_irn_idx(blk);
800 sim->live[idx] = live;
801 } /* update_liveness */
804 * Returns true if a register is live in a set.
806 * @param reg_idx the vfp register index
807 * @param live a live bitset
809 #define is_vfp_live(reg_idx, live) ((live) & (1 << (reg_idx)))
813 * Dump liveness info.
815 * @param live the live bitset
817 static void vfp_dump_live(unsigned live) {
820 DB((dbg, LEVEL_2, "Live registers here: \n"));
821 for (i = 0; i < 8; ++i) {
822 if (live & (1 << i)) {
823 DB((dbg, LEVEL_2, " vf%d", i));
826 DB((dbg, LEVEL_2, "\n"));
827 } /* vfp_dump_live */
828 #endif /* DEBUG_libfirm */
830 /* --------------------------------- simulators ---------------------------------------- */
832 #define XCHG(a, b) do { int t = (a); (a) = (b); (b) = t; } while (0)
835 * Simulate a virtual binop.
837 * @param state the x87 state
838 * @param n the node that should be simulated (and patched)
839 * @param env the architecture environment
840 * @param tmpl the template containing the 4 possible x87 opcodes
842 static int sim_binop(x87_state *state, ir_node *n, const arch_env_t *env, const exchange_tmpl *tmpl) {
843 int op2_idx, op1_idx = -1;
844 int out_idx, do_pop =0;
847 const arch_register_t *op1 = arch_get_irn_register(env, get_irn_n(n, BINOP_IDX_1));
848 const arch_register_t *op2 = arch_get_irn_register(env, get_irn_n(n, BINOP_IDX_2));
849 const arch_register_t *out = arch_get_irn_register(env, n);
850 unsigned live = vfp_liveness_nodes_live_at(state->sim, n);
852 DB((dbg, LEVEL_1, ">>> %s %s, %s -> %s\n", get_irn_opname(n),
853 arch_register_get_name(op1), arch_register_get_name(op2),
854 arch_register_get_name(out)));
855 DEBUG_ONLY(vfp_dump_live(live));
857 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
858 op2_idx = x87_on_stack(state, arch_register_get_index(op2));
860 if (op2->index != REG_VFP_NOREG) {
861 /* second operand is a vfp register */
863 if (is_vfp_live(op2->index, live)) {
864 /* Second operand is live. */
866 if (is_vfp_live(op1->index, live)) {
867 /* Both operands are live: push the first one.
868 This works even for op1 == op2. */
869 x87_create_fpush(env, state, n, op2_idx, BINOP_IDX_2);
870 out_idx = op2_idx = 0;
872 dst = tmpl->normal_op;
876 /* Second live, first operand is dead here, bring it to tos. */
878 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
882 op1_idx = out_idx = 0;
883 dst = tmpl->normal_op;
888 /* Second operand is dead. */
889 if (is_vfp_live(op1->index, live)) {
890 /* First operand is live: bring second to tos. */
892 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
896 op2_idx = out_idx = 0;
897 dst = tmpl->reverse_op;
901 /* Both operands are dead here, pop them from the stack. */
904 if (op1_idx == op2_idx) {
905 /* Both are identically, no pop needed. */
906 dst = tmpl->normal_op;
910 dst = tmpl->normal_pop_op;
914 else if (op1_idx == 0) {
916 XCHG(op2_idx, op1_idx);
917 if (op1_idx == op2_idx) {
918 /* Both are identically, no pop needed. */
919 dst = tmpl->reverse_op;
923 dst = tmpl->reverse_pop_op;
928 /* Bring the second on top. */
929 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
930 if (op1_idx == op2_idx) {
931 /* Both are identically, no pop needed. */
932 out_idx = op1_idx = op2_idx = 0;
933 dst = tmpl->normal_op;
939 dst = tmpl->normal_pop_op;
947 /* second operand is an address mode */
948 if (is_vfp_live(op1->index, live)) {
949 /* first operand is live: push it here */
950 x87_create_fpush(env, state, n, op1_idx, BINOP_IDX_1);
953 /* first operand is dead: bring it to tos */
955 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
957 op1_idx = out_idx = 0;
958 dst = tmpl->normal_op;
962 x87_set_st(state, arch_register_get_index(out), x87_patch_insn(n, dst), out_idx);
966 /* patch the operation */
967 attr = get_ia32_attr(n);
968 attr->x87[0] = op1 = &ia32_st_regs[op1_idx];
970 attr->x87[1] = op2 = &ia32_st_regs[op2_idx];
971 attr->x87[2] = out = &ia32_st_regs[out_idx];
974 DB((dbg, LEVEL_1, "<<< %s %s, %s -> %s\n", get_irn_opname(n),
975 arch_register_get_name(op1), arch_register_get_name(op2),
976 arch_register_get_name(out)));
978 DB((dbg, LEVEL_1, "<<< %s %s, [AM] -> %s\n", get_irn_opname(n),
979 arch_register_get_name(op1),
980 arch_register_get_name(out)));
986 * Simulate a virtual Unop.
988 * @param state the x87 state
989 * @param n the node that should be simulated (and patched)
990 * @param env the architecture environment
991 * @param op the x87 opcode that will replace n's opcode
993 static int sim_unop(x87_state *state, ir_node *n, const arch_env_t *env, ir_op *op) {
994 int op1_idx, out_idx;
995 const arch_register_t *op1 = arch_get_irn_register(env, get_irn_n(n, UNOP_IDX));
996 const arch_register_t *out = arch_get_irn_register(env, n);
998 unsigned live = vfp_liveness_nodes_live_at(state->sim, n);
1000 DB((dbg, LEVEL_1, ">>> %s -> %s\n", get_irn_opname(n), out->name));
1001 DEBUG_ONLY(vfp_dump_live(live));
1003 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1005 if (is_vfp_live(op1->index, live)) {
1006 /* push the operand here */
1007 x87_create_fpush(env, state, n, op1_idx, UNOP_IDX);
1010 /* operand is dead, bring it to tos */
1012 x87_create_fxch(state, n, op1_idx, UNOP_IDX);
1015 x87_set_tos(state, arch_register_get_index(out), x87_patch_insn(n, op));
1016 op1_idx = out_idx = 0;
1017 attr = get_ia32_attr(n);
1018 attr->x87[0] = op1 = &ia32_st_regs[0];
1019 attr->x87[2] = out = &ia32_st_regs[0];
1020 DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), out->name));
1026 * Simulate a virtual Load instruction.
1028 * @param state the x87 state
1029 * @param n the node that should be simulated (and patched)
1030 * @param env the architecture environment
1031 * @param op the x87 opcode that will replace n's opcode
1033 static int sim_load(x87_state *state, ir_node *n, const arch_env_t *env, ir_op *op) {
1034 const arch_register_t *out = arch_get_irn_register(env, n);
1037 DB((dbg, LEVEL_1, ">>> %s -> %s\n", get_irn_opname(n), arch_register_get_name(out)));
1038 x87_push(state, arch_register_get_index(out), x87_patch_insn(n, op));
1039 attr = get_ia32_attr(n);
1040 attr->x87[2] = out = &ia32_st_regs[0];
1041 DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), arch_register_get_name(out)));
1047 * Rewire all users of @p old_val to @new_val iff they are scheduled after @p store.
1049 * @param store The store
1050 * @param old_val The former value
1051 * @param new_val The new value
1053 static void collect_and_rewire_users(ir_node *store, ir_node *old_val, ir_node *new_val) {
1054 const ir_edge_t *edge, *ne;
1056 foreach_out_edge_safe(old_val, edge, ne) {
1057 ir_node *user = get_edge_src_irn(edge);
1059 if (! user || user == store)
1062 /* if the user is scheduled after the store: rewire */
1063 if (sched_is_scheduled(user) && sched_comes_after(store, user)) {
1065 /* find the input of the user pointing to the old value */
1066 for (i = get_irn_arity(user) - 1; i >= 0; i--) {
1067 if (get_irn_n(user, i) == old_val)
1068 set_irn_n(user, i, new_val);
1072 } /* collect_and_rewire_users */
1075 * Simulate a virtual Store.
1077 * @param state the x87 state
1078 * @param n the node that should be simulated (and patched)
1079 * @param env the architecture environment
1080 * @param op the x87 store opcode
1081 * @param op_p the x87 store and pop opcode
1083 static int sim_store(x87_state *state, ir_node *n, const arch_env_t *env, ir_op *op, ir_op *op_p) {
1084 ir_node *val = get_irn_n(n, STORE_VAL_IDX);
1085 const arch_register_t *op2 = arch_get_irn_register(env, val);
1086 unsigned live = vfp_liveness_nodes_live_at(state->sim, n);
1092 op2_idx = x87_on_stack(state, arch_register_get_index(op2));
1093 assert(op2_idx >= 0);
1095 DB((dbg, LEVEL_1, ">>> %s %s ->\n", get_irn_opname(n), arch_register_get_name(op2)));
1097 mode = get_ia32_ls_mode(n);
1098 depth = x87_get_depth(state);
1101 We can only store the tos to memory.
1102 A store of mode_E with free registers
1103 pushes value to tos, so skip it here.
1105 if (! (mode == mode_E && depth < N_x87_REGS) && op2_idx != 0)
1106 x87_create_fxch(state, n, op2_idx, STORE_VAL_IDX);
1108 if (is_vfp_live(op2->index, live)) {
1110 Problem: fst doesn't support mode_E (spills), only fstp does
1112 - stack not full: push value and fstp
1113 - stack full: fstp value and load again
1115 if (mode == mode_E) {
1116 if (depth < N_x87_REGS) {
1117 /* ok, we have a free register: push + fstp */
1118 x87_create_fpush(env, state, n, op2_idx, STORE_VAL_IDX);
1120 x87_patch_insn(n, op_p);
1123 ir_node *vfld, *mem, *block, *rproj, *mproj;
1126 /* stack full here: need fstp + load */
1128 x87_patch_insn(n, op_p);
1130 block = get_nodes_block(n);
1131 irg = get_irn_irg(n);
1132 vfld = new_rd_ia32_vfld(NULL, irg, block, get_irn_n(n, 0), get_irn_n(n, 1), new_rd_NoMem(irg));
1134 /* copy all attributes */
1135 set_ia32_frame_ent(vfld, get_ia32_frame_ent(n));
1136 if (is_ia32_use_frame(n))
1137 set_ia32_use_frame(vfld);
1138 set_ia32_am_flavour(vfld, get_ia32_am_flavour(n));
1139 set_ia32_op_type(vfld, ia32_am_Source);
1140 add_ia32_am_offs(vfld, get_ia32_am_offs(n));
1141 set_ia32_am_sc(vfld, get_ia32_am_sc(n));
1142 set_ia32_ls_mode(vfld, get_ia32_ls_mode(n));
1144 rproj = new_r_Proj(irg, block, vfld, get_ia32_ls_mode(vfld), pn_ia32_vfld_res);
1145 mproj = new_r_Proj(irg, block, vfld, mode_M, pn_ia32_vfld_M);
1146 mem = get_irn_Proj_for_mode(n, mode_M);
1148 assert(mem && "Store memory not found");
1150 arch_set_irn_register(env, rproj, op2);
1152 /* reroute all former users of the store memory to the load memory */
1153 edges_reroute(mem, mproj, irg);
1154 /* set the memory input of the load to the store memory */
1155 set_irn_n(vfld, 2, mem);
1157 sched_add_after(n, vfld);
1158 sched_add_after(vfld, rproj);
1160 /* rewire all users, scheduled after the store, to the loaded value */
1161 collect_and_rewire_users(n, val, rproj);
1167 /* mode != mode_E -> use normal fst */
1168 x87_patch_insn(n, op);
1173 x87_patch_insn(n, op_p);
1176 attr = get_ia32_attr(n);
1177 attr->x87[1] = op2 = &ia32_st_regs[0];
1178 DB((dbg, LEVEL_1, "<<< %s %s ->\n", get_irn_opname(n), arch_register_get_name(op2)));
1184 * Simulate a virtual Phi.
1185 * Just for cosmetic reasons change the mode of Phi nodes to mode_E.
1187 * @param state the x87 state
1188 * @param n the node that should be simulated (and patched)
1189 * @param env the architecture environment
1191 static int sim_Phi(x87_state *state, ir_node *n, const arch_env_t *env) {
1192 ir_mode *mode = get_irn_mode(n);
1194 if (mode_is_float(mode))
1195 set_irn_mode(n, mode_E);
1201 #define _GEN_BINOP(op, rev) \
1202 static int sim_##op(x87_state *state, ir_node *n, const arch_env_t *env) { \
1203 exchange_tmpl tmpl = { op_ia32_##op, op_ia32_##rev, op_ia32_##op##p, op_ia32_##rev##p }; \
1204 return sim_binop(state, n, env, &tmpl); \
1207 #define GEN_BINOP(op) _GEN_BINOP(op, op)
1208 #define GEN_BINOPR(op) _GEN_BINOP(op, op##r)
1210 #define GEN_LOAD2(op, nop) \
1211 static int sim_##op(x87_state *state, ir_node *n, const arch_env_t *env) { \
1212 return sim_load(state, n, env, op_ia32_##nop); \
1215 #define GEN_LOAD(op) GEN_LOAD2(op, op)
1217 #define GEN_UNOP(op) \
1218 static int sim_##op(x87_state *state, ir_node *n, const arch_env_t *env) { \
1219 return sim_unop(state, n, env, op_ia32_##op); \
1222 #define GEN_STORE(op) \
1223 static int sim_##op(x87_state *state, ir_node *n, const arch_env_t *env) { \
1224 return sim_store(state, n, env, op_ia32_##op, op_ia32_##op##p); \
1243 GEN_LOAD2(fConst, fldConst)
1249 * Simulate a fCondJmp.
1251 * @param state the x87 state
1252 * @param n the node that should be simulated (and patched)
1253 * @param env the architecture environment
1255 static int sim_fCondJmp(x87_state *state, ir_node *n, const arch_env_t *env) {
1256 int op2_idx, op1_idx = -1, pop_cnt = 0;
1259 const arch_register_t *op1 = arch_get_irn_register(env, get_irn_n(n, BINOP_IDX_1));
1260 const arch_register_t *op2 = arch_get_irn_register(env, get_irn_n(n, BINOP_IDX_2));
1261 unsigned live = vfp_liveness_nodes_live_at(state->sim, n);
1263 DB((dbg, LEVEL_1, ">>> %s %s, %s\n", get_irn_opname(n),
1264 arch_register_get_name(op1), arch_register_get_name(op2)));
1265 DEBUG_ONLY(vfp_dump_live(live));
1267 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1268 op2_idx = x87_on_stack(state, arch_register_get_index(op2));
1270 /* BEWARE: check for comp a,a cases, they might happen */
1271 if (op2->index != REG_VFP_NOREG) {
1272 /* second operand is a vfp register */
1274 if (is_vfp_live(op2->index, live)) {
1275 /* second operand is live */
1277 if (is_vfp_live(op1->index, live)) {
1278 /* both operands are live: move one of them to tos */
1280 XCHG(op2_idx, op1_idx);
1281 dst = op_ia32_fcomrJmp;
1283 else if (op1_idx == 0) {
1284 dst = op_ia32_fcomJmp;
1287 /* bring the first on top */
1288 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1289 if (op1_idx == op2_idx)
1292 dst = op_ia32_fcomJmp;
1296 /* second live, first operand is dead here, bring it to tos.
1297 This means further, op1_idx != op2_idx. */
1299 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1304 dst = op_ia32_fcompJmp;
1309 /* second operand is dead */
1310 if (is_vfp_live(op1->index, live)) {
1311 /* first operand is live: bring second to tos.
1312 This means further, op1_idx != op2_idx. */
1314 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1319 dst = op_ia32_fcomrpJmp;
1323 /* both operands are dead here, check first for identity. */
1324 if (op1_idx == op2_idx) {
1325 /* identically, one one needed */
1327 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1328 op1_idx = op2_idx = 0;
1330 dst = op_ia32_fcompJmp;
1333 /* different, move them to st and st(1) and pop both.
1334 The tricky part is to get one into st(1).*/
1335 else if (op2_idx == 1) {
1336 /* good, second operand is already in the right place, move the first */
1338 /* bring the first on top */
1339 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1342 dst = op_ia32_fcomppJmp;
1345 else if (op1_idx == 1) {
1346 /* good, first operand is already in the right place, move the second */
1348 /* bring the first on top */
1349 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1352 dst = op_ia32_fcomrppJmp;
1356 /* if one is already the TOS, we need two fxch */
1358 /* first one is TOS, move to st(1) */
1359 x87_create_fxch(state, n, 1, BINOP_IDX_1);
1361 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1363 dst = op_ia32_fcomrppJmp;
1366 else if (op2_idx == 0) {
1367 /* second one is TOS, move to st(1) */
1368 x87_create_fxch(state, n, 1, BINOP_IDX_2);
1370 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1372 dst = op_ia32_fcomrppJmp;
1376 /* none of them is either TOS or st(1), 3 fxch needed */
1377 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1378 x87_create_fxch(state, n, 1, BINOP_IDX_2);
1379 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1382 dst = op_ia32_fcomppJmp;
1390 /* second operand is an address mode */
1391 if (is_vfp_live(op1->index, live)) {
1392 /* first operand is live: bring it to TOS */
1394 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1397 dst = op_ia32_fcomJmp;
1400 /* first operand is dead: bring it to tos */
1402 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1406 dst = op_ia32_fcompJmp;
1410 x87_patch_insn(n, dst);
1416 /* patch the operation */
1417 attr = get_ia32_attr(n);
1418 attr->x87[0] = op1 = &ia32_st_regs[op1_idx];
1420 attr->x87[1] = op2 = &ia32_st_regs[op2_idx];
1423 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(n),
1424 arch_register_get_name(op1), arch_register_get_name(op2)));
1426 DB((dbg, LEVEL_1, "<<< %s %s, [AM]\n", get_irn_opname(n),
1427 arch_register_get_name(op1)));
1430 } /* sim_fCondJmp */
1433 * Simulate a be_Copy.
1435 * @param state the x87 state
1436 * @param n the node that should be simulated (and patched)
1437 * @param env the architecture environment
1439 static int sim_Copy(x87_state *state, ir_node *n, const arch_env_t *env) {
1440 ir_mode *mode = get_irn_mode(n);
1442 if (mode_is_float(mode)) {
1443 const arch_register_t *op1 = arch_get_irn_register(env, get_irn_n(n, 0));
1444 const arch_register_t *out = arch_get_irn_register(env, n);
1445 ir_node *node, *next;
1447 int op1_idx, out_idx;
1448 unsigned live = vfp_liveness_nodes_live_at(state->sim, n);
1450 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1452 DB((dbg, LEVEL_1, ">>> %s %s -> %s\n", get_irn_opname(n),
1453 arch_register_get_name(op1), arch_register_get_name(out)));
1454 DEBUG_ONLY(vfp_dump_live(live));
1456 if (is_vfp_live(op1->index, live)) {
1457 /* operand is still live,a real copy */
1458 node = new_rd_ia32_fpush(get_irn_dbg_info(n), get_irn_irg(n), get_nodes_block(n), get_irn_n(n, 0), mode);
1459 arch_set_irn_register(env, node, out);
1461 x87_push(state, arch_register_get_index(out), node);
1463 attr = get_ia32_attr(node);
1464 attr->x87[0] = op1 = &ia32_st_regs[op1_idx];
1465 attr->x87[2] = out = &ia32_st_regs[0];
1467 next = sched_next(n);
1470 sched_add_before(next, node);
1471 DB((dbg, LEVEL_1, ">>> %s %s -> %s\n", get_irn_opname(node), op1->name, out->name));
1474 out_idx = x87_on_stack(state, arch_register_get_index(out));
1476 if (out_idx >= 0 && out_idx != op1_idx) {
1477 /* op1 must be killed and placed where out is */
1479 /* best case, simple remove and rename */
1480 x87_patch_insn(n, op_ia32_Pop);
1481 attr = get_ia32_attr(n);
1482 attr->x87[0] = op1 = &ia32_st_regs[0];
1485 x87_set_st(state, arch_register_get_index(out), n, op1_idx - 1);
1488 /* move op1 to tos, store and pop it */
1490 x87_create_fxch(state, n, op1_idx, 0);
1493 x87_patch_insn(n, op_ia32_Pop);
1494 attr = get_ia32_attr(n);
1495 attr->x87[0] = op1 = &ia32_st_regs[out_idx];
1498 x87_set_st(state, arch_register_get_index(out), n, out_idx - 1);
1500 DB((dbg, LEVEL_1, ">>> %s %s\n", get_irn_opname(n), op1->name));
1503 /* just a virtual copy */
1504 x87_set_st(state, arch_register_get_index(out), get_unop_op(n), op1_idx);
1506 DB((dbg, LEVEL_1, ">>> KILLED %s\n", get_irn_opname(n)));
1507 exchange(n, get_unop_op(n));
1516 * Simulate a be_Call.
1518 * @param state the x87 state
1519 * @param n the node that should be simulated
1520 * @param env the architecture environment
1522 static int sim_Call(x87_state *state, ir_node *n, const arch_env_t *env) {
1523 ir_type *call_tp = be_Call_get_type(n);
1525 /* at the begin of a call the x87 state should be empty */
1526 assert(state->depth == 0 && "stack not empty before call");
1529 * If the called function returns a float, it is returned in st(0).
1530 * This even happens if the return value is NOT used.
1531 * Moreover, only one return result is supported.
1533 if (get_method_n_ress(call_tp) > 0) {
1534 ir_type *res_type = get_method_res_type(call_tp, 0);
1535 ir_mode *mode = get_type_mode(res_type);
1537 if (mode && mode_is_float(mode)) {
1539 * TODO: what to push here? The result might be unused and currently
1540 * we have no possibility to detect this :-(
1542 x87_push(state, 0, n);
1550 * Simulate a be_Spill.
1552 * @param state the x87 state
1553 * @param n the node that should be simulated (and patched)
1554 * @param env the architecture environment
1556 * Should not happen, spills are lowered before x87 simulator see them.
1558 static int sim_Spill(x87_state *state, ir_node *n, const arch_env_t *env) {
1559 assert(0 && "Spill not lowered");
1560 return sim_fst(state, n, env);
1564 * Simulate a be_Reload.
1566 * @param state the x87 state
1567 * @param n the node that should be simulated (and patched)
1568 * @param env the architecture environment
1570 * Should not happen, reloads are lowered before x87 simulator see them.
1572 static int sim_Reload(x87_state *state, ir_node *n, const arch_env_t *env) {
1573 assert(0 && "Reload not lowered");
1574 return sim_fld(state, n, env);
1578 * Simulate a be_Return.
1580 * @param state the x87 state
1581 * @param n the node that should be simulated (and patched)
1582 * @param env the architecture environment
1584 static int sim_Return(x87_state *state, ir_node *n, const arch_env_t *env) {
1585 int n_res = be_Return_get_n_rets(n);
1586 int i, n_float_res = 0;
1588 /* only floating point return values must resist on stack */
1589 for (i = 0; i < n_res; ++i) {
1590 ir_node *res = get_irn_n(n, be_pos_Return_val + i);
1592 if (mode_is_float(get_irn_mode(res)))
1595 assert(x87_get_depth(state) == n_float_res);
1597 /* pop them virtually */
1598 for (i = n_float_res - 1; i >= 0; --i)
1605 * Kill any dead registers at block start by popping them from the stack.
1607 * @param sim the simulator handle
1608 * @param block the current block
1609 * @param start_state the x87 state at the begin of the block
1611 static x87_state *x87_kill_deads(x87_simulator *sim, ir_node *block, x87_state *start_state) {
1612 x87_state *state = start_state;
1613 ir_node *first_insn = sched_first(block);
1614 ir_node *keep = NULL;
1615 unsigned live = vfp_liveness_nodes_live_at(sim, block);
1617 int i, depth, num_pop;
1620 depth = x87_get_depth(state);
1621 for (i = depth - 1; i >= 0; --i) {
1622 int reg = x87_get_st_reg(state, i);
1624 if (! is_vfp_live(reg, live))
1625 kill_mask |= (1 << i);
1629 /* create a new state, will be changed */
1630 state = x87_clone_state(sim, state);
1632 DB((dbg, LEVEL_1, "Killing deads:\n"));
1633 DEBUG_ONLY(vfp_dump_live(live));
1634 DEBUG_ONLY(x87_dump_stack(state));
1636 /* now kill registers */
1638 /* we can only kill from TOS, so bring them up */
1639 if (! (kill_mask & 1)) {
1640 /* search from behind, because we can to a double-pop */
1641 for (i = depth - 1; i >= 0; --i) {
1642 if (kill_mask & (1 << i)) {
1643 kill_mask &= ~(1 << i);
1650 x87_set_st(state, -1, keep, i);
1651 keep = x87_create_fxch(state, first_insn, i, -1);
1654 keep = x87_get_st_node(state, 0);
1656 if ((kill_mask & 3) == 3) {
1657 /* we can do a double-pop */
1661 /* only a single pop */
1666 kill_mask >>= num_pop;
1667 keep = x87_create_fpop(sim->env, state, first_insn, num_pop, keep);
1669 add_End_keepalive(get_irg_end(get_irn_irg(block)), keep);
1672 } /* x87_kill_deads */
1675 * Run a simulation and fix all virtual instructions for a block.
1677 * @param sim the simulator handle
1678 * @param block the current block
1680 * @return non-zero if simulation is complete,
1681 * zero if the simulation must be rerun
1683 static int x87_simulate_block(x87_simulator *sim, ir_node *block) {
1685 blk_state *bl_state = x87_get_bl_state(sim, block);
1686 x87_state *state = bl_state->begin;
1687 const ir_edge_t *edge;
1688 ir_node *start_block;
1690 /* if we have no assigned start state, we must wait ... */
1694 assert(bl_state->end == NULL);
1696 DB((dbg, LEVEL_1, "Simulate %+F\n", block));
1697 DB((dbg, LEVEL_2, "State at Block begin:\n ")); DEBUG_ONLY(x87_dump_stack(state));
1699 /* at block begin, kill all dead registers */
1700 state = x87_kill_deads(sim, block, state);
1702 /* beware, n might changed */
1703 for (n = sched_first(block); !sched_is_end(n); n = next) {
1704 ir_op *op = get_irn_op(n);
1706 next = sched_next(n);
1707 if (op->ops.generic) {
1709 sim_func func = (sim_func)op->ops.generic;
1711 /* have work to do */
1712 if (state == bl_state->begin) {
1713 /* create a new state, will be changed */
1714 state = x87_clone_state(sim, state);
1718 node_inserted = (*func)(state, n, sim->env);
1721 sim_func might have added additional nodes after n,
1723 beware: n must not be changed by sim_func
1724 (i.e. removed from schedule) in this case
1727 next = sched_next(n);
1731 start_block = get_irg_start_block(get_irn_irg(block));
1733 /* check if the state must be shuffled */
1734 foreach_block_succ(block, edge) {
1735 ir_node *succ = get_edge_src_irn(edge);
1736 blk_state *succ_state = x87_get_bl_state(sim, succ);
1738 if (succ_state->begin && succ != start_block) {
1739 /* There is already a begin state for this block, bad.
1740 Do the necessary permutations.
1741 Note that critical edges are removed, so this is always possible. */
1742 x87_shuffle(sim, block, state, succ, succ_state->begin);
1744 /* Note further, that there can be only one such situation,
1745 so we can break here. */
1749 bl_state->end = state;
1751 /* now propagate the state to all successor blocks */
1752 foreach_block_succ(block, edge) {
1753 ir_node *succ = get_edge_src_irn(edge);
1754 blk_state *succ_state = x87_get_bl_state(sim, succ);
1756 if (! succ_state->begin)
1757 succ_state->begin = state;
1760 DB((dbg, LEVEL_2, "State at Block end:\n ")); DEBUG_ONLY(x87_dump_stack(state));
1763 } /* x87_simulate_block */
1766 * Create a new x87 simulator.
1768 * @param sim a simulator handle, will be initialized
1769 * @param irg the current graph
1770 * @param env the architecture environment
1772 static void x87_init_simulator(x87_simulator *sim, ir_graph *irg, const arch_env_t *env)
1774 obstack_init(&sim->obst);
1775 sim->blk_states = pmap_create();
1777 sim->n_idx = get_irg_last_idx(irg);
1778 sim->live = obstack_alloc(&sim->obst, sizeof(*sim->live) * sim->n_idx);
1780 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.x87");
1782 DB((dbg, LEVEL_1, "--------------------------------\n"
1783 "x87 Simulator started for %+F\n", irg));
1785 /* set the generic function pointer of instruction we must simulate */
1786 clear_irp_opcodes_generic_func();
1788 #define ASSOC(op) (op_ ## op)->ops.generic = (op_func)(sim_##op)
1789 #define ASSOC_IA32(op) (op_ia32_v ## op)->ops.generic = (op_func)(sim_##op)
1790 #define ASSOC_BE(op) (op_be_ ## op)->ops.generic = (op_func)(sim_##op)
1808 ASSOC_IA32(fCondJmp);
1818 } /* x87_init_simulator */
1821 * Destroy a x87 simulator.
1823 * @param sim the simulator handle
1825 static void x87_destroy_simulator(x87_simulator *sim) {
1826 pmap_destroy(sim->blk_states);
1827 obstack_free(&sim->obst, NULL);
1828 DB((dbg, LEVEL_1, "x87 Simulator stopped\n\n"));
1829 } /* x87_destroy_simulator */
1832 * Run a simulation and fix all virtual instructions for a graph.
1834 * @param env the architecture environment
1835 * @param irg the current graph
1836 * @param blk_list the block schedule list
1838 * Needs a block-schedule.
1840 void x87_simulate_graph(const arch_env_t *env, ir_graph *irg, ir_node **blk_list) {
1841 ir_node *block, *start_block;
1843 blk_state *bl_state;
1848 /* create the simulator */
1849 x87_init_simulator(&sim, irg, env);
1851 start_block = get_irg_start_block(irg);
1852 bl_state = x87_get_bl_state(&sim, start_block);
1854 /* start with the empty state */
1855 bl_state->begin = empty;
1858 worklist = new_waitq();
1860 /* Create the worklist for the schedule and calculate the liveness
1861 for all nodes. We must precalculate this info, because the
1862 simulator adds new nodes (and possible before Phi nodes) which
1863 let fail the old lazy calculation.
1864 On the other hand we reduce the computation amount due to
1865 precaching from O(n^2) to O(n) at the expense of O(n) cache memory.
1867 lv = be_liveness(irg);
1868 for (i = 0, n = ARR_LEN(blk_list); i < n; ++i) {
1869 update_liveness(&sim, lv, blk_list[i]);
1870 waitq_put(worklist, blk_list[i]);
1872 be_liveness_free(lv);
1876 block = waitq_get(worklist);
1877 if (! x87_simulate_block(&sim, block)) {
1878 waitq_put(worklist, block);
1881 } while (! pdeq_empty(worklist));
1884 del_waitq(worklist);
1885 x87_destroy_simulator(&sim);
1886 } /* x87_simulate_graph */