2 * This file implements the x87 support and virtual to stack
3 * register translation for the ia32 backend.
5 * @author: Michael Beck
12 #endif /* HAVE_CONFIG_H */
19 #include "iredges_t.h"
28 #include "../belive_t.h"
29 #include "../besched_t.h"
30 #include "../benode_t.h"
31 #include "ia32_new_nodes.h"
32 #include "gen_ia32_new_nodes.h"
33 #include "gen_ia32_regalloc_if.h"
38 /* first and second binop index */
45 /* the store val index */
46 #define STORE_VAL_IDX 2
48 #define MASK_TOS(x) ((x) & (N_x87_REGS - 1))
50 /** the debug handle */
51 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
53 /* Forward declaration. */
54 typedef struct _x87_simulator x87_simulator;
57 * An exchange template.
58 * Note that our virtual functions have the same inputs
59 * and attributes as the real ones, so we can simple exchange
61 * Further, x87 supports inverse instructions, so we can handle them.
63 typedef struct _exchange_tmpl {
64 ir_op *normal_op; /**< the normal one */
65 ir_op *reverse_op; /**< the reverse one if exists */
66 ir_op *normal_pop_op; /**< the normal one with tos pop */
67 ir_op *reverse_pop_op; /**< the reverse one with tos pop */
71 * An entry on the simulated x87 stack.
73 typedef struct _st_entry {
74 int reg_idx; /**< the virtual register index of this stack value */
75 ir_node *node; /**< the node that produced this value */
81 typedef struct _x87_state {
82 st_entry st[N_x87_REGS]; /**< the register stack */
83 int depth; /**< the current stack depth */
84 int tos; /**< position of the tos */
85 x87_simulator *sim; /**< The simulator. */
88 /** An empty state, used for blocks without fp instructions. */
89 static x87_state _empty = { { {0, NULL}, }, 0, 0 };
90 static x87_state *empty = (x87_state *)&_empty;
92 /** The type of an instruction simulator function. */
93 typedef int (*sim_func)(x87_state *state, ir_node *n, const arch_env_t *env);
96 * A block state: Every block has a x87 state at the beginning and at the end.
98 typedef struct _blk_state {
99 x87_state *begin; /**< state at the begin or NULL if not assigned */
100 x87_state *end; /**< state at the end or NULL if not assigned */
103 #define PTR_TO_BLKSTATE(p) ((blk_state *)(p))
108 struct _x87_simulator {
109 struct obstack obst; /**< an obstack for fast allocating */
110 pmap *blk_states; /**< map blocks to states */
111 const arch_env_t *env; /**< architecture environment */
112 unsigned char *live; /**< Liveness information. */
113 unsigned n_idx; /**< cached get_irg_last_idx() result */
117 * Returns the current stack depth.
119 * @param state the x87 state
121 * @return the x87 stack depth
123 static int x87_get_depth(const x87_state *state) {
128 * Check if the state is empty.
130 * @param state the x87 state
132 * returns non-zero if the x87 stack is empty
134 static int x87_state_is_empty(const x87_state *state) {
135 return state->depth == 0;
139 * Return the virtual register index at st(pos).
141 * @param state the x87 state
142 * @param pos a stack position
144 * @return the vfp register index that produced the value at st(pos)
146 static int x87_get_st_reg(const x87_state *state, int pos) {
147 assert(pos < state->depth);
148 return state->st[MASK_TOS(state->tos + pos)].reg_idx;
152 * Return the node at st(pos).
154 * @param state the x87 state
155 * @param pos a stack position
157 * @return the IR node that produced the value at st(pos)
159 static ir_node *x87_get_st_node(const x87_state *state, int pos) {
160 assert(pos < state->depth);
161 return state->st[MASK_TOS(state->tos + pos)].node;
162 } /* x87_get_st_node */
166 * Dump the stack for debugging.
168 * @param state the x87 state
170 static void x87_dump_stack(const x87_state *state) {
173 for (i = state->depth - 1; i >= 0; --i) {
174 DB((dbg, LEVEL_2, "vf%d ", x87_get_st_reg(state, i)));
176 DB((dbg, LEVEL_2, "<-- TOS\n"));
177 } /* x87_dump_stack */
178 #endif /* DEBUG_libfirm */
181 * Set a virtual register to st(pos).
183 * @param state the x87 state
184 * @param reg_idx the vfp register index that should be set
185 * @param node the IR node that produces the value of the vfp register
186 * @param pos the stack position where the new value should be entered
188 static void x87_set_st(x87_state *state, int reg_idx, ir_node *node, int pos) {
189 assert(0 < state->depth);
190 state->st[MASK_TOS(state->tos + pos)].reg_idx = reg_idx;
191 state->st[MASK_TOS(state->tos + pos)].node = node;
193 DB((dbg, LEVEL_2, "After SET_REG:\n ")); DEBUG_ONLY(x87_dump_stack(state));
197 * Set the tos virtual register.
199 * @param state the x87 state
200 * @param reg_idx the vfp register index that should be set
201 * @param node the IR node that produces the value of the vfp register
203 static void x87_set_tos(x87_state *state, int reg_idx, ir_node *node) {
204 x87_set_st(state, reg_idx, node, 0);
209 * Flush the x87 stack.
211 * @param state the x87 state
213 static void x87_flush(x87_state *state) {
220 * Swap st(0) with st(pos).
222 * @param state the x87 state
223 * @param pos the stack position to change the tos with
225 static void x87_fxch(x87_state *state, int pos) {
227 assert(pos < state->depth);
229 entry = state->st[MASK_TOS(state->tos + pos)];
230 state->st[MASK_TOS(state->tos + pos)] = state->st[MASK_TOS(state->tos)];
231 state->st[MASK_TOS(state->tos)] = entry;
233 DB((dbg, LEVEL_2, "After FXCH:\n ")); DEBUG_ONLY(x87_dump_stack(state));
237 * Convert a virtual register to the stack index.
239 * @param state the x87 state
240 * @param reg_idx the register vfp index
242 * @return the stack position where the register is stacked
243 * or -1 if the virtual register was not found
245 static int x87_on_stack(const x87_state *state, int reg_idx) {
246 int i, tos = state->tos;
248 for (i = 0; i < state->depth; ++i)
249 if (state->st[MASK_TOS(tos + i)].reg_idx == reg_idx)
255 * Push a virtual Register onto the stack, double pushed allowed.
257 * @param state the x87 state
258 * @param reg_idx the register vfp index
259 * @param node the node that produces the value of the vfp register
261 static void x87_push_dbl(x87_state *state, int reg_idx, ir_node *node) {
262 assert(state->depth < N_x87_REGS && "stack overrun");
265 state->tos = MASK_TOS(state->tos - 1);
266 state->st[state->tos].reg_idx = reg_idx;
267 state->st[state->tos].node = node;
269 DB((dbg, LEVEL_2, "After PUSH:\n ")); DEBUG_ONLY(x87_dump_stack(state));
273 * Push a virtual Register onto the stack, double pushes are NOT allowed..
275 * @param state the x87 state
276 * @param reg_idx the register vfp index
277 * @param node the node that produces the value of the vfp register
278 * @param dbl_push if != 0 double pushes are allowed
280 static void x87_push(x87_state *state, int reg_idx, ir_node *node) {
281 assert(x87_on_stack(state, reg_idx) == -1 && "double push");
283 x87_push_dbl(state, reg_idx, node);
287 * Pop a virtual Register from the stack.
289 static void x87_pop(x87_state *state) {
290 assert(state->depth > 0 && "stack underrun");
293 state->tos = MASK_TOS(state->tos + 1);
295 DB((dbg, LEVEL_2, "After POP:\n ")); DEBUG_ONLY(x87_dump_stack(state));
299 * Returns the block state of a block.
301 * @param sim the x87 simulator handle
302 * @param block the current block
304 * @return the block state
306 static blk_state *x87_get_bl_state(x87_simulator *sim, ir_node *block) {
307 pmap_entry *entry = pmap_find(sim->blk_states, block);
310 blk_state *bl_state = obstack_alloc(&sim->obst, sizeof(*bl_state));
311 bl_state->begin = NULL;
312 bl_state->end = NULL;
314 pmap_insert(sim->blk_states, block, bl_state);
318 return PTR_TO_BLKSTATE(entry->value);
319 } /* x87_get_bl_state */
322 * Creates a new x87 state.
324 * @param sim the x87 simulator handle
326 * @return a new x87 state
328 static x87_state *x87_alloc_state(x87_simulator *sim) {
329 x87_state *res = obstack_alloc(&sim->obst, sizeof(*res));
333 } /* x87_alloc_state */
337 * Create a new empty x87 state.
339 * @param sim the x87 simulator handle
341 * @return a new empty x87 state
343 static x87_state *x87_alloc_empty_state(x87_simulator *sim) {
344 x87_state *res = x87_alloc_state(sim);
348 } /* x87_alloc_empty_state */
354 * @param sim the x87 simulator handle
355 * @param src the x87 state that will be cloned
357 * @return a cloned copy of the src state
359 static x87_state *x87_clone_state(x87_simulator *sim, const x87_state *src) {
360 x87_state *res = x87_alloc_state(sim);
362 memcpy(res, src, sizeof(*res));
364 } /* x87_clone_state */
367 * Patch a virtual instruction into a x87 one and return
370 * @param n the IR node to patch
371 * @param op the x87 opcode to patch in
373 static ir_node *x87_patch_insn(ir_node *n, ir_op *op) {
374 ir_mode *mode = get_irn_mode(n);
379 if (mode == mode_T) {
380 /* patch all Proj's */
381 const ir_edge_t *edge;
383 foreach_out_edge(n, edge) {
384 ir_node *proj = get_edge_src_irn(edge);
386 mode = get_irn_mode(proj);
387 if (mode_is_float(mode)) {
389 set_irn_mode(proj, mode_E);
394 else if (mode_is_float(mode))
395 set_irn_mode(n, mode_E);
397 } /* x87_patch_insn */
400 * Returns the first Proj of a mode_T node having a given mode.
402 * @param n the mode_T node
403 * @param m the desired mode of the Proj
404 * @return The first Proj of mode @p m found or NULL.
406 static ir_node *get_irn_Proj_for_mode(ir_node *n, ir_mode *m) {
407 const ir_edge_t *edge;
409 assert(get_irn_mode(n) == mode_T && "Need mode_T node");
411 foreach_out_edge(n, edge) {
412 ir_node *proj = get_edge_src_irn(edge);
413 if (get_irn_mode(proj) == m)
418 } /* get_irn_Proj_for_mode */
420 /* -------------- x87 perm --------------- */
423 * Creates a fxch for shuffle.
425 * @param state the x87 state
426 * @param pos parameter for fxch
427 * @param dst_block the block of the user
429 * Creates a new fxch node and reroute the user of the old node
432 * @return the fxch node
434 static ir_node *x87_fxch_shuffle(x87_state *state, int pos, ir_node *block, ir_node *dst_block)
436 const ir_edge_t *edge;
437 ir_node *n = x87_get_st_node(state, pos);
438 ir_node *user = NULL;
443 if (block == get_nodes_block(n)) {
444 /* this is a node from out block: change it's user */
445 foreach_out_edge(n, edge) {
446 ir_node *succ = get_edge_src_irn(edge);
448 if (is_Phi(succ) && get_nodes_block(succ) == dst_block) {
450 node_idx = get_edge_src_pos(edge);
457 fxch = new_rd_ia32_fxch(NULL, get_irn_irg(block), block, n, get_irn_mode(n));
458 attr = get_ia32_attr(fxch);
459 attr->x87[0] = &ia32_st_regs[pos];
460 attr->x87[2] = &ia32_st_regs[0];
463 DB((dbg, LEVEL_2, "%+F replaced input %d of %+F\n", fxch, node_idx, user));
464 set_irn_n(user, node_idx, fxch);
468 * This is a node from a dominator block. Changing it's user might be wrong,
469 * so just keep it alive.
470 * The "right" solution would require a new Phi, but we don't care here.
475 x87_fxch(state, pos);
477 } /* x87_fxch_shuffle */
480 * Calculate the necessary permutations to reach dst_state.
482 * These permutations are done with fxch instructions and placed
483 * at the end of the block.
485 * Note that critical edges are removed here, so we need only
486 * a shuffle if the current block has only one successor.
488 * @param sim the simulator handle
489 * @param block the current block
490 * @param state the current x87 stack state, might be modified
491 * @param dst_block the destination block
492 * @param dst_state destination state
496 static x87_state *x87_shuffle(x87_simulator *sim, ir_node *block, x87_state *state, ir_node *dst_block, const x87_state *dst_state) {
497 int i, n_cycles, k, ri;
498 unsigned cycles[4], all_mask;
499 char cycle_idx[4][8];
501 ir_node *before, *after;
503 assert(state->depth == dst_state->depth);
505 /* Some mathematics here:
506 If we have a cycle of length n that includes the tos,
507 we need n-1 exchange operations.
508 We can always add the tos and restore it, so we need
509 n+1 exchange operations for a cycle not containing the tos.
510 So, the maximum of needed operations is for a cycle of 7
511 not including the tos == 8.
512 This is the same number of ops we would need for using stores,
513 so exchange is cheaper (we save the loads).
514 On the other hand, we might need an additional exchange
515 in the next block to bring one operand on top, so the
516 number of ops in the first case is identical.
517 Further, no more than 4 cycles can exists (4 x 2).
519 all_mask = (1 << (state->depth)) - 1;
521 for (n_cycles = 0; all_mask; ++n_cycles) {
522 int src_idx, dst_idx;
524 /* find the first free slot */
525 for (i = 0; i < state->depth; ++i) {
526 if (all_mask & (1 << i)) {
527 all_mask &= ~(1 << i);
529 /* check if there are differences here */
530 if (x87_get_st_reg(state, i) != x87_get_st_reg(dst_state, i))
536 /* no more cycles found */
541 cycles[n_cycles] = (1 << i);
542 cycle_idx[n_cycles][k++] = i;
543 for (src_idx = i; ; src_idx = dst_idx) {
544 dst_idx = x87_on_stack(dst_state, x87_get_st_reg(state, src_idx));
546 if ((all_mask & (1 << dst_idx)) == 0)
549 cycle_idx[n_cycles][k++] = dst_idx;
550 cycles[n_cycles] |= (1 << dst_idx);
551 all_mask &= ~(1 << dst_idx);
553 cycle_idx[n_cycles][k] = -1;
557 /* no permutation needed */
561 /* Hmm: permutation needed */
562 DB((dbg, LEVEL_2, "\n%+F needs permutation: from\n", block));
563 DEBUG_ONLY(x87_dump_stack(state));
564 DB((dbg, LEVEL_2, " to\n"));
565 DEBUG_ONLY(x87_dump_stack(dst_state));
569 DB((dbg, LEVEL_2, "Need %d cycles\n", n_cycles));
570 for (ri = 0; ri < n_cycles; ++ri) {
571 DB((dbg, LEVEL_2, " Ring %d:\n ", ri));
572 for (k = 0; cycle_idx[ri][k] != -1; ++k)
573 DB((dbg, LEVEL_2, " st%d ->", cycle_idx[ri][k]));
574 DB((dbg, LEVEL_2, "\n"));
581 * Find the place node must be insert.
582 * We have only one successor block, so the last instruction should
585 before = sched_last(block);
586 assert(is_cfop(before));
588 /* now do the permutations */
589 for (ri = 0; ri < n_cycles; ++ri) {
590 if ((cycles[ri] & 1) == 0) {
591 /* this cycle does not include the tos */
592 fxch = x87_fxch_shuffle(state, cycle_idx[ri][0], block, dst_block);
594 sched_add_after(after, fxch);
596 sched_add_before(before, fxch);
599 for (k = 1; cycle_idx[ri][k] != -1; ++k) {
600 fxch = x87_fxch_shuffle(state, cycle_idx[ri][k], block, dst_block);
602 sched_add_after(after, fxch);
604 sched_add_before(before, fxch);
607 if ((cycles[ri] & 1) == 0) {
608 /* this cycle does not include the tos */
609 fxch = x87_fxch_shuffle(state, cycle_idx[ri][0], block, dst_block);
610 sched_add_after(after, fxch);
617 * Create a fxch node before another node.
619 * @param state the x87 state
620 * @param n the node before the fxch
621 * @param pos exchange st(pos) with st(0)
622 * @param op_idx if >= 0, replace input op_idx of n with the fxch result
626 static ir_node *x87_create_fxch(x87_state *state, ir_node *n, int pos, int op_idx) {
627 ir_node *fxch, *pred;
630 x87_fxch(state, pos);
633 pred = get_irn_n(n, op_idx);
635 pred = x87_get_st_node(state, pos);
637 fxch = new_rd_ia32_fxch(NULL, get_irn_irg(n), get_nodes_block(n), pred, get_irn_mode(pred));
638 attr = get_ia32_attr(fxch);
639 attr->x87[0] = &ia32_st_regs[pos];
640 attr->x87[2] = &ia32_st_regs[0];
643 set_irn_n(n, op_idx, fxch);
645 sched_add_before(n, fxch);
646 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(fxch), attr->x87[0]->name, attr->x87[2]->name));
648 } /* x87_create_fxch */
651 * Create a fpush before node n.
653 * @param state the x87 state
654 * @param n the node before the fpush
655 * @param pos push st(pos) on stack
656 * @param op_idx if >= 0, replace input op_idx of n with the fpush result
658 static void x87_create_fpush(const arch_env_t *env, x87_state *state, ir_node *n, int pos, int op_idx) {
659 ir_node *fpush, *pred = get_irn_n(n, op_idx);
661 const arch_register_t *out = arch_get_irn_register(env, pred);
663 x87_push_dbl(state, arch_register_get_index(out), pred);
665 fpush = new_rd_ia32_fpush(NULL, get_irn_irg(n), get_nodes_block(n), pred, get_irn_mode(pred));
666 attr = get_ia32_attr(fpush);
667 attr->x87[0] = &ia32_st_regs[pos];
668 attr->x87[2] = &ia32_st_regs[0];
670 set_irn_n(n, op_idx, fpush);
672 sched_add_before(n, fpush);
673 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(fpush), attr->x87[0]->name, attr->x87[2]->name));
674 } /* x87_create_fpush */
677 * Create a fpop before node n.
679 * @param state the x87 state
680 * @param n the node before the fpop
681 * @param num pop 1 or 2 values
682 * @param pred node to use as predecessor of the fpop
684 * @return the fpop node
686 static ir_node *x87_create_fpop(const arch_env_t *env, x87_state *state, ir_node *n, int num, ir_node *pred) {
692 fpop = new_rd_ia32_fpop(NULL, get_irn_irg(n), get_nodes_block(n), mode_E);
693 attr = get_ia32_attr(fpop);
694 attr->x87[0] = &ia32_st_regs[0];
695 attr->x87[1] = &ia32_st_regs[0];
696 attr->x87[2] = &ia32_st_regs[0];
698 sched_add_before(n, fpop);
699 DB((dbg, LEVEL_1, "<<< %s %s\n", get_irn_opname(fpop), attr->x87[0]->name));
705 } /* x87_create_fpop */
707 /* --------------------------------- liveness ------------------------------------------ */
710 * The liveness transfer function.
711 * Updates a live set over a single step from a given node to its predecessor.
712 * Everything defined at the node is removed from the set, the uses of the node get inserted.
714 * @param arch_env The architecture environment.
715 * @param irn The node at which liveness should be computed.
716 * @param live The bitset of registers live before @p irn. This set gets modified by updating it to
717 * the registers live after irn.
719 * @return The live bitset.
721 static unsigned vfp_liveness_transfer(const arch_env_t *arch_env, ir_node *irn, unsigned live)
724 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
726 if (arch_irn_consider_in_reg_alloc(arch_env, cls, irn)) {
727 const arch_register_t *reg = arch_get_irn_register(arch_env, irn);
728 live &= ~(1 << arch_register_get_index(reg));
731 for (i = 0, n = get_irn_arity(irn); i < n; ++i) {
732 ir_node *op = get_irn_n(irn, i);
734 if (mode_is_float(get_irn_mode(op)) && arch_irn_consider_in_reg_alloc(arch_env, cls, op)) {
735 const arch_register_t *reg = arch_get_irn_register(arch_env, op);
736 live |= 1 << arch_register_get_index(reg);
740 } /* vfp_liveness_transfer */
743 * Put all live virtual registers at the end of a block into a bitset.
745 * @param env the architecture environment
746 * @param lv the liveness information
747 * @param bl the block
749 * @return The live bitset at the end of this block
751 static unsigned vfp_liveness_end_of_block(const arch_env_t *env, be_lv_t *lv, const ir_node *bl)
755 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
757 be_lv_foreach(lv, bl, be_lv_state_end, i) {
758 ir_node *irn = be_lv_get_irn(lv, bl, i);
759 if (arch_irn_consider_in_reg_alloc(env, cls, irn)) {
760 const arch_register_t *reg = arch_get_irn_register(env, irn);
761 live |= 1 << arch_register_get_index(reg);
766 } /* vfp_liveness_end_of_block */
768 /** get the register mask from an arch_register */
769 #define REGMASK(reg) (1 << (arch_register_get_index(reg)))
772 * Return a bitset of argument registers which are live at the end of a node.
774 * @param sim the simulator handle
775 * @param pos the node
776 * @param kill kill mask for the output registers
778 * @return The live bitset.
780 static unsigned vfp_live_args_after(x87_simulator *sim, const ir_node *pos, unsigned kill)
782 unsigned idx = get_irn_idx(pos);
784 assert(idx < sim->n_idx);
785 return sim->live[idx] & ~kill;
786 } /* vfp_live_args_after */
789 * Calculate the liveness for a whole block and cache it.
791 * @param sim the simulator handle
792 * @param lv the liveness handle
793 * @param blk the block
795 static void update_liveness(x87_simulator *sim, be_lv_t *lv, ir_node *blk) {
796 unsigned live = vfp_liveness_end_of_block(sim->env, lv, blk);
800 /* now iterate through the block backward and cache the results */
801 sched_foreach_reverse(blk, irn) {
802 /* stop at the first Phi: this produces the live-in */
806 idx = get_irn_idx(irn);
807 sim->live[idx] = live;
809 live = vfp_liveness_transfer(sim->env, irn, live);
811 idx = get_irn_idx(blk);
812 sim->live[idx] = live;
813 } /* update_liveness */
816 * Returns true if a register is live in a set.
818 * @param reg_idx the vfp register index
819 * @param live a live bitset
821 #define is_vfp_live(reg_idx, live) ((live) & (1 << (reg_idx)))
825 * Dump liveness info.
827 * @param live the live bitset
829 static void vfp_dump_live(unsigned live) {
832 DB((dbg, LEVEL_2, "Live registers here: \n"));
833 for (i = 0; i < 8; ++i) {
834 if (live & (1 << i)) {
835 DB((dbg, LEVEL_2, " vf%d", i));
838 DB((dbg, LEVEL_2, "\n"));
839 } /* vfp_dump_live */
840 #endif /* DEBUG_libfirm */
842 /* --------------------------------- simulators ---------------------------------------- */
844 #define XCHG(a, b) do { int t = (a); (a) = (b); (b) = t; } while (0)
847 * Simulate a virtual binop.
849 * @param state the x87 state
850 * @param n the node that should be simulated (and patched)
851 * @param env the architecture environment
852 * @param tmpl the template containing the 4 possible x87 opcodes
854 static int sim_binop(x87_state *state, ir_node *n, const arch_env_t *env, const exchange_tmpl *tmpl) {
855 int op2_idx, op1_idx = -1;
856 int out_idx, do_pop =0;
859 const arch_register_t *op1 = arch_get_irn_register(env, get_irn_n(n, BINOP_IDX_1));
860 const arch_register_t *op2 = arch_get_irn_register(env, get_irn_n(n, BINOP_IDX_2));
861 const arch_register_t *out = arch_get_irn_register(env, n);
862 unsigned live = vfp_live_args_after(state->sim, n, REGMASK(out));
864 DB((dbg, LEVEL_1, ">>> %+F %s, %s -> %s\n", n,
865 arch_register_get_name(op1), arch_register_get_name(op2),
866 arch_register_get_name(out)));
867 DEBUG_ONLY(vfp_dump_live(live));
869 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
870 op2_idx = x87_on_stack(state, arch_register_get_index(op2));
872 if (arch_register_get_index(op2) != REG_VFP_NOREG) {
873 /* second operand is a vfp register */
875 if (is_vfp_live(arch_register_get_index(op2), live)) {
876 /* Second operand is live. */
878 if (is_vfp_live(arch_register_get_index(op1), live)) {
879 /* Both operands are live: push the first one.
880 This works even for op1 == op2. */
881 x87_create_fpush(env, state, n, op2_idx, BINOP_IDX_2);
882 out_idx = op2_idx = 0;
884 dst = tmpl->normal_op;
888 /* Second live, first operand is dead here, bring it to tos. */
890 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
894 op1_idx = out_idx = 0;
895 dst = tmpl->normal_op;
900 /* Second operand is dead. */
901 if (is_vfp_live(arch_register_get_index(op1), live)) {
902 /* First operand is live: bring second to tos. */
904 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
908 op2_idx = out_idx = 0;
909 dst = tmpl->reverse_op;
913 /* Both operands are dead here, pop them from the stack. */
916 if (op1_idx == op2_idx) {
917 /* Both are identically, no pop needed. */
918 dst = tmpl->normal_op;
922 dst = tmpl->normal_pop_op;
926 else if (op1_idx == 0) {
928 XCHG(op2_idx, op1_idx);
929 if (op1_idx == op2_idx) {
930 /* Both are identically, no pop needed. */
931 dst = tmpl->reverse_op;
935 dst = tmpl->reverse_pop_op;
940 /* Bring the second on top. */
941 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
942 if (op1_idx == op2_idx) {
943 /* Both are identically, no pop needed. */
944 out_idx = op1_idx = op2_idx = 0;
945 dst = tmpl->normal_op;
951 dst = tmpl->normal_pop_op;
959 /* second operand is an address mode */
960 if (is_vfp_live(arch_register_get_index(op1), live)) {
961 /* first operand is live: push it here */
962 x87_create_fpush(env, state, n, op1_idx, BINOP_IDX_1);
965 /* first operand is dead: bring it to tos */
967 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
969 op1_idx = out_idx = 0;
970 dst = tmpl->normal_op;
974 x87_set_st(state, arch_register_get_index(out), x87_patch_insn(n, dst), out_idx);
978 /* patch the operation */
979 attr = get_ia32_attr(n);
980 attr->x87[0] = op1 = &ia32_st_regs[op1_idx];
982 attr->x87[1] = op2 = &ia32_st_regs[op2_idx];
983 attr->x87[2] = out = &ia32_st_regs[out_idx];
986 DB((dbg, LEVEL_1, "<<< %s %s, %s -> %s\n", get_irn_opname(n),
987 arch_register_get_name(op1), arch_register_get_name(op2),
988 arch_register_get_name(out)));
990 DB((dbg, LEVEL_1, "<<< %s %s, [AM] -> %s\n", get_irn_opname(n),
991 arch_register_get_name(op1),
992 arch_register_get_name(out)));
998 * Simulate a virtual Unop.
1000 * @param state the x87 state
1001 * @param n the node that should be simulated (and patched)
1002 * @param env the architecture environment
1003 * @param op the x87 opcode that will replace n's opcode
1005 static int sim_unop(x87_state *state, ir_node *n, const arch_env_t *env, ir_op *op) {
1006 int op1_idx, out_idx;
1007 const arch_register_t *op1 = arch_get_irn_register(env, get_irn_n(n, UNOP_IDX));
1008 const arch_register_t *out = arch_get_irn_register(env, n);
1010 unsigned live = vfp_live_args_after(state->sim, n, REGMASK(out));
1012 DB((dbg, LEVEL_1, ">>> %+F -> %s\n", n, out->name));
1013 DEBUG_ONLY(vfp_dump_live(live));
1015 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1017 if (is_vfp_live(arch_register_get_index(op1), live)) {
1018 /* push the operand here */
1019 x87_create_fpush(env, state, n, op1_idx, UNOP_IDX);
1022 /* operand is dead, bring it to tos */
1024 x87_create_fxch(state, n, op1_idx, UNOP_IDX);
1027 x87_set_tos(state, arch_register_get_index(out), x87_patch_insn(n, op));
1028 op1_idx = out_idx = 0;
1029 attr = get_ia32_attr(n);
1030 attr->x87[0] = op1 = &ia32_st_regs[0];
1031 attr->x87[2] = out = &ia32_st_regs[0];
1032 DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), out->name));
1038 * Simulate a virtual Load instruction.
1040 * @param state the x87 state
1041 * @param n the node that should be simulated (and patched)
1042 * @param env the architecture environment
1043 * @param op the x87 opcode that will replace n's opcode
1045 static int sim_load(x87_state *state, ir_node *n, const arch_env_t *env, ir_op *op) {
1046 const arch_register_t *out = arch_get_irn_register(env, n);
1049 DB((dbg, LEVEL_1, ">>> %+F -> %s\n", n, arch_register_get_name(out)));
1050 x87_push(state, arch_register_get_index(out), x87_patch_insn(n, op));
1051 attr = get_ia32_attr(n);
1052 attr->x87[2] = out = &ia32_st_regs[0];
1053 DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), arch_register_get_name(out)));
1059 * Rewire all users of @p old_val to @new_val iff they are scheduled after @p store.
1061 * @param store The store
1062 * @param old_val The former value
1063 * @param new_val The new value
1065 static void collect_and_rewire_users(ir_node *store, ir_node *old_val, ir_node *new_val) {
1066 const ir_edge_t *edge, *ne;
1068 foreach_out_edge_safe(old_val, edge, ne) {
1069 ir_node *user = get_edge_src_irn(edge);
1071 if (! user || user == store)
1074 /* if the user is scheduled after the store: rewire */
1075 if (sched_is_scheduled(user) && sched_comes_after(store, user)) {
1077 /* find the input of the user pointing to the old value */
1078 for (i = get_irn_arity(user) - 1; i >= 0; i--) {
1079 if (get_irn_n(user, i) == old_val)
1080 set_irn_n(user, i, new_val);
1084 } /* collect_and_rewire_users */
1087 * Simulate a virtual Store.
1089 * @param state the x87 state
1090 * @param n the node that should be simulated (and patched)
1091 * @param env the architecture environment
1092 * @param op the x87 store opcode
1093 * @param op_p the x87 store and pop opcode
1095 static int sim_store(x87_state *state, ir_node *n, const arch_env_t *env, ir_op *op, ir_op *op_p) {
1096 ir_node *val = get_irn_n(n, STORE_VAL_IDX);
1097 const arch_register_t *op2 = arch_get_irn_register(env, val);
1098 unsigned live = vfp_live_args_after(state->sim, n, 0);
1104 op2_idx = x87_on_stack(state, arch_register_get_index(op2));
1105 assert(op2_idx >= 0);
1107 DB((dbg, LEVEL_1, ">>> %+F %s ->\n", n, arch_register_get_name(op2)));
1109 mode = get_ia32_ls_mode(n);
1110 depth = x87_get_depth(state);
1113 We can only store the tos to memory.
1114 A store of mode_E with free registers
1115 pushes value to tos, so skip it here.
1117 if (! (mode == mode_E && depth < N_x87_REGS) && op2_idx != 0)
1118 x87_create_fxch(state, n, op2_idx, STORE_VAL_IDX);
1120 if (is_vfp_live(arch_register_get_index(op2), live)) {
1122 Problem: fst doesn't support mode_E (spills), only fstp does
1124 - stack not full: push value and fstp
1125 - stack full: fstp value and load again
1127 if (mode == mode_E) {
1128 if (depth < N_x87_REGS) {
1129 /* ok, we have a free register: push + fstp */
1130 x87_create_fpush(env, state, n, op2_idx, STORE_VAL_IDX);
1132 x87_patch_insn(n, op_p);
1135 ir_node *vfld, *mem, *block, *rproj, *mproj;
1138 /* stack full here: need fstp + load */
1140 x87_patch_insn(n, op_p);
1142 block = get_nodes_block(n);
1143 irg = get_irn_irg(n);
1144 vfld = new_rd_ia32_vfld(NULL, irg, block, get_irn_n(n, 0), get_irn_n(n, 1), new_rd_NoMem(irg));
1146 /* copy all attributes */
1147 set_ia32_frame_ent(vfld, get_ia32_frame_ent(n));
1148 if (is_ia32_use_frame(n))
1149 set_ia32_use_frame(vfld);
1150 set_ia32_am_flavour(vfld, get_ia32_am_flavour(n));
1151 set_ia32_op_type(vfld, ia32_am_Source);
1152 add_ia32_am_offs(vfld, get_ia32_am_offs(n));
1153 set_ia32_am_sc(vfld, get_ia32_am_sc(n));
1154 set_ia32_ls_mode(vfld, get_ia32_ls_mode(n));
1156 rproj = new_r_Proj(irg, block, vfld, get_ia32_ls_mode(vfld), pn_ia32_vfld_res);
1157 mproj = new_r_Proj(irg, block, vfld, mode_M, pn_ia32_vfld_M);
1158 mem = get_irn_Proj_for_mode(n, mode_M);
1160 assert(mem && "Store memory not found");
1162 arch_set_irn_register(env, rproj, op2);
1164 /* reroute all former users of the store memory to the load memory */
1165 edges_reroute(mem, mproj, irg);
1166 /* set the memory input of the load to the store memory */
1167 set_irn_n(vfld, 2, mem);
1169 sched_add_after(n, vfld);
1170 sched_add_after(vfld, rproj);
1172 /* rewire all users, scheduled after the store, to the loaded value */
1173 collect_and_rewire_users(n, val, rproj);
1179 /* mode != mode_E -> use normal fst */
1180 x87_patch_insn(n, op);
1185 x87_patch_insn(n, op_p);
1188 attr = get_ia32_attr(n);
1189 attr->x87[1] = op2 = &ia32_st_regs[0];
1190 DB((dbg, LEVEL_1, "<<< %s %s ->\n", get_irn_opname(n), arch_register_get_name(op2)));
1196 * Simulate a virtual Phi.
1197 * Just for cosmetic reasons change the mode of Phi nodes to mode_E.
1199 * @param state the x87 state
1200 * @param n the node that should be simulated (and patched)
1201 * @param env the architecture environment
1203 static int sim_Phi(x87_state *state, ir_node *n, const arch_env_t *env) {
1204 ir_mode *mode = get_irn_mode(n);
1206 if (mode_is_float(mode))
1207 set_irn_mode(n, mode_E);
1213 #define _GEN_BINOP(op, rev) \
1214 static int sim_##op(x87_state *state, ir_node *n, const arch_env_t *env) { \
1215 exchange_tmpl tmpl = { op_ia32_##op, op_ia32_##rev, op_ia32_##op##p, op_ia32_##rev##p }; \
1216 return sim_binop(state, n, env, &tmpl); \
1219 #define GEN_BINOP(op) _GEN_BINOP(op, op)
1220 #define GEN_BINOPR(op) _GEN_BINOP(op, op##r)
1222 #define GEN_LOAD2(op, nop) \
1223 static int sim_##op(x87_state *state, ir_node *n, const arch_env_t *env) { \
1224 return sim_load(state, n, env, op_ia32_##nop); \
1227 #define GEN_LOAD(op) GEN_LOAD2(op, op)
1229 #define GEN_UNOP(op) \
1230 static int sim_##op(x87_state *state, ir_node *n, const arch_env_t *env) { \
1231 return sim_unop(state, n, env, op_ia32_##op); \
1234 #define GEN_STORE(op) \
1235 static int sim_##op(x87_state *state, ir_node *n, const arch_env_t *env) { \
1236 return sim_store(state, n, env, op_ia32_##op, op_ia32_##op##p); \
1255 GEN_LOAD2(fConst, fldConst)
1261 * Simulate a fCondJmp.
1263 * @param state the x87 state
1264 * @param n the node that should be simulated (and patched)
1265 * @param env the architecture environment
1267 static int sim_fCondJmp(x87_state *state, ir_node *n, const arch_env_t *env) {
1268 int op2_idx, op1_idx = -1, pop_cnt = 0;
1271 const arch_register_t *op1 = arch_get_irn_register(env, get_irn_n(n, BINOP_IDX_1));
1272 const arch_register_t *op2 = arch_get_irn_register(env, get_irn_n(n, BINOP_IDX_2));
1273 unsigned live = vfp_live_args_after(state->sim, n, 0);
1275 DB((dbg, LEVEL_1, ">>> %+F %s, %s\n", n,
1276 arch_register_get_name(op1), arch_register_get_name(op2)));
1277 DEBUG_ONLY(vfp_dump_live(live));
1279 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1280 op2_idx = x87_on_stack(state, arch_register_get_index(op2));
1282 /* BEWARE: check for comp a,a cases, they might happen */
1283 if (arch_register_get_index(op2) != REG_VFP_NOREG) {
1284 /* second operand is a vfp register */
1286 if (is_vfp_live(arch_register_get_index(op2), live)) {
1287 /* second operand is live */
1289 if (is_vfp_live(arch_register_get_index(op1), live)) {
1290 /* both operands are live: move one of them to tos */
1292 XCHG(op2_idx, op1_idx);
1293 dst = op_ia32_fcomrJmp;
1295 else if (op1_idx == 0) {
1296 dst = op_ia32_fcomJmp;
1299 /* bring the first on top */
1300 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1301 if (op1_idx == op2_idx)
1304 dst = op_ia32_fcomJmp;
1308 /* second live, first operand is dead here, bring it to tos.
1309 This means further, op1_idx != op2_idx. */
1311 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1316 dst = op_ia32_fcompJmp;
1321 /* second operand is dead */
1322 if (is_vfp_live(arch_register_get_index(op1), live)) {
1323 /* first operand is live: bring second to tos.
1324 This means further, op1_idx != op2_idx. */
1326 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1331 dst = op_ia32_fcomrpJmp;
1335 /* both operands are dead here, check first for identity. */
1336 if (op1_idx == op2_idx) {
1337 /* identically, one one needed */
1339 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1340 op1_idx = op2_idx = 0;
1342 dst = op_ia32_fcompJmp;
1345 /* different, move them to st and st(1) and pop both.
1346 The tricky part is to get one into st(1).*/
1347 else if (op2_idx == 1) {
1348 /* good, second operand is already in the right place, move the first */
1350 /* bring the first on top */
1351 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1354 dst = op_ia32_fcomppJmp;
1357 else if (op1_idx == 1) {
1358 /* good, first operand is already in the right place, move the second */
1360 /* bring the first on top */
1361 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1364 dst = op_ia32_fcomrppJmp;
1368 /* if one is already the TOS, we need two fxch */
1370 /* first one is TOS, move to st(1) */
1371 x87_create_fxch(state, n, 1, BINOP_IDX_1);
1373 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1375 dst = op_ia32_fcomrppJmp;
1378 else if (op2_idx == 0) {
1379 /* second one is TOS, move to st(1) */
1380 x87_create_fxch(state, n, 1, BINOP_IDX_2);
1382 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1384 dst = op_ia32_fcomrppJmp;
1388 /* none of them is either TOS or st(1), 3 fxch needed */
1389 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1390 x87_create_fxch(state, n, 1, BINOP_IDX_2);
1391 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1394 dst = op_ia32_fcomppJmp;
1402 /* second operand is an address mode */
1403 if (is_vfp_live(arch_register_get_index(op1), live)) {
1404 /* first operand is live: bring it to TOS */
1406 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1409 dst = op_ia32_fcomJmp;
1412 /* first operand is dead: bring it to tos */
1414 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1418 dst = op_ia32_fcompJmp;
1422 x87_patch_insn(n, dst);
1428 /* patch the operation */
1429 attr = get_ia32_attr(n);
1430 attr->x87[1] = op1 = &ia32_st_regs[op1_idx];
1432 attr->x87[2] = op2 = &ia32_st_regs[op2_idx];
1435 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(n),
1436 arch_register_get_name(op1), arch_register_get_name(op2)));
1438 DB((dbg, LEVEL_1, "<<< %s %s, [AM]\n", get_irn_opname(n),
1439 arch_register_get_name(op1)));
1442 } /* sim_fCondJmp */
1445 * Simulate a be_Copy.
1447 * @param state the x87 state
1448 * @param n the node that should be simulated (and patched)
1449 * @param env the architecture environment
1451 static int sim_Copy(x87_state *state, ir_node *n, const arch_env_t *env) {
1452 ir_mode *mode = get_irn_mode(n);
1454 if (mode_is_float(mode)) {
1455 ir_node *pred = get_irn_n(n, 0);
1456 ir_op *op = get_irn_op(pred);
1457 const arch_register_t *out = arch_get_irn_register(env, n);
1458 const arch_register_t *op1 = arch_get_irn_register(env, pred);
1459 ir_node *node, *next;
1461 int op1_idx, out_idx;
1462 unsigned live = vfp_live_args_after(state->sim, n, REGMASK(out));
1464 DB((dbg, LEVEL_1, ">>> %+F %s -> %s\n", n,
1465 arch_register_get_name(op1), arch_register_get_name(out)));
1466 DEBUG_ONLY(vfp_dump_live(live));
1468 /* FIXME: check here for all possible constants */
1469 if (op == op_ia32_fldz || op == op_ia32_fld1) {
1470 /* copy a constant */
1471 node = new_rd_ia32_fldz(get_irn_dbg_info(n), get_irn_irg(n), get_nodes_block(n), mode);
1472 set_irn_op(node, op);
1473 arch_set_irn_register(env, node, out);
1475 x87_push(state, arch_register_get_index(out), node);
1477 attr = get_ia32_attr(node);
1478 attr->x87[2] = out = &ia32_st_regs[0];
1480 next = sched_next(n);
1483 sched_add_before(next, node);
1484 DB((dbg, LEVEL_1, ">>> %+F -> %s\n", node, arch_register_get_name(out)));
1488 /* handle the infamous unknown value */
1489 if (arch_register_get_index(op1) == REG_VFP_UKNWN) {
1490 /* This happens before Phi nodes */
1491 if (x87_state_is_empty(state)) {
1492 /* create some value */
1493 x87_patch_insn(n, op_ia32_fldz);
1494 attr = get_ia32_attr(n);
1495 attr->x87[2] = out = &ia32_st_regs[0];
1496 DB((dbg, LEVEL_1, "<<< %+F -> %s\n", n,
1497 arch_register_get_name(out)));
1500 node = new_rd_ia32_fpush(get_irn_dbg_info(n), get_irn_irg(n), get_nodes_block(n), get_irn_n(n, 0), mode);
1501 arch_set_irn_register(env, node, out);
1503 x87_push(state, arch_register_get_index(out), node);
1505 attr = get_ia32_attr(node);
1506 attr->x87[0] = op1 =
1507 attr->x87[2] = out = &ia32_st_regs[0];
1509 next = sched_next(n);
1512 sched_add_before(next, node);
1513 DB((dbg, LEVEL_1, "<<< %+F %s -> %s\n", node,
1514 arch_register_get_name(op1),
1515 arch_register_get_name(out)));
1520 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1522 if (is_vfp_live(arch_register_get_index(op1), live)) {
1523 /* operand is still live,a real copy */
1524 node = new_rd_ia32_fpush(get_irn_dbg_info(n), get_irn_irg(n), get_nodes_block(n), get_irn_n(n, 0), mode);
1525 arch_set_irn_register(env, node, out);
1527 x87_push(state, arch_register_get_index(out), node);
1529 attr = get_ia32_attr(node);
1530 attr->x87[0] = op1 = &ia32_st_regs[op1_idx];
1531 attr->x87[2] = out = &ia32_st_regs[0];
1533 next = sched_next(n);
1536 sched_add_before(next, node);
1537 DB((dbg, LEVEL_1, ">>> %+F %s -> %s\n", node, op1->name, out->name));
1540 out_idx = x87_on_stack(state, arch_register_get_index(out));
1542 if (out_idx >= 0 && out_idx != op1_idx) {
1543 /* op1 must be killed and placed where out is */
1545 /* best case, simple remove and rename */
1546 x87_patch_insn(n, op_ia32_Pop);
1547 attr = get_ia32_attr(n);
1548 attr->x87[0] = op1 = &ia32_st_regs[0];
1551 x87_set_st(state, arch_register_get_index(out), n, op1_idx - 1);
1554 /* move op1 to tos, store and pop it */
1556 x87_create_fxch(state, n, op1_idx, 0);
1559 x87_patch_insn(n, op_ia32_Pop);
1560 attr = get_ia32_attr(n);
1561 attr->x87[0] = op1 = &ia32_st_regs[out_idx];
1564 x87_set_st(state, arch_register_get_index(out), n, out_idx - 1);
1566 DB((dbg, LEVEL_1, ">>> %+F %s\n", n, op1->name));
1569 /* just a virtual copy */
1570 x87_set_st(state, arch_register_get_index(out), get_unop_op(n), op1_idx);
1572 DB((dbg, LEVEL_1, ">>> KILLED %s\n", get_irn_opname(n)));
1573 exchange(n, get_unop_op(n));
1582 * Simulate a be_Call.
1584 * @param state the x87 state
1585 * @param n the node that should be simulated
1586 * @param env the architecture environment
1588 static int sim_Call(x87_state *state, ir_node *n, const arch_env_t *env) {
1589 ir_type *call_tp = be_Call_get_type(n);
1591 /* at the begin of a call the x87 state should be empty */
1592 assert(state->depth == 0 && "stack not empty before call");
1595 * If the called function returns a float, it is returned in st(0).
1596 * This even happens if the return value is NOT used.
1597 * Moreover, only one return result is supported.
1599 if (get_method_n_ress(call_tp) > 0) {
1600 ir_type *res_type = get_method_res_type(call_tp, 0);
1601 ir_mode *mode = get_type_mode(res_type);
1603 if (mode && mode_is_float(mode)) {
1605 * TODO: what to push here? The result might be unused and currently
1606 * we have no possibility to detect this :-(
1608 x87_push(state, 0, n);
1616 * Simulate a be_Spill.
1618 * @param state the x87 state
1619 * @param n the node that should be simulated (and patched)
1620 * @param env the architecture environment
1622 * Should not happen, spills are lowered before x87 simulator see them.
1624 static int sim_Spill(x87_state *state, ir_node *n, const arch_env_t *env) {
1625 assert(0 && "Spill not lowered");
1626 return sim_fst(state, n, env);
1630 * Simulate a be_Reload.
1632 * @param state the x87 state
1633 * @param n the node that should be simulated (and patched)
1634 * @param env the architecture environment
1636 * Should not happen, reloads are lowered before x87 simulator see them.
1638 static int sim_Reload(x87_state *state, ir_node *n, const arch_env_t *env) {
1639 assert(0 && "Reload not lowered");
1640 return sim_fld(state, n, env);
1644 * Simulate a be_Return.
1646 * @param state the x87 state
1647 * @param n the node that should be simulated (and patched)
1648 * @param env the architecture environment
1650 static int sim_Return(x87_state *state, ir_node *n, const arch_env_t *env) {
1651 int n_res = be_Return_get_n_rets(n);
1652 int i, n_float_res = 0;
1654 /* only floating point return values must resist on stack */
1655 for (i = 0; i < n_res; ++i) {
1656 ir_node *res = get_irn_n(n, be_pos_Return_val + i);
1658 if (mode_is_float(get_irn_mode(res)))
1661 assert(x87_get_depth(state) == n_float_res);
1663 /* pop them virtually */
1664 for (i = n_float_res - 1; i >= 0; --i)
1670 typedef struct _perm_data_t {
1671 const arch_register_t *in;
1672 const arch_register_t *out;
1676 * Simulate a be_Perm.
1678 * @param state the x87 state
1679 * @param irn the node that should be simulated (and patched)
1680 * @param env the architecture environment
1682 static int sim_Perm(x87_state *state, ir_node *irn, const arch_env_t *env) {
1684 ir_node *pred = get_irn_n(irn, 0);
1686 const ir_edge_t *edge;
1688 /* handle only floating point Perms */
1689 if (! mode_is_float(get_irn_mode(pred)))
1692 DB((dbg, LEVEL_1, ">>> %+F\n", irn));
1694 /* Perm is a pure virtual instruction on x87.
1695 All inputs must be on the FPU stack and are pairwise
1696 different from each other.
1697 So, all we need to do is to permutate the stack state. */
1698 n = get_irn_arity(irn);
1699 NEW_ARR_A(int, stack_pos, n);
1701 /* collect old stack positions */
1702 for (i = 0; i < n; ++i) {
1703 const arch_register_t *inreg = arch_get_irn_register(env, get_irn_n(irn, i));
1704 int idx = x87_on_stack(state, arch_register_get_index(inreg));
1706 assert(idx >= 0 && "Perm argument not on x87 stack");
1710 /* now do the permutation */
1711 foreach_out_edge(irn, edge) {
1712 ir_node *proj = get_edge_src_irn(edge);
1713 const arch_register_t *out = arch_get_irn_register(env, proj);
1714 long num = get_Proj_proj(proj);
1716 assert(0 <= num && num < n && "More Proj's than Perm inputs");
1717 x87_set_st(state, arch_register_get_index(out), proj, stack_pos[(unsigned)num]);
1719 DB((dbg, LEVEL_1, "<<< %+F\n", irn));
1725 * Kill any dead registers at block start by popping them from the stack.
1727 * @param sim the simulator handle
1728 * @param block the current block
1729 * @param start_state the x87 state at the begin of the block
1731 static x87_state *x87_kill_deads(x87_simulator *sim, ir_node *block, x87_state *start_state) {
1732 x87_state *state = start_state;
1733 ir_node *first_insn = sched_first(block);
1734 ir_node *keep = NULL;
1735 unsigned live = vfp_live_args_after(sim, block, 0);
1737 int i, depth, num_pop;
1740 depth = x87_get_depth(state);
1741 for (i = depth - 1; i >= 0; --i) {
1742 int reg = x87_get_st_reg(state, i);
1744 if (! is_vfp_live(reg, live))
1745 kill_mask |= (1 << i);
1749 /* create a new state, will be changed */
1750 state = x87_clone_state(sim, state);
1752 DB((dbg, LEVEL_1, "Killing deads:\n"));
1753 DEBUG_ONLY(vfp_dump_live(live));
1754 DEBUG_ONLY(x87_dump_stack(state));
1756 /* now kill registers */
1758 /* we can only kill from TOS, so bring them up */
1759 if (! (kill_mask & 1)) {
1760 /* search from behind, because we can to a double-pop */
1761 for (i = depth - 1; i >= 0; --i) {
1762 if (kill_mask & (1 << i)) {
1763 kill_mask &= ~(1 << i);
1770 x87_set_st(state, -1, keep, i);
1771 keep = x87_create_fxch(state, first_insn, i, -1);
1774 keep = x87_get_st_node(state, 0);
1776 if ((kill_mask & 3) == 3) {
1777 /* we can do a double-pop */
1781 /* only a single pop */
1786 kill_mask >>= num_pop;
1787 keep = x87_create_fpop(sim->env, state, first_insn, num_pop, keep);
1789 add_End_keepalive(get_irg_end(get_irn_irg(block)), keep);
1792 } /* x87_kill_deads */
1795 * Run a simulation and fix all virtual instructions for a block.
1797 * @param sim the simulator handle
1798 * @param block the current block
1800 * @return non-zero if simulation is complete,
1801 * zero if the simulation must be rerun
1803 static int x87_simulate_block(x87_simulator *sim, ir_node *block) {
1805 blk_state *bl_state = x87_get_bl_state(sim, block);
1806 x87_state *state = bl_state->begin;
1807 const ir_edge_t *edge;
1808 ir_node *start_block;
1810 /* if we have no assigned start state, we must wait ... */
1814 assert(bl_state->end == NULL);
1816 DB((dbg, LEVEL_1, "Simulate %+F\n", block));
1817 DB((dbg, LEVEL_2, "State at Block begin:\n ")); DEBUG_ONLY(x87_dump_stack(state));
1819 /* at block begin, kill all dead registers */
1820 state = x87_kill_deads(sim, block, state);
1822 /* beware, n might changed */
1823 for (n = sched_first(block); !sched_is_end(n); n = next) {
1824 ir_op *op = get_irn_op(n);
1826 next = sched_next(n);
1827 if (op->ops.generic) {
1829 sim_func func = (sim_func)op->ops.generic;
1831 /* have work to do */
1832 if (state == bl_state->begin) {
1833 /* create a new state, will be changed */
1834 state = x87_clone_state(sim, state);
1838 node_inserted = (*func)(state, n, sim->env);
1841 sim_func might have added additional nodes after n,
1843 beware: n must not be changed by sim_func
1844 (i.e. removed from schedule) in this case
1847 next = sched_next(n);
1851 start_block = get_irg_start_block(get_irn_irg(block));
1853 /* check if the state must be shuffled */
1854 foreach_block_succ(block, edge) {
1855 ir_node *succ = get_edge_src_irn(edge);
1856 blk_state *succ_state = x87_get_bl_state(sim, succ);
1858 if (succ_state->begin && succ != start_block) {
1859 /* There is already a begin state for this block, bad.
1860 Do the necessary permutations.
1861 Note that critical edges are removed, so this is always possible. */
1862 x87_shuffle(sim, block, state, succ, succ_state->begin);
1864 /* Note further, that there can be only one such situation,
1865 so we can break here. */
1869 bl_state->end = state;
1871 /* now propagate the state to all successor blocks */
1872 foreach_block_succ(block, edge) {
1873 ir_node *succ = get_edge_src_irn(edge);
1874 blk_state *succ_state = x87_get_bl_state(sim, succ);
1876 if (! succ_state->begin)
1877 succ_state->begin = state;
1880 DB((dbg, LEVEL_2, "State at Block end:\n ")); DEBUG_ONLY(x87_dump_stack(state));
1883 } /* x87_simulate_block */
1886 * Create a new x87 simulator.
1888 * @param sim a simulator handle, will be initialized
1889 * @param irg the current graph
1890 * @param env the architecture environment
1892 static void x87_init_simulator(x87_simulator *sim, ir_graph *irg, const arch_env_t *env)
1894 obstack_init(&sim->obst);
1895 sim->blk_states = pmap_create();
1897 sim->n_idx = get_irg_last_idx(irg);
1898 sim->live = obstack_alloc(&sim->obst, sizeof(*sim->live) * sim->n_idx);
1900 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.x87");
1902 DB((dbg, LEVEL_1, "--------------------------------\n"
1903 "x87 Simulator started for %+F\n", irg));
1905 /* set the generic function pointer of instruction we must simulate */
1906 clear_irp_opcodes_generic_func();
1908 #define ASSOC(op) (op_ ## op)->ops.generic = (op_func)(sim_##op)
1909 #define ASSOC_IA32(op) (op_ia32_v ## op)->ops.generic = (op_func)(sim_##op)
1910 #define ASSOC_BE(op) (op_be_ ## op)->ops.generic = (op_func)(sim_##op)
1927 ASSOC_IA32(fCondJmp);
1938 } /* x87_init_simulator */
1941 * Destroy a x87 simulator.
1943 * @param sim the simulator handle
1945 static void x87_destroy_simulator(x87_simulator *sim) {
1946 pmap_destroy(sim->blk_states);
1947 obstack_free(&sim->obst, NULL);
1948 DB((dbg, LEVEL_1, "x87 Simulator stopped\n\n"));
1949 } /* x87_destroy_simulator */
1952 * Run a simulation and fix all virtual instructions for a graph.
1954 * @param env the architecture environment
1955 * @param irg the current graph
1956 * @param blk_list the block schedule list
1958 * Needs a block-schedule.
1960 void x87_simulate_graph(const arch_env_t *env, ir_graph *irg, ir_node **blk_list) {
1961 ir_node *block, *start_block;
1963 blk_state *bl_state;
1968 /* create the simulator */
1969 x87_init_simulator(&sim, irg, env);
1971 start_block = get_irg_start_block(irg);
1972 bl_state = x87_get_bl_state(&sim, start_block);
1974 /* start with the empty state */
1975 bl_state->begin = empty;
1978 worklist = new_waitq();
1980 /* Create the worklist for the schedule and calculate the liveness
1981 for all nodes. We must precalculate this info, because the
1982 simulator adds new nodes (and possible before Phi nodes) which
1983 let fail the old lazy calculation.
1984 On the other hand we reduce the computation amount due to
1985 precaching from O(n^2) to O(n) at the expense of O(n) cache memory.
1987 lv = be_liveness(irg);
1988 for (i = 0, n = ARR_LEN(blk_list); i < n; ++i) {
1989 update_liveness(&sim, lv, blk_list[i]);
1990 waitq_put(worklist, blk_list[i]);
1992 be_liveness_free(lv);
1996 block = waitq_get(worklist);
1997 if (! x87_simulate_block(&sim, block)) {
1998 waitq_put(worklist, block);
2001 } while (! pdeq_empty(worklist));
2004 del_waitq(worklist);
2005 x87_destroy_simulator(&sim);
2006 } /* x87_simulate_graph */