2 * This file implements the x87 support and virtual to stack
3 * register translation for the ia32 backend.
5 * @author: Michael Beck
18 #include "iredges_t.h"
29 #include "../belive_t.h"
30 #include "../besched_t.h"
31 #include "../benode_t.h"
32 #include "ia32_new_nodes.h"
33 #include "gen_ia32_new_nodes.h"
34 #include "gen_ia32_regalloc_if.h"
39 /* first and second binop index */
46 /* the store val index */
47 #define STORE_VAL_IDX 2
49 #define MASK_TOS(x) ((x) & (N_x87_REGS - 1))
51 /** the debug handle */
52 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
54 /* Forward declaration. */
55 typedef struct _x87_simulator x87_simulator;
58 * An exchange template.
59 * Note that our virtual functions have the same inputs
60 * and attributes as the real ones, so we can simple exchange
62 * Further, x87 supports inverse instructions, so we can handle them.
64 typedef struct _exchange_tmpl {
65 ir_op *normal_op; /**< the normal one */
66 ir_op *reverse_op; /**< the reverse one if exists */
67 ir_op *normal_pop_op; /**< the normal one with tos pop */
68 ir_op *reverse_pop_op; /**< the reverse one with tos pop */
72 * An entry on the simulated x87 stack.
74 typedef struct _st_entry {
75 int reg_idx; /**< the virtual register index of this stack value */
76 ir_node *node; /**< the node that produced this value */
82 typedef struct _x87_state {
83 st_entry st[N_x87_REGS]; /**< the register stack */
84 int depth; /**< the current stack depth */
85 int tos; /**< position of the tos */
86 x87_simulator *sim; /**< The simulator. */
89 /** An empty state, used for blocks without fp instructions. */
90 static x87_state _empty = { { {0, NULL}, }, 0, 0 };
91 static x87_state *empty = (x87_state *)&_empty;
93 /** The type of an instruction simulator function. */
94 typedef int (*sim_func)(x87_state *state, ir_node *n);
97 * A block state: Every block has a x87 state at the beginning and at the end.
99 typedef struct _blk_state {
100 x87_state *begin; /**< state at the begin or NULL if not assigned */
101 x87_state *end; /**< state at the end or NULL if not assigned */
104 #define PTR_TO_BLKSTATE(p) ((blk_state *)(p))
106 /** liveness bitset for vfp registers. */
107 typedef unsigned char vfp_liveness;
112 struct _x87_simulator {
113 struct obstack obst; /**< An obstack for fast allocating. */
114 pmap *blk_states; /**< Map blocks to states. */
115 const arch_env_t *arch_env; /**< The architecture environment. */
116 be_lv_t *lv; /**< intrablock liveness. */
117 vfp_liveness *live; /**< Liveness information. */
118 unsigned n_idx; /**< The cached get_irg_last_idx() result. */
119 waitq *worklist; /**< list of blocks to process. */
123 * Returns the current stack depth.
125 * @param state the x87 state
127 * @return the x87 stack depth
129 static int x87_get_depth(const x87_state *state) {
135 * Check if the state is empty.
137 * @param state the x87 state
139 * returns non-zero if the x87 stack is empty
141 static int x87_state_is_empty(const x87_state *state) {
142 return state->depth == 0;
147 * Return the virtual register index at st(pos).
149 * @param state the x87 state
150 * @param pos a stack position
152 * @return the vfp register index that produced the value at st(pos)
154 static int x87_get_st_reg(const x87_state *state, int pos) {
155 assert(pos < state->depth);
156 return state->st[MASK_TOS(state->tos + pos)].reg_idx;
160 * Return the node at st(pos).
162 * @param state the x87 state
163 * @param pos a stack position
165 * @return the IR node that produced the value at st(pos)
167 static ir_node *x87_get_st_node(const x87_state *state, int pos) {
168 assert(pos < state->depth);
169 return state->st[MASK_TOS(state->tos + pos)].node;
170 } /* x87_get_st_node */
174 * Dump the stack for debugging.
176 * @param state the x87 state
178 static void x87_dump_stack(const x87_state *state) {
181 for (i = state->depth - 1; i >= 0; --i) {
182 DB((dbg, LEVEL_2, "vf%d(%+F) ", x87_get_st_reg(state, i),
183 x87_get_st_node(state, i)));
185 DB((dbg, LEVEL_2, "<-- TOS\n"));
186 } /* x87_dump_stack */
187 #endif /* DEBUG_libfirm */
190 * Set a virtual register to st(pos).
192 * @param state the x87 state
193 * @param reg_idx the vfp register index that should be set
194 * @param node the IR node that produces the value of the vfp register
195 * @param pos the stack position where the new value should be entered
197 static void x87_set_st(x87_state *state, int reg_idx, ir_node *node, int pos) {
198 assert(0 < state->depth);
199 state->st[MASK_TOS(state->tos + pos)].reg_idx = reg_idx;
200 state->st[MASK_TOS(state->tos + pos)].node = node;
202 DB((dbg, LEVEL_2, "After SET_REG: "));
203 DEBUG_ONLY(x87_dump_stack(state));
207 * Set the tos virtual register.
209 * @param state the x87 state
210 * @param reg_idx the vfp register index that should be set
211 * @param node the IR node that produces the value of the vfp register
213 static void x87_set_tos(x87_state *state, int reg_idx, ir_node *node) {
214 x87_set_st(state, reg_idx, node, 0);
219 * Flush the x87 stack.
221 * @param state the x87 state
223 static void x87_flush(x87_state *state) {
230 * Swap st(0) with st(pos).
232 * @param state the x87 state
233 * @param pos the stack position to change the tos with
235 static void x87_fxch(x87_state *state, int pos) {
237 assert(pos < state->depth);
239 entry = state->st[MASK_TOS(state->tos + pos)];
240 state->st[MASK_TOS(state->tos + pos)] = state->st[MASK_TOS(state->tos)];
241 state->st[MASK_TOS(state->tos)] = entry;
243 DB((dbg, LEVEL_2, "After FXCH: ")); DEBUG_ONLY(x87_dump_stack(state));
247 * Convert a virtual register to the stack index.
249 * @param state the x87 state
250 * @param reg_idx the register vfp index
252 * @return the stack position where the register is stacked
253 * or -1 if the virtual register was not found
255 static int x87_on_stack(const x87_state *state, int reg_idx) {
256 int i, tos = state->tos;
258 for (i = 0; i < state->depth; ++i)
259 if (state->st[MASK_TOS(tos + i)].reg_idx == reg_idx)
265 * Push a virtual Register onto the stack, double pushed allowed.
267 * @param state the x87 state
268 * @param reg_idx the register vfp index
269 * @param node the node that produces the value of the vfp register
271 static void x87_push_dbl(x87_state *state, int reg_idx, ir_node *node) {
272 assert(state->depth < N_x87_REGS && "stack overrun");
275 state->tos = MASK_TOS(state->tos - 1);
276 state->st[state->tos].reg_idx = reg_idx;
277 state->st[state->tos].node = node;
279 DB((dbg, LEVEL_2, "After PUSH: ")); DEBUG_ONLY(x87_dump_stack(state));
283 * Push a virtual Register onto the stack, double pushes are NOT allowed..
285 * @param state the x87 state
286 * @param reg_idx the register vfp index
287 * @param node the node that produces the value of the vfp register
288 * @param dbl_push if != 0 double pushes are allowed
290 static void x87_push(x87_state *state, int reg_idx, ir_node *node) {
291 assert(x87_on_stack(state, reg_idx) == -1 && "double push");
293 x87_push_dbl(state, reg_idx, node);
297 * Pop a virtual Register from the stack.
299 static void x87_pop(x87_state *state) {
300 assert(state->depth > 0 && "stack underrun");
303 state->tos = MASK_TOS(state->tos + 1);
305 DB((dbg, LEVEL_2, "After POP: ")); DEBUG_ONLY(x87_dump_stack(state));
309 * Returns the block state of a block.
311 * @param sim the x87 simulator handle
312 * @param block the current block
314 * @return the block state
316 static blk_state *x87_get_bl_state(x87_simulator *sim, ir_node *block) {
317 pmap_entry *entry = pmap_find(sim->blk_states, block);
320 blk_state *bl_state = obstack_alloc(&sim->obst, sizeof(*bl_state));
321 bl_state->begin = NULL;
322 bl_state->end = NULL;
324 pmap_insert(sim->blk_states, block, bl_state);
328 return PTR_TO_BLKSTATE(entry->value);
329 } /* x87_get_bl_state */
332 * Creates a new x87 state.
334 * @param sim the x87 simulator handle
336 * @return a new x87 state
338 static x87_state *x87_alloc_state(x87_simulator *sim) {
339 x87_state *res = obstack_alloc(&sim->obst, sizeof(*res));
343 } /* x87_alloc_state */
347 * Create a new empty x87 state.
349 * @param sim the x87 simulator handle
351 * @return a new empty x87 state
353 static x87_state *x87_alloc_empty_state(x87_simulator *sim) {
354 x87_state *res = x87_alloc_state(sim);
358 } /* x87_alloc_empty_state */
364 * @param sim the x87 simulator handle
365 * @param src the x87 state that will be cloned
367 * @return a cloned copy of the src state
369 static x87_state *x87_clone_state(x87_simulator *sim, const x87_state *src) {
370 x87_state *res = x87_alloc_state(sim);
372 memcpy(res, src, sizeof(*res));
374 } /* x87_clone_state */
377 * Patch a virtual instruction into a x87 one and return
380 * @param n the IR node to patch
381 * @param op the x87 opcode to patch in
383 static ir_node *x87_patch_insn(ir_node *n, ir_op *op) {
384 ir_mode *mode = get_irn_mode(n);
389 if (mode == mode_T) {
390 /* patch all Proj's */
391 const ir_edge_t *edge;
393 foreach_out_edge(n, edge) {
394 ir_node *proj = get_edge_src_irn(edge);
396 mode = get_irn_mode(proj);
397 if (mode_is_float(mode)) {
399 set_irn_mode(proj, mode_E);
404 else if (mode_is_float(mode))
405 set_irn_mode(n, mode_E);
407 } /* x87_patch_insn */
410 * Returns the first Proj of a mode_T node having a given mode.
412 * @param n the mode_T node
413 * @param m the desired mode of the Proj
414 * @return The first Proj of mode @p m found or NULL.
416 static ir_node *get_irn_Proj_for_mode(ir_node *n, ir_mode *m) {
417 const ir_edge_t *edge;
419 assert(get_irn_mode(n) == mode_T && "Need mode_T node");
421 foreach_out_edge(n, edge) {
422 ir_node *proj = get_edge_src_irn(edge);
423 if (get_irn_mode(proj) == m)
428 } /* get_irn_Proj_for_mode */
431 * Wrap the arch_* function here so we can check for errors.
433 static INLINE const arch_register_t *x87_get_irn_register(x87_simulator *sim, const ir_node *irn) {
434 const arch_register_t *res;
436 res = arch_get_irn_register(sim->arch_env, irn);
437 assert(res->reg_class->regs == ia32_vfp_regs);
441 /* -------------- x87 perm --------------- */
444 * Creates a fxch for shuffle.
446 * @param state the x87 state
447 * @param pos parameter for fxch
448 * @param block the block were fxch is inserted
450 * Creates a new fxch node and reroute the user of the old node
453 * @return the fxch node
455 static ir_node *x87_fxch_shuffle(x87_state *state, int pos, ir_node *block)
460 fxch = new_rd_ia32_fxch(NULL, get_irn_irg(block), block, mode_E);
461 attr = get_ia32_attr(fxch);
462 attr->x87[0] = &ia32_st_regs[pos];
463 attr->x87[2] = &ia32_st_regs[0];
467 x87_fxch(state, pos);
469 } /* x87_fxch_shuffle */
472 * Calculate the necessary permutations to reach dst_state.
474 * These permutations are done with fxch instructions and placed
475 * at the end of the block.
477 * Note that critical edges are removed here, so we need only
478 * a shuffle if the current block has only one successor.
480 * @param sim the simulator handle
481 * @param block the current block
482 * @param state the current x87 stack state, might be modified
483 * @param dst_block the destination block
484 * @param dst_state destination state
488 static x87_state *x87_shuffle(x87_simulator *sim, ir_node *block, x87_state *state, ir_node *dst_block, const x87_state *dst_state) {
489 int i, n_cycles, k, ri;
490 unsigned cycles[4], all_mask;
491 char cycle_idx[4][8];
492 ir_node *fxch, *before, *after;
494 assert(state->depth == dst_state->depth);
496 /* Some mathematics here:
497 If we have a cycle of length n that includes the tos,
498 we need n-1 exchange operations.
499 We can always add the tos and restore it, so we need
500 n+1 exchange operations for a cycle not containing the tos.
501 So, the maximum of needed operations is for a cycle of 7
502 not including the tos == 8.
503 This is the same number of ops we would need for using stores,
504 so exchange is cheaper (we save the loads).
505 On the other hand, we might need an additional exchange
506 in the next block to bring one operand on top, so the
507 number of ops in the first case is identical.
508 Further, no more than 4 cycles can exists (4 x 2).
510 all_mask = (1 << (state->depth)) - 1;
512 for (n_cycles = 0; all_mask; ++n_cycles) {
513 int src_idx, dst_idx;
515 /* find the first free slot */
516 for (i = 0; i < state->depth; ++i) {
517 if (all_mask & (1 << i)) {
518 all_mask &= ~(1 << i);
520 /* check if there are differences here */
521 if (x87_get_st_reg(state, i) != x87_get_st_reg(dst_state, i))
527 /* no more cycles found */
532 cycles[n_cycles] = (1 << i);
533 cycle_idx[n_cycles][k++] = i;
534 for (src_idx = i; ; src_idx = dst_idx) {
535 dst_idx = x87_on_stack(dst_state, x87_get_st_reg(state, src_idx));
537 if ((all_mask & (1 << dst_idx)) == 0)
540 cycle_idx[n_cycles][k++] = dst_idx;
541 cycles[n_cycles] |= (1 << dst_idx);
542 all_mask &= ~(1 << dst_idx);
544 cycle_idx[n_cycles][k] = -1;
548 /* no permutation needed */
552 /* Hmm: permutation needed */
553 DB((dbg, LEVEL_2, "\n%+F needs permutation: from\n", block));
554 DEBUG_ONLY(x87_dump_stack(state));
555 DB((dbg, LEVEL_2, " to\n"));
556 DEBUG_ONLY(x87_dump_stack(dst_state));
560 DB((dbg, LEVEL_2, "Need %d cycles\n", n_cycles));
561 for (ri = 0; ri < n_cycles; ++ri) {
562 DB((dbg, LEVEL_2, " Ring %d:\n ", ri));
563 for (k = 0; cycle_idx[ri][k] != -1; ++k)
564 DB((dbg, LEVEL_2, " st%d ->", cycle_idx[ri][k]));
565 DB((dbg, LEVEL_2, "\n"));
572 * Find the place node must be insert.
573 * We have only one successor block, so the last instruction should
576 before = sched_last(block);
577 assert(is_cfop(before));
579 /* now do the permutations */
580 for (ri = 0; ri < n_cycles; ++ri) {
581 if ((cycles[ri] & 1) == 0) {
582 /* this cycle does not include the tos */
583 fxch = x87_fxch_shuffle(state, cycle_idx[ri][0], block);
585 sched_add_after(after, fxch);
587 sched_add_before(before, fxch);
590 for (k = 1; cycle_idx[ri][k] != -1; ++k) {
591 fxch = x87_fxch_shuffle(state, cycle_idx[ri][k], block);
593 sched_add_after(after, fxch);
595 sched_add_before(before, fxch);
598 if ((cycles[ri] & 1) == 0) {
599 /* this cycle does not include the tos */
600 fxch = x87_fxch_shuffle(state, cycle_idx[ri][0], block);
601 sched_add_after(after, fxch);
608 * Create a fxch node before another node.
610 * @param state the x87 state
611 * @param n the node after the fxch
612 * @param pos exchange st(pos) with st(0)
613 * @param op_idx if >= 0, replace input op_idx of n with the fxch result
617 static ir_node *x87_create_fxch(x87_state *state, ir_node *n, int pos, int op_idx) {
620 ir_graph *irg = get_irn_irg(n);
621 ir_node *block = get_nodes_block(n);
623 x87_fxch(state, pos);
625 fxch = new_rd_ia32_fxch(NULL, irg, block, mode_E);
626 attr = get_ia32_attr(fxch);
627 attr->x87[0] = &ia32_st_regs[pos];
628 attr->x87[2] = &ia32_st_regs[0];
632 sched_add_before(n, fxch);
633 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(fxch), attr->x87[0]->name, attr->x87[2]->name));
635 } /* x87_create_fxch */
638 * Create a fpush before node n.
640 * @param state the x87 state
641 * @param n the node after the fpush
642 * @param pos push st(pos) on stack
643 * @param op_idx replace input op_idx of n with the fpush result
645 static void x87_create_fpush(x87_state *state, ir_node *n, int pos, int op_idx) {
646 ir_node *fpush, *pred = get_irn_n(n, op_idx);
648 const arch_register_t *out = x87_get_irn_register(state->sim, pred);
650 x87_push_dbl(state, arch_register_get_index(out), pred);
652 fpush = new_rd_ia32_fpush(NULL, get_irn_irg(n), get_nodes_block(n), mode_E);
653 attr = get_ia32_attr(fpush);
654 attr->x87[0] = &ia32_st_regs[pos];
655 attr->x87[2] = &ia32_st_regs[0];
658 sched_add_before(n, fpush);
660 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(fpush), attr->x87[0]->name, attr->x87[2]->name));
661 } /* x87_create_fpush */
664 * Create a fpop before node n.
666 * @param state the x87 state
667 * @param n the node after the fpop
668 * @param num pop 1 or 2 values
669 * @param pred node to use as predecessor of the fpop
671 * @return the fpop node
673 static ir_node *x87_create_fpop(x87_state *state, ir_node *n, int num, ir_node *pred) {
674 ir_node *fpop = pred;
681 fpop = new_rd_ia32_fpop(NULL, get_irn_irg(n), get_nodes_block(n), mode_E);
682 attr = get_ia32_attr(fpop);
683 attr->x87[0] = &ia32_st_regs[0];
684 attr->x87[1] = &ia32_st_regs[0];
685 attr->x87[2] = &ia32_st_regs[0];
688 sched_add_before(n, fpop);
689 DB((dbg, LEVEL_1, "<<< %s %s\n", get_irn_opname(fpop), attr->x87[0]->name));
695 } /* x87_create_fpop */
698 * Creates an fldz before node n
700 * @param state the x87 state
701 * @param n the node after the fldz
703 * @return the fldz node
705 static ir_node *x87_create_fldz(x87_state *state, ir_node *n, int regidx) {
706 ir_graph *irg = get_irn_irg(n);
707 ir_node *block = get_nodes_block(n);
710 fldz = new_rd_ia32_fldz(NULL, irg, block, mode_E);
712 sched_add_before(n, fldz);
713 DB((dbg, LEVEL_1, "<<< %s\n", get_irn_opname(fldz)));
716 x87_push(state, regidx, fldz);
721 /* --------------------------------- liveness ------------------------------------------ */
724 * The liveness transfer function.
725 * Updates a live set over a single step from a given node to its predecessor.
726 * Everything defined at the node is removed from the set, the uses of the node get inserted.
728 * @param sim The simulator handle.
729 * @param irn The node at which liveness should be computed.
730 * @param live The bitset of registers live before @p irn. This set gets modified by updating it to
731 * the registers live after irn.
733 * @return The live bitset.
735 static vfp_liveness vfp_liveness_transfer(x87_simulator *sim, ir_node *irn, vfp_liveness live)
738 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
739 const arch_env_t *arch_env = sim->arch_env;
741 if (arch_irn_consider_in_reg_alloc(arch_env, cls, irn)) {
742 const arch_register_t *reg = x87_get_irn_register(sim, irn);
743 live &= ~(1 << arch_register_get_index(reg));
746 for (i = 0, n = get_irn_arity(irn); i < n; ++i) {
747 ir_node *op = get_irn_n(irn, i);
749 if (mode_is_float(get_irn_mode(op)) && arch_irn_consider_in_reg_alloc(arch_env, cls, op)) {
750 const arch_register_t *reg = x87_get_irn_register(sim, op);
751 live |= 1 << arch_register_get_index(reg);
755 } /* vfp_liveness_transfer */
758 * Put all live virtual registers at the end of a block into a bitset.
760 * @param sim the simulator handle
761 * @param lv the liveness information
762 * @param bl the block
764 * @return The live bitset at the end of this block
766 static vfp_liveness vfp_liveness_end_of_block(x87_simulator *sim, const ir_node *block)
769 vfp_liveness live = 0;
770 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
771 const arch_env_t *arch_env = sim->arch_env;
772 const be_lv_t *lv = sim->lv;
774 be_lv_foreach(lv, block, be_lv_state_end, i) {
775 const arch_register_t *reg;
776 const ir_node *node = be_lv_get_irn(lv, block, i);
777 if (!arch_irn_consider_in_reg_alloc(arch_env, cls, node))
780 reg = x87_get_irn_register(sim, node);
781 live |= 1 << arch_register_get_index(reg);
785 } /* vfp_liveness_end_of_block */
787 /** get the register mask from an arch_register */
788 #define REGMASK(reg) (1 << (arch_register_get_index(reg)))
791 * Return a bitset of argument registers which are live at the end of a node.
793 * @param sim the simulator handle
794 * @param pos the node
795 * @param kill kill mask for the output registers
797 * @return The live bitset.
799 static unsigned vfp_live_args_after(x87_simulator *sim, const ir_node *pos, unsigned kill)
801 unsigned idx = get_irn_idx(pos);
803 assert(idx < sim->n_idx);
804 return sim->live[idx] & ~kill;
805 } /* vfp_live_args_after */
808 * Calculate the liveness for a whole block and cache it.
810 * @param sim the simulator handle
811 * @param lv the liveness handle
812 * @param block the block
814 static void update_liveness(x87_simulator *sim, ir_node *block) {
815 vfp_liveness live = vfp_liveness_end_of_block(sim, block);
819 /* now iterate through the block backward and cache the results */
820 sched_foreach_reverse(block, irn) {
821 /* stop at the first Phi: this produces the live-in */
825 idx = get_irn_idx(irn);
826 sim->live[idx] = live;
828 live = vfp_liveness_transfer(sim, irn, live);
830 idx = get_irn_idx(block);
831 sim->live[idx] = live;
832 } /* update_liveness */
835 * Returns true if a register is live in a set.
837 * @param reg_idx the vfp register index
838 * @param live a live bitset
840 #define is_vfp_live(reg_idx, live) ((live) & (1 << (reg_idx)))
844 * Dump liveness info.
846 * @param live the live bitset
848 static void vfp_dump_live(vfp_liveness live) {
851 DB((dbg, LEVEL_2, "Live after: "));
852 for (i = 0; i < 8; ++i) {
853 if (live & (1 << i)) {
854 DB((dbg, LEVEL_2, "vf%d ", i));
857 DB((dbg, LEVEL_2, "\n"));
858 } /* vfp_dump_live */
859 #endif /* DEBUG_libfirm */
861 /* --------------------------------- simulators ---------------------------------------- */
863 #define XCHG(a, b) do { int t = (a); (a) = (b); (b) = t; } while (0)
866 * Simulate a virtual binop.
868 * @param state the x87 state
869 * @param n the node that should be simulated (and patched)
870 * @param tmpl the template containing the 4 possible x87 opcodes
872 static int sim_binop(x87_state *state, ir_node *n, const exchange_tmpl *tmpl) {
873 int op2_idx = 0, op1_idx;
874 int out_idx, do_pop = 0;
876 ir_node *patched_insn;
878 x87_simulator *sim = state->sim;
879 const arch_register_t *op1 = x87_get_irn_register(sim, get_irn_n(n, BINOP_IDX_1));
880 const arch_register_t *op2 = x87_get_irn_register(sim, get_irn_n(n, BINOP_IDX_2));
881 const arch_register_t *out = x87_get_irn_register(sim, n);
882 int reg_index_1 = arch_register_get_index(op1);
883 int reg_index_2 = arch_register_get_index(op2);
884 vfp_liveness live = vfp_live_args_after(sim, n, REGMASK(out));
886 DB((dbg, LEVEL_1, ">>> %+F %s, %s -> %s\n", n,
887 arch_register_get_name(op1), arch_register_get_name(op2),
888 arch_register_get_name(out)));
889 DEBUG_ONLY(vfp_dump_live(live));
890 DB((dbg, LEVEL_1, "Stack before: "));
891 DEBUG_ONLY(x87_dump_stack(state));
893 op1_idx = x87_on_stack(state, reg_index_1);
894 assert(op1_idx >= 0);
896 if (reg_index_2 != REG_VFP_NOREG) {
897 /* second operand is a vfp register */
898 op2_idx = x87_on_stack(state, reg_index_2);
899 assert(op2_idx >= 0);
901 if (is_vfp_live(arch_register_get_index(op2), live)) {
902 /* Second operand is live. */
904 if (is_vfp_live(arch_register_get_index(op1), live)) {
905 /* Both operands are live: push the first one.
906 This works even for op1 == op2. */
907 x87_create_fpush(state, n, op1_idx, BINOP_IDX_2);
908 /* now do fxxx (tos=tos X op) */
912 dst = tmpl->normal_op;
914 /* Second live, first operand is dead here, bring it to tos. */
916 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
921 /* now do fxxx (tos=tos X op) */
923 dst = tmpl->normal_op;
926 /* Second operand is dead. */
927 if (is_vfp_live(arch_register_get_index(op1), live)) {
928 /* First operand is live: bring second to tos. */
930 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
935 /* now do fxxxr (tos = op X tos) */
937 dst = tmpl->reverse_op;
939 /* Both operands are dead here, pop them from the stack. */
942 /* Both are identically and on tos, no pop needed. */
943 /* here fxxx (tos = tos X tos) */
944 dst = tmpl->normal_op;
947 /* now do fxxxp (op = op X tos, pop) */
948 dst = tmpl->normal_pop_op;
952 } else if (op1_idx == 0) {
953 assert(op1_idx != op2_idx);
954 /* now do fxxxrp (op = tos X op, pop) */
955 dst = tmpl->reverse_pop_op;
959 /* Bring the second on top. */
960 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
961 if (op1_idx == op2_idx) {
962 /* Both are identically and on tos now, no pop needed. */
965 /* use fxxx (tos = tos X tos) */
966 dst = tmpl->normal_op;
969 /* op2 is on tos now */
971 /* use fxxxp (op = op X tos, pop) */
972 dst = tmpl->normal_pop_op;
980 /* second operand is an address mode */
981 if (is_vfp_live(arch_register_get_index(op1), live)) {
982 /* first operand is live: push it here */
983 x87_create_fpush(state, n, op1_idx, BINOP_IDX_1);
985 /* use fxxx (tos = tos X mem) */
986 dst = tmpl->normal_op;
989 /* first operand is dead: bring it to tos */
991 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
995 /* use fxxxp (tos = tos X mem) */
996 dst = tmpl->normal_op;
1001 patched_insn = x87_patch_insn(n, dst);
1002 x87_set_st(state, arch_register_get_index(out), patched_insn, out_idx);
1007 /* patch the operation */
1008 attr = get_ia32_attr(n);
1009 attr->x87[0] = op1 = &ia32_st_regs[op1_idx];
1010 if (reg_index_2 != REG_VFP_NOREG) {
1011 attr->x87[1] = op2 = &ia32_st_regs[op2_idx];
1013 attr->x87[2] = out = &ia32_st_regs[out_idx];
1015 if (reg_index_2 != REG_VFP_NOREG) {
1016 DB((dbg, LEVEL_1, "<<< %s %s, %s -> %s\n", get_irn_opname(n),
1017 arch_register_get_name(op1), arch_register_get_name(op2),
1018 arch_register_get_name(out)));
1020 DB((dbg, LEVEL_1, "<<< %s %s, [AM] -> %s\n", get_irn_opname(n),
1021 arch_register_get_name(op1),
1022 arch_register_get_name(out)));
1029 * Simulate a virtual Unop.
1031 * @param state the x87 state
1032 * @param n the node that should be simulated (and patched)
1033 * @param op the x87 opcode that will replace n's opcode
1035 static int sim_unop(x87_state *state, ir_node *n, ir_op *op) {
1036 int op1_idx, out_idx;
1037 x87_simulator *sim = state->sim;
1038 const arch_register_t *op1 = x87_get_irn_register(sim, get_irn_n(n, UNOP_IDX));
1039 const arch_register_t *out = x87_get_irn_register(sim, n);
1041 unsigned live = vfp_live_args_after(sim, n, REGMASK(out));
1043 DB((dbg, LEVEL_1, ">>> %+F -> %s\n", n, out->name));
1044 DEBUG_ONLY(vfp_dump_live(live));
1046 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1048 if (is_vfp_live(arch_register_get_index(op1), live)) {
1049 /* push the operand here */
1050 x87_create_fpush(state, n, op1_idx, UNOP_IDX);
1054 /* operand is dead, bring it to tos */
1056 x87_create_fxch(state, n, op1_idx, UNOP_IDX);
1061 x87_set_tos(state, arch_register_get_index(out), x87_patch_insn(n, op));
1063 attr = get_ia32_attr(n);
1064 attr->x87[0] = op1 = &ia32_st_regs[0];
1065 attr->x87[2] = out = &ia32_st_regs[0];
1066 DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), out->name));
1072 * Simulate a virtual Load instruction.
1074 * @param state the x87 state
1075 * @param n the node that should be simulated (and patched)
1076 * @param op the x87 opcode that will replace n's opcode
1078 static int sim_load(x87_state *state, ir_node *n, ir_op *op) {
1079 const arch_register_t *out = x87_get_irn_register(state->sim, n);
1082 DB((dbg, LEVEL_1, ">>> %+F -> %s\n", n, arch_register_get_name(out)));
1083 x87_push(state, arch_register_get_index(out), x87_patch_insn(n, op));
1084 assert(out == x87_get_irn_register(state->sim, n));
1085 attr = get_ia32_attr(n);
1086 attr->x87[2] = out = &ia32_st_regs[0];
1087 DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), arch_register_get_name(out)));
1093 * Rewire all users of @p old_val to @new_val iff they are scheduled after @p store.
1095 * @param store The store
1096 * @param old_val The former value
1097 * @param new_val The new value
1099 static void collect_and_rewire_users(ir_node *store, ir_node *old_val, ir_node *new_val) {
1100 const ir_edge_t *edge, *ne;
1102 foreach_out_edge_safe(old_val, edge, ne) {
1103 ir_node *user = get_edge_src_irn(edge);
1105 if (! user || user == store)
1108 /* if the user is scheduled after the store: rewire */
1109 if (sched_is_scheduled(user) && sched_comes_after(store, user)) {
1111 /* find the input of the user pointing to the old value */
1112 for (i = get_irn_arity(user) - 1; i >= 0; i--) {
1113 if (get_irn_n(user, i) == old_val)
1114 set_irn_n(user, i, new_val);
1118 } /* collect_and_rewire_users */
1121 * Simulate a virtual Store.
1123 * @param state the x87 state
1124 * @param n the node that should be simulated (and patched)
1125 * @param op the x87 store opcode
1126 * @param op_p the x87 store and pop opcode
1128 static int sim_store(x87_state *state, ir_node *n, ir_op *op, ir_op *op_p) {
1129 x87_simulator *sim = state->sim;
1130 ir_node *val = get_irn_n(n, STORE_VAL_IDX);
1131 const arch_register_t *op2 = x87_get_irn_register(sim, val);
1132 unsigned live = vfp_live_args_after(sim, n, 0);
1135 int op2_reg_idx, op2_idx, depth;
1136 int live_after_node;
1139 op2_reg_idx = arch_register_get_index(op2);
1140 if (op2_reg_idx == REG_VFP_UKNWN) {
1141 // just take any value from stack
1142 if(state->depth > 0) {
1144 DEBUG_ONLY(op2 = NULL);
1145 live_after_node = 1;
1147 // produce a new value which we will consume imediately
1148 x87_create_fldz(state, n, op2_reg_idx);
1149 live_after_node = 0;
1150 op2_idx = x87_on_stack(state, op2_reg_idx);
1151 assert(op2_idx >= 0);
1154 op2_idx = x87_on_stack(state, op2_reg_idx);
1155 live_after_node = is_vfp_live(arch_register_get_index(op2), live);
1156 assert(op2_idx >= 0);
1157 DB((dbg, LEVEL_1, ">>> %+F %s ->\n", n, arch_register_get_name(op2)));
1160 mode = get_ia32_ls_mode(n);
1161 depth = x87_get_depth(state);
1163 if (live_after_node) {
1165 Problem: fst doesn't support mode_E (spills), only fstp does
1167 - stack not full: push value and fstp
1168 - stack full: fstp value and load again
1170 if (mode == mode_E) {
1171 if (depth < N_x87_REGS) {
1172 /* ok, we have a free register: push + fstp */
1173 x87_create_fpush(state, n, op2_idx, STORE_VAL_IDX);
1175 x87_patch_insn(n, op_p);
1178 ir_node *vfld, *mem, *block, *rproj, *mproj;
1181 /* stack full here: need fstp + load */
1183 x87_patch_insn(n, op_p);
1185 block = get_nodes_block(n);
1186 irg = get_irn_irg(n);
1187 vfld = new_rd_ia32_vfld(NULL, irg, block, get_irn_n(n, 0), get_irn_n(n, 1), new_rd_NoMem(irg));
1189 /* copy all attributes */
1190 set_ia32_frame_ent(vfld, get_ia32_frame_ent(n));
1191 if (is_ia32_use_frame(n))
1192 set_ia32_use_frame(vfld);
1193 set_ia32_am_flavour(vfld, get_ia32_am_flavour(n));
1194 set_ia32_op_type(vfld, ia32_am_Source);
1195 add_ia32_am_offs_int(vfld, get_ia32_am_offs_int(n));
1196 set_ia32_am_sc(vfld, get_ia32_am_sc(n));
1197 set_ia32_ls_mode(vfld, get_ia32_ls_mode(n));
1199 rproj = new_r_Proj(irg, block, vfld, get_ia32_ls_mode(vfld), pn_ia32_vfld_res);
1200 mproj = new_r_Proj(irg, block, vfld, mode_M, pn_ia32_vfld_M);
1201 mem = get_irn_Proj_for_mode(n, mode_M);
1203 assert(mem && "Store memory not found");
1205 arch_set_irn_register(sim->arch_env, rproj, op2);
1207 /* reroute all former users of the store memory to the load memory */
1208 edges_reroute(mem, mproj, irg);
1209 /* set the memory input of the load to the store memory */
1210 set_irn_n(vfld, 2, mem);
1212 sched_add_after(n, vfld);
1213 sched_add_after(vfld, rproj);
1215 /* rewire all users, scheduled after the store, to the loaded value */
1216 collect_and_rewire_users(n, val, rproj);
1222 /* we can only store the tos to memory */
1224 x87_create_fxch(state, n, op2_idx, STORE_VAL_IDX);
1226 /* mode != mode_E -> use normal fst */
1227 x87_patch_insn(n, op);
1231 /* we can only store the tos to memory */
1233 x87_create_fxch(state, n, op2_idx, STORE_VAL_IDX);
1236 x87_patch_insn(n, op_p);
1239 attr = get_ia32_attr(n);
1240 attr->x87[1] = op2 = &ia32_st_regs[0];
1241 DB((dbg, LEVEL_1, "<<< %s %s ->\n", get_irn_opname(n), arch_register_get_name(op2)));
1247 * Simulate a virtual Phi.
1248 * Just for cosmetic reasons change the mode of Phi nodes to mode_E.
1250 * @param state the x87 state
1251 * @param n the node that should be simulated (and patched)
1252 * @param arch_env the architecture environment
1254 static int sim_Phi(x87_state *state, ir_node *n, const arch_env_t *arch_env) {
1255 ir_mode *mode = get_irn_mode(n);
1257 if (mode_is_float(mode))
1258 set_irn_mode(n, mode_E);
1263 #define _GEN_BINOP(op, rev) \
1264 static int sim_##op(x87_state *state, ir_node *n) { \
1265 exchange_tmpl tmpl = { op_ia32_##op, op_ia32_##rev, op_ia32_##op##p, op_ia32_##rev##p }; \
1266 return sim_binop(state, n, &tmpl); \
1269 #define GEN_BINOP(op) _GEN_BINOP(op, op)
1270 #define GEN_BINOPR(op) _GEN_BINOP(op, op##r)
1272 #define GEN_LOAD2(op, nop) \
1273 static int sim_##op(x87_state *state, ir_node *n) { \
1274 return sim_load(state, n, op_ia32_##nop); \
1277 #define GEN_LOAD(op) GEN_LOAD2(op, op)
1279 #define GEN_UNOP(op) \
1280 static int sim_##op(x87_state *state, ir_node *n) { \
1281 return sim_unop(state, n, op_ia32_##op); \
1284 #define GEN_STORE(op) \
1285 static int sim_##op(x87_state *state, ir_node *n) { \
1286 return sim_store(state, n, op_ia32_##op, op_ia32_##op##p); \
1311 * Simulate a fCondJmp.
1313 * @param state the x87 state
1314 * @param n the node that should be simulated (and patched)
1316 static int sim_fCondJmp(x87_state *state, ir_node *n) {
1322 x87_simulator *sim = state->sim;
1323 const arch_register_t *op1 = x87_get_irn_register(sim, get_irn_n(n, BINOP_IDX_1));
1324 const arch_register_t *op2 = x87_get_irn_register(sim, get_irn_n(n, BINOP_IDX_2));
1325 int reg_index_1 = arch_register_get_index(op1);
1326 int reg_index_2 = arch_register_get_index(op2);
1327 unsigned live = vfp_live_args_after(sim, n, 0);
1329 DB((dbg, LEVEL_1, ">>> %+F %s, %s\n", n,
1330 arch_register_get_name(op1), arch_register_get_name(op2)));
1331 DEBUG_ONLY(vfp_dump_live(live));
1332 DB((dbg, LEVEL_1, "Stack before: "));
1333 DEBUG_ONLY(x87_dump_stack(state));
1335 op1_idx = x87_on_stack(state, reg_index_1);
1336 assert(op1_idx >= 0);
1338 /* BEWARE: check for comp a,a cases, they might happen */
1339 if (reg_index_2 != REG_VFP_NOREG) {
1340 /* second operand is a vfp register */
1341 op2_idx = x87_on_stack(state, reg_index_2);
1342 assert(op2_idx >= 0);
1344 if (is_vfp_live(arch_register_get_index(op2), live)) {
1345 /* second operand is live */
1347 if (is_vfp_live(arch_register_get_index(op1), live)) {
1348 /* both operands are live */
1351 /* res = tos X op */
1352 dst = op_ia32_fcomJmp;
1353 } else if (op2_idx == 0) {
1354 /* res = op X tos */
1355 dst = op_ia32_fcomrJmp;
1357 /* bring the first one to tos */
1358 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1362 /* res = tos X op */
1363 dst = op_ia32_fcomJmp;
1366 /* second live, first operand is dead here, bring it to tos.
1367 This means further, op1_idx != op2_idx. */
1368 assert(op1_idx != op2_idx);
1370 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1375 /* res = tos X op, pop */
1376 dst = op_ia32_fcompJmp;
1380 /* second operand is dead */
1381 if (is_vfp_live(arch_register_get_index(op1), live)) {
1382 /* first operand is live: bring second to tos.
1383 This means further, op1_idx != op2_idx. */
1384 assert(op1_idx != op2_idx);
1386 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1391 /* res = op X tos, pop */
1392 dst = op_ia32_fcomrpJmp;
1395 /* both operands are dead here, check first for identity. */
1396 if (op1_idx == op2_idx) {
1397 /* identically, one pop needed */
1399 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1403 /* res = tos X op, pop */
1404 dst = op_ia32_fcompJmp;
1407 /* different, move them to st and st(1) and pop both.
1408 The tricky part is to get one into st(1).*/
1409 else if (op2_idx == 1) {
1410 /* good, second operand is already in the right place, move the first */
1412 /* bring the first on top */
1413 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1414 assert(op2_idx != 0);
1417 /* res = tos X op, pop, pop */
1418 dst = op_ia32_fcomppJmp;
1420 } else if (op1_idx == 1) {
1421 /* good, first operand is already in the right place, move the second */
1423 /* bring the first on top */
1424 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1425 assert(op1_idx != 0);
1428 dst = op_ia32_fcomrppJmp;
1431 /* if one is already the TOS, we need two fxch */
1433 /* first one is TOS, move to st(1) */
1434 x87_create_fxch(state, n, 1, BINOP_IDX_1);
1435 assert(op2_idx != 1);
1437 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1439 /* res = op X tos, pop, pop */
1440 dst = op_ia32_fcomrppJmp;
1442 } else if (op2_idx == 0) {
1443 /* second one is TOS, move to st(1) */
1444 x87_create_fxch(state, n, 1, BINOP_IDX_2);
1445 assert(op1_idx != 1);
1447 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1449 /* res = tos X op, pop, pop */
1450 dst = op_ia32_fcomppJmp;
1453 /* none of them is either TOS or st(1), 3 fxch needed */
1454 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
1455 assert(op1_idx != 0);
1456 x87_create_fxch(state, n, 1, BINOP_IDX_2);
1458 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1460 /* res = tos X op, pop, pop */
1461 dst = op_ia32_fcomppJmp;
1468 /* second operand is an address mode */
1469 if (is_vfp_live(arch_register_get_index(op1), live)) {
1470 /* first operand is live: bring it to TOS */
1472 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1475 dst = op_ia32_fcomJmp;
1477 /* first operand is dead: bring it to tos */
1479 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
1482 dst = op_ia32_fcompJmp;
1487 x87_patch_insn(n, dst);
1488 assert(pop_cnt < 3);
1494 /* patch the operation */
1495 attr = get_ia32_attr(n);
1496 op1 = &ia32_st_regs[op1_idx];
1499 op2 = &ia32_st_regs[op2_idx];
1502 attr->x87[2] = NULL;
1505 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(n),
1506 arch_register_get_name(op1), arch_register_get_name(op2)));
1508 DB((dbg, LEVEL_1, "<<< %s %s, [AM]\n", get_irn_opname(n),
1509 arch_register_get_name(op1)));
1512 } /* sim_fCondJmp */
1514 static ir_node *create_Copy(x87_state *state, ir_node *n) {
1515 x87_simulator *sim = state->sim;
1516 ir_graph *irg = get_irn_irg(n);
1517 dbg_info *n_dbg = get_irn_dbg_info(n);
1518 ir_mode *mode = get_irn_mode(n);
1519 ir_node *block = get_nodes_block(n);
1520 ir_node *pred = get_irn_n(n, 0);
1521 ir_node *(*cnstr)(dbg_info *, ir_graph *, ir_node *, ir_mode *) = NULL;
1523 const arch_register_t *out;
1524 const arch_register_t *op1;
1527 /* Do not copy constants, recreate them. */
1528 switch (get_ia32_irn_opcode(pred)) {
1529 case iro_ia32_Unknown_VFP:
1531 cnstr = new_rd_ia32_fldz;
1534 cnstr = new_rd_ia32_fld1;
1536 case iro_ia32_fldpi:
1537 cnstr = new_rd_ia32_fldpi;
1539 case iro_ia32_fldl2e:
1540 cnstr = new_rd_ia32_fldl2e;
1542 case iro_ia32_fldl2t:
1543 cnstr = new_rd_ia32_fldl2t;
1545 case iro_ia32_fldlg2:
1546 cnstr = new_rd_ia32_fldlg2;
1548 case iro_ia32_fldln2:
1549 cnstr = new_rd_ia32_fldln2;
1553 out = x87_get_irn_register(sim, n);
1554 op1 = x87_get_irn_register(sim, pred);
1557 /* copy a constant */
1558 res = (*cnstr)(n_dbg, irg, block, mode);
1560 x87_push(state, arch_register_get_index(out), res);
1562 attr = get_ia32_attr(res);
1563 attr->x87[2] = &ia32_st_regs[0];
1565 int op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1567 res = new_rd_ia32_fpushCopy(n_dbg, irg, block, pred, mode);
1569 x87_push(state, arch_register_get_index(out), res);
1571 attr = get_ia32_attr(res);
1572 attr->x87[0] = &ia32_st_regs[op1_idx];
1573 attr->x87[2] = &ia32_st_regs[0];
1575 arch_set_irn_register(sim->arch_env, res, out);
1581 * Simulate a be_Copy.
1583 * @param state the x87 state
1584 * @param n the node that should be simulated (and patched)
1586 static int sim_Copy(x87_state *state, ir_node *n) {
1589 const arch_register_t *out;
1590 const arch_register_t *op1;
1591 ir_node *node, *next;
1593 int op1_idx, out_idx;
1596 ir_mode *mode = get_irn_mode(n);
1598 if (!mode_is_float(mode))
1602 pred = get_irn_n(n, 0);
1603 out = x87_get_irn_register(sim, n);
1604 op1 = x87_get_irn_register(sim, pred);
1605 live = vfp_live_args_after(sim, n, REGMASK(out));
1607 DB((dbg, LEVEL_1, ">>> %+F %s -> %s\n", n,
1608 arch_register_get_name(op1), arch_register_get_name(out)));
1609 DEBUG_ONLY(vfp_dump_live(live));
1611 /* handle the infamous unknown value */
1612 if (arch_register_get_index(op1) == REG_VFP_UKNWN) {
1613 /* Operand is still live, a real copy. We need here an fpush that can
1614 hold a a register, so use the fpushCopy or recreate constants */
1615 node = create_Copy(state, n);
1617 assert(is_ia32_fldz(node));
1618 next = sched_next(n);
1621 sched_add_before(next, node);
1623 DB((dbg, LEVEL_1, "<<< %+F %s -> %s\n", node, op1->name,
1624 arch_get_irn_register(sim->arch_env, node)->name));
1628 op1_idx = x87_on_stack(state, arch_register_get_index(op1));
1630 if (is_vfp_live(arch_register_get_index(op1), live)) {
1631 /* Operand is still live, a real copy. We need here an fpush that can
1632 hold a a register, so use the fpushCopy or recreate constants */
1633 node = create_Copy(state, n);
1635 next = sched_next(n);
1638 sched_add_before(next, node);
1639 DB((dbg, LEVEL_1, "<<< %+F %s -> %s\n", node, op1->name,
1640 arch_get_irn_register(sim->arch_env, node)->name));
1642 out_idx = x87_on_stack(state, arch_register_get_index(out));
1644 if (out_idx >= 0 && out_idx != op1_idx) {
1645 /* Matze: out already on stack? how can this happen? */
1648 /* op1 must be killed and placed where out is */
1650 /* best case, simple remove and rename */
1651 x87_patch_insn(n, op_ia32_Pop);
1652 attr = get_ia32_attr(n);
1653 attr->x87[0] = op1 = &ia32_st_regs[0];
1656 x87_set_st(state, arch_register_get_index(out), n, op1_idx - 1);
1658 /* move op1 to tos, store and pop it */
1660 x87_create_fxch(state, n, op1_idx, 0);
1663 x87_patch_insn(n, op_ia32_Pop);
1664 attr = get_ia32_attr(n);
1665 attr->x87[0] = op1 = &ia32_st_regs[out_idx];
1668 x87_set_st(state, arch_register_get_index(out), n, out_idx - 1);
1670 DB((dbg, LEVEL_1, "<<< %+F %s\n", n, op1->name));
1672 /* just a virtual copy */
1673 x87_set_st(state, arch_register_get_index(out), get_unop_op(n), op1_idx);
1674 /* don't remove the node to keep the verifier quiet :),
1675 the emitter won't emit any code for the node */
1678 DB((dbg, LEVEL_1, "<<< KILLED %s\n", get_irn_opname(n)));
1679 exchange(n, get_unop_op(n));
1688 * Returns the result proj of the call, or NULL if the result is not used
1690 static ir_node *get_call_result_proj(ir_node *call)
1692 const ir_edge_t *edge;
1693 ir_node *resproj = NULL;
1695 /* search the result proj */
1696 foreach_out_edge(call, edge) {
1697 ir_node *proj = get_edge_src_irn(edge);
1698 long pn = get_Proj_proj(proj);
1700 if(pn == pn_be_Call_first_res) {
1705 if(resproj == NULL) {
1709 /* the result proj is connected to a Keep and maybe other nodes */
1710 foreach_out_edge(resproj, edge) {
1711 ir_node *pred = get_edge_src_irn(edge);
1712 if(!be_is_Keep(pred)) {
1717 /* only be_Keep found, so result is not used */
1722 * Simulate a be_Call.
1724 * @param state the x87 state
1725 * @param n the node that should be simulated
1726 * @param arch_env the architecture environment
1728 static int sim_Call(x87_state *state, ir_node *n, const arch_env_t *arch_env) {
1729 ir_type *call_tp = be_Call_get_type(n);
1733 const arch_register_t *reg;
1735 /* at the begin of a call the x87 state should be empty */
1736 assert(state->depth == 0 && "stack not empty before call");
1738 if (get_method_n_ress(call_tp) <= 0)
1742 * If the called function returns a float, it is returned in st(0).
1743 * This even happens if the return value is NOT used.
1744 * Moreover, only one return result is supported.
1746 res_type = get_method_res_type(call_tp, 0);
1747 mode = get_type_mode(res_type);
1749 if (mode == NULL || !mode_is_float(mode))
1752 resproj = get_call_result_proj(n);
1753 if (resproj == NULL)
1756 reg = x87_get_irn_register(state->sim, resproj);
1757 x87_push(state, arch_register_get_index(reg), resproj);
1763 * Simulate a be_Spill.
1765 * @param state the x87 state
1766 * @param n the node that should be simulated (and patched)
1768 * Should not happen, spills are lowered before x87 simulator see them.
1770 static int sim_Spill(x87_state *state, ir_node *n) {
1771 assert(0 && "Spill not lowered");
1772 return sim_fst(state, n);
1776 * Simulate a be_Reload.
1778 * @param state the x87 state
1779 * @param n the node that should be simulated (and patched)
1781 * Should not happen, reloads are lowered before x87 simulator see them.
1783 static int sim_Reload(x87_state *state, ir_node *n) {
1784 assert(0 && "Reload not lowered");
1785 return sim_fld(state, n);
1789 * Simulate a be_Return.
1791 * @param state the x87 state
1792 * @param n the node that should be simulated (and patched)
1794 static int sim_Return(x87_state *state, ir_node *n) {
1795 int n_res = be_Return_get_n_rets(n);
1796 int i, n_float_res = 0;
1798 /* only floating point return values must resist on stack */
1799 for (i = 0; i < n_res; ++i) {
1800 ir_node *res = get_irn_n(n, be_pos_Return_val + i);
1802 if (mode_is_float(get_irn_mode(res)))
1805 assert(x87_get_depth(state) == n_float_res);
1807 /* pop them virtually */
1808 for (i = n_float_res - 1; i >= 0; --i)
1814 typedef struct _perm_data_t {
1815 const arch_register_t *in;
1816 const arch_register_t *out;
1820 * Simulate a be_Perm.
1822 * @param state the x87 state
1823 * @param irn the node that should be simulated (and patched)
1825 static int sim_Perm(x87_state *state, ir_node *irn) {
1827 x87_simulator *sim = state->sim;
1828 ir_node *pred = get_irn_n(irn, 0);
1830 const ir_edge_t *edge;
1832 /* handle only floating point Perms */
1833 if (! mode_is_float(get_irn_mode(pred)))
1836 DB((dbg, LEVEL_1, ">>> %+F\n", irn));
1838 /* Perm is a pure virtual instruction on x87.
1839 All inputs must be on the FPU stack and are pairwise
1840 different from each other.
1841 So, all we need to do is to permutate the stack state. */
1842 n = get_irn_arity(irn);
1843 NEW_ARR_A(int, stack_pos, n);
1845 /* collect old stack positions */
1846 for (i = 0; i < n; ++i) {
1847 const arch_register_t *inreg = x87_get_irn_register(sim, get_irn_n(irn, i));
1848 int idx = x87_on_stack(state, arch_register_get_index(inreg));
1850 assert(idx >= 0 && "Perm argument not on x87 stack");
1854 /* now do the permutation */
1855 foreach_out_edge(irn, edge) {
1856 ir_node *proj = get_edge_src_irn(edge);
1857 const arch_register_t *out = x87_get_irn_register(sim, proj);
1858 long num = get_Proj_proj(proj);
1860 assert(0 <= num && num < n && "More Proj's than Perm inputs");
1861 x87_set_st(state, arch_register_get_index(out), proj, stack_pos[(unsigned)num]);
1863 DB((dbg, LEVEL_1, "<<< %+F\n", irn));
1869 * Kill any dead registers at block start by popping them from the stack.
1871 * @param sim the simulator handle
1872 * @param block the current block
1873 * @param start_state the x87 state at the begin of the block
1875 static x87_state *x87_kill_deads(x87_simulator *sim, ir_node *block, x87_state *start_state) {
1876 x87_state *state = start_state;
1877 ir_node *first_insn = sched_first(block);
1878 ir_node *keep = NULL;
1879 unsigned live = vfp_live_args_after(sim, block, 0);
1881 int i, depth, num_pop;
1884 depth = x87_get_depth(state);
1885 for (i = depth - 1; i >= 0; --i) {
1886 int reg = x87_get_st_reg(state, i);
1888 if (! is_vfp_live(reg, live))
1889 kill_mask |= (1 << i);
1893 /* create a new state, will be changed */
1894 state = x87_clone_state(sim, state);
1896 DB((dbg, LEVEL_1, "Killing deads:\n"));
1897 DEBUG_ONLY(vfp_dump_live(live));
1898 DEBUG_ONLY(x87_dump_stack(state));
1900 /* now kill registers */
1902 /* we can only kill from TOS, so bring them up */
1903 if (! (kill_mask & 1)) {
1904 /* search from behind, because we can to a double-pop */
1905 for (i = depth - 1; i >= 0; --i) {
1906 if (kill_mask & (1 << i)) {
1907 kill_mask &= ~(1 << i);
1914 x87_set_st(state, -1, keep, i);
1915 keep = x87_create_fxch(state, first_insn, i, -1);
1918 keep = x87_get_st_node(state, 0);
1920 if ((kill_mask & 3) == 3) {
1921 /* we can do a double-pop */
1925 /* only a single pop */
1930 kill_mask >>= num_pop;
1931 keep = x87_create_fpop(state, first_insn, num_pop, keep);
1936 } /* x87_kill_deads */
1939 * Run a simulation and fix all virtual instructions for a block.
1941 * @param sim the simulator handle
1942 * @param block the current block
1944 * @return non-zero if simulation is complete,
1945 * zero if the simulation must be rerun
1947 static void x87_simulate_block(x87_simulator *sim, ir_node *block) {
1949 blk_state *bl_state = x87_get_bl_state(sim, block);
1950 x87_state *state = bl_state->begin;
1951 const ir_edge_t *edge;
1952 ir_node *start_block;
1954 assert(state != NULL);
1955 // already processed?
1956 if(bl_state->end != NULL)
1959 //update_liveness(sim, block);
1961 DB((dbg, LEVEL_1, "Simulate %+F\n", block));
1962 DB((dbg, LEVEL_2, "State at Block begin:\n "));
1963 DEBUG_ONLY(x87_dump_stack(state));
1965 /* at block begin, kill all dead registers */
1966 state = x87_kill_deads(sim, block, state);
1968 /* beware, n might change */
1969 for (n = sched_first(block); !sched_is_end(n); n = next) {
1972 ir_op *op = get_irn_op(n);
1974 next = sched_next(n);
1975 if (op->ops.generic == NULL)
1978 func = (sim_func)op->ops.generic;
1980 /* have work to do */
1981 if (state == bl_state->begin) {
1982 /* create a new state, will be changed */
1983 state = x87_clone_state(sim, state);
1987 node_inserted = (*func)(state, n);
1990 sim_func might have added additional nodes after n,
1992 beware: n must not be changed by sim_func
1993 (i.e. removed from schedule) in this case
1996 next = sched_next(n);
1999 start_block = get_irg_start_block(get_irn_irg(block));
2001 /* check if the state must be shuffled */
2002 foreach_block_succ(block, edge) {
2003 ir_node *succ = get_edge_src_irn(edge);
2004 blk_state *succ_state;
2006 if(succ == start_block)
2009 succ_state = x87_get_bl_state(sim, succ);
2011 if (succ_state->begin == NULL) {
2012 succ_state->begin = state;
2013 waitq_put(sim->worklist, succ);
2015 /* There is already a begin state for the successor, bad.
2016 Do the necessary permutations.
2017 Note that critical edges are removed, so this is always possible. */
2018 x87_shuffle(sim, block, state, succ, succ_state->begin);
2021 bl_state->end = state;
2023 DB((dbg, LEVEL_2, "State at Block end:\n ")); DEBUG_ONLY(x87_dump_stack(state));
2024 } /* x87_simulate_block */
2027 * Create a new x87 simulator.
2029 * @param sim a simulator handle, will be initialized
2030 * @param irg the current graph
2031 * @param arch_env the architecture environment
2033 static void x87_init_simulator(x87_simulator *sim, ir_graph *irg,
2034 const arch_env_t *arch_env)
2036 obstack_init(&sim->obst);
2037 sim->blk_states = pmap_create();
2038 sim->arch_env = arch_env;
2039 sim->n_idx = get_irg_last_idx(irg);
2040 sim->live = obstack_alloc(&sim->obst, sizeof(*sim->live) * sim->n_idx);
2042 DB((dbg, LEVEL_1, "--------------------------------\n"
2043 "x87 Simulator started for %+F\n", irg));
2045 /* set the generic function pointer of instruction we must simulate */
2046 clear_irp_opcodes_generic_func();
2048 #define ASSOC(op) (op_ ## op)->ops.generic = (op_func)(sim_##op)
2049 #define ASSOC_IA32(op) (op_ia32_v ## op)->ops.generic = (op_func)(sim_##op)
2050 #define ASSOC_BE(op) (op_be_ ## op)->ops.generic = (op_func)(sim_##op)
2067 ASSOC_IA32(fCondJmp);
2078 } /* x87_init_simulator */
2081 * Destroy a x87 simulator.
2083 * @param sim the simulator handle
2085 static void x87_destroy_simulator(x87_simulator *sim) {
2086 pmap_destroy(sim->blk_states);
2087 obstack_free(&sim->obst, NULL);
2088 DB((dbg, LEVEL_1, "x87 Simulator stopped\n\n"));
2089 } /* x87_destroy_simulator */
2091 static void update_liveness_walker(ir_node *block, void *data)
2093 x87_simulator *sim = data;
2094 update_liveness(sim, block);
2098 * Run a simulation and fix all virtual instructions for a graph.
2100 * @param env the architecture environment
2101 * @param irg the current graph
2103 * Needs a block-schedule.
2105 void x87_simulate_graph(const arch_env_t *arch_env, be_irg_t *birg) {
2106 ir_node *block, *start_block;
2107 blk_state *bl_state;
2109 ir_graph *irg = be_get_birg_irg(birg);
2111 /* create the simulator */
2112 x87_init_simulator(&sim, irg, arch_env);
2114 start_block = get_irg_start_block(irg);
2115 bl_state = x87_get_bl_state(&sim, start_block);
2117 /* start with the empty state */
2118 bl_state->begin = empty;
2121 sim.worklist = new_waitq();
2122 waitq_put(sim.worklist, start_block);
2124 be_invalidate_liveness(birg);
2125 be_assure_liveness(birg);
2126 sim.lv = be_get_birg_liveness(birg);
2128 /* Calculate the liveness for all nodes. We must precalculate this info,
2129 * because the simulator adds new nodes (possible before Phi nodes) which
2130 * would let a lazy calculation fail.
2131 * On the other hand we reduce the computation amount due to
2132 * precaching from O(n^2) to O(n) at the expense of O(n) cache memory.
2134 irg_block_walk_graph(irg, update_liveness_walker, NULL, &sim);
2138 block = waitq_get(sim.worklist);
2139 x87_simulate_block(&sim, block);
2140 } while (! pdeq_empty(sim.worklist));
2143 del_waitq(sim.worklist);
2144 x87_destroy_simulator(&sim);
2145 } /* x87_simulate_graph */
2147 void ia32_init_x87(void)
2149 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.x87");