2 * This file is part of libFirm.
3 * Copyright (C) 2012 University of Karlsruhe.
8 * @brief This file implements the x87 support and virtual to stack
9 * register translation for the ia32 backend.
10 * @author Michael Beck
19 #include "iredges_t.h"
34 #include "bearch_ia32_t.h"
35 #include "ia32_new_nodes.h"
36 #include "gen_ia32_new_nodes.h"
37 #include "gen_ia32_regalloc_if.h"
39 #include "ia32_architecture.h"
41 #define N_FLOAT_REGS (N_ia32_fp_REGS-1) // exclude NOREG
43 /** the debug handle */
44 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
46 /* Forward declaration. */
47 typedef struct x87_simulator x87_simulator;
50 * An entry on the simulated x87 stack.
52 typedef struct st_entry {
53 int reg_idx; /**< the virtual register index of this stack value */
54 ir_node *node; /**< the node that produced this value */
60 typedef struct x87_state {
61 st_entry st[N_FLOAT_REGS]; /**< the register stack */
62 int depth; /**< the current stack depth */
63 x87_simulator *sim; /**< The simulator. */
66 /** An empty state, used for blocks without fp instructions. */
67 static x87_state empty = { { {0, NULL}, }, 0, NULL };
70 * Return values of the instruction simulator functions.
73 NO_NODE_ADDED = 0, /**< No node that needs simulation was added. */
74 NODE_ADDED = 1 /**< A node that must be simulated was added by the simulator
75 in the schedule AFTER the current node. */
79 * The type of an instruction simulator function.
81 * @param state the x87 state
82 * @param n the node to be simulated
84 * @return NODE_ADDED if a node was added AFTER n in schedule that MUST be
86 * NO_NODE_ADDED otherwise
88 typedef int (*sim_func)(x87_state *state, ir_node *n);
91 * A block state: Every block has a x87 state at the beginning and at the end.
93 typedef struct blk_state {
94 x87_state *begin; /**< state at the begin or NULL if not assigned */
95 x87_state *end; /**< state at the end or NULL if not assigned */
98 /** liveness bitset for fp registers. */
99 typedef unsigned char fp_liveness;
104 struct x87_simulator {
105 struct obstack obst; /**< An obstack for fast allocating. */
106 pmap *blk_states; /**< Map blocks to states. */
107 be_lv_t *lv; /**< intrablock liveness. */
108 fp_liveness *live; /**< Liveness information. */
109 unsigned n_idx; /**< The cached get_irg_last_idx() result. */
110 waitq *worklist; /**< Worklist of blocks that must be processed. */
114 * Returns the current stack depth.
116 * @param state the x87 state
118 * @return the x87 stack depth
120 static int x87_get_depth(const x87_state *state)
125 static st_entry *x87_get_entry(x87_state *const state, int const pos)
127 assert(0 <= pos && pos < state->depth);
128 return &state->st[N_FLOAT_REGS - state->depth + pos];
132 * Return the virtual register index at st(pos).
134 * @param state the x87 state
135 * @param pos a stack position
137 * @return the fp register index that produced the value at st(pos)
139 static int x87_get_st_reg(const x87_state *state, int pos)
141 return x87_get_entry((x87_state*)state, pos)->reg_idx;
146 * Dump the stack for debugging.
148 * @param state the x87 state
150 static void x87_dump_stack(const x87_state *state)
152 for (int i = state->depth; i-- != 0;) {
153 st_entry const *const entry = x87_get_entry((x87_state*)state, i);
154 DB((dbg, LEVEL_2, "vf%d(%+F) ", entry->reg_idx, entry->node));
156 DB((dbg, LEVEL_2, "<-- TOS\n"));
158 #endif /* DEBUG_libfirm */
161 * Set a virtual register to st(pos).
163 * @param state the x87 state
164 * @param reg_idx the fp register index that should be set
165 * @param node the IR node that produces the value of the fp register
166 * @param pos the stack position where the new value should be entered
168 static void x87_set_st(x87_state *state, int reg_idx, ir_node *node, int pos)
170 st_entry *const entry = x87_get_entry(state, pos);
171 entry->reg_idx = reg_idx;
174 DB((dbg, LEVEL_2, "After SET_REG: "));
175 DEBUG_ONLY(x87_dump_stack(state);)
179 * Swap st(0) with st(pos).
181 * @param state the x87 state
182 * @param pos the stack position to change the tos with
184 static void x87_fxch(x87_state *state, int pos)
186 st_entry *const a = x87_get_entry(state, pos);
187 st_entry *const b = x87_get_entry(state, 0);
188 st_entry const t = *a;
192 DB((dbg, LEVEL_2, "After FXCH: "));
193 DEBUG_ONLY(x87_dump_stack(state);)
197 * Convert a virtual register to the stack index.
199 * @param state the x87 state
200 * @param reg_idx the register fp index
202 * @return the stack position where the register is stacked
203 * or -1 if the virtual register was not found
205 static int x87_on_stack(const x87_state *state, int reg_idx)
207 for (int i = 0; i < state->depth; ++i) {
208 if (x87_get_st_reg(state, i) == reg_idx)
215 * Push a virtual Register onto the stack, double pushes are NOT allowed.
217 * @param state the x87 state
218 * @param reg_idx the register fp index
219 * @param node the node that produces the value of the fp register
221 static void x87_push(x87_state *state, int reg_idx, ir_node *node)
223 assert(x87_on_stack(state, reg_idx) == -1 && "double push");
224 assert(state->depth < N_FLOAT_REGS && "stack overrun");
227 st_entry *const entry = x87_get_entry(state, 0);
228 entry->reg_idx = reg_idx;
231 DB((dbg, LEVEL_2, "After PUSH: ")); DEBUG_ONLY(x87_dump_stack(state);)
235 * Pop a virtual Register from the stack.
237 * @param state the x87 state
239 static void x87_pop(x87_state *state)
241 assert(state->depth > 0 && "stack underrun");
245 DB((dbg, LEVEL_2, "After POP: ")); DEBUG_ONLY(x87_dump_stack(state);)
249 * Empty the fpu stack
251 * @param state the x87 state
253 static void x87_emms(x87_state *state)
259 * Returns the block state of a block.
261 * @param sim the x87 simulator handle
262 * @param block the current block
264 * @return the block state
266 static blk_state *x87_get_bl_state(x87_simulator *sim, ir_node *block)
268 blk_state *res = pmap_get(blk_state, sim->blk_states, block);
271 res = OALLOC(&sim->obst, blk_state);
275 pmap_insert(sim->blk_states, block, res);
284 * @param sim the x87 simulator handle
285 * @param src the x87 state that will be cloned
287 * @return a cloned copy of the src state
289 static x87_state *x87_clone_state(x87_simulator *sim, const x87_state *src)
291 x87_state *const res = OALLOC(&sim->obst, x87_state);
297 * Returns the first Proj of a mode_T node having a given mode.
299 * @param n the mode_T node
300 * @param m the desired mode of the Proj
301 * @return The first Proj of mode @p m found.
303 static ir_node *get_irn_Proj_for_mode(ir_node *n, ir_mode *m)
305 assert(get_irn_mode(n) == mode_T && "Need mode_T node");
307 foreach_out_edge(n, edge) {
308 ir_node *proj = get_edge_src_irn(edge);
309 if (get_irn_mode(proj) == m)
313 panic("Proj not found");
317 * Wrap the arch_* function here so we can check for errors.
319 static inline const arch_register_t *x87_get_irn_register(const ir_node *irn)
321 const arch_register_t *res = arch_get_irn_register(irn);
323 assert(res->reg_class == &ia32_reg_classes[CLASS_ia32_fp]);
327 static inline const arch_register_t *x87_irn_get_register(const ir_node *irn,
330 const arch_register_t *res = arch_get_irn_register_out(irn, pos);
332 assert(res->reg_class == &ia32_reg_classes[CLASS_ia32_fp]);
336 static inline const arch_register_t *get_st_reg(int index)
338 return &ia32_registers[REG_ST0 + index];
342 * Create a fxch node before another node.
344 * @param state the x87 state
345 * @param n the node after the fxch
346 * @param pos exchange st(pos) with st(0)
348 static void x87_create_fxch(x87_state *state, ir_node *n, int pos)
350 x87_fxch(state, pos);
352 ir_node *const block = get_nodes_block(n);
353 ir_node *const fxch = new_bd_ia32_fxch(NULL, block);
354 ia32_x87_attr_t *const attr = get_ia32_x87_attr(fxch);
355 attr->reg = get_st_reg(pos);
359 sched_add_before(n, fxch);
360 DB((dbg, LEVEL_1, "<<< %s %s\n", get_irn_opname(fxch), attr->reg->name));
363 /* -------------- x87 perm --------------- */
366 * Calculate the necessary permutations to reach dst_state.
368 * These permutations are done with fxch instructions and placed
369 * at the end of the block.
371 * Note that critical edges are removed here, so we need only
372 * a shuffle if the current block has only one successor.
374 * @param block the current block
375 * @param state the current x87 stack state, might be modified
376 * @param dst_state destination state
380 static x87_state *x87_shuffle(ir_node *block, x87_state *state, const x87_state *dst_state)
382 int i, n_cycles, k, ri;
383 unsigned cycles[4], all_mask;
384 char cycle_idx[4][8];
386 assert(state->depth == dst_state->depth);
388 /* Some mathematics here:
389 * If we have a cycle of length n that includes the tos,
390 * we need n-1 exchange operations.
391 * We can always add the tos and restore it, so we need
392 * n+1 exchange operations for a cycle not containing the tos.
393 * So, the maximum of needed operations is for a cycle of 7
394 * not including the tos == 8.
395 * This is the same number of ops we would need for using stores,
396 * so exchange is cheaper (we save the loads).
397 * On the other hand, we might need an additional exchange
398 * in the next block to bring one operand on top, so the
399 * number of ops in the first case is identical.
400 * Further, no more than 4 cycles can exists (4 x 2). */
401 all_mask = (1 << (state->depth)) - 1;
403 for (n_cycles = 0; all_mask; ++n_cycles) {
404 int src_idx, dst_idx;
406 /* find the first free slot */
407 for (i = 0; i < state->depth; ++i) {
408 if (all_mask & (1 << i)) {
409 all_mask &= ~(1 << i);
411 /* check if there are differences here */
412 if (x87_get_st_reg(state, i) != x87_get_st_reg(dst_state, i))
418 /* no more cycles found */
423 cycles[n_cycles] = (1 << i);
424 cycle_idx[n_cycles][k++] = i;
425 for (src_idx = i; ; src_idx = dst_idx) {
426 dst_idx = x87_on_stack(dst_state, x87_get_st_reg(state, src_idx));
428 if ((all_mask & (1 << dst_idx)) == 0)
431 cycle_idx[n_cycles][k++] = dst_idx;
432 cycles[n_cycles] |= (1 << dst_idx);
433 all_mask &= ~(1 << dst_idx);
435 cycle_idx[n_cycles][k] = -1;
439 /* no permutation needed */
443 /* Hmm: permutation needed */
444 DB((dbg, LEVEL_2, "\n%+F needs permutation: from\n", block));
445 DEBUG_ONLY(x87_dump_stack(state);)
446 DB((dbg, LEVEL_2, " to\n"));
447 DEBUG_ONLY(x87_dump_stack(dst_state);)
451 DB((dbg, LEVEL_2, "Need %d cycles\n", n_cycles));
452 for (ri = 0; ri < n_cycles; ++ri) {
453 DB((dbg, LEVEL_2, " Ring %d:\n ", ri));
454 for (k = 0; cycle_idx[ri][k] != -1; ++k)
455 DB((dbg, LEVEL_2, " st%d ->", cycle_idx[ri][k]));
456 DB((dbg, LEVEL_2, "\n"));
461 * Find the place node must be insert.
462 * We have only one successor block, so the last instruction should
465 ir_node *const before = sched_last(block);
466 assert(is_cfop(before));
468 /* now do the permutations */
469 for (ri = 0; ri < n_cycles; ++ri) {
470 if ((cycles[ri] & 1) == 0) {
471 /* this cycle does not include the tos */
472 x87_create_fxch(state, before, cycle_idx[ri][0]);
474 for (k = 1; cycle_idx[ri][k] != -1; ++k) {
475 x87_create_fxch(state, before, cycle_idx[ri][k]);
477 if ((cycles[ri] & 1) == 0) {
478 /* this cycle does not include the tos */
479 x87_create_fxch(state, before, cycle_idx[ri][0]);
486 * Create a fpush before node n.
488 * @param state the x87 state
489 * @param n the node after the fpush
490 * @param pos push st(pos) on stack
491 * @param val the value to push
493 static void x87_create_fpush(x87_state *state, ir_node *n, int pos, int const out_reg_idx, ir_node *const val)
495 x87_push(state, out_reg_idx, val);
497 ir_node *const fpush = new_bd_ia32_fpush(NULL, get_nodes_block(n));
498 ia32_x87_attr_t *const attr = get_ia32_x87_attr(fpush);
499 attr->reg = get_st_reg(pos);
502 sched_add_before(n, fpush);
504 DB((dbg, LEVEL_1, "<<< %s %s\n", get_irn_opname(fpush), attr->reg->name));
508 * Create a fpop before node n.
509 * This overwrites st(pos) with st(0) and pops st(0).
511 * @param state the x87 state
512 * @param n the node after the fpop
513 * @param pos the index of the entry to remove the register stack
515 * @return the fpop node
517 static ir_node *x87_create_fpop(x87_state *const state, ir_node *const n, int const pos)
520 st_entry *const dst = x87_get_entry(state, pos);
521 st_entry *const src = x87_get_entry(state, 0);
525 ir_node *const block = get_nodes_block(n);
526 ir_node *const fpop = pos == 0 && ia32_cg_config.use_ffreep ?
527 new_bd_ia32_ffreep(NULL, block) :
528 new_bd_ia32_fpop( NULL, block);
529 ia32_x87_attr_t *const attr = get_ia32_x87_attr(fpop);
530 attr->reg = get_st_reg(pos);
533 sched_add_before(n, fpop);
534 DB((dbg, LEVEL_1, "<<< %s %s\n", get_irn_opname(fpop), attr->reg->name));
538 /* --------------------------------- liveness ------------------------------------------ */
541 * The liveness transfer function.
542 * Updates a live set over a single step from a given node to its predecessor.
543 * Everything defined at the node is removed from the set, the uses of the node get inserted.
545 * @param irn The node at which liveness should be computed.
546 * @param live The bitset of registers live before @p irn. This set gets modified by updating it to
547 * the registers live after irn.
549 * @return The live bitset.
551 static fp_liveness fp_liveness_transfer(ir_node *irn, fp_liveness live)
553 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_fp];
555 be_foreach_definition(irn, cls, def, req,
556 const arch_register_t *reg = x87_get_irn_register(def);
557 live &= ~(1 << reg->index);
559 be_foreach_use(irn, cls, in_req_, op, op_req_,
560 const arch_register_t *reg = x87_get_irn_register(op);
561 live |= 1 << reg->index;
567 * Put all live virtual registers at the end of a block into a bitset.
569 * @param sim the simulator handle
570 * @param bl the block
572 * @return The live bitset at the end of this block
574 static fp_liveness fp_liveness_end_of_block(x87_simulator *sim, const ir_node *block)
576 fp_liveness live = 0;
577 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_fp];
578 const be_lv_t *lv = sim->lv;
580 be_lv_foreach_cls(lv, block, be_lv_state_end, cls, node) {
581 const arch_register_t *reg = x87_get_irn_register(node);
582 live |= 1 << reg->index;
588 /** get the register mask from an arch_register */
589 #define REGMASK(reg) (1 << (reg->index))
592 * Return a bitset of argument registers which are live at the end of a node.
594 * @param sim the simulator handle
595 * @param pos the node
596 * @param kill kill mask for the output registers
598 * @return The live bitset.
600 static unsigned fp_live_args_after(x87_simulator *sim, const ir_node *pos, unsigned kill)
602 unsigned idx = get_irn_idx(pos);
604 assert(idx < sim->n_idx);
605 return sim->live[idx] & ~kill;
609 * Calculate the liveness for a whole block and cache it.
611 * @param sim the simulator handle
612 * @param block the block
614 static void update_liveness(x87_simulator *sim, ir_node *block)
616 fp_liveness live = fp_liveness_end_of_block(sim, block);
619 /* now iterate through the block backward and cache the results */
620 sched_foreach_reverse(block, irn) {
621 /* stop at the first Phi: this produces the live-in */
625 idx = get_irn_idx(irn);
626 sim->live[idx] = live;
628 live = fp_liveness_transfer(irn, live);
630 idx = get_irn_idx(block);
631 sim->live[idx] = live;
635 * Returns true if a register is live in a set.
637 * @param reg_idx the fp register index
638 * @param live a live bitset
640 #define is_fp_live(reg_idx, live) ((live) & (1 << (reg_idx)))
644 * Dump liveness info.
646 * @param live the live bitset
648 static void fp_dump_live(fp_liveness live)
652 DB((dbg, LEVEL_2, "Live after: "));
653 for (i = 0; i < 8; ++i) {
654 if (live & (1 << i)) {
655 DB((dbg, LEVEL_2, "vf%d ", i));
658 DB((dbg, LEVEL_2, "\n"));
660 #endif /* DEBUG_libfirm */
662 /* --------------------------------- simulators ---------------------------------------- */
665 * Simulate a virtual binop.
667 * @param state the x87 state
668 * @param n the node that should be simulated (and patched)
670 * @return NO_NODE_ADDED
672 static int sim_binop(x87_state *const state, ir_node *const n)
674 x87_simulator *sim = state->sim;
675 ir_node *op1 = get_irn_n(n, n_ia32_binary_left);
676 ir_node *op2 = get_irn_n(n, n_ia32_binary_right);
677 const arch_register_t *op1_reg = x87_get_irn_register(op1);
678 const arch_register_t *op2_reg = x87_get_irn_register(op2);
679 const arch_register_t *out = x87_irn_get_register(n, pn_ia32_res);
680 int reg_index_1 = op1_reg->index;
681 int reg_index_2 = op2_reg->index;
682 fp_liveness live = fp_live_args_after(sim, n, REGMASK(out));
686 DB((dbg, LEVEL_1, ">>> %+F %s, %s -> %s\n", n, op1_reg->name, op2_reg->name, out->name));
687 DEBUG_ONLY(fp_dump_live(live);)
688 DB((dbg, LEVEL_1, "Stack before: "));
689 DEBUG_ONLY(x87_dump_stack(state);)
691 int op1_idx = x87_on_stack(state, reg_index_1);
692 assert(op1_idx >= 0);
693 op1_live_after = is_fp_live(reg_index_1, live);
698 int const out_reg_idx = out->index;
699 ia32_x87_attr_t *const attr = get_ia32_x87_attr(n);
700 if (reg_index_2 != REG_FP_FP_NOREG) {
701 /* second operand is a fp register */
702 op2_idx = x87_on_stack(state, reg_index_2);
703 assert(op2_idx >= 0);
704 op2_live_after = is_fp_live(reg_index_2, live);
706 if (op2_live_after) {
707 /* Second operand is live. */
709 if (op1_live_after) {
710 /* Both operands are live: push the first one.
711 * This works even for op1 == op2. */
712 x87_create_fpush(state, n, op1_idx, out_reg_idx, op1);
713 /* now do fxxx (tos=tos X op) */
718 /* Second live, first operand is dead: Overwrite first. */
719 if (op1_idx != 0 && op2_idx != 0) {
720 /* Bring one operand to tos. */
721 x87_create_fxch(state, n, op1_idx);
727 /* Second operand is dead. */
728 if (op1_live_after) {
729 /* First operand is live, second is dead: Overwrite second. */
730 if (op1_idx != 0 && op2_idx != 0) {
731 /* Bring one operand to tos. */
732 x87_create_fxch(state, n, op2_idx);
737 /* Both operands are dead. */
738 if (op1_idx == op2_idx) {
739 /* Operands are identical: no pop. */
741 x87_create_fxch(state, n, op1_idx);
746 if (op1_idx != 0 && op2_idx != 0) {
747 /* Bring one operand to tos. Heuristically swap the operand not at
748 * st(1) to tos. This way, if any operand was at st(1), the result
749 * will end up in the new st(0) after the implicit pop. If the next
750 * operation uses the result, then no fxch will be necessary. */
752 x87_create_fxch(state, n, op1_idx);
755 x87_create_fxch(state, n, op2_idx);
761 out_idx = op1_idx != 0 ? op1_idx : op2_idx;
765 /* second operand is an address mode */
766 if (op1_live_after) {
767 /* first operand is live: push it here */
768 x87_create_fpush(state, n, op1_idx, out_reg_idx, op1);
770 /* first operand is dead: bring it to tos */
772 x87_create_fxch(state, n, op1_idx);
775 op1_idx = attr->attr.data.ins_permuted ? -1 : 0;
776 op2_idx = attr->attr.data.ins_permuted ? 0 : -1;
779 assert(op1_idx == 0 || op2_idx == 0);
780 assert(out_idx == op1_idx || out_idx == op2_idx);
782 x87_set_st(state, out_reg_idx, n, out_idx);
786 /* patch the operation */
787 int const reg_idx = op1_idx != 0 ? op1_idx : op2_idx;
788 attr->reg = reg_idx >= 0 ? get_st_reg(reg_idx) : NULL;
789 attr->attr.data.ins_permuted = op1_idx != 0;
790 attr->res_in_reg = out_idx != 0;
794 char const *const l = op1_idx >= 0 ? get_st_reg(op1_idx)->name : "[AM]";
795 char const *const r = op2_idx >= 0 ? get_st_reg(op2_idx)->name : "[AM]";
796 char const *const o = get_st_reg(out_idx)->name;
797 DB((dbg, LEVEL_1, "<<< %s %s, %s -> %s\n", get_irn_opname(n), l, r, o));
800 return NO_NODE_ADDED;
804 * Simulate a virtual Unop.
806 * @param state the x87 state
807 * @param n the node that should be simulated (and patched)
809 * @return NO_NODE_ADDED
811 static int sim_unop(x87_state *state, ir_node *n)
813 arch_register_t const *const out = x87_get_irn_register(n);
814 unsigned const live = fp_live_args_after(state->sim, n, REGMASK(out));
815 DB((dbg, LEVEL_1, ">>> %+F -> %s\n", n, out->name));
816 DEBUG_ONLY(fp_dump_live(live);)
818 ir_node *const op1 = get_irn_n(n, 0);
819 arch_register_t const *const op1_reg = x87_get_irn_register(op1);
820 int const op1_reg_idx = op1_reg->index;
821 int const op1_idx = x87_on_stack(state, op1_reg_idx);
822 int const out_reg_idx = out->index;
823 if (is_fp_live(op1_reg_idx, live)) {
824 /* push the operand here */
825 x87_create_fpush(state, n, op1_idx, out_reg_idx, op1);
827 /* operand is dead, bring it to tos */
829 x87_create_fxch(state, n, op1_idx);
833 x87_set_st(state, out_reg_idx, n, 0);
834 DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), get_st_reg(0)->name));
836 return NO_NODE_ADDED;
840 * Simulate a virtual Load instruction.
842 * @param state the x87 state
843 * @param n the node that should be simulated (and patched)
845 * @return NO_NODE_ADDED
847 static int sim_load(x87_state *state, ir_node *n)
849 assert((int)pn_ia32_fld_res == (int)pn_ia32_fild_res
850 && (int)pn_ia32_fld_res == (int)pn_ia32_fld1_res
851 && (int)pn_ia32_fld_res == (int)pn_ia32_fldz_res);
852 const arch_register_t *out = x87_irn_get_register(n, pn_ia32_fld_res);
854 DB((dbg, LEVEL_1, ">>> %+F -> %s\n", n, out->name));
855 x87_push(state, out->index, n);
856 assert(out == x87_irn_get_register(n, pn_ia32_fld_res));
857 DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), get_st_reg(0)->name));
859 return NO_NODE_ADDED;
863 * Rewire all users of @p old_val to @new_val iff they are scheduled after @p store.
865 * @param store The store
866 * @param old_val The former value
867 * @param new_val The new value
869 static void collect_and_rewire_users(ir_node *store, ir_node *old_val, ir_node *new_val)
871 foreach_out_edge_safe(old_val, edge) {
872 ir_node *user = get_edge_src_irn(edge);
873 /* if the user is scheduled after the store: rewire */
874 if (sched_is_scheduled(user) && sched_comes_after(store, user)) {
875 set_irn_n(user, get_edge_src_pos(edge), new_val);
881 * Simulate a virtual Store.
883 * @param state the x87 state
884 * @param n the node that should be simulated (and patched)
886 static int sim_store(x87_state *state, ir_node *n)
888 ir_node *const val = get_irn_n(n, n_ia32_fst_val);
889 arch_register_t const *const op2 = x87_get_irn_register(val);
890 DB((dbg, LEVEL_1, ">>> %+F %s ->\n", n, op2->name));
893 int insn = NO_NODE_ADDED;
894 int const op2_reg_idx = op2->index;
895 int const op2_idx = x87_on_stack(state, op2_reg_idx);
896 unsigned const live = fp_live_args_after(state->sim, n, 0);
897 int const live_after_node = is_fp_live(op2_reg_idx, live);
898 assert(op2_idx >= 0);
899 if (live_after_node) {
900 /* Problem: fst doesn't support 80bit modes (spills), only fstp does
901 * fist doesn't support 64bit mode, only fistp
903 * - stack not full: push value and fstp
904 * - stack full: fstp value and load again
905 * Note that we cannot test on mode_E, because floats might be 80bit ... */
906 ir_mode *const mode = get_ia32_ls_mode(n);
907 if (get_mode_size_bits(mode) > (mode_is_int(mode) ? 32U : 64U)) {
908 if (x87_get_depth(state) < N_FLOAT_REGS) {
909 /* ok, we have a free register: push + fstp */
910 x87_create_fpush(state, n, op2_idx, REG_FP_FP_NOREG, val);
913 /* stack full here: need fstp + load */
916 ir_node *const block = get_nodes_block(n);
917 ir_node *const mem = get_irn_Proj_for_mode(n, mode_M);
918 ir_node *const vfld = new_bd_ia32_fld(NULL, block, get_irn_n(n, 0), get_irn_n(n, 1), mem, mode);
920 /* copy all attributes */
921 set_ia32_frame_ent(vfld, get_ia32_frame_ent(n));
922 if (is_ia32_use_frame(n))
923 set_ia32_use_frame(vfld);
924 set_ia32_op_type(vfld, ia32_AddrModeS);
925 add_ia32_am_offs_int(vfld, get_ia32_am_offs_int(n));
926 set_ia32_am_sc(vfld, get_ia32_am_sc(n));
927 set_ia32_ls_mode(vfld, mode);
929 ir_node *const rproj = new_r_Proj(vfld, mode, pn_ia32_fld_res);
930 ir_node *const mproj = new_r_Proj(vfld, mode_M, pn_ia32_fld_M);
932 arch_set_irn_register(rproj, op2);
934 /* reroute all former users of the store memory to the load memory */
935 edges_reroute_except(mem, mproj, vfld);
937 sched_add_after(n, vfld);
939 /* rewire all users, scheduled after the store, to the loaded value */
940 collect_and_rewire_users(n, val, rproj);
945 /* we can only store the tos to memory */
947 x87_create_fxch(state, n, op2_idx);
950 /* we can only store the tos to memory */
952 x87_create_fxch(state, n, op2_idx);
960 ia32_x87_attr_t *const attr = get_ia32_x87_attr(n);
962 DB((dbg, LEVEL_1, "<<< %s %s ->\n", get_irn_opname(n), get_st_reg(0)->name));
967 static int sim_fprem(x87_state *const state, ir_node *const n)
971 panic("TODO implement");
975 * Simulate a virtual fisttp.
977 * @param state the x87 state
978 * @param n the node that should be simulated (and patched)
980 * @return NO_NODE_ADDED
982 static int sim_fisttp(x87_state *state, ir_node *n)
984 ir_node *val = get_irn_n(n, n_ia32_fst_val);
985 const arch_register_t *op2 = x87_get_irn_register(val);
987 int const op2_idx = x87_on_stack(state, op2->index);
988 DB((dbg, LEVEL_1, ">>> %+F %s ->\n", n, op2->name));
989 assert(op2_idx >= 0);
991 /* Note: although the value is still live here, it is destroyed because
992 of the pop. The register allocator is aware of that and introduced a copy
993 if the value must be alive. */
995 /* we can only store the tos to memory */
997 x87_create_fxch(state, n, op2_idx);
1001 DB((dbg, LEVEL_1, "<<< %s %s ->\n", get_irn_opname(n), get_st_reg(0)->name));
1003 return NO_NODE_ADDED;
1007 * Simulate a virtual FtstFnstsw.
1009 * @param state the x87 state
1010 * @param n the node that should be simulated (and patched)
1012 * @return NO_NODE_ADDED
1014 static int sim_FtstFnstsw(x87_state *state, ir_node *n)
1016 x87_simulator *sim = state->sim;
1017 ir_node *op1_node = get_irn_n(n, n_ia32_FtstFnstsw_left);
1018 const arch_register_t *reg1 = x87_get_irn_register(op1_node);
1019 int reg_index_1 = reg1->index;
1020 int op1_idx = x87_on_stack(state, reg_index_1);
1021 unsigned live = fp_live_args_after(sim, n, 0);
1023 DB((dbg, LEVEL_1, ">>> %+F %s\n", n, reg1->name));
1024 DEBUG_ONLY(fp_dump_live(live);)
1025 DB((dbg, LEVEL_1, "Stack before: "));
1026 DEBUG_ONLY(x87_dump_stack(state);)
1027 assert(op1_idx >= 0);
1030 /* bring the value to tos */
1031 x87_create_fxch(state, n, op1_idx);
1034 if (!is_fp_live(reg_index_1, live))
1035 x87_create_fpop(state, sched_next(n), 0);
1037 return NO_NODE_ADDED;
1043 * @param state the x87 state
1044 * @param n the node that should be simulated (and patched)
1046 * @return NO_NODE_ADDED
1048 static int sim_Fucom(x87_state *state, ir_node *n)
1050 ia32_x87_attr_t *attr = get_ia32_x87_attr(n);
1051 x87_simulator *sim = state->sim;
1052 ir_node *op1_node = get_irn_n(n, n_ia32_FucomFnstsw_left);
1053 ir_node *op2_node = get_irn_n(n, n_ia32_FucomFnstsw_right);
1054 const arch_register_t *op1 = x87_get_irn_register(op1_node);
1055 const arch_register_t *op2 = x87_get_irn_register(op2_node);
1056 int reg_index_1 = op1->index;
1057 int reg_index_2 = op2->index;
1058 unsigned live = fp_live_args_after(sim, n, 0);
1060 DB((dbg, LEVEL_1, ">>> %+F %s, %s\n", n, op1->name, op2->name));
1061 DEBUG_ONLY(fp_dump_live(live);)
1062 DB((dbg, LEVEL_1, "Stack before: "));
1063 DEBUG_ONLY(x87_dump_stack(state);)
1065 int op1_idx = x87_on_stack(state, reg_index_1);
1066 assert(op1_idx >= 0);
1070 /* BEWARE: check for comp a,a cases, they might happen */
1071 if (reg_index_2 != REG_FP_FP_NOREG) {
1072 /* second operand is a fp register */
1073 op2_idx = x87_on_stack(state, reg_index_2);
1074 assert(op2_idx >= 0);
1076 if (is_fp_live(reg_index_2, live)) {
1077 /* second operand is live */
1079 if (is_fp_live(reg_index_1, live)) {
1080 /* both operands are live */
1081 if (op1_idx != 0 && op2_idx != 0) {
1082 /* bring the first one to tos */
1083 x87_create_fxch(state, n, op1_idx);
1084 if (op1_idx == op2_idx)
1087 /* res = tos X op */
1090 /* second live, first operand is dead here, bring it to tos.
1091 This means further, op1_idx != op2_idx. */
1092 assert(op1_idx != op2_idx);
1094 x87_create_fxch(state, n, op1_idx);
1099 /* res = tos X op, pop */
1103 /* second operand is dead */
1104 if (is_fp_live(reg_index_1, live)) {
1105 /* first operand is live: bring second to tos.
1106 This means further, op1_idx != op2_idx. */
1107 assert(op1_idx != op2_idx);
1109 x87_create_fxch(state, n, op2_idx);
1114 /* res = op X tos, pop */
1117 /* both operands are dead here, check first for identity. */
1118 if (op1_idx == op2_idx) {
1119 /* identically, one pop needed */
1121 x87_create_fxch(state, n, op1_idx);
1125 /* res = tos X op, pop */
1128 if (op1_idx != 0 && op2_idx != 0) {
1129 /* Both not at tos: Move one operand to tos. Move the one not at
1130 * pos 1, so we get a chance to use fucompp. */
1132 x87_create_fxch(state, n, op1_idx);
1135 x87_create_fxch(state, n, op2_idx);
1144 /* second operand is an address mode */
1146 x87_create_fxch(state, n, op1_idx);
1147 /* Pop first operand, if it is dead. */
1148 if (!is_fp_live(reg_index_1, live))
1151 op1_idx = attr->attr.data.ins_permuted ? -1 : 0;
1152 op2_idx = attr->attr.data.ins_permuted ? 0 : -1;
1154 assert(op1_idx == 0 || op2_idx == 0);
1156 /* patch the operation */
1157 if (is_ia32_FucomFnstsw(n) && pops == 2
1158 && (op1_idx == 1 || op2_idx == 1)) {
1159 set_irn_op(n, op_ia32_FucomppFnstsw);
1166 int const idx = (op1_idx != 0 ? op1_idx : op2_idx) - 1 /* Due to prior pop. */;
1167 x87_create_fpop(state, sched_next(n), idx);
1171 int const reg_idx = op1_idx != 0 ? op1_idx : op2_idx;
1172 attr->reg = reg_idx >= 0 ? get_st_reg(reg_idx) : NULL;
1173 attr->attr.data.ins_permuted = op1_idx != 0;
1174 attr->pop = pops != 0;
1177 char const *const l = op1_idx >= 0 ? get_st_reg(op1_idx)->name : "[AM]";
1178 char const *const r = op2_idx >= 0 ? get_st_reg(op2_idx)->name : "[AM]";
1179 DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(n), l, r));
1182 return NO_NODE_ADDED;
1188 * @param state the x87 state
1189 * @param n the node that should be simulated (and patched)
1191 * @return NO_NODE_ADDED
1193 static int sim_Keep(x87_state *state, ir_node *node)
1196 const arch_register_t *op_reg;
1202 DB((dbg, LEVEL_1, ">>> %+F\n", node));
1204 arity = get_irn_arity(node);
1205 for (i = 0; i < arity; ++i) {
1206 op = get_irn_n(node, i);
1207 op_reg = arch_get_irn_register(op);
1208 if (op_reg->reg_class != &ia32_reg_classes[CLASS_ia32_fp])
1211 reg_id = op_reg->index;
1212 live = fp_live_args_after(state->sim, node, 0);
1214 op_stack_idx = x87_on_stack(state, reg_id);
1215 if (op_stack_idx >= 0 && !is_fp_live(reg_id, live))
1216 x87_create_fpop(state, sched_next(node), 0);
1219 DB((dbg, LEVEL_1, "Stack after: "));
1220 DEBUG_ONLY(x87_dump_stack(state);)
1222 return NO_NODE_ADDED;
1226 * Keep the given node alive by adding a be_Keep.
1228 * @param node the node to kept alive
1230 static void keep_float_node_alive(ir_node *node)
1232 ir_node *block = get_nodes_block(node);
1233 ir_node *keep = be_new_Keep(block, 1, &node);
1234 sched_add_after(node, keep);
1238 * Create a copy of a node. Recreate the node if it's a constant.
1240 * @param state the x87 state
1241 * @param n the node to be copied
1243 * @return the copy of n
1245 static ir_node *create_Copy(x87_state *state, ir_node *n)
1247 dbg_info *n_dbg = get_irn_dbg_info(n);
1248 ir_mode *mode = get_irn_mode(n);
1249 ir_node *block = get_nodes_block(n);
1250 ir_node *pred = get_irn_n(n, 0);
1251 ir_node *(*cnstr)(dbg_info *, ir_node *) = NULL;
1253 const arch_register_t *out;
1254 const arch_register_t *op1;
1256 /* Do not copy constants, recreate them. */
1257 switch (get_ia32_irn_opcode(pred)) {
1259 cnstr = new_bd_ia32_fldz;
1262 cnstr = new_bd_ia32_fld1;
1264 case iro_ia32_fldpi:
1265 cnstr = new_bd_ia32_fldpi;
1267 case iro_ia32_fldl2e:
1268 cnstr = new_bd_ia32_fldl2e;
1270 case iro_ia32_fldl2t:
1271 cnstr = new_bd_ia32_fldl2t;
1273 case iro_ia32_fldlg2:
1274 cnstr = new_bd_ia32_fldlg2;
1276 case iro_ia32_fldln2:
1277 cnstr = new_bd_ia32_fldln2;
1283 out = x87_get_irn_register(n);
1284 op1 = x87_get_irn_register(pred);
1286 if (cnstr != NULL) {
1287 /* copy a constant */
1288 res = (*cnstr)(n_dbg, block);
1290 x87_push(state, out->index, res);
1292 int op1_idx = x87_on_stack(state, op1->index);
1294 res = new_bd_ia32_fpushCopy(n_dbg, block, pred, mode);
1296 x87_push(state, out->index, res);
1298 ia32_x87_attr_t *const attr = get_ia32_x87_attr(res);
1299 attr->reg = get_st_reg(op1_idx);
1301 arch_set_irn_register(res, out);
1307 * Simulate a be_Copy.
1309 * @param state the x87 state
1310 * @param n the node that should be simulated (and patched)
1312 * @return NO_NODE_ADDED
1314 static int sim_Copy(x87_state *state, ir_node *n)
1316 arch_register_class_t const *const cls = arch_get_irn_reg_class(n);
1317 if (cls != &ia32_reg_classes[CLASS_ia32_fp])
1318 return NO_NODE_ADDED;
1320 ir_node *const pred = be_get_Copy_op(n);
1321 arch_register_t const *const op1 = x87_get_irn_register(pred);
1322 arch_register_t const *const out = x87_get_irn_register(n);
1323 unsigned const live = fp_live_args_after(state->sim, n, REGMASK(out));
1325 DB((dbg, LEVEL_1, ">>> %+F %s -> %s\n", n, op1->name, out->name));
1326 DEBUG_ONLY(fp_dump_live(live);)
1328 if (is_fp_live(op1->index, live)) {
1329 /* Operand is still live, a real copy. We need here an fpush that can
1330 hold a a register, so use the fpushCopy or recreate constants */
1331 ir_node *const node = create_Copy(state, n);
1333 /* We have to make sure the old value doesn't go dead (which can happen
1334 * when we recreate constants). As the simulator expected that value in
1335 * the pred blocks. This is unfortunate as removing it would save us 1
1336 * instruction, but we would have to rerun all the simulation to get
1339 sched_replace(n, node);
1342 if (get_irn_n_edges(pred) == 0) {
1343 keep_float_node_alive(pred);
1346 DB((dbg, LEVEL_1, "<<< %+F %s -> ?\n", node, op1->name));
1348 /* Just a virtual copy. */
1349 int const op1_idx = x87_on_stack(state, op1->index);
1350 x87_set_st(state, out->index, n, op1_idx);
1352 return NO_NODE_ADDED;
1356 * Returns the vf0 result Proj of a Call.
1358 * @para call the Call node
1360 static ir_node *get_call_result_proj(ir_node *call)
1362 /* search the result proj */
1363 foreach_out_edge(call, edge) {
1364 ir_node *proj = get_edge_src_irn(edge);
1365 long pn = get_Proj_proj(proj);
1367 if (pn == pn_ia32_Call_st0)
1371 panic("result Proj missing");
1374 static int sim_Asm(x87_state *const state, ir_node *const n)
1378 be_foreach_use(n, &ia32_reg_classes[CLASS_ia32_fp], in_req, value, value_req,
1379 panic("cannot handle %+F with x87 constraints", n);
1382 be_foreach_out(n, i) {
1383 arch_register_req_t const *const req = arch_get_irn_register_req_out(n, i);
1384 if (req->cls == &ia32_reg_classes[CLASS_ia32_fp])
1385 panic("cannot handle %+F with x87 constraints", n);
1388 return NO_NODE_ADDED;
1392 * Simulate a ia32_Call.
1394 * @param state the x87 state
1395 * @param n the node that should be simulated (and patched)
1397 * @return NO_NODE_ADDED
1399 static int sim_Call(x87_state *state, ir_node *n)
1401 DB((dbg, LEVEL_1, ">>> %+F\n", n));
1403 /* at the begin of a call the x87 state should be empty */
1404 assert(state->depth == 0 && "stack not empty before call");
1406 ir_type *const call_tp = get_ia32_call_attr_const(n)->call_tp;
1407 if (get_method_n_ress(call_tp) != 0) {
1408 /* If the called function returns a float, it is returned in st(0).
1409 * This even happens if the return value is NOT used.
1410 * Moreover, only one return result is supported. */
1411 ir_type *const res_type = get_method_res_type(call_tp, 0);
1412 ir_mode *const mode = get_type_mode(res_type);
1413 if (mode && mode_is_float(mode)) {
1414 ir_node *const resproj = get_call_result_proj(n);
1415 arch_register_t const *const reg = x87_get_irn_register(resproj);
1416 x87_push(state, reg->index, resproj);
1419 DB((dbg, LEVEL_1, "Stack after: "));
1420 DEBUG_ONLY(x87_dump_stack(state);)
1422 return NO_NODE_ADDED;
1426 * Simulate a be_Return.
1428 * @param state the x87 state
1429 * @param n the node that should be simulated (and patched)
1431 * @return NO_NODE_ADDED
1433 static int sim_Return(x87_state *state, ir_node *n)
1435 #ifdef DEBUG_libfirm
1436 /* only floating point return values must reside on stack */
1437 int n_float_res = 0;
1438 int const n_res = be_Return_get_n_rets(n);
1439 for (int i = 0; i < n_res; ++i) {
1440 ir_node *const res = get_irn_n(n, n_be_Return_val + i);
1441 if (mode_is_float(get_irn_mode(res)))
1444 assert(x87_get_depth(state) == n_float_res);
1447 /* pop them virtually */
1449 return NO_NODE_ADDED;
1453 * Simulate a be_Perm.
1455 * @param state the x87 state
1456 * @param irn the node that should be simulated (and patched)
1458 * @return NO_NODE_ADDED
1460 static int sim_Perm(x87_state *state, ir_node *irn)
1463 ir_node *pred = get_irn_n(irn, 0);
1466 /* handle only floating point Perms */
1467 if (! mode_is_float(get_irn_mode(pred)))
1468 return NO_NODE_ADDED;
1470 DB((dbg, LEVEL_1, ">>> %+F\n", irn));
1472 /* Perm is a pure virtual instruction on x87.
1473 All inputs must be on the FPU stack and are pairwise
1474 different from each other.
1475 So, all we need to do is to permutate the stack state. */
1476 n = get_irn_arity(irn);
1477 NEW_ARR_A(int, stack_pos, n);
1479 /* collect old stack positions */
1480 for (i = 0; i < n; ++i) {
1481 const arch_register_t *inreg = x87_get_irn_register(get_irn_n(irn, i));
1482 int idx = x87_on_stack(state, inreg->index);
1484 assert(idx >= 0 && "Perm argument not on x87 stack");
1488 /* now do the permutation */
1489 foreach_out_edge(irn, edge) {
1490 ir_node *proj = get_edge_src_irn(edge);
1491 const arch_register_t *out = x87_get_irn_register(proj);
1492 long num = get_Proj_proj(proj);
1494 assert(0 <= num && num < n && "More Proj's than Perm inputs");
1495 x87_set_st(state, out->index, proj, stack_pos[(unsigned)num]);
1497 DB((dbg, LEVEL_1, "<<< %+F\n", irn));
1499 return NO_NODE_ADDED;
1503 * Kill any dead registers at block start by popping them from the stack.
1505 * @param sim the simulator handle
1506 * @param block the current block
1507 * @param state the x87 state at the begin of the block
1509 static void x87_kill_deads(x87_simulator *const sim, ir_node *const block, x87_state *const state)
1511 ir_node *first_insn = sched_first(block);
1512 ir_node *keep = NULL;
1513 unsigned live = fp_live_args_after(sim, block, 0);
1518 depth = x87_get_depth(state);
1519 for (i = depth - 1; i >= 0; --i) {
1520 int reg = x87_get_st_reg(state, i);
1522 if (! is_fp_live(reg, live))
1523 kill_mask |= (1 << i);
1527 DB((dbg, LEVEL_1, "Killing deads:\n"));
1528 DEBUG_ONLY(fp_dump_live(live);)
1529 DEBUG_ONLY(x87_dump_stack(state);)
1531 if (kill_mask != 0 && live == 0) {
1532 /* special case: kill all registers */
1533 if (ia32_cg_config.use_femms || ia32_cg_config.use_emms) {
1534 if (ia32_cg_config.use_femms) {
1535 /* use FEMMS on AMD processors to clear all */
1536 keep = new_bd_ia32_femms(NULL, block);
1538 /* use EMMS to clear all */
1539 keep = new_bd_ia32_emms(NULL, block);
1541 sched_add_before(first_insn, keep);
1547 /* now kill registers */
1549 /* we can only kill from TOS, so bring them up */
1550 if (! (kill_mask & 1)) {
1551 /* search from behind, because we can to a double-pop */
1552 for (i = depth - 1; i >= 0; --i) {
1553 if (kill_mask & (1 << i)) {
1554 kill_mask &= ~(1 << i);
1561 x87_set_st(state, -1, keep, i);
1562 x87_create_fxch(state, first_insn, i);
1567 keep = x87_create_fpop(state, first_insn, 0);
1574 * Run a simulation and fix all virtual instructions for a block.
1576 * @param sim the simulator handle
1577 * @param block the current block
1579 static void x87_simulate_block(x87_simulator *sim, ir_node *block)
1582 blk_state *bl_state = x87_get_bl_state(sim, block);
1583 x87_state *state = bl_state->begin;
1585 assert(state != NULL);
1586 /* already processed? */
1587 if (bl_state->end != NULL)
1590 DB((dbg, LEVEL_1, "Simulate %+F\n", block));
1591 DB((dbg, LEVEL_2, "State at Block begin:\n "));
1592 DEBUG_ONLY(x87_dump_stack(state);)
1594 /* create a new state, will be changed */
1595 state = x87_clone_state(sim, state);
1596 /* at block begin, kill all dead registers */
1597 x87_kill_deads(sim, block, state);
1599 /* beware, n might change */
1600 for (n = sched_first(block); !sched_is_end(n); n = next) {
1603 ir_op *op = get_irn_op(n);
1606 * get the next node to be simulated here.
1607 * n might be completely removed from the schedule-
1609 next = sched_next(n);
1610 if (op->ops.generic != NULL) {
1611 func = (sim_func)op->ops.generic;
1614 node_inserted = (*func)(state, n);
1617 * sim_func might have added an additional node after n,
1618 * so update next node
1619 * beware: n must not be changed by sim_func
1620 * (i.e. removed from schedule) in this case
1622 if (node_inserted != NO_NODE_ADDED)
1623 next = sched_next(n);
1627 DB((dbg, LEVEL_2, "State at Block end:\n ")); DEBUG_ONLY(x87_dump_stack(state);)
1629 /* check if the state must be shuffled */
1630 foreach_block_succ(block, edge) {
1631 ir_node *succ = get_edge_src_irn(edge);
1632 blk_state *succ_state;
1634 succ_state = x87_get_bl_state(sim, succ);
1636 if (succ_state->begin == NULL) {
1637 DB((dbg, LEVEL_2, "Set begin state for succ %+F:\n", succ));
1638 DEBUG_ONLY(x87_dump_stack(state);)
1639 succ_state->begin = state;
1641 waitq_put(sim->worklist, succ);
1643 DB((dbg, LEVEL_2, "succ %+F already has a state, shuffling\n", succ));
1644 /* There is already a begin state for the successor, bad.
1645 Do the necessary permutations.
1646 Note that critical edges are removed, so this is always possible:
1647 If the successor has more than one possible input, then it must
1650 x87_shuffle(block, state, succ_state->begin);
1653 bl_state->end = state;
1657 * Register a simulator function.
1659 * @param op the opcode to simulate
1660 * @param func the simulator function for the opcode
1662 static void register_sim(ir_op *op, sim_func func)
1664 assert(op->ops.generic == NULL);
1665 op->ops.generic = (op_func) func;
1669 * Create a new x87 simulator.
1671 * @param sim a simulator handle, will be initialized
1672 * @param irg the current graph
1674 static void x87_init_simulator(x87_simulator *sim, ir_graph *irg)
1676 obstack_init(&sim->obst);
1677 sim->blk_states = pmap_create();
1678 sim->n_idx = get_irg_last_idx(irg);
1679 sim->live = OALLOCN(&sim->obst, fp_liveness, sim->n_idx);
1681 DB((dbg, LEVEL_1, "--------------------------------\n"
1682 "x87 Simulator started for %+F\n", irg));
1684 /* set the generic function pointer of instruction we must simulate */
1685 ir_clear_opcodes_generic_func();
1687 register_sim(op_ia32_Asm, sim_Asm);
1688 register_sim(op_ia32_Call, sim_Call);
1689 register_sim(op_ia32_fld, sim_load);
1690 register_sim(op_ia32_fild, sim_load);
1691 register_sim(op_ia32_fld1, sim_load);
1692 register_sim(op_ia32_fldz, sim_load);
1693 register_sim(op_ia32_fadd, sim_binop);
1694 register_sim(op_ia32_fsub, sim_binop);
1695 register_sim(op_ia32_fmul, sim_binop);
1696 register_sim(op_ia32_fdiv, sim_binop);
1697 register_sim(op_ia32_fprem, sim_fprem);
1698 register_sim(op_ia32_fabs, sim_unop);
1699 register_sim(op_ia32_fchs, sim_unop);
1700 register_sim(op_ia32_fist, sim_store);
1701 register_sim(op_ia32_fisttp, sim_fisttp);
1702 register_sim(op_ia32_fst, sim_store);
1703 register_sim(op_ia32_FtstFnstsw, sim_FtstFnstsw);
1704 register_sim(op_ia32_FucomFnstsw, sim_Fucom);
1705 register_sim(op_ia32_Fucomi, sim_Fucom);
1706 register_sim(op_be_Copy, sim_Copy);
1707 register_sim(op_be_Return, sim_Return);
1708 register_sim(op_be_Perm, sim_Perm);
1709 register_sim(op_be_Keep, sim_Keep);
1713 * Destroy a x87 simulator.
1715 * @param sim the simulator handle
1717 static void x87_destroy_simulator(x87_simulator *sim)
1719 pmap_destroy(sim->blk_states);
1720 obstack_free(&sim->obst, NULL);
1721 DB((dbg, LEVEL_1, "x87 Simulator stopped\n\n"));
1725 * Pre-block walker: calculate the liveness information for the block
1726 * and store it into the sim->live cache.
1728 static void update_liveness_walker(ir_node *block, void *data)
1730 x87_simulator *sim = (x87_simulator*)data;
1731 update_liveness(sim, block);
1735 * Run a simulation and fix all virtual instructions for a graph.
1736 * Replaces all virtual floating point instructions and registers
1739 void ia32_x87_simulate_graph(ir_graph *irg)
1741 /* TODO improve code quality (less executed fxch) by using execfreqs */
1743 ir_node *block, *start_block;
1744 blk_state *bl_state;
1747 /* create the simulator */
1748 x87_init_simulator(&sim, irg);
1750 start_block = get_irg_start_block(irg);
1751 bl_state = x87_get_bl_state(&sim, start_block);
1753 /* start with the empty state */
1755 bl_state->begin = ∅
1757 sim.worklist = new_waitq();
1758 waitq_put(sim.worklist, start_block);
1760 be_assure_live_sets(irg);
1761 sim.lv = be_get_irg_liveness(irg);
1763 /* Calculate the liveness for all nodes. We must precalculate this info,
1764 * because the simulator adds new nodes (possible before Phi nodes) which
1765 * would let a lazy calculation fail.
1766 * On the other hand we reduce the computation amount due to
1767 * precaching from O(n^2) to O(n) at the expense of O(n) cache memory.
1769 irg_block_walk_graph(irg, update_liveness_walker, NULL, &sim);
1773 block = (ir_node*)waitq_get(sim.worklist);
1774 x87_simulate_block(&sim, block);
1775 } while (! waitq_empty(sim.worklist));
1778 del_waitq(sim.worklist);
1779 x87_destroy_simulator(&sim);
1782 /* Initializes the x87 simulator. */
1783 void ia32_init_x87(void)
1785 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.x87");