2 * This file implements the x87 support and virtual to stack
3 * register translation for the ia32 backend.
5 * @author: Michael Beck
14 #include "iredges_t.h"
21 #include "..\belive_t.h"
22 #include "..\besched.h"
23 #include "..\benode_t.h"
24 #include "ia32_new_nodes.h"
25 #include "gen_ia32_new_nodes.h"
26 #include "gen_ia32_regalloc_if.h"
31 /* first and second binop index */
38 /* the store val index */
39 #define STORE_VAL_IDX 2
41 #define MASK_TOS(x) ((x) & (N_x87_REGS - 1))
43 /** the virtual floating point flag */
44 #define irop_flag_vp (irop_flag_machine << 1)
47 * An exchange template.
48 * Note that our virtual functions have the same inputs
49 * and attributes as the real ones, so we can simple exchange
51 * Further, x87 supports inverse instructions, so we can handle them.
53 typedef struct _exchange_tmpl {
54 ir_op *normal_op; /**< the normal one */
55 ir_op *reverse_op; /**< the reverse one if exists */
56 ir_op *normal_pop_op; /**< the normal one with tos pop */
57 ir_op *reverse_pop_op; /**< the reverse one with tos pop */
63 typedef struct _x87_state {
64 const arch_register_t *st[N_x87_REGS]; /**< the register stack */
65 int depth; /**< the current stack depth */
66 int tos; /**< position of the tos */
69 /** An empty state, used for blocks without fp instructions. */
70 static const x87_state _empty = { {0}, 0, 0 };
71 static x87_state *empty = (x87_state *)&_empty;
73 /** The type of an instruction simulator */
74 typedef void (*sim_func)(x87_state *state, ir_node *n, const arch_env_t *env);
77 * A block state: Every block has a x87 state at the beginning and at the end.
79 typedef struct _blk_state {
80 x87_state *begin; /**< state at the begin or NULL if not assigned */
81 x87_state *end; /**< state at the end or NULL if not assigned */
84 #define PTR_TO_BLKSTATE(p) ((blk_state *)(p))
89 typedef struct _x87_simulator {
90 struct obstack obst; /**< an obstack for fast allocating */
91 pmap *blk_states; /**< map blocks to states */
92 const arch_env_t *env; /**< architecture environment */
96 * Check if the state is empty.
98 static int x87_state_is_empty(const x87_state *state) {
99 return state->depth == 0;
103 * Return the virtual register at st(pos).
105 static const arch_register_t *x87_get_reg(const x87_state *state, int pos) {
106 assert(pos < state->depth);
107 return state->st[MASK_TOS(state->tos + pos)];
111 * Dump the stack for debugging.
113 static void x87_dump_stack(const x87_state *state) {
116 for (i = state->depth - 1; i >= 0; --i) {
117 const arch_register_t *vreg = x87_get_reg(state, i);
118 ir_printf("%s ", vreg->name);
120 ir_printf("<-- TOS\n");
124 * Set a virtual register to st(pos)
126 static void x87_set_reg(x87_state *state, const arch_register_t *vreg, int pos) {
127 assert(0 < state->depth);
128 state->st[MASK_TOS(state->tos + pos)] = vreg;
130 printf("After SET_REG:\n "); x87_dump_stack(state);
134 * Set the tos virtual register
136 static void x87_set_tos_reg(x87_state *state, const arch_register_t *vreg) {
137 x87_set_reg(state, vreg, 0);
141 * Flush the x87 stack.
143 static void x87_flush(x87_state *state) {
149 * Swap st(0) with st(pos).
151 static void x87_fxch(x87_state *state, int pos) {
152 const arch_register_t *vreg;
153 assert(pos < state->depth);
155 vreg = state->st[MASK_TOS(state->tos + pos)];
156 state->st[MASK_TOS(state->tos + pos)] = state->st[MASK_TOS(state->tos)];
157 state->st[MASK_TOS(state->tos)] = vreg;
159 printf("After FXCH:\n "); x87_dump_stack(state);
163 * Convert a virtual register to the stack index.
164 * Return -1 if the virtual register was not found.
166 static int x87_on_stack(const x87_state *state, const arch_register_t *vreg) {
167 int i, tos = state->tos;
169 for (i = 0; i < state->depth; ++i)
170 if (state->st[MASK_TOS(tos + i)] == vreg)
176 * Push a virtual Register onto the stack.
178 static void x87_push(x87_state *state, const arch_register_t *vreg) {
179 // assert(x87_on_stack(state, vreg) == -1 && "double push");
180 assert(state->depth < N_x87_REGS && "stack overrun");
183 state->tos = MASK_TOS(state->tos - 1);
184 state->st[state->tos] = vreg;
186 printf("After PUSH:\n "); x87_dump_stack(state);
190 * Pop a virtual Register from the stack.
192 static void x87_pop(x87_state *state) {
193 assert(state->depth > 0 && "stack underrun");
196 state->tos = MASK_TOS(state->tos + 1);
198 printf("After POP:\n "); x87_dump_stack(state);
202 * Returns the block state of a block.
204 static blk_state *x87_get_bl_state(x87_simulator *sim, ir_node *block) {
205 pmap_entry *entry = pmap_find(sim->blk_states, block);
208 blk_state *bl_state = obstack_alloc(&sim->obst, sizeof(*bl_state));
209 bl_state->begin = NULL;
210 bl_state->end = NULL;
212 pmap_insert(sim->blk_states, block, bl_state);
216 return entry ? PTR_TO_BLKSTATE(entry->value) : NULL;
220 * Create a new x87 state.
222 static x87_state *x87_alloc_state(x87_simulator *sim) {
223 x87_state *res = obstack_alloc(&sim->obst, sizeof(*res));
228 * Create a new empty x87 state.
230 static x87_state *x87_alloc_empty_state(x87_simulator *sim) {
231 x87_state *res = x87_alloc_state(sim);
240 static x87_state *x87_clone_state(x87_simulator *sim, const x87_state *src) {
241 x87_state *res = x87_alloc_state(sim);
243 memcpy(res, src, sizeof(*res));
248 * Calculate the necessary permutations to reach dst_state.
250 static x87_state *x87_shuffle(x87_simulator *sim, ir_node *block, x87_state *state, const x87_state *dst_state) {
251 assert(state->depth == dst_state->depth);
254 state = x87_clone_state(sim, state);
259 * Create a fxch before node n.
261 static void x87_create_fxch(x87_state *state, ir_node *n, int pos, int op_idx) {
262 ir_node *fxch, *pred;
265 x87_fxch(state, pos);
267 pred = get_irn_n(n, op_idx);
268 fxch = new_rd_ia32_fxch(NULL, get_irn_irg(n), get_nodes_block(n), pred, get_irn_mode(pred));
269 attr = get_ia32_attr(fxch);
270 attr->x87[0] = &ia32_st_regs[pos];
271 attr->x87[2] = &ia32_st_regs[0];
272 set_irn_n(n, op_idx, fxch);
274 sched_add_before(n, fxch);
275 printf("<<< %s %s, %s\n", get_irn_opname(fxch), attr->x87[0]->name, attr->x87[2]->name);
279 * Create a fpush before node n.
281 static void x87_create_fpush(const arch_env_t *env, x87_state *state, ir_node *n, int pos, int op_idx) {
282 ir_node *fpush, *pred;
285 x87_push(state, arch_get_irn_register(env, n));
287 pred = get_irn_n(n, op_idx);
288 fpush = new_rd_ia32_fpush(NULL, get_irn_irg(n), get_nodes_block(n), pred, get_irn_mode(pred));
289 attr = get_ia32_attr(fpush);
290 attr->x87[0] = &ia32_st_regs[pos];
291 attr->x87[2] = &ia32_st_regs[0];
292 set_irn_n(n, op_idx, fpush);
294 sched_add_before(n, fpush);
295 printf("<<< %s %s, %s\n", get_irn_opname(fpush), attr->x87[0]->name, attr->x87[2]->name);
298 /* --------------------------------- liveness ------------------------------------------ */
301 * The liveness transfer function.
302 * Updates a live set over a single step from a given node to its predecessor.
303 * Everything defined at the node is removed from the set, the uses of the node get inserted.
304 * @param arch_env The architecture environment.
305 * @param irn The node at which liveness should be computed.
306 * @param live The bitset of registers live before @p irn. This set gets modified by updating it to
307 * the registers live after irn.
310 static unsigned vfp_liveness_transfer(const arch_env_t *arch_env, ir_node *irn, unsigned live)
313 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
315 if(arch_irn_consider_in_reg_alloc(arch_env, cls, irn)) {
316 const arch_register_t *reg = arch_get_irn_register(arch_env, irn);
317 live &= ~(1 << reg->index);
320 for(i = 0, n = get_irn_arity(irn); i < n; ++i) {
321 ir_node *op = get_irn_n(irn, i);
323 if (arch_irn_consider_in_reg_alloc(arch_env, cls, op)) {
324 const arch_register_t *reg = arch_get_irn_register(arch_env, op);
325 live |= 1 << reg->index;
333 * Put all live virtual registers at the end of a block into a bitset.
334 * @param arch_env The architecture environment.
335 * @param bl The block.
336 * @return The live bitset.
338 static unsigned vfp_liveness_end_of_block(const arch_env_t *arch_env, const ir_node *bl)
342 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
344 live_foreach(bl, li) {
345 ir_node *irn = (ir_node *) li->irn;
346 if (live_is_end(li) && arch_irn_consider_in_reg_alloc(arch_env, cls, irn)) {
347 const arch_register_t *reg = arch_get_irn_register(arch_env, irn);
348 live |= 1 << reg->index;
356 * Compute a bitset of registers which are live at another node.
357 * @param arch_env The architecture environment.
358 * @param pos The node.
359 * @return The live bitset.
361 static unsigned vfp_liveness_nodes_live_at(const arch_env_t *arch_env, const ir_node *pos)
363 const ir_node *bl = is_Block(pos) ? pos : get_nodes_block(pos);
364 const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_vfp];
368 live = vfp_liveness_end_of_block(arch_env, bl);
370 sched_foreach_reverse(bl, irn) {
372 * If we encounter the node we want to insert the Perm after,
373 * exit immediately, so that this node is still live
378 live = vfp_liveness_transfer(arch_env, irn, live);
385 * Returns true if a register is live in a set.
387 static unsigned is_vfp_live(const arch_register_t *reg, unsigned live) {
388 return live & (1 << reg->index);
391 static vfp_dump_live(unsigned live) {
394 printf("Live registers here: \n");
395 for (i = 0; i < 8; ++i) {
396 if (live & (1 << i)) {
403 /* --------------------------------- simulators ---------------------------------------- */
405 #define XCHG(a, b) do { int t =(a); (a) = (b); (b) = t; } while (0)
408 * Simulate a virtual binop
410 static void sim_binop(x87_state *state, ir_node *n, const arch_env_t *env, const exchange_tmpl *tmpl) {
411 int op1_idx, op2_idx = -1;
412 int out_idx, do_pop =0;
415 const arch_register_t *op1 = arch_get_irn_register(env, get_irn_n(n, BINOP_IDX_1));
416 const arch_register_t *op2 = arch_get_irn_register(env, get_irn_n(n, BINOP_IDX_2));
417 const arch_register_t *out = arch_get_irn_register(env, n);
418 unsigned live = vfp_liveness_nodes_live_at(env, n);
420 printf(">>> %s %s, %s -> %s\n", get_irn_opname(n), op1->name, op2->name, out->name);
423 op1_idx = x87_on_stack(state, op1);
425 if (op2->reg_class == &ia32_reg_classes[CLASS_ia32_vfp]) {
426 /* second operand is a vfp register */
427 op2_idx = x87_on_stack(state, op2);
429 if (is_vfp_live(op1, live)) {
430 /* first operand is live */
432 if (is_vfp_live(op2, live)) {
433 /* both operands are live: push the first one */
434 x87_create_fpush(env, state, n, op1_idx, BINOP_IDX_1);
435 out_idx = op1_idx = 0;
437 dst = tmpl->normal_op;
441 /* first live, second operand is dead here, bring it to tos */
443 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
447 op2_idx = out_idx = 0;
448 dst = tmpl->normal_op;
453 /* first operand is dead */
454 if (is_vfp_live(op2, live)) {
455 /* second operand is live: bring first to tos */
457 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
461 op1_idx = out_idx = 0;
462 dst = tmpl->normal_op;
466 /* both operands are dead here, pop them from the stack */
469 XCHG(op1_idx, op2_idx);
470 dst = tmpl->reverse_pop_op;
473 else if (op2_idx == 0) {
475 dst = tmpl->normal_pop_op;
479 /* bring the second on top */
480 x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
483 dst = tmpl->normal_pop_op;
490 /* second operand is an address mode */
491 if (is_vfp_live(op1, live)) {
492 /* first operand is live: push it here */
493 x87_create_fpush(env, state, n, op1_idx, BINOP_IDX_1);
496 /* first operand is dead: bring it to tos */
498 x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
500 op1_idx = out_idx = 0;
501 dst = tmpl->normal_op;
505 x87_set_reg(state, out, out_idx);
509 /* patch the operation */
511 attr = get_ia32_attr(n);
512 attr->x87[0] = op1 = &ia32_st_regs[op1_idx];
513 attr->x87[1] = op2 = &ia32_st_regs[op2_idx];
514 attr->x87[2] = out = &ia32_st_regs[out_idx];
516 printf("<<< %s %s, %s -> %s\n", get_irn_opname(n), op1->name, op2->name, out->name);
520 * Simulate a virtual Unop
522 static void sim_unop(x87_state *state, ir_node *n, const arch_env_t *env, ir_op *op) {
523 int op1_idx, out_idx;
524 const arch_register_t *op1 = arch_get_irn_register(env, get_irn_n(n, UNOP_IDX));
525 const arch_register_t *out = arch_get_irn_register(env, n);
527 unsigned live = vfp_liveness_nodes_live_at(env, n);
529 printf(">>> %s -> %s\n", get_irn_opname(n), out->name);
532 op1_idx = x87_on_stack(state, op1);
534 if (is_vfp_live(op1, live)) {
535 /* push the operand here */
536 x87_create_fpush(env, state, n, op1_idx, UNOP_IDX);
539 /* operand is dead, bring it to tos */
541 x87_create_fxch(state, n, op1_idx, UNOP_IDX);
544 x87_set_tos_reg(state, out);
545 op1_idx = out_idx = 0;
547 attr = get_ia32_attr(n);
548 attr->x87[0] = op1 = &ia32_st_regs[0];
549 attr->x87[2] = out = &ia32_st_regs[0];
550 printf("<<< %s -> %s\n", get_irn_opname(n), out->name);
554 * Simulate a virtual Load instructions
556 static void sim_load(x87_state *state, ir_node *n, const arch_env_t *env, ir_op *op) {
557 const arch_register_t *out = arch_get_irn_register(env, n);
560 printf(">>> %s -> %s\n", get_irn_opname(n), out->name);
561 x87_push(state, out);
563 attr = get_ia32_attr(n);
564 attr->x87[2] = out = &ia32_st_regs[0];
565 printf("<<< %s -> %s\n", get_irn_opname(n), out->name);
569 * Simulate a virtual Store
571 static void sim_fst(x87_state *state, ir_node *n, const arch_env_t *env) {
573 const arch_register_t *op2 = arch_get_irn_register(env, get_irn_n(n, STORE_VAL_IDX));
575 unsigned live = vfp_liveness_nodes_live_at(env, n);
577 op2_idx = x87_on_stack(state, op2);
579 printf(">>> %s %s ->\n", get_irn_opname(n), op2->name);
581 /* we can only store the tos to memory */
583 x87_create_fxch(state, n, op2_idx, STORE_VAL_IDX);
585 if (is_vfp_live(op2, live))
589 n->op = op_ia32_fstp;
592 attr = get_ia32_attr(n);
593 attr->x87[1] = op2 = &ia32_st_regs[0];
594 printf("<<< %s %s ->\n", get_irn_opname(n), op2->name);
597 #define _GEN_BINOP(op, rev) \
598 static void sim_##op(x87_state *state, ir_node *n, const arch_env_t *env) { \
599 exchange_tmpl tmpl = { op_ia32_##op, op_ia32_##rev, op_ia32_##op##p, op_ia32_##rev##p }; \
600 sim_binop(state, n, env, &tmpl); \
603 #define GEN_BINOP(op) _GEN_BINOP(op, op)
604 #define GEN_BINOPR(op) _GEN_BINOP(op, op##r)
606 #define GEN_LOAD2(op, nop) \
607 static void sim_##op(x87_state *state, ir_node *n, const arch_env_t *env) { \
608 sim_load(state, n, env, op_ia32_##nop); \
611 #define GEN_LOAD(op) GEN_LOAD2(op, op)
613 #define GEN_UNOP(op) \
614 static void sim_##op(x87_state *state, ir_node *n, const arch_env_t *env) { \
615 sim_unop(state, n, env, op_ia32_##op); \
627 GEN_LOAD2(fConst, fldConst)
636 * Simulate a virtual Copy
638 static void sim_Copy(x87_state *state, ir_node *n, const arch_env_t *env) {
639 ir_mode *mode = get_irn_mode(n);
641 if (mode_is_float(mode)) {
642 const arch_register_t *op1 = arch_get_irn_register(env, get_irn_n(n, 0));
643 const arch_register_t *out = arch_get_irn_register(env, n);
644 ir_node *node, *next;
647 unsigned live = vfp_liveness_nodes_live_at(env, n);
649 op1_idx = x87_on_stack(state, op1);
651 printf(">>> %s %s -> %s\n", get_irn_opname(n), op1->name, out->name);
654 if (is_vfp_live(op1, live)) {
655 /* operand is still live,a real copy */
656 x87_push(state, out);
658 node = new_rd_ia32_fpush(get_irn_dbg_info(n), get_irn_irg(n), get_nodes_block(n), get_irn_n(n, 0), mode);
659 arch_set_irn_register(env, node, out);
661 attr = get_ia32_attr(node);
662 attr->x87[0] = op1 = &ia32_st_regs[op1_idx];
663 attr->x87[2] = out = &ia32_st_regs[0];
665 next = sched_next(n);
668 sched_add_before(next, node);
669 printf(">>> %s %s -> %s\n", get_irn_opname(node), op1->name, out->name);
672 /* just a virtual copy */
673 x87_set_reg(state, out, op1_idx);
675 printf(">>> KILLED %s\n", get_irn_opname(n));
681 * Run a simulation and fix all virtual instructions for a block.
683 * @return non-zero if simulation is complete,
684 * zero if the simulation must be rerun
686 static int x87_simulate_block(x87_simulator *sim, ir_node *block) {
688 blk_state *bl_state = x87_get_bl_state(sim, block);
689 x87_state *state = bl_state->begin;
690 const ir_edge_t *edge;
691 ir_node *start_block;
693 /* if we have no assigned start state, we must wait ... */
697 assert(bl_state->end == NULL);
699 /* beware, n might changed */
700 for (n = sched_first(block); !sched_is_end(n); n = next) {
701 ir_op *op = get_irn_op(n);
703 next = sched_next(n);
704 if (op->ops.generic) {
705 sim_func func = (sim_func)op->ops.generic;
707 /* have work to do */
708 if (state == bl_state->begin) {
709 /* create a new state, will be changed */
710 state = x87_clone_state(sim, state);
714 (*func)(state, n, sim->env);
718 start_block = get_irg_start_block(get_irn_irg(block));
720 /* check if the state must be shuffled */
721 foreach_block_succ(block, edge) {
722 ir_node *succ = get_edge_src_irn(edge);
723 blk_state *succ_state = x87_get_bl_state(sim, succ);
725 if (succ_state->begin && succ != start_block) {
726 /* There is already a begin state for this block, bad.
727 Do the necessary permutations.
728 Note that critical edges are removed, so this is always possible. */
729 x87_shuffle(sim, block, state, succ_state->begin);
731 /* Note further, that there can be only one such situation,
732 so we can break here. */
736 bl_state->end = state;
738 /* now propagate the state to all successor blocks */
739 foreach_block_succ(block, edge) {
740 ir_node *succ = get_edge_src_irn(edge);
741 blk_state *succ_state = x87_get_bl_state(sim, succ);
743 if (! succ_state->begin)
744 succ_state->begin = state;
751 * Create a new x87 simulator.
753 static void x87_init_simulator(x87_simulator *sim, const arch_env_t *env) {
754 obstack_init(&sim->obst);
755 sim->blk_states = pmap_create();
758 clear_irp_opcodes_generic_func();
760 #define ASSOC(op) (op_ia32_v ## op)->ops.generic = (op_func)(sim_##op)
761 #define ASSOC_BE(op) (op_be_ ## op)->ops.generic = (op_func)(sim_##op)
783 * Destroy a x87 simulator.
785 static void x87_destroy_simulator(x87_simulator *sim) {
786 pmap_destroy(sim->blk_states);
787 obstack_free(&sim->obst, NULL);
791 * Run a simulation and fix all virtual instructions for a graph.
793 * Needs a block-schedule.
795 void x87_simulate_graph(const arch_env_t *env, ir_graph *irg, ir_node **blk_list) {
796 ir_node *block, *start_block;
803 be_liveness_dumpto(irg, "-x87-live");
805 x87_init_simulator(&sim, env);
807 start_block = get_irg_start_block(irg);
808 bl_state = x87_get_bl_state(&sim, start_block);
810 /* start with the empty state */
811 bl_state->begin = empty;
813 worklist = new_pdeq();
815 /* create the worklist for the schedule. */
816 for (i = 0; i < ARR_LEN(blk_list); ++i)
817 pdeq_putr(worklist, blk_list[i]);
821 block = pdeq_getl(worklist);
822 if (! x87_simulate_block(&sim, block)) {
823 pdeq_putr(worklist, block);
826 } while (! pdeq_empty(worklist));
828 x87_destroy_simulator(&sim);