2 * This file implements the x87 support and virtual to stack
3 * register translation.
12 #include "iredges_t.h"
18 #include "..\belive.h"
19 #include "..\besched.h"
20 #include "ia32_new_nodes.h"
21 #include "gen_ia32_new_nodes.h"
22 #include "gen_ia32_regalloc_if.h"
27 /* first and second binop index */
34 #define MASK_TOS(x) ((x) & (N_x87_REGS - 1))
36 /** the virtual floating point flag */
37 #define irop_flag_vp (irop_flag_machine << 1)
40 * An exchange template.
41 * Note that our virtual functions have the same inputs
42 * ant attributes as the real ones, so we can simple exchange
44 * Further, x87 supports inverse instructions, so we can handle them.
46 typedef struct _exchange_tmpl {
47 ir_op *normal_op; /**< the normal one */
48 ir_op *reverse_op; /**< the reverse one if exists */
54 typedef struct _x87_state {
55 const arch_register_t *st[N_x87_REGS]; /**< the register stack */
56 int depth; /**< the current stack depth */
57 int tos; /**< position of the tos */
60 /** An empty state, used for blocks without fp instructions. */
61 static const x87_state _empty = { {0}, 0, 0 };
62 static x87_state *empty = (x87_state *)&_empty;
64 /** The type of an instruction simulator */
65 typedef void (*sim_func)(x87_state *state, ir_node *n, const arch_env_t *env);
68 * A block state: Every block has a x87 state at the beginning and at the end.
70 typedef struct _blk_state {
71 x87_state *begin; /**< state at the begin or NULL if not assigned */
72 x87_state *end; /**< state at the end or NULL if not assigned */
75 #define PTR_TO_BLKSTATE(p) ((blk_state *)(p))
80 typedef struct _x87_simulator {
81 struct obstack obst; /**< an obstack for fast allocating */
82 pmap *blk_states; /**< map blocks to states */
83 const arch_env_t *env; /**< architecture environment */
87 * Check if the state is empty.
89 static int x87_state_is_empty(const x87_state *state) {
90 return state->depth == 0;
94 * Return the virtual register at st(pos).
96 static const arch_register_t *x87_get_reg(const x87_state *state, int pos) {
97 assert(pos < state->depth);
98 return state->st[MASK_TOS(state->tos + pos)];
102 * Dump the stack for debugging.
104 static void x87_dump_stack(const x87_state *state) {
107 for (i = state->depth - 1; i >= 0; --i) {
108 const arch_register_t *vreg = x87_get_reg(state, i);
109 ir_printf("%s ", vreg->name);
111 ir_printf("<-- TOS\n");
115 * Set the tos virtual register
117 static void x87_set_tos_reg(x87_state *state, const arch_register_t *vreg) {
118 assert(0 < state->depth);
119 state->st[MASK_TOS(state->tos)] = vreg;
121 printf("After INSTR:\n "); x87_dump_stack(state);
125 * Flush the x87 stack.
127 static void x87_flush(x87_state *state) {
133 * Swap st(0) with st(pos).
135 static void x87_fxch(x87_state *state, int pos) {
136 const arch_register_t *vreg;
137 assert(pos < state->depth);
139 vreg = state->st[MASK_TOS(state->tos + pos)];
140 state->st[MASK_TOS(state->tos + pos)] = state->st[MASK_TOS(state->tos)];
141 state->st[MASK_TOS(state->tos)] = vreg;
143 printf("After FXCH:\n "); x87_dump_stack(state);
147 * Convert a virtual register to the stack index.
148 * Return -1 if the virtual register was not found.
150 static int x87_on_stack(const x87_state *state, const arch_register_t *vreg) {
151 int i, tos = state->tos;
153 for (i = 0; i < state->depth; ++i)
154 if (state->st[MASK_TOS(tos + i)] == vreg)
160 * Push a virtual Register onto the stack.
162 static void x87_push(x87_state *state, const arch_register_t *vreg) {
163 // assert(x87_on_stack(state, vreg) == -1 && "double push");
164 assert(state->depth < N_x87_REGS && "stack overrun");
167 state->tos = MASK_TOS(state->tos - 1);
168 state->st[state->tos] = vreg;
170 printf("After PUSH:\n "); x87_dump_stack(state);
174 * Pop a virtual Register from the stack.
176 static void x87_pop(x87_state *state) {
177 assert(state->depth > 0 && "stack underrun");
180 state->tos = MASK_TOS(state->tos + 1);
182 printf("After POP:\n "); x87_dump_stack(state);
186 * Returns the block state of a block.
188 static blk_state *x87_get_bl_state(x87_simulator *sim, ir_node *block) {
189 pmap_entry *entry = pmap_find(sim->blk_states, block);
192 blk_state *bl_state = obstack_alloc(&sim->obst, sizeof(*bl_state));
193 bl_state->begin = NULL;
194 bl_state->end = NULL;
196 pmap_insert(sim->blk_states, block, bl_state);
200 return entry ? PTR_TO_BLKSTATE(entry->value) : NULL;
204 * Create a new x87 state.
206 static x87_state *x87_alloc_state(x87_simulator *sim) {
207 x87_state *res = obstack_alloc(&sim->obst, sizeof(*res));
212 * Create a new empty x87 state.
214 static x87_state *x87_alloc_empty_state(x87_simulator *sim) {
215 x87_state *res = x87_alloc_state(sim);
224 static x87_state *x87_clone_state(x87_simulator *sim, const x87_state *src) {
225 x87_state *res = x87_alloc_state(sim);
227 memcpy(res, src, sizeof(*res));
232 * Calculate the necessary permutations to reach dst_state.
234 static x87_state *x87_shuffle(x87_simulator *sim, ir_node *block, x87_state *state, const x87_state *dst_state) {
235 assert(state->depth == dst_state->depth);
238 state = x87_clone_state(sim, state);
243 * Create a fxch before node n.
245 static void x87_create_fxch(x87_state *state, ir_node *n, int pos, int op_idx) {
246 ir_node *fxch, *pred;
249 x87_fxch(state, pos);
251 pred = get_irn_n(n, op_idx);
252 fxch = new_rd_ia32_fxch(NULL, get_irn_irg(n), get_nodes_block(n), pred, get_irn_mode(pred));
253 attr = get_ia32_attr(fxch);
254 attr->x87[0] = &ia32_st_regs[pos];
255 attr->x87[2] = &ia32_st_regs[0];
256 set_irn_n(n, op_idx, fxch);
258 sched_add_before(n, fxch);
259 printf("<<< %s %s, %s\n", get_irn_opname(fxch), attr->x87[0]->name, attr->x87[2]->name);
263 * Create a fpush before node n.
265 static void x87_create_fpush(const arch_env_t *env, x87_state *state, ir_node *n, int pos, int op_idx) {
266 ir_node *fpush, *pred;
269 x87_push(state, arch_get_irn_register(env, n));
271 pred = get_irn_n(n, op_idx);
272 fpush = new_rd_ia32_fpush(NULL, get_irn_irg(n), get_nodes_block(n), pred, get_irn_mode(pred));
273 attr = get_ia32_attr(fpush);
274 attr->x87[0] = &ia32_st_regs[pos];
275 attr->x87[2] = &ia32_st_regs[0];
276 set_irn_n(n, op_idx, fpush);
278 sched_add_before(n, fpush);
279 printf("<<< %s %s, %s\n", get_irn_opname(fpush), attr->x87[0]->name, attr->x87[2]->name);
283 * Simulate a virtual binop
285 static void sim_binop(x87_state *state, ir_node *n, const arch_env_t *env, const exchange_tmpl *tmpl) {
286 int op1_idx, op2_idx = -1;
288 const arch_register_t *op1 = arch_get_irn_register(env, get_irn_n(n, BINOP_IDX_1));
289 const arch_register_t *op2 = arch_get_irn_register(env, get_irn_n(n, BINOP_IDX_2));
290 const arch_register_t *out = arch_get_irn_register(env, n);
291 ia32_attr_t *attr = get_ia32_attr(n);
293 printf(">>> %s %s, %s -> %s\n", get_irn_opname(n), op1->name, op2->name, out->name);
295 out_idx = x87_on_stack(state, out);
296 op1_idx = x87_on_stack(state, op1);
298 if (out_idx == op1_idx) {
299 /* good, first operands match, bring it to top */
301 x87_create_fxch(state, n, out_idx, BINOP_IDX_1);
306 if (op2->index != REG_VFP_NOREG) {
307 /* op2 is no address mode */
308 op2_idx = x87_on_stack(state, op2);
311 op2_idx = REG_ST_NOREG;
313 n->op = tmpl->normal_op;
316 if (op2->index != REG_VFP_NOREG) {
317 /* op2 is no address mode */
318 op2_idx = x87_on_stack(state, op2);
321 if (out_idx == op2_idx) {
322 /* good, second operands match, bring it to top */
324 x87_create_fxch(state, n, out_idx, BINOP_IDX_2);
331 n->op = tmpl->reverse_op;
334 /* bad, both numbers did not match, push op1 */
335 x87_push(state, op1);
336 if (op2->index != REG_VFP_NOREG)
339 op2_idx = REG_ST_NOREG;
343 n->op = tmpl->normal_op;
346 x87_set_tos_reg(state, out);
347 attr->x87[0] = op1 = &ia32_st_regs[op1_idx];
348 attr->x87[1] = op2 = &ia32_st_regs[op2_idx];
349 attr->x87[2] = out = &ia32_st_regs[out_idx];
351 printf("<<< %s %s, %s -> %s\n", get_irn_opname(n), op1->name, op2->name, out->name);
355 * Simulate a virtual Unop
357 static void sim_unop(x87_state *state, ir_node *n, const arch_env_t *env, ir_op *op) {
358 int op1_idx, out_idx;
359 const arch_register_t *op1 = arch_get_irn_register(env, get_irn_n(n, UNOP_IDX));
360 const arch_register_t *out = arch_get_irn_register(env, n);
361 ia32_attr_t *attr = get_ia32_attr(n);
363 out_idx = x87_on_stack(state, out);
364 op1_idx = x87_on_stack(state, op1);
366 if (out_idx == op1_idx) {
367 /* good, operands match, bring it to top */
369 x87_create_fxch(state, n, out_idx, UNOP_IDX);
375 /* create a push or an exchange here */
377 x87_create_fpush(env, state, n, op1_idx, UNOP_IDX);
379 x87_create_fxch(state, n, op1_idx, UNOP_IDX);
381 printf(">>> %s -> %s\n", get_irn_opname(n), out->name);
383 attr->x87[0] = op1 = &ia32_st_regs[0];
384 attr->x87[2] = out = &ia32_st_regs[0];
385 printf("<<< %s -> %s\n", get_irn_opname(n), out->name);
389 * Simulate a virtual Load
391 static void sim_load(x87_state *state, ir_node *n, const arch_env_t *env, ir_op *op) {
392 const arch_register_t *out = arch_get_irn_register(env, n);
393 ia32_attr_t *attr = get_ia32_attr(n);
395 printf(">>> %s -> %s\n", get_irn_opname(n), out->name);
396 x87_push(state, out);
398 attr->x87[2] = out = &ia32_st_regs[0];
399 printf("<<< %s -> %s\n", get_irn_opname(n), out->name);
402 #define _GEN_BINOP(op, rev) \
403 static void sim_##op(x87_state *state, ir_node *n, const arch_env_t *env) { \
404 exchange_tmpl tmpl = { op_ia32_##op, op_ia32_##rev }; \
405 sim_binop(state, n, env, &tmpl); \
408 #define GEN_BINOP(op) _GEN_BINOP(op, op)
409 #define GEN_BINOPR(op) _GEN_BINOP(op, op##r)
411 #define GEN_LOAD2(op, nop) \
412 static void sim_##op(x87_state *state, ir_node *n, const arch_env_t *env) { \
413 sim_load(state, n, env, op_ia32_##nop); \
416 #define GEN_LOAD(op) GEN_LOAD2(op, op)
418 #define GEN_UNOP(op) \
419 static void sim_##op(x87_state *state, ir_node *n, const arch_env_t *env) { \
420 sim_unop(state, n, env, op_ia32_##op); \
432 GEN_LOAD2(fConst, fldConst)
441 * Run a simulation and fix all virtual instructions for a block.
443 * @return non-zero if simulation is complete,
444 * zero if the simulation must be rerun
446 static int x87_simulate_block(x87_simulator *sim, ir_node *block) {
448 blk_state *bl_state = x87_get_bl_state(sim, block);
449 x87_state *state = bl_state->begin;
450 const ir_edge_t *edge;
451 ir_node *start_block;
453 /* if we have no assigned start state, we must wait ... */
457 assert(bl_state->end == NULL);
459 sched_foreach(block, n) {
460 ir_op *op = get_irn_op(n);
462 if (op->ops.generic) {
463 sim_func func = (sim_func)op->ops.generic;
465 /* have work to do */
466 if (state == bl_state->begin) {
467 /* create a new state, will be changed */
468 state = x87_clone_state(sim, state);
472 (*func)(state, n, sim->env);
476 start_block = get_irg_start_block(get_irn_irg(block));
478 /* check if the state must be shuffled */
479 foreach_block_succ(block, edge) {
480 ir_node *succ = get_edge_src_irn(edge);
481 blk_state *succ_state = x87_get_bl_state(sim, succ);
483 if (succ_state->begin && succ != start_block) {
484 /* There is already a begin state for this block, bad.
485 Do the necessary permutations.
486 Note that critical edges are removed, so this is always possible. */
487 x87_shuffle(sim, block, state, succ_state->begin);
489 /* Note further, that there can be only one such situation,
490 so we can break here. */
494 bl_state->end = state;
496 /* now propagate the state to all successor blocks */
497 foreach_block_succ(block, edge) {
498 ir_node *succ = get_edge_src_irn(edge);
499 blk_state *succ_state = x87_get_bl_state(sim, succ);
501 if (! succ_state->begin)
502 succ_state->begin = state;
509 * Create a new x87 simulator.
511 static void x87_init_simulator(x87_simulator *sim, const arch_env_t *env) {
512 obstack_init(&sim->obst);
513 sim->blk_states = pmap_create();
516 clear_irp_opcodes_generic_func();
518 #define ASSOC(op) (op_ia32_v ## op)->ops.generic = (op_func)(sim_##op)
537 * Destroy a x87 simulator.
539 static void x87_destroy_simulator(x87_simulator *sim) {
540 pmap_destroy(sim->blk_states);
541 obstack_free(&sim->obst, NULL);
545 * Run a simulation and fix all virtual instructions for a graph.
547 * Needs a block-schedule.
549 void x87_simulate_graph(const arch_env_t *env, ir_graph *irg, ir_node **blk_list) {
550 ir_node *block, *start_block;
557 be_liveness_dumpto(irg, "-x87-live");
559 x87_init_simulator(&sim, env);
561 start_block = get_irg_start_block(irg);
562 bl_state = x87_get_bl_state(&sim, start_block);
564 /* start with the empty state */
565 bl_state->begin = empty;
567 worklist = new_pdeq();
569 /* create the worklist for the schedule. */
570 for (i = 0; i < ARR_LEN(blk_list); ++i)
571 pdeq_putr(worklist, blk_list[i]);
575 block = pdeq_getl(worklist);
576 if (! x87_simulate_block(&sim, block)) {
577 pdeq_putr(worklist, block);
580 } while (! pdeq_empty(worklist));
582 x87_destroy_simulator(&sim);