X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=ir%2Fbe%2Fia32%2Fia32_x87.c;h=c724764d4b77c9d152e6cbaa39ae1db24e55ec33;hb=1872920c09708b361d06c0dc9f4c1fd0a03544f5;hp=45c2853d0959b15e6a6deacd49fd62f0738bd7a1;hpb=5057c4b3b6c48dc75dee684cd172ce4cfed00da7;p=libfirm diff --git a/ir/be/ia32/ia32_x87.c b/ir/be/ia32/ia32_x87.c index 45c2853d0..c724764d4 100644 --- a/ir/be/ia32/ia32_x87.c +++ b/ir/be/ia32/ia32_x87.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 1995-2007 University of Karlsruhe. All right reserved. + * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved. * * This file is part of libFirm. * @@ -47,23 +47,18 @@ #include "../belive_t.h" #include "../besched_t.h" #include "../benode_t.h" +#include "bearch_ia32_t.h" #include "ia32_new_nodes.h" #include "gen_ia32_new_nodes.h" #include "gen_ia32_regalloc_if.h" #include "ia32_x87.h" +#include "ia32_architecture.h" #define N_x87_REGS 8 -/* first and second binop index */ -#define BINOP_IDX_1 2 -#define BINOP_IDX_2 3 - /* the unop index */ #define UNOP_IDX 0 -/* the store val index */ -#define STORE_VAL_IDX 2 - #define MASK_TOS(x) ((x) & (N_x87_REGS - 1)) /** the debug handle */ @@ -148,6 +143,7 @@ struct _x87_simulator { vfp_liveness *live; /**< Liveness information. */ unsigned n_idx; /**< The cached get_irg_last_idx() result. */ waitq *worklist; /**< Worklist of blocks that must be processed. */ + ia32_isa_t *isa; /**< the ISA object */ }; /** @@ -174,6 +170,7 @@ static int x87_get_st_reg(const x87_state *state, int pos) { return state->st[MASK_TOS(state->tos + pos)].reg_idx; } /* x87_get_st_reg */ +#ifdef DEBUG_libfirm /** * Return the node at st(pos). * @@ -187,7 +184,6 @@ static ir_node *x87_get_st_node(const x87_state *state, int pos) { return state->st[MASK_TOS(state->tos + pos)].node; } /* x87_get_st_node */ -#ifdef DEBUG_libfirm /** * Dump the stack for debugging. * @@ -313,6 +309,16 @@ static void x87_pop(x87_state *state) { DB((dbg, LEVEL_2, "After POP: ")); DEBUG_ONLY(x87_dump_stack(state)); } /* x87_pop */ +/** + * Empty the fpu stack + * + * @param state the x87 state + */ +static void x87_emms(x87_state *state) { + state->depth = 0; + state->tos = 0; +} + /** * Returns the block state of a block. * @@ -447,7 +453,7 @@ static ir_node *x87_fxch_shuffle(x87_state *state, int pos, ir_node *block) { ir_node *fxch; ia32_x87_attr_t *attr; - fxch = new_rd_ia32_fxch(NULL, get_irn_irg(block), block, mode_E); + fxch = new_rd_ia32_fxch(NULL, get_irn_irg(block), block); attr = get_ia32_x87_attr(fxch); attr->x87[0] = &ia32_st_regs[pos]; attr->x87[2] = &ia32_st_regs[0]; @@ -617,7 +623,7 @@ static ir_node *x87_create_fxch(x87_state *state, ir_node *n, int pos) x87_fxch(state, pos); - fxch = new_rd_ia32_fxch(NULL, irg, block, mode_E); + fxch = new_rd_ia32_fxch(NULL, irg, block); attr = get_ia32_x87_attr(fxch); attr->x87[0] = &ia32_st_regs[pos]; attr->x87[2] = &ia32_st_regs[0]; @@ -644,7 +650,7 @@ static void x87_create_fpush(x87_state *state, ir_node *n, int pos, int op_idx) x87_push_dbl(state, arch_register_get_index(out), pred); - fpush = new_rd_ia32_fpush(NULL, get_irn_irg(n), get_nodes_block(n), mode_E); + fpush = new_rd_ia32_fpush(NULL, get_irn_irg(n), get_nodes_block(n)); attr = get_ia32_x87_attr(fpush); attr->x87[0] = &ia32_st_regs[pos]; attr->x87[2] = &ia32_st_regs[0]; @@ -666,12 +672,16 @@ static void x87_create_fpush(x87_state *state, ir_node *n, int pos, int op_idx) */ static ir_node *x87_create_fpop(x87_state *state, ir_node *n, int num) { - ir_node *fpop; + ir_node *fpop = NULL; ia32_x87_attr_t *attr; + assert(num > 0); while (num > 0) { x87_pop(state); - fpop = new_rd_ia32_fpop(NULL, get_irn_irg(n), get_nodes_block(n), mode_E); + if (ia32_cg_config.use_ffreep) + fpop = new_rd_ia32_ffreep(NULL, get_irn_irg(n), get_nodes_block(n)); + else + fpop = new_rd_ia32_fpop(NULL, get_irn_irg(n), get_nodes_block(n)); attr = get_ia32_x87_attr(fpop); attr->x87[0] = &ia32_st_regs[0]; attr->x87[1] = &ia32_st_regs[0]; @@ -867,6 +877,15 @@ static void vfp_dump_live(vfp_liveness live) { #define XCHG(a, b) do { int t = (a); (a) = (b); (b) = t; } while (0) +/* Pseudocode: + + + + + + +*/ + /** * Simulate a virtual binop. * @@ -882,36 +901,53 @@ static int sim_binop(x87_state *state, ir_node *n, const exchange_tmpl *tmpl) { ia32_x87_attr_t *attr; ir_node *patched_insn; ir_op *dst; - x87_simulator *sim = state->sim; - const arch_register_t *op1 = x87_get_irn_register(sim, get_irn_n(n, BINOP_IDX_1)); - const arch_register_t *op2 = x87_get_irn_register(sim, get_irn_n(n, BINOP_IDX_2)); - const arch_register_t *out = x87_get_irn_register(sim, n); - int reg_index_1 = arch_register_get_index(op1); - int reg_index_2 = arch_register_get_index(op2); - vfp_liveness live = vfp_live_args_after(sim, n, REGMASK(out)); + x87_simulator *sim = state->sim; + ir_node *op1 = get_irn_n(n, n_ia32_binary_left); + ir_node *op2 = get_irn_n(n, n_ia32_binary_right); + const arch_register_t *op1_reg = x87_get_irn_register(sim, op1); + const arch_register_t *op2_reg = x87_get_irn_register(sim, op2); + const arch_register_t *out = x87_get_irn_register(sim, n); + int reg_index_1 = arch_register_get_index(op1_reg); + int reg_index_2 = arch_register_get_index(op2_reg); + vfp_liveness live = vfp_live_args_after(sim, n, REGMASK(out)); + int op1_live_after; + int op2_live_after; DB((dbg, LEVEL_1, ">>> %+F %s, %s -> %s\n", n, - arch_register_get_name(op1), arch_register_get_name(op2), + arch_register_get_name(op1_reg), arch_register_get_name(op2_reg), arch_register_get_name(out))); DEBUG_ONLY(vfp_dump_live(live)); DB((dbg, LEVEL_1, "Stack before: ")); DEBUG_ONLY(x87_dump_stack(state)); - op1_idx = x87_on_stack(state, reg_index_1); - assert(op1_idx >= 0); + if(reg_index_1 == REG_VFP_UKNWN) { + op1_idx = 0; + op1_live_after = 1; + } else { + op1_idx = x87_on_stack(state, reg_index_1); + assert(op1_idx >= 0); + op1_live_after = is_vfp_live(arch_register_get_index(op1_reg), live); + } if (reg_index_2 != REG_VFP_NOREG) { - /* second operand is a vfp register */ - op2_idx = x87_on_stack(state, reg_index_2); - assert(op2_idx >= 0); + if(reg_index_2 == REG_VFP_UKNWN) { + op2_idx = 0; + op2_live_after = 1; + } else { + /* second operand is a vfp register */ + op2_idx = x87_on_stack(state, reg_index_2); + assert(op2_idx >= 0); + op2_live_after + = is_vfp_live(arch_register_get_index(op2_reg), live); + } - if (is_vfp_live(arch_register_get_index(op2), live)) { + if (op2_live_after) { /* Second operand is live. */ - if (is_vfp_live(arch_register_get_index(op1), live)) { + if (op1_live_after) { /* Both operands are live: push the first one. This works even for op1 == op2. */ - x87_create_fpush(state, n, op1_idx, BINOP_IDX_2); + x87_create_fpush(state, n, op1_idx, n_ia32_binary_right); /* now do fxxx (tos=tos X op) */ op1_idx = 0; op2_idx += 1; @@ -931,7 +967,7 @@ static int sim_binop(x87_state *state, ir_node *n, const exchange_tmpl *tmpl) { } } else { /* Second operand is dead. */ - if (is_vfp_live(arch_register_get_index(op1), live)) { + if (op1_live_after) { /* First operand is live: bring second to tos. */ if (op2_idx != 0) { x87_create_fxch(state, n, op2_idx); @@ -985,9 +1021,9 @@ static int sim_binop(x87_state *state, ir_node *n, const exchange_tmpl *tmpl) { } } else { /* second operand is an address mode */ - if (is_vfp_live(arch_register_get_index(op1), live)) { + if (op1_live_after) { /* first operand is live: push it here */ - x87_create_fpush(state, n, op1_idx, BINOP_IDX_1); + x87_create_fpush(state, n, op1_idx, n_ia32_binary_left); op1_idx = 0; /* use fxxx (tos = tos X mem) */ dst = tmpl->normal_op; @@ -1013,19 +1049,19 @@ static int sim_binop(x87_state *state, ir_node *n, const exchange_tmpl *tmpl) { /* patch the operation */ attr = get_ia32_x87_attr(n); - attr->x87[0] = op1 = &ia32_st_regs[op1_idx]; + attr->x87[0] = op1_reg = &ia32_st_regs[op1_idx]; if (reg_index_2 != REG_VFP_NOREG) { - attr->x87[1] = op2 = &ia32_st_regs[op2_idx]; + attr->x87[1] = op2_reg = &ia32_st_regs[op2_idx]; } attr->x87[2] = out = &ia32_st_regs[out_idx]; if (reg_index_2 != REG_VFP_NOREG) { DB((dbg, LEVEL_1, "<<< %s %s, %s -> %s\n", get_irn_opname(n), - arch_register_get_name(op1), arch_register_get_name(op2), + arch_register_get_name(op1_reg), arch_register_get_name(op2_reg), arch_register_get_name(out))); } else { DB((dbg, LEVEL_1, "<<< %s %s, [AM] -> %s\n", get_irn_opname(n), - arch_register_get_name(op1), + arch_register_get_name(op1_reg), arch_register_get_name(out))); } @@ -1138,7 +1174,7 @@ static void collect_and_rewire_users(ir_node *store, ir_node *old_val, ir_node * */ static int sim_store(x87_state *state, ir_node *n, ir_op *op, ir_op *op_p) { x87_simulator *sim = state->sim; - ir_node *val = get_irn_n(n, STORE_VAL_IDX); + ir_node *val = get_irn_n(n, n_ia32_vfst_val); const arch_register_t *op2 = x87_get_irn_register(sim, val); unsigned live = vfp_live_args_after(sim, n, 0); int insn = NO_NODE_ADDED; @@ -1177,11 +1213,12 @@ static int sim_store(x87_state *state, ir_node *n, ir_op *op, ir_op *op_p) { Solution: - stack not full: push value and fstp - stack full: fstp value and load again + Note that we cannot test on mode_E, because floats might be 96bit ... */ - if (mode == mode_E) { + if (get_mode_size_bits(mode) > 64 || mode == mode_Ls) { if (depth < N_x87_REGS) { /* ok, we have a free register: push + fstp */ - x87_create_fpush(state, n, op2_idx, STORE_VAL_IDX); + x87_create_fpush(state, n, op2_idx, n_ia32_vfst_val); x87_pop(state); x87_patch_insn(n, op_p); } else { @@ -1200,7 +1237,6 @@ static int sim_store(x87_state *state, ir_node *n, ir_op *op, ir_op *op_p) { set_ia32_frame_ent(vfld, get_ia32_frame_ent(n)); if (is_ia32_use_frame(n)) set_ia32_use_frame(vfld); - set_ia32_am_flavour(vfld, get_ia32_am_flavour(n)); set_ia32_op_type(vfld, ia32_am_Source); add_ia32_am_offs_int(vfld, get_ia32_am_offs_int(n)); set_ia32_am_sc(vfld, get_ia32_am_sc(n)); @@ -1217,7 +1253,7 @@ static int sim_store(x87_state *state, ir_node *n, ir_op *op, ir_op *op_p) { /* reroute all former users of the store memory to the load memory */ edges_reroute(mem, mproj, irg); /* set the memory input of the load to the store memory */ - set_irn_n(vfld, 2, mem); + set_irn_n(vfld, n_ia32_vfld_mem, mem); sched_add_after(n, vfld); sched_add_after(vfld, rproj); @@ -1296,27 +1332,114 @@ GEN_STORE(fst) GEN_STORE(fist) /** - * Simulate a fCondJmp. - * +* Simulate a virtual fisttp. +* +* @param state the x87 state +* @param n the node that should be simulated (and patched) +*/ +static int sim_fisttp(x87_state *state, ir_node *n) { + x87_simulator *sim = state->sim; + ir_node *val = get_irn_n(n, n_ia32_vfst_val); + const arch_register_t *op2 = x87_get_irn_register(sim, val); + int insn = NO_NODE_ADDED; + ia32_x87_attr_t *attr; + int op2_reg_idx, op2_idx, depth; + + op2_reg_idx = arch_register_get_index(op2); + if (op2_reg_idx == REG_VFP_UKNWN) { + /* just take any value from stack */ + if (state->depth > 0) { + op2_idx = 0; + DEBUG_ONLY(op2 = NULL); + } else { + /* produce a new value which we will consume immediately */ + x87_create_fldz(state, n, op2_reg_idx); + op2_idx = x87_on_stack(state, op2_reg_idx); + assert(op2_idx >= 0); + } + } else { + op2_idx = x87_on_stack(state, op2_reg_idx); + DB((dbg, LEVEL_1, ">>> %+F %s ->\n", n, arch_register_get_name(op2))); + assert(op2_idx >= 0); + } + + depth = x87_get_depth(state); + + /* Note: although the value is still live here, it is destroyed because + of the pop. The register allocator is aware of that and introduced a copy + if the value must be alive. */ + + /* we can only store the tos to memory */ + if (op2_idx != 0) + x87_create_fxch(state, n, op2_idx); + + x87_pop(state); + x87_patch_insn(n, op_ia32_fisttp); + + attr = get_ia32_x87_attr(n); + attr->x87[1] = op2 = &ia32_st_regs[0]; + DB((dbg, LEVEL_1, "<<< %s %s ->\n", get_irn_opname(n), arch_register_get_name(op2))); + + return insn; +} /* sim_fisttp */ + +static int sim_FtstFnstsw(x87_state *state, ir_node *n) { + x87_simulator *sim = state->sim; + ia32_x87_attr_t *attr = get_ia32_x87_attr(n); + ir_node *op1_node = get_irn_n(n, n_ia32_vFtstFnstsw_left); + const arch_register_t *reg1 = x87_get_irn_register(sim, op1_node); + int reg_index_1 = arch_register_get_index(reg1); + int op1_idx = x87_on_stack(state, reg_index_1); + unsigned live = vfp_live_args_after(sim, n, 0); + + DB((dbg, LEVEL_1, ">>> %+F %s\n", n, arch_register_get_name(reg1))); + DEBUG_ONLY(vfp_dump_live(live)); + DB((dbg, LEVEL_1, "Stack before: ")); + DEBUG_ONLY(x87_dump_stack(state)); + assert(op1_idx >= 0); + + if (op1_idx != 0) { + /* bring the value to tos */ + x87_create_fxch(state, n, op1_idx); + op1_idx = 0; + } + + /* patch the operation */ + x87_patch_insn(n, op_ia32_FtstFnstsw); + reg1 = &ia32_st_regs[op1_idx]; + attr->x87[0] = reg1; + attr->x87[1] = NULL; + attr->x87[2] = NULL; + + if(!is_vfp_live(reg_index_1, live)) { + x87_create_fpop(state, sched_next(n), 1); + return NODE_ADDED; + } + + return NO_NODE_ADDED; +} + +/** * @param state the x87 state * @param n the node that should be simulated (and patched) - * - * @return NO_NODE_ADDED */ -static int sim_fCondJmp(x87_state *state, ir_node *n) { +static int sim_Fucom(x87_state *state, ir_node *n) { int op1_idx; int op2_idx = -1; - int pop_cnt = 0; - ia32_x87_attr_t *attr; + ia32_x87_attr_t *attr = get_ia32_x87_attr(n); ir_op *dst; x87_simulator *sim = state->sim; - ir_node *op1_node = get_irn_n(n, n_ia32_vfCondJmp_left); - ir_node *op2_node = get_irn_n(n, n_ia32_vfCondJmp_right); + ir_node *op1_node = get_irn_n(n, n_ia32_vFucomFnstsw_left); + ir_node *op2_node = get_irn_n(n, n_ia32_vFucomFnstsw_right); const arch_register_t *op1 = x87_get_irn_register(sim, op1_node); const arch_register_t *op2 = x87_get_irn_register(sim, op2_node); int reg_index_1 = arch_register_get_index(op1); int reg_index_2 = arch_register_get_index(op2); unsigned live = vfp_live_args_after(sim, n, 0); + int permuted = attr->attr.data.ins_permuted; + int xchg = 0; + int pops = 0; + int node_added = NO_NODE_ADDED; DB((dbg, LEVEL_1, ">>> %+F %s, %s\n", n, arch_register_get_name(op1), arch_register_get_name(op2))); @@ -1333,18 +1456,18 @@ static int sim_fCondJmp(x87_state *state, ir_node *n) { op2_idx = x87_on_stack(state, reg_index_2); assert(op2_idx >= 0); - if (is_vfp_live(arch_register_get_index(op2), live)) { + if (is_vfp_live(reg_index_2, live)) { /* second operand is live */ - if (is_vfp_live(arch_register_get_index(op1), live)) { + if (is_vfp_live(reg_index_1, live)) { /* both operands are live */ if (op1_idx == 0) { /* res = tos X op */ - dst = op_ia32_fcomJmp; } else if (op2_idx == 0) { /* res = op X tos */ - dst = op_ia32_fcomrJmp; + permuted = !permuted; + xchg = 1; } else { /* bring the first one to tos */ x87_create_fxch(state, n, op1_idx); @@ -1352,7 +1475,6 @@ static int sim_fCondJmp(x87_state *state, ir_node *n) { op2_idx = op1_idx; op1_idx = 0; /* res = tos X op */ - dst = op_ia32_fcomJmp; } } else { /* second live, first operand is dead here, bring it to tos. @@ -1365,12 +1487,11 @@ static int sim_fCondJmp(x87_state *state, ir_node *n) { op1_idx = 0; } /* res = tos X op, pop */ - dst = op_ia32_fcompJmp; - pop_cnt = 1; + pops = 1; } } else { /* second operand is dead */ - if (is_vfp_live(arch_register_get_index(op1), live)) { + if (is_vfp_live(reg_index_1, live)) { /* first operand is live: bring second to tos. This means further, op1_idx != op2_idx. */ assert(op1_idx != op2_idx); @@ -1381,8 +1502,9 @@ static int sim_fCondJmp(x87_state *state, ir_node *n) { op2_idx = 0; } /* res = op X tos, pop */ - dst = op_ia32_fcomrpJmp; - pop_cnt = 1; + pops = 1; + permuted = !permuted; + xchg = 1; } else { /* both operands are dead here, check first for identity. */ if (op1_idx == op2_idx) { @@ -1393,8 +1515,7 @@ static int sim_fCondJmp(x87_state *state, ir_node *n) { op2_idx = 0; } /* res = tos X op, pop */ - dst = op_ia32_fcompJmp; - pop_cnt = 1; + pops = 1; } /* different, move them to st and st(1) and pop both. The tricky part is to get one into st(1).*/ @@ -1407,8 +1528,7 @@ static int sim_fCondJmp(x87_state *state, ir_node *n) { op1_idx = 0; } /* res = tos X op, pop, pop */ - dst = op_ia32_fcomppJmp; - pop_cnt = 2; + pops = 2; } else if (op1_idx == 1) { /* good, first operand is already in the right place, move the second */ if (op2_idx != 0) { @@ -1417,8 +1537,10 @@ static int sim_fCondJmp(x87_state *state, ir_node *n) { assert(op1_idx != 0); op2_idx = 0; } - dst = op_ia32_fcomrppJmp; - pop_cnt = 2; + /* res = op X tos, pop, pop */ + permuted = !permuted; + xchg = 1; + pops = 2; } else { /* if one is already the TOS, we need two fxch */ if (op1_idx == 0) { @@ -1429,8 +1551,9 @@ static int sim_fCondJmp(x87_state *state, ir_node *n) { x87_create_fxch(state, n, op2_idx); op2_idx = 0; /* res = op X tos, pop, pop */ - dst = op_ia32_fcomrppJmp; - pop_cnt = 2; + pops = 2; + permuted = !permuted; + xchg = 1; } else if (op2_idx == 0) { /* second one is TOS, move to st(1) */ x87_create_fxch(state, n, 1); @@ -1439,8 +1562,7 @@ static int sim_fCondJmp(x87_state *state, ir_node *n) { x87_create_fxch(state, n, op1_idx); op1_idx = 0; /* res = tos X op, pop, pop */ - dst = op_ia32_fcomppJmp; - pop_cnt = 2; + pops = 2; } else { /* none of them is either TOS or st(1), 3 fxch needed */ x87_create_fxch(state, n, op2_idx); @@ -1450,41 +1572,66 @@ static int sim_fCondJmp(x87_state *state, ir_node *n) { x87_create_fxch(state, n, op1_idx); op1_idx = 0; /* res = tos X op, pop, pop */ - dst = op_ia32_fcomppJmp; - pop_cnt = 2; + pops = 2; } } } } } else { /* second operand is an address mode */ - if (is_vfp_live(arch_register_get_index(op1), live)) { + if (is_vfp_live(reg_index_1, live)) { /* first operand is live: bring it to TOS */ if (op1_idx != 0) { x87_create_fxch(state, n, op1_idx); op1_idx = 0; } - dst = op_ia32_fcomJmp; } else { /* first operand is dead: bring it to tos */ if (op1_idx != 0) { x87_create_fxch(state, n, op1_idx); op1_idx = 0; } - dst = op_ia32_fcompJmp; - pop_cnt = 1; + pops = 1; + } + } + + /* patch the operation */ + if (is_ia32_vFucomFnstsw(n)) { + int i; + + switch(pops) { + case 0: dst = op_ia32_FucomFnstsw; break; + case 1: dst = op_ia32_FucompFnstsw; break; + case 2: dst = op_ia32_FucomppFnstsw; break; + default: panic("invalid popcount in sim_Fucom"); } + + for(i = 0; i < pops; ++i) { + x87_pop(state); + } + } else if(is_ia32_vFucomi(n)) { + switch(pops) { + case 0: dst = op_ia32_Fucomi; break; + case 1: dst = op_ia32_Fucompi; x87_pop(state); break; + case 2: + dst = op_ia32_Fucompi; + x87_pop(state); + x87_create_fpop(state, sched_next(n), 1); + node_added = NODE_ADDED; + break; + default: panic("invalid popcount in sim_Fucom"); + } + } else { + panic("invalid operation %+F in sim_FucomFnstsw", n); } x87_patch_insn(n, dst); - assert(pop_cnt < 3); - if (pop_cnt >= 2) - x87_pop(state); - if (pop_cnt >= 1) - x87_pop(state); + if(xchg) { + int tmp = op1_idx; + op1_idx = op2_idx; + op2_idx = tmp; + } - /* patch the operation */ - attr = get_ia32_x87_attr(n); op1 = &ia32_st_regs[op1_idx]; attr->x87[0] = op1; if (op2_idx >= 0) { @@ -1492,41 +1639,52 @@ static int sim_fCondJmp(x87_state *state, ir_node *n) { attr->x87[1] = op2; } attr->x87[2] = NULL; + attr->attr.data.ins_permuted = permuted; - if (op2_idx >= 0) + if (op2_idx >= 0) { DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(n), arch_register_get_name(op1), arch_register_get_name(op2))); - else + } else { DB((dbg, LEVEL_1, "<<< %s %s, [AM]\n", get_irn_opname(n), arch_register_get_name(op1))); + } - return NO_NODE_ADDED; -} /* sim_fCondJmp */ + return node_added; +} -static -int sim_Keep(x87_state *state, ir_node *node) +static int sim_Keep(x87_state *state, ir_node *node) { const ir_node *op; const arch_register_t *op_reg; int reg_id; int op_stack_idx; unsigned live; + int i, arity; + int node_added = NO_NODE_ADDED; - op = get_irn_n(node, 0); - op_reg = arch_get_irn_register(state->sim->arch_env, op); - if(arch_register_get_class(op_reg) != &ia32_reg_classes[CLASS_ia32_vfp]) - return NO_NODE_ADDED; + DB((dbg, LEVEL_1, ">>> %+F\n", node)); - reg_id = arch_register_get_index(op_reg); - live = vfp_live_args_after(state->sim, node, 0); + arity = get_irn_arity(node); + for(i = 0; i < arity; ++i) { + op = get_irn_n(node, i); + op_reg = arch_get_irn_register(state->sim->arch_env, op); + if(arch_register_get_class(op_reg) != &ia32_reg_classes[CLASS_ia32_vfp]) + continue; - op_stack_idx = x87_on_stack(state, reg_id); - if(op_stack_idx >= 0 && !is_vfp_live(reg_id, live)) { - x87_create_fpop(state, sched_next(node), 1); - return NODE_ADDED; + reg_id = arch_register_get_index(op_reg); + live = vfp_live_args_after(state->sim, node, 0); + + op_stack_idx = x87_on_stack(state, reg_id); + if(op_stack_idx >= 0 && !is_vfp_live(reg_id, live)) { + x87_create_fpop(state, sched_next(node), 1); + node_added = NODE_ADDED; + } } - return NO_NODE_ADDED; + DB((dbg, LEVEL_1, "Stack after: ")); + DEBUG_ONLY(x87_dump_stack(state)); + + return node_added; } static @@ -1633,24 +1791,23 @@ static ir_node *create_Copy(x87_state *state, ir_node *n) { * @return NO_NODE_ADDED */ static int sim_Copy(x87_state *state, ir_node *n) { - x87_simulator *sim; - ir_node *pred; - const arch_register_t *out; - const arch_register_t *op1; - ir_node *node, *next; - ia32_x87_attr_t *attr; - int op1_idx, out_idx; - unsigned live; - - ir_mode *mode = get_irn_mode(n); + x87_simulator *sim = state->sim; + ir_node *pred; + const arch_register_t *out; + const arch_register_t *op1; + const arch_register_class_t *cls; + ir_node *node, *next; + ia32_x87_attr_t *attr; + int op1_idx, out_idx; + unsigned live; - if (!mode_is_float(mode)) + cls = arch_get_irn_reg_class(sim->arch_env, n, -1); + if (cls->regs != ia32_vfp_regs) return 0; - sim = state->sim; pred = get_irn_n(n, 0); - out = x87_get_irn_register(sim, n); - op1 = x87_get_irn_register(sim, pred); + out = x87_get_irn_register(sim, n); + op1 = x87_get_irn_register(sim, pred); live = vfp_live_args_after(sim, n, REGMASK(out)); DB((dbg, LEVEL_1, ">>> %+F %s -> %s\n", n, @@ -1698,8 +1855,7 @@ static int sim_Copy(x87_state *state, ir_node *n) { keep_float_node_alive(state, pred); } - DB((dbg, LEVEL_1, "<<< %+F %s -> %s\n", node, op1->name, - arch_get_irn_register(sim->arch_env, node)->name)); + DB((dbg, LEVEL_1, "<<< %+F %s -> ?\n", node, op1->name)); } else { out_idx = x87_on_stack(state, arch_register_get_index(out)); @@ -1746,11 +1902,10 @@ static int sim_Copy(x87_state *state, ir_node *n) { } /* sim_Copy */ /** - * Returns the result proj of the call, or NULL if the result is not used + * Returns the result proj of the call */ static ir_node *get_call_result_proj(ir_node *call) { const ir_edge_t *edge; - ir_node *resproj = NULL; /* search the result proj */ foreach_out_edge(call, edge) { @@ -1758,23 +1913,10 @@ static ir_node *get_call_result_proj(ir_node *call) { long pn = get_Proj_proj(proj); if (pn == pn_be_Call_first_res) { - resproj = proj; - break; - } - } - if (resproj == NULL) { - return NULL; - } - - /* the result proj is connected to a Keep and maybe other nodes */ - foreach_out_edge(resproj, edge) { - ir_node *pred = get_edge_src_irn(edge); - if (!be_is_Keep(pred)) { - return resproj; + return proj; } } - /* only be_Keep found, so result is not used */ return NULL; } /* get_call_result_proj */ @@ -1783,24 +1925,24 @@ static ir_node *get_call_result_proj(ir_node *call) { * * @param state the x87 state * @param n the node that should be simulated - * @param arch_env the architecture environment * * @return NO_NODE_ADDED */ -static int sim_Call(x87_state *state, ir_node *n, const arch_env_t *arch_env) +static int sim_Call(x87_state *state, ir_node *n) { ir_type *call_tp = be_Call_get_type(n); ir_type *res_type; ir_mode *mode; ir_node *resproj; const arch_register_t *reg; - (void) arch_env; + + DB((dbg, LEVEL_1, ">>> %+F\n", n)); /* at the begin of a call the x87 state should be empty */ assert(state->depth == 0 && "stack not empty before call"); if (get_method_n_ress(call_tp) <= 0) - return NO_NODE_ADDED; + goto end_call; /* * If the called function returns a float, it is returned in st(0). @@ -1811,15 +1953,18 @@ static int sim_Call(x87_state *state, ir_node *n, const arch_env_t *arch_env) mode = get_type_mode(res_type); if (mode == NULL || !mode_is_float(mode)) - return NO_NODE_ADDED; + goto end_call; resproj = get_call_result_proj(n); - if (resproj == NULL) - return NO_NODE_ADDED; + assert(resproj != NULL); reg = x87_get_irn_register(state->sim, resproj); x87_push(state, arch_register_get_index(reg), resproj); +end_call: + DB((dbg, LEVEL_1, "Stack after: ")); + DEBUG_ONLY(x87_dump_stack(state)); + return NO_NODE_ADDED; } /* sim_Call */ @@ -1933,6 +2078,42 @@ static int sim_Perm(x87_state *state, ir_node *irn) { return NO_NODE_ADDED; } /* sim_Perm */ +static int sim_Barrier(x87_state *state, ir_node *node) { + //const arch_env_t *arch_env = state->sim->arch_env; + int i, arity; + + /* materialize unknown if needed */ + arity = get_irn_arity(node); + for(i = 0; i < arity; ++i) { + const arch_register_t *reg; + ir_node *zero; + ir_node *block; + ia32_x87_attr_t *attr; + ir_node *in = get_irn_n(node, i); + + if(!is_ia32_Unknown_VFP(in)) + continue; + + /* TODO: not completely correct... */ + reg = &ia32_vfp_regs[REG_VFP_UKNWN]; + + /* create a zero */ + block = get_nodes_block(node); + zero = new_rd_ia32_fldz(NULL, current_ir_graph, block, mode_E); + x87_push(state, arch_register_get_index(reg), zero); + + attr = get_ia32_x87_attr(zero); + attr->x87[2] = &ia32_st_regs[0]; + + sched_add_before(node, zero); + + set_irn_n(node, i, zero); + } + + return NO_NODE_ADDED; +} + + /** * Kill any dead registers at block start by popping them from the stack. * @@ -1967,6 +2148,22 @@ static x87_state *x87_kill_deads(x87_simulator *sim, ir_node *block, x87_state * DEBUG_ONLY(vfp_dump_live(live)); DEBUG_ONLY(x87_dump_stack(state)); + if (kill_mask != 0 && live == 0) { + /* special case: kill all registers */ + if (ia32_cg_config.use_femms || ia32_cg_config.use_emms) { + if (ia32_cg_config.use_femms) { + /* use FEMMS on AMD processors to clear all */ + keep = new_rd_ia32_femms(NULL, get_irn_irg(block), block); + } else { + /* use EMMS to clear all */ + keep = new_rd_ia32_emms(NULL, get_irn_irg(block), block); + } + sched_add_before(first_insn, keep); + keep_alive(keep); + x87_emms(state); + return state; + } + } /* now kill registers */ while (kill_mask) { /* we can only kill from TOS, so bring them up */ @@ -2128,6 +2325,12 @@ static void x87_simulate_block(x87_simulator *sim, ir_node *block) { bl_state->end = state; } /* x87_simulate_block */ +static void register_sim(ir_op *op, sim_func func) +{ + assert(op->ops.generic == NULL); + op->ops.generic = (op_func) func; +} + /** * Create a new x87 simulator. * @@ -2150,33 +2353,31 @@ static void x87_init_simulator(x87_simulator *sim, ir_graph *irg, /* set the generic function pointer of instruction we must simulate */ clear_irp_opcodes_generic_func(); -#define ASSOC(op) (op_ ## op)->ops.generic = (op_func)(sim_##op) -#define ASSOC_IA32(op) (op_ia32_v ## op)->ops.generic = (op_func)(sim_##op) -#define ASSOC_BE(op) (op_be_ ## op)->ops.generic = (op_func)(sim_##op) - ASSOC_IA32(fld); - ASSOC_IA32(fild); - ASSOC_IA32(fld1); - ASSOC_IA32(fldz); - ASSOC_IA32(fadd); - ASSOC_IA32(fsub); - ASSOC_IA32(fmul); - ASSOC_IA32(fdiv); - ASSOC_IA32(fprem); - ASSOC_IA32(fabs); - ASSOC_IA32(fchs); - ASSOC_IA32(fist); - ASSOC_IA32(fst); - ASSOC_IA32(fCondJmp); - ASSOC_BE(Copy); - ASSOC_BE(Call); - ASSOC_BE(Spill); - ASSOC_BE(Reload); - ASSOC_BE(Return); - ASSOC_BE(Perm); - ASSOC_BE(Keep); -#undef ASSOC_BE -#undef ASSOC_IA32 -#undef ASSOC + register_sim(op_ia32_vfld, sim_fld); + register_sim(op_ia32_vfild, sim_fild); + register_sim(op_ia32_vfld1, sim_fld1); + register_sim(op_ia32_vfldz, sim_fldz); + register_sim(op_ia32_vfadd, sim_fadd); + register_sim(op_ia32_vfsub, sim_fsub); + register_sim(op_ia32_vfmul, sim_fmul); + register_sim(op_ia32_vfdiv, sim_fdiv); + register_sim(op_ia32_vfprem, sim_fprem); + register_sim(op_ia32_vfabs, sim_fabs); + register_sim(op_ia32_vfchs, sim_fchs); + register_sim(op_ia32_vfist, sim_fist); + register_sim(op_ia32_vfisttp, sim_fisttp); + register_sim(op_ia32_vfst, sim_fst); + register_sim(op_ia32_vFtstFnstsw, sim_FtstFnstsw); + register_sim(op_ia32_vFucomFnstsw, sim_Fucom); + register_sim(op_ia32_vFucomi, sim_Fucom); + register_sim(op_be_Copy, sim_Copy); + register_sim(op_be_Call, sim_Call); + register_sim(op_be_Spill, sim_Spill); + register_sim(op_be_Reload, sim_Reload); + register_sim(op_be_Return, sim_Return); + register_sim(op_be_Perm, sim_Perm); + register_sim(op_be_Keep, sim_Keep); + register_sim(op_be_Barrier, sim_Barrier); } /* x87_init_simulator */ /** @@ -2217,7 +2418,7 @@ void x87_simulate_graph(const arch_env_t *arch_env, be_irg_t *birg) { x87_init_simulator(&sim, irg, arch_env); start_block = get_irg_start_block(irg); - bl_state = x87_get_bl_state(&sim, start_block); + bl_state = x87_get_bl_state(&sim, start_block); /* start with the empty state */ bl_state->begin = empty;