+GEN_STORE(fst)
+GEN_STORE(fist)
+
+static int sim_FtstFnstsw(x87_state *state, ir_node *n) {
+ x87_simulator *sim = state->sim;
+ ia32_x87_attr_t *attr = get_ia32_x87_attr(n);
+ ir_node *op1_node = get_irn_n(n, n_ia32_vFtstFnstsw_left);
+ const arch_register_t *reg1 = x87_get_irn_register(sim, op1_node);
+ int reg_index_1 = arch_register_get_index(reg1);
+ int op1_idx = x87_on_stack(state, reg_index_1);
+ unsigned live = vfp_live_args_after(sim, n, 0);
+
+ DB((dbg, LEVEL_1, ">>> %+F %s\n", n, arch_register_get_name(reg1)));
+ DEBUG_ONLY(vfp_dump_live(live));
+ DB((dbg, LEVEL_1, "Stack before: "));
+ DEBUG_ONLY(x87_dump_stack(state));
+ assert(op1_idx >= 0);
+
+ if (op1_idx != 0) {
+ /* bring the value to tos */
+ x87_create_fxch(state, n, op1_idx);
+ op1_idx = 0;
+ }
+
+ /* patch the operation */
+ x87_patch_insn(n, op_ia32_FtstFnstsw);
+ reg1 = &ia32_st_regs[op1_idx];
+ attr->x87[0] = reg1;
+ attr->x87[1] = NULL;
+ attr->x87[2] = NULL;
+
+ if(!is_vfp_live(reg_index_1, live)) {
+ x87_create_fpop(state, sched_next(n), 1);
+ return NODE_ADDED;
+ }
+
+ return NO_NODE_ADDED;
+}
+
+/**
+ * @param state the x87 state
+ * @param n the node that should be simulated (and patched)
+ */
+static int sim_Fucom(x87_state *state, ir_node *n) {
+ int op1_idx;
+ int op2_idx = -1;
+ ia32_x87_attr_t *attr = get_ia32_x87_attr(n);
+ ir_op *dst;
+ x87_simulator *sim = state->sim;
+ ir_node *op1_node = get_irn_n(n, n_ia32_vFucomFnstsw_left);
+ ir_node *op2_node = get_irn_n(n, n_ia32_vFucomFnstsw_right);
+ const arch_register_t *op1 = x87_get_irn_register(sim, op1_node);
+ const arch_register_t *op2 = x87_get_irn_register(sim, op2_node);
+ int reg_index_1 = arch_register_get_index(op1);
+ int reg_index_2 = arch_register_get_index(op2);
+ unsigned live = vfp_live_args_after(sim, n, 0);
+ int permuted = attr->attr.data.ins_permuted;
+ int xchg = 0;
+ int pops = 0;
+ int node_added = NO_NODE_ADDED;
+
+ DB((dbg, LEVEL_1, ">>> %+F %s, %s\n", n,
+ arch_register_get_name(op1), arch_register_get_name(op2)));
+ DEBUG_ONLY(vfp_dump_live(live));
+ DB((dbg, LEVEL_1, "Stack before: "));
+ DEBUG_ONLY(x87_dump_stack(state));
+
+ op1_idx = x87_on_stack(state, reg_index_1);
+ assert(op1_idx >= 0);
+
+ /* BEWARE: check for comp a,a cases, they might happen */
+ if (reg_index_2 != REG_VFP_NOREG) {
+ /* second operand is a vfp register */
+ op2_idx = x87_on_stack(state, reg_index_2);
+ assert(op2_idx >= 0);
+
+ if (is_vfp_live(reg_index_2, live)) {
+ /* second operand is live */
+
+ if (is_vfp_live(reg_index_1, live)) {
+ /* both operands are live */
+
+ if (op1_idx == 0) {
+ /* res = tos X op */
+ } else if (op2_idx == 0) {
+ /* res = op X tos */
+ permuted = !permuted;
+ xchg = 1;
+ } else {
+ /* bring the first one to tos */
+ x87_create_fxch(state, n, op1_idx);
+ if (op2_idx == 0)
+ op2_idx = op1_idx;
+ op1_idx = 0;
+ /* res = tos X op */
+ }
+ } else {
+ /* second live, first operand is dead here, bring it to tos.
+ This means further, op1_idx != op2_idx. */
+ assert(op1_idx != op2_idx);
+ if (op1_idx != 0) {
+ x87_create_fxch(state, n, op1_idx);
+ if (op2_idx == 0)
+ op2_idx = op1_idx;
+ op1_idx = 0;
+ }
+ /* res = tos X op, pop */
+ pops = 1;
+ }
+ } else {
+ /* second operand is dead */
+ if (is_vfp_live(reg_index_1, live)) {
+ /* first operand is live: bring second to tos.
+ This means further, op1_idx != op2_idx. */
+ assert(op1_idx != op2_idx);
+ if (op2_idx != 0) {
+ x87_create_fxch(state, n, op2_idx);
+ if (op1_idx == 0)
+ op1_idx = op2_idx;
+ op2_idx = 0;
+ }
+ /* res = op X tos, pop */
+ pops = 1;
+ permuted = !permuted;
+ xchg = 1;
+ } else {
+ /* both operands are dead here, check first for identity. */
+ if (op1_idx == op2_idx) {
+ /* identically, one pop needed */
+ if (op1_idx != 0) {
+ x87_create_fxch(state, n, op1_idx);
+ op1_idx = 0;
+ op2_idx = 0;
+ }
+ /* res = tos X op, pop */
+ pops = 1;
+ }
+ /* different, move them to st and st(1) and pop both.
+ The tricky part is to get one into st(1).*/
+ else if (op2_idx == 1) {
+ /* good, second operand is already in the right place, move the first */
+ if (op1_idx != 0) {
+ /* bring the first on top */
+ x87_create_fxch(state, n, op1_idx);
+ assert(op2_idx != 0);
+ op1_idx = 0;
+ }
+ /* res = tos X op, pop, pop */
+ pops = 2;
+ } else if (op1_idx == 1) {
+ /* good, first operand is already in the right place, move the second */
+ if (op2_idx != 0) {
+ /* bring the first on top */
+ x87_create_fxch(state, n, op2_idx);
+ assert(op1_idx != 0);
+ op2_idx = 0;
+ }
+ /* res = op X tos, pop, pop */
+ permuted = !permuted;
+ xchg = 1;
+ pops = 2;
+ } else {
+ /* if one is already the TOS, we need two fxch */
+ if (op1_idx == 0) {
+ /* first one is TOS, move to st(1) */
+ x87_create_fxch(state, n, 1);
+ assert(op2_idx != 1);
+ op1_idx = 1;
+ x87_create_fxch(state, n, op2_idx);
+ op2_idx = 0;
+ /* res = op X tos, pop, pop */
+ pops = 2;
+ permuted = !permuted;
+ xchg = 1;
+ } else if (op2_idx == 0) {
+ /* second one is TOS, move to st(1) */
+ x87_create_fxch(state, n, 1);
+ assert(op1_idx != 1);
+ op2_idx = 1;
+ x87_create_fxch(state, n, op1_idx);
+ op1_idx = 0;
+ /* res = tos X op, pop, pop */
+ pops = 2;
+ } else {
+ /* none of them is either TOS or st(1), 3 fxch needed */
+ x87_create_fxch(state, n, op2_idx);
+ assert(op1_idx != 0);
+ x87_create_fxch(state, n, 1);
+ op2_idx = 1;
+ x87_create_fxch(state, n, op1_idx);
+ op1_idx = 0;
+ /* res = tos X op, pop, pop */
+ pops = 2;
+ }
+ }
+ }
+ }
+ } else {
+ /* second operand is an address mode */
+ if (is_vfp_live(reg_index_1, live)) {
+ /* first operand is live: bring it to TOS */
+ if (op1_idx != 0) {
+ x87_create_fxch(state, n, op1_idx);
+ op1_idx = 0;
+ }
+ } else {
+ /* first operand is dead: bring it to tos */
+ if (op1_idx != 0) {
+ x87_create_fxch(state, n, op1_idx);
+ op1_idx = 0;
+ }
+ pops = 1;
+ }
+ }
+
+ /* patch the operation */
+ if(is_ia32_vFucomFnstsw(n)) {
+ int i;
+
+ switch(pops) {
+ case 0: dst = op_ia32_FucomFnstsw; break;
+ case 1: dst = op_ia32_FucompFnstsw; break;
+ case 2: dst = op_ia32_FucomppFnstsw; break;
+ default: panic("invalid popcount in sim_Fucom");
+ }
+
+ for(i = 0; i < pops; ++i) {
+ x87_pop(state);
+ }
+ } else if(is_ia32_vFucomi(n)) {
+ switch(pops) {
+ case 0: dst = op_ia32_Fucomi; break;
+ case 1: dst = op_ia32_Fucompi; x87_pop(state); break;
+ case 2:
+ dst = op_ia32_Fucompi;
+ x87_pop(state);
+ x87_create_fpop(state, sched_next(n), 1);
+ node_added = NODE_ADDED;
+ break;
+ default: panic("invalid popcount in sim_Fucom");
+ }
+ } else {
+ panic("invalid operation %+F in sim_FucomFnstsw", n);
+ }
+
+ x87_patch_insn(n, dst);
+ if(xchg) {
+ int tmp = op1_idx;
+ op1_idx = op2_idx;
+ op2_idx = tmp;
+ }
+
+ op1 = &ia32_st_regs[op1_idx];
+ attr->x87[0] = op1;
+ if (op2_idx >= 0) {
+ op2 = &ia32_st_regs[op2_idx];
+ attr->x87[1] = op2;
+ }
+ attr->x87[2] = NULL;
+ attr->attr.data.ins_permuted = permuted;
+
+ if (op2_idx >= 0)
+ DB((dbg, LEVEL_1, "<<< %s %s, %s\n", get_irn_opname(n),
+ arch_register_get_name(op1), arch_register_get_name(op2)));
+ else
+ DB((dbg, LEVEL_1, "<<< %s %s, [AM]\n", get_irn_opname(n),
+ arch_register_get_name(op1)));
+
+ return node_added;
+}
+
+static int sim_Keep(x87_state *state, ir_node *node)
+{
+ const ir_node *op;
+ const arch_register_t *op_reg;
+ int reg_id;
+ int op_stack_idx;
+ unsigned live;
+ int i, arity;
+ int node_added = NO_NODE_ADDED;
+
+ DB((dbg, LEVEL_1, ">>> %+F\n", node));
+
+ arity = get_irn_arity(node);
+ for(i = 0; i < arity; ++i) {
+ op = get_irn_n(node, i);
+ op_reg = arch_get_irn_register(state->sim->arch_env, op);
+ if(arch_register_get_class(op_reg) != &ia32_reg_classes[CLASS_ia32_vfp])
+ continue;
+
+ reg_id = arch_register_get_index(op_reg);
+ live = vfp_live_args_after(state->sim, node, 0);
+
+ op_stack_idx = x87_on_stack(state, reg_id);
+ if(op_stack_idx >= 0 && !is_vfp_live(reg_id, live)) {
+ x87_create_fpop(state, sched_next(node), 1);
+ node_added = NODE_ADDED;
+ }
+ }
+
+ DB((dbg, LEVEL_1, "Stack after: "));
+ DEBUG_ONLY(x87_dump_stack(state));
+
+ return node_added;
+}
+
+static
+void keep_float_node_alive(x87_state *state, ir_node *node)
+{
+ ir_graph *irg;
+ ir_node *block;
+ ir_node *in[1];
+ ir_node *keep;
+ const arch_register_class_t *cls;
+
+ irg = get_irn_irg(node);
+ block = get_nodes_block(node);
+ cls = arch_get_irn_reg_class(state->sim->arch_env, node, -1);
+ in[0] = node;
+ keep = be_new_Keep(cls, irg, block, 1, in);
+
+ assert(sched_is_scheduled(node));
+ sched_add_after(node, keep);
+}
+
+/**
+ * Create a copy of a node. Recreate the node if it's a constant.
+ *
+ * @param state the x87 state
+ * @param n the node to be copied
+ *
+ * @return the copy of n
+ */
+static ir_node *create_Copy(x87_state *state, ir_node *n) {
+ x87_simulator *sim = state->sim;
+ ir_graph *irg = get_irn_irg(n);
+ dbg_info *n_dbg = get_irn_dbg_info(n);
+ ir_mode *mode = get_irn_mode(n);
+ ir_node *block = get_nodes_block(n);
+ ir_node *pred = get_irn_n(n, 0);
+ ir_node *(*cnstr)(dbg_info *, ir_graph *, ir_node *, ir_mode *) = NULL;
+ ir_node *res;
+ const arch_register_t *out;
+ const arch_register_t *op1;
+ ia32_x87_attr_t *attr;
+
+ /* Do not copy constants, recreate them. */
+ switch (get_ia32_irn_opcode(pred)) {
+ case iro_ia32_Unknown_VFP:
+ case iro_ia32_fldz:
+ cnstr = new_rd_ia32_fldz;
+ break;
+ case iro_ia32_fld1:
+ cnstr = new_rd_ia32_fld1;
+ break;
+ case iro_ia32_fldpi:
+ cnstr = new_rd_ia32_fldpi;
+ break;
+ case iro_ia32_fldl2e:
+ cnstr = new_rd_ia32_fldl2e;
+ break;
+ case iro_ia32_fldl2t:
+ cnstr = new_rd_ia32_fldl2t;
+ break;
+ case iro_ia32_fldlg2:
+ cnstr = new_rd_ia32_fldlg2;
+ break;
+ case iro_ia32_fldln2:
+ cnstr = new_rd_ia32_fldln2;
+ break;
+ default:
+ break;
+ }
+
+ out = x87_get_irn_register(sim, n);
+ op1 = x87_get_irn_register(sim, pred);
+
+ if (cnstr != NULL) {
+ /* copy a constant */
+ res = (*cnstr)(n_dbg, irg, block, mode);
+
+ x87_push(state, arch_register_get_index(out), res);
+
+ attr = get_ia32_x87_attr(res);
+ attr->x87[2] = &ia32_st_regs[0];
+ } else {
+ int op1_idx = x87_on_stack(state, arch_register_get_index(op1));
+
+ res = new_rd_ia32_fpushCopy(n_dbg, irg, block, pred, mode);
+
+ x87_push(state, arch_register_get_index(out), res);
+
+ attr = get_ia32_x87_attr(res);
+ attr->x87[0] = &ia32_st_regs[op1_idx];
+ attr->x87[2] = &ia32_st_regs[0];
+ }
+ arch_set_irn_register(sim->arch_env, res, out);
+
+ return res;
+} /* create_Copy */
+
+/**
+ * Simulate a be_Copy.
+ *
+ * @param state the x87 state
+ * @param n the node that should be simulated (and patched)
+ *
+ * @return NO_NODE_ADDED
+ */
+static int sim_Copy(x87_state *state, ir_node *n) {
+ x87_simulator *sim = state->sim;
+ ir_node *pred;
+ const arch_register_t *out;
+ const arch_register_t *op1;
+ const arch_register_class_t *class;
+ ir_node *node, *next;
+ ia32_x87_attr_t *attr;
+ int op1_idx, out_idx;
+ unsigned live;
+
+ class = arch_get_irn_reg_class(sim->arch_env, n, -1);
+ if (class->regs != ia32_vfp_regs)
+ return 0;
+
+ pred = get_irn_n(n, 0);
+ out = x87_get_irn_register(sim, n);
+ op1 = x87_get_irn_register(sim, pred);
+ live = vfp_live_args_after(sim, n, REGMASK(out));
+
+ DB((dbg, LEVEL_1, ">>> %+F %s -> %s\n", n,
+ arch_register_get_name(op1), arch_register_get_name(out)));
+ DEBUG_ONLY(vfp_dump_live(live));
+
+ /* handle the infamous unknown value */
+ if (arch_register_get_index(op1) == REG_VFP_UKNWN) {
+ /* Operand is still live, a real copy. We need here an fpush that can
+ hold a a register, so use the fpushCopy or recreate constants */
+ node = create_Copy(state, n);
+
+ assert(is_ia32_fldz(node));
+ next = sched_next(n);
+ sched_remove(n);
+ exchange(n, node);
+ sched_add_before(next, node);
+
+ DB((dbg, LEVEL_1, "<<< %+F %s -> %s\n", node, op1->name,
+ arch_get_irn_register(sim->arch_env, node)->name));
+ return NO_NODE_ADDED;
+ }
+
+ op1_idx = x87_on_stack(state, arch_register_get_index(op1));
+
+ if (is_vfp_live(arch_register_get_index(op1), live)) {
+ ir_node *pred = get_irn_n(n, 0);
+
+ /* Operand is still live, a real copy. We need here an fpush that can
+ hold a a register, so use the fpushCopy or recreate constants */
+ node = create_Copy(state, n);
+
+ /* We have to make sure the old value doesn't go dead (which can happen
+ * when we recreate constants). As the simulator expected that value in
+ * the pred blocks. This is unfortunate as removing it would save us 1
+ * instruction, but we would have to rerun all the simulation to get
+ * this correct...
+ */
+ next = sched_next(n);
+ sched_remove(n);
+ exchange(n, node);
+ sched_add_before(next, node);
+
+ if(get_irn_n_edges(pred) == 0) {
+ keep_float_node_alive(state, pred);
+ }
+
+ DB((dbg, LEVEL_1, "<<< %+F %s -> ?\n", node, op1->name));
+ } else {
+ out_idx = x87_on_stack(state, arch_register_get_index(out));
+
+ if (out_idx >= 0 && out_idx != op1_idx) {
+ /* Matze: out already on stack? how can this happen? */
+ assert(0);
+
+ /* op1 must be killed and placed where out is */
+ if (out_idx == 0) {
+ /* best case, simple remove and rename */
+ x87_patch_insn(n, op_ia32_Pop);
+ attr = get_ia32_x87_attr(n);
+ attr->x87[0] = op1 = &ia32_st_regs[0];
+
+ x87_pop(state);
+ x87_set_st(state, arch_register_get_index(out), n, op1_idx - 1);
+ } else {
+ /* move op1 to tos, store and pop it */
+ if (op1_idx != 0) {
+ x87_create_fxch(state, n, op1_idx);
+ op1_idx = 0;
+ }
+ x87_patch_insn(n, op_ia32_Pop);
+ attr = get_ia32_x87_attr(n);
+ attr->x87[0] = op1 = &ia32_st_regs[out_idx];
+
+ x87_pop(state);
+ x87_set_st(state, arch_register_get_index(out), n, out_idx - 1);
+ }
+ DB((dbg, LEVEL_1, "<<< %+F %s\n", n, op1->name));
+ } else {
+ /* just a virtual copy */
+ x87_set_st(state, arch_register_get_index(out), get_unop_op(n), op1_idx);
+ /* don't remove the node to keep the verifier quiet :),
+ the emitter won't emit any code for the node */
+#if 0
+ sched_remove(n);
+ DB((dbg, LEVEL_1, "<<< KILLED %s\n", get_irn_opname(n)));
+ exchange(n, get_unop_op(n));
+#endif
+ }
+ }
+ return NO_NODE_ADDED;
+} /* sim_Copy */
+
+/**
+ * Returns the result proj of the call
+ */
+static ir_node *get_call_result_proj(ir_node *call) {
+ const ir_edge_t *edge;
+
+ /* search the result proj */
+ foreach_out_edge(call, edge) {
+ ir_node *proj = get_edge_src_irn(edge);
+ long pn = get_Proj_proj(proj);
+
+ if (pn == pn_be_Call_first_res) {
+ return proj;
+ }
+ }
+
+ return NULL;
+} /* get_call_result_proj */
+
+/**
+ * Simulate a be_Call.
+ *
+ * @param state the x87 state
+ * @param n the node that should be simulated
+ *
+ * @return NO_NODE_ADDED
+ */
+static int sim_Call(x87_state *state, ir_node *n)
+{
+ ir_type *call_tp = be_Call_get_type(n);
+ ir_type *res_type;
+ ir_mode *mode;
+ ir_node *resproj;
+ const arch_register_t *reg;
+
+ DB((dbg, LEVEL_1, ">>> %+F\n", n));
+
+ /* at the begin of a call the x87 state should be empty */
+ assert(state->depth == 0 && "stack not empty before call");
+
+ if (get_method_n_ress(call_tp) <= 0)
+ goto end_call;
+
+ /*
+ * If the called function returns a float, it is returned in st(0).
+ * This even happens if the return value is NOT used.
+ * Moreover, only one return result is supported.
+ */
+ res_type = get_method_res_type(call_tp, 0);
+ mode = get_type_mode(res_type);
+
+ if (mode == NULL || !mode_is_float(mode))
+ goto end_call;
+
+ resproj = get_call_result_proj(n);
+ assert(resproj != NULL);
+
+ reg = x87_get_irn_register(state->sim, resproj);
+ x87_push(state, arch_register_get_index(reg), resproj);
+
+end_call:
+ DB((dbg, LEVEL_1, "Stack after: "));
+ DEBUG_ONLY(x87_dump_stack(state));
+
+ return NO_NODE_ADDED;
+} /* sim_Call */
+
+/**
+ * Simulate a be_Spill.
+ *
+ * @param state the x87 state
+ * @param n the node that should be simulated (and patched)
+ *
+ * Should not happen, spills are lowered before x87 simulator see them.
+ */
+static int sim_Spill(x87_state *state, ir_node *n) {
+ assert(0 && "Spill not lowered");
+ return sim_fst(state, n);
+} /* sim_Spill */
+
+/**
+ * Simulate a be_Reload.
+ *
+ * @param state the x87 state
+ * @param n the node that should be simulated (and patched)
+ *
+ * Should not happen, reloads are lowered before x87 simulator see them.
+ */
+static int sim_Reload(x87_state *state, ir_node *n) {
+ assert(0 && "Reload not lowered");
+ return sim_fld(state, n);
+} /* sim_Reload */
+
+/**
+ * Simulate a be_Return.
+ *
+ * @param state the x87 state
+ * @param n the node that should be simulated (and patched)
+ *
+ * @return NO_NODE_ADDED
+ */
+static int sim_Return(x87_state *state, ir_node *n) {
+ int n_res = be_Return_get_n_rets(n);
+ int i, n_float_res = 0;
+
+ /* only floating point return values must resist on stack */
+ for (i = 0; i < n_res; ++i) {
+ ir_node *res = get_irn_n(n, be_pos_Return_val + i);
+
+ if (mode_is_float(get_irn_mode(res)))
+ ++n_float_res;
+ }
+ assert(x87_get_depth(state) == n_float_res);
+
+ /* pop them virtually */
+ for (i = n_float_res - 1; i >= 0; --i)
+ x87_pop(state);
+
+ return NO_NODE_ADDED;
+} /* sim_Return */
+
+typedef struct _perm_data_t {
+ const arch_register_t *in;
+ const arch_register_t *out;
+} perm_data_t;
+
+/**
+ * Simulate a be_Perm.
+ *
+ * @param state the x87 state
+ * @param irn the node that should be simulated (and patched)
+ *
+ * @return NO_NODE_ADDED
+ */
+static int sim_Perm(x87_state *state, ir_node *irn) {
+ int i, n;
+ x87_simulator *sim = state->sim;
+ ir_node *pred = get_irn_n(irn, 0);
+ int *stack_pos;
+ const ir_edge_t *edge;
+
+ /* handle only floating point Perms */
+ if (! mode_is_float(get_irn_mode(pred)))
+ return NO_NODE_ADDED;
+
+ DB((dbg, LEVEL_1, ">>> %+F\n", irn));
+
+ /* Perm is a pure virtual instruction on x87.
+ All inputs must be on the FPU stack and are pairwise
+ different from each other.
+ So, all we need to do is to permutate the stack state. */
+ n = get_irn_arity(irn);
+ NEW_ARR_A(int, stack_pos, n);
+
+ /* collect old stack positions */
+ for (i = 0; i < n; ++i) {
+ const arch_register_t *inreg = x87_get_irn_register(sim, get_irn_n(irn, i));
+ int idx = x87_on_stack(state, arch_register_get_index(inreg));
+
+ assert(idx >= 0 && "Perm argument not on x87 stack");
+
+ stack_pos[i] = idx;
+ }
+ /* now do the permutation */
+ foreach_out_edge(irn, edge) {
+ ir_node *proj = get_edge_src_irn(edge);
+ const arch_register_t *out = x87_get_irn_register(sim, proj);
+ long num = get_Proj_proj(proj);
+
+ assert(0 <= num && num < n && "More Proj's than Perm inputs");
+ x87_set_st(state, arch_register_get_index(out), proj, stack_pos[(unsigned)num]);
+ }
+ DB((dbg, LEVEL_1, "<<< %+F\n", irn));
+
+ return NO_NODE_ADDED;
+} /* sim_Perm */
+
+static int sim_Barrier(x87_state *state, ir_node *node) {
+ //const arch_env_t *arch_env = state->sim->arch_env;
+ int i, arity;
+
+ /* materialize unknown if needed */
+ arity = get_irn_arity(node);
+ for(i = 0; i < arity; ++i) {
+ const arch_register_t *reg;
+ ir_node *zero;
+ ir_node *block;
+ ia32_x87_attr_t *attr;
+ ir_node *in = get_irn_n(node, i);
+
+ if(!is_ia32_Unknown_VFP(in))
+ continue;
+
+ /* TODO: not completely correct... */
+ reg = &ia32_vfp_regs[REG_VFP_UKNWN];
+
+ /* create a zero */
+ block = get_nodes_block(node);
+ zero = new_rd_ia32_fldz(NULL, current_ir_graph, block, mode_E);
+ x87_push(state, arch_register_get_index(reg), zero);
+
+ attr = get_ia32_x87_attr(zero);
+ attr->x87[2] = &ia32_st_regs[0];
+
+ sched_add_before(node, zero);
+
+ set_irn_n(node, i, zero);
+ }
+
+ return NO_NODE_ADDED;
+}
+
+
+/**
+ * Kill any dead registers at block start by popping them from the stack.
+ *
+ * @param sim the simulator handle
+ * @param block the current block
+ * @param start_state the x87 state at the begin of the block
+ *
+ * @return the x87 state after dead register killed
+ */
+static x87_state *x87_kill_deads(x87_simulator *sim, ir_node *block, x87_state *start_state) {
+ x87_state *state = start_state;
+ ir_node *first_insn = sched_first(block);
+ ir_node *keep = NULL;
+ unsigned live = vfp_live_args_after(sim, block, 0);
+ unsigned kill_mask;
+ int i, depth, num_pop;
+
+ kill_mask = 0;
+ depth = x87_get_depth(state);
+ for (i = depth - 1; i >= 0; --i) {
+ int reg = x87_get_st_reg(state, i);
+
+ if (! is_vfp_live(reg, live))
+ kill_mask |= (1 << i);
+ }
+
+ if (kill_mask) {
+ /* create a new state, will be changed */
+ state = x87_clone_state(sim, state);
+
+ DB((dbg, LEVEL_1, "Killing deads:\n"));
+ DEBUG_ONLY(vfp_dump_live(live));
+ DEBUG_ONLY(x87_dump_stack(state));
+
+ if (kill_mask != 0 && live == 0) {
+ int cpu = sim->isa->arch;
+
+ /* special case: kill all registers */
+ if (ARCH_ATHLON(sim->isa->opt_arch) && ARCH_MMX(cpu)) {
+ if (ARCH_AMD(cpu)) {
+ /* use FEMMS on AMD processors to clear all */
+ keep = new_rd_ia32_femms(NULL, get_irn_irg(block), block);
+ } else {
+ /* use EMMS to clear all */
+ keep = new_rd_ia32_emms(NULL, get_irn_irg(block), block);
+ }
+ sched_add_before(first_insn, keep);
+ keep_alive(keep);
+ x87_emms(state);
+ return state;
+ }
+ }
+ /* now kill registers */
+ while (kill_mask) {
+ /* we can only kill from TOS, so bring them up */
+ if (! (kill_mask & 1)) {
+ /* search from behind, because we can to a double-pop */
+ for (i = depth - 1; i >= 0; --i) {
+ if (kill_mask & (1 << i)) {
+ kill_mask &= ~(1 << i);
+ kill_mask |= 1;
+ break;
+ }
+ }
+
+ if (keep)
+ x87_set_st(state, -1, keep, i);
+ x87_create_fxch(state, first_insn, i);
+ }
+
+ if ((kill_mask & 3) == 3) {
+ /* we can do a double-pop */
+ num_pop = 2;
+ }
+ else {
+ /* only a single pop */
+ num_pop = 1;
+ }
+
+ depth -= num_pop;
+ kill_mask >>= num_pop;
+ keep = x87_create_fpop(state, first_insn, num_pop);
+ }
+ keep_alive(keep);
+ }
+ return state;
+} /* x87_kill_deads */
+
+/**
+ * If we have PhiEs with unknown operands then we have to make sure that some
+ * value is actually put onto the stack.
+ */
+static void fix_unknown_phis(x87_state *state, ir_node *block,
+ ir_node *pred_block, int pos)
+{
+ ir_node *node, *op;
+
+ sched_foreach(block, node) {
+ ir_node *zero;
+ const arch_register_t *reg;
+ ia32_x87_attr_t *attr;
+
+ if(!is_Phi(node))
+ break;
+
+ op = get_Phi_pred(node, pos);
+ if(!is_ia32_Unknown_VFP(op))
+ continue;
+
+ reg = arch_get_irn_register(state->sim->arch_env, node);
+
+ /* create a zero at end of pred block */
+ zero = new_rd_ia32_fldz(NULL, current_ir_graph, pred_block, mode_E);
+ x87_push(state, arch_register_get_index(reg), zero);
+
+ attr = get_ia32_x87_attr(zero);
+ attr->x87[2] = &ia32_st_regs[0];
+
+ assert(is_ia32_fldz(zero));
+ sched_add_before(sched_last(pred_block), zero);
+
+ set_Phi_pred(node, pos, zero);
+ }
+}