2 * This file is part of libFirm.
3 * Copyright (C) 2012 University of Karlsruhe.
8 * @brief Handles fpu rounding modes
9 * @author Matthias Braun
11 * The problem we deal with here is that the x86 ABI says the user can control
12 * the fpu rounding mode, which means that when we do some operations like float
13 * to int conversion which are specified as truncation in the C standard we have
14 * to spill, change and restore the fpu rounding mode between spills.
19 #include "ia32_new_nodes.h"
20 #include "ia32_architecture.h"
21 #include "gen_ia32_regalloc_if.h"
35 #include "bessaconstr.h"
37 static ir_entity *fpcw_round = NULL;
38 static ir_entity *fpcw_truncate = NULL;
40 static ir_entity *create_ent(int value, const char *name)
42 ir_mode *mode = mode_Hu;
43 ir_type *type = new_type_primitive(mode);
44 ir_type *glob = get_glob_type();
50 set_type_alignment_bytes(type, 4);
52 tv = new_tarval_from_long(value, mode);
53 ent = new_entity(glob, new_id_from_str(name), type);
54 set_entity_ld_ident(ent, get_entity_ident(ent));
55 set_entity_visibility(ent, ir_visibility_local);
56 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
58 cnst_irg = get_const_code_irg();
59 cnst = new_r_Const(cnst_irg, tv);
60 set_atomic_ent_value(ent, cnst);
65 static void create_fpcw_entities(void)
67 fpcw_round = create_ent(0xc7f, "_fpcw_round");
68 fpcw_truncate = create_ent(0x37f, "_fpcw_truncate");
71 static ir_node *create_fpu_mode_spill(void *env, ir_node *state, int force,
76 /* we don't spill the fpcw in unsafe mode */
77 if (ia32_cg_config.use_unsafe_floatconv) {
78 ir_node *block = get_nodes_block(state);
79 if (force == 1 || !is_ia32_ChangeCW(state)) {
80 ir_node *spill = new_bd_ia32_FnstCWNOP(NULL, block, state);
81 sched_add_after(after, spill);
87 if (force == 1 || !is_ia32_ChangeCW(state)) {
88 ir_graph *irg = get_irn_irg(state);
89 ir_node *block = get_nodes_block(state);
90 ir_node *noreg = ia32_new_NoReg_gp(irg);
91 ir_node *nomem = get_irg_no_mem(irg);
92 ir_node *frame = get_irg_frame(irg);
94 = new_bd_ia32_FnstCW(NULL, block, frame, noreg, nomem, state);
95 set_ia32_op_type(spill, ia32_AddrModeD);
96 /* use mode_Iu, as movl has a shorter opcode than movw */
97 set_ia32_ls_mode(spill, mode_Iu);
98 set_ia32_use_frame(spill);
100 sched_add_after(skip_Proj(after), spill);
107 static ir_node *create_fldcw_ent(ir_node *block, ir_entity *entity)
109 ir_graph *irg = get_irn_irg(block);
110 ir_node *nomem = get_irg_no_mem(irg);
111 ir_node *noreg = ia32_new_NoReg_gp(irg);
114 reload = new_bd_ia32_FldCW(NULL, block, noreg, noreg, nomem);
115 set_ia32_op_type(reload, ia32_AddrModeS);
116 set_ia32_ls_mode(reload, ia32_reg_classes[CLASS_ia32_fp_cw].mode);
117 set_ia32_am_sc(reload, entity);
118 set_ia32_use_frame(reload);
119 arch_set_irn_register(reload, &ia32_registers[REG_FPCW]);
124 static ir_node *create_fpu_mode_reload(void *env, ir_node *state,
125 ir_node *spill, ir_node *before,
128 ir_graph *irg = get_irn_irg(state);
129 ir_node *block = get_nodes_block(before);
130 ir_node *frame = get_irg_frame(irg);
131 ir_node *noreg = ia32_new_NoReg_gp(irg);
132 ir_node *reload = NULL;
135 if (ia32_cg_config.use_unsafe_floatconv) {
136 if (fpcw_round == NULL) {
137 create_fpcw_entities();
140 reload = create_fldcw_ent(block, fpcw_round);
142 reload = create_fldcw_ent(block, fpcw_truncate);
144 sched_add_before(before, reload);
149 reload = new_bd_ia32_FldCW(NULL, block, frame, noreg, spill);
150 set_ia32_op_type(reload, ia32_AddrModeS);
151 set_ia32_ls_mode(reload, ia32_reg_classes[CLASS_ia32_fp_cw].mode);
152 set_ia32_use_frame(reload);
153 arch_set_irn_register(reload, &ia32_registers[REG_FPCW]);
155 sched_add_before(before, reload);
157 ir_mode *lsmode = ia32_reg_classes[CLASS_ia32_fp_cw].mode;
158 ir_node *nomem = get_irg_no_mem(irg);
159 ir_node *cwstore, *load, *load_res, *orn, *store, *fldcw;
163 assert(last_state != NULL);
164 cwstore = new_bd_ia32_FnstCW(NULL, block, frame, noreg, nomem,
166 set_ia32_op_type(cwstore, ia32_AddrModeD);
167 set_ia32_ls_mode(cwstore, lsmode);
168 set_ia32_use_frame(cwstore);
169 sched_add_before(before, cwstore);
171 load = new_bd_ia32_Load(NULL, block, frame, noreg, cwstore);
172 set_ia32_op_type(load, ia32_AddrModeS);
173 set_ia32_ls_mode(load, lsmode);
174 set_ia32_use_frame(load);
175 sched_add_before(before, load);
177 load_res = new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
179 /* TODO: make the actual mode configurable in ChangeCW... */
180 or_const = new_bd_ia32_Immediate(NULL, get_irg_start_block(irg),
182 arch_set_irn_register(or_const, &ia32_registers[REG_GP_NOREG]);
183 orn = new_bd_ia32_Or(NULL, block, noreg, noreg, nomem, load_res,
185 sched_add_before(before, orn);
187 store = new_bd_ia32_Store(NULL, block, frame, noreg, nomem, orn);
188 set_ia32_op_type(store, ia32_AddrModeD);
189 /* use mode_Iu, as movl has a shorter opcode than movw */
190 set_ia32_ls_mode(store, mode_Iu);
191 set_ia32_use_frame(store);
192 store_proj = new_r_Proj(store, mode_M, pn_ia32_Store_M);
193 sched_add_before(before, store);
195 fldcw = new_bd_ia32_FldCW(NULL, block, frame, noreg, store_proj);
196 set_ia32_op_type(fldcw, ia32_AddrModeS);
197 set_ia32_ls_mode(fldcw, lsmode);
198 set_ia32_use_frame(fldcw);
199 arch_set_irn_register(fldcw, &ia32_registers[REG_FPCW]);
200 sched_add_before(before, fldcw);
208 typedef struct collect_fpu_mode_nodes_env_t {
209 ir_node **state_nodes;
210 } collect_fpu_mode_nodes_env_t;
212 static void collect_fpu_mode_nodes_walker(ir_node *node, void *data)
214 collect_fpu_mode_nodes_env_t *env = (collect_fpu_mode_nodes_env_t*)data;
215 const arch_register_t *reg;
217 if (!mode_is_data(get_irn_mode(node)))
220 reg = arch_get_irn_register(node);
221 if (reg == &ia32_registers[REG_FPCW] && !is_ia32_ChangeCW(node)) {
222 ARR_APP1(ir_node*, env->state_nodes, node);
226 static void rewire_fpu_mode_nodes(ir_graph *irg)
228 collect_fpu_mode_nodes_env_t env;
229 be_ssa_construction_env_t senv;
230 const arch_register_t *reg = &ia32_registers[REG_FPCW];
231 ir_node *initial_value;
233 be_lv_t *lv = be_get_irg_liveness(irg);
236 /* do ssa construction for the fpu modes */
237 env.state_nodes = NEW_ARR_F(ir_node*, 0);
238 irg_walk_graph(irg, collect_fpu_mode_nodes_walker, NULL, &env);
240 /* nothing needs to be done, in fact we must not continue as for endless
241 * loops noone is using the initial_value and it will point to a bad node
244 if (ARR_LEN(env.state_nodes) == 0) {
245 DEL_ARR_F(env.state_nodes);
249 initial_value = be_get_initial_reg_value(irg, reg);
250 be_ssa_construction_init(&senv, irg);
251 be_ssa_construction_add_copies(&senv, env.state_nodes,
252 ARR_LEN(env.state_nodes));
253 be_ssa_construction_fix_users(&senv, initial_value);
256 be_ssa_construction_update_liveness_phis(&senv, lv);
257 be_liveness_update(lv, initial_value);
258 len = ARR_LEN(env.state_nodes);
259 for (i = 0; i < len; ++i) {
260 be_liveness_update(lv, env.state_nodes[i]);
263 be_invalidate_live_sets(irg);
266 /* set registers for the phis */
267 phis = be_ssa_construction_get_new_phis(&senv);
269 for (i = 0; i < len; ++i) {
270 ir_node *phi = phis[i];
271 arch_set_irn_register(phi, reg);
273 be_ssa_construction_destroy(&senv);
274 DEL_ARR_F(env.state_nodes);
276 be_invalidate_live_sets(irg);
279 void ia32_setup_fpu_mode(ir_graph *irg)
281 /* do ssa construction for the fpu modes */
282 rewire_fpu_mode_nodes(irg);
284 /* ensure correct fpu mode for operations */
285 be_assure_state(irg, &ia32_registers[REG_FPCW],
286 NULL, create_fpu_mode_spill, create_fpu_mode_reload);