3 * @brief Handles fpu rounding modes
4 * @author Matthias Braun
7 * The problem we deal with here is that the x86 ABI says the user can control
8 * the fpu rounding mode, which means that when we do some operations like float
9 * to int conversion which are specified as truncation in the C standard we have
10 * to spill, change and restore the fpu rounding mode between spills.
17 #include "ia32_new_nodes.h"
18 #include "gen_ia32_regalloc_if.h"
25 #include "../beirgmod.h"
26 #include "../bearch.h"
27 #include "../besched.h"
29 #include "../benode_t.h"
30 #include "../bestate.h"
31 #include "../beutil.h"
32 #include "../bessaconstr.h"
34 static ir_node *create_fpu_mode_spill(void *env, ir_node *state, int force,
37 ia32_code_gen_t *cg = env;
38 ir_node *spill = NULL;
40 if(force == 1 || !is_ia32_ChangeCW(state)) {
41 ir_graph *irg = get_irn_irg(state);
42 ir_node *block = get_nodes_block(state);
43 ir_node *noreg = ia32_new_NoReg_gp(cg);
44 ir_node *nomem = new_NoMem();
45 ir_node *frame = get_irg_frame(irg);
47 spill = new_rd_ia32_FnstCW(NULL, irg, block, frame, noreg, state,
49 set_ia32_am_support(spill, ia32_am_Dest);
50 set_ia32_op_type(spill, ia32_AddrModeD);
51 set_ia32_am_flavour(spill, ia32_B);
52 set_ia32_ls_mode(spill, ia32_reg_classes[CLASS_ia32_fp_cw].mode);
53 set_ia32_use_frame(spill);
55 sched_add_after(after, spill);
61 static ir_node *create_fpu_mode_reload(void *env, ir_node *state,
62 ir_node *spill, ir_node *before,
65 ia32_code_gen_t *cg = env;
66 ir_graph *irg = get_irn_irg(state);
67 ir_node *block = get_nodes_block(before);
68 ir_node *frame = get_irg_frame(irg);
69 ir_node *noreg = ia32_new_NoReg_gp(cg);
70 ir_node *reload = NULL;
73 reload = new_rd_ia32_FldCW(NULL, irg, block, frame, noreg, spill);
74 set_ia32_am_support(reload, ia32_am_Source);
75 set_ia32_op_type(reload, ia32_AddrModeS);
76 set_ia32_am_flavour(reload, ia32_B);
77 set_ia32_ls_mode(reload, ia32_reg_classes[CLASS_ia32_fp_cw].mode);
78 set_ia32_use_frame(reload);
79 arch_set_irn_register(cg->arch_env, reload, &ia32_fp_cw_regs[REG_FPCW]);
81 sched_add_before(before, reload);
83 ir_mode *lsmode = ia32_reg_classes[CLASS_ia32_fp_cw].mode;
84 ir_node *nomem = new_NoMem();
85 ir_node *cwstore, *load, *load_res, *or, *store, *fldcw;
87 assert(last_state != NULL);
88 cwstore = new_rd_ia32_FnstCW(NULL, irg, block, frame, noreg, last_state,
90 set_ia32_am_support(cwstore, ia32_am_Dest);
91 set_ia32_op_type(cwstore, ia32_AddrModeD);
92 set_ia32_am_flavour(cwstore, ia32_B);
93 set_ia32_ls_mode(cwstore, lsmode);
94 set_ia32_use_frame(cwstore);
95 sched_add_before(before, cwstore);
97 load = new_rd_ia32_Load(NULL, irg, block, frame, noreg, cwstore);
98 set_ia32_am_support(load, ia32_am_Source);
99 set_ia32_op_type(load, ia32_AddrModeS);
100 set_ia32_am_flavour(load, ia32_B);
101 set_ia32_ls_mode(load, lsmode);
102 set_ia32_use_frame(load);
103 sched_add_before(before, load);
105 load_res = new_r_Proj(irg, block, load, mode_Iu, pn_ia32_Load_res);
106 sched_add_before(before, load_res);
108 /* TODO: make the actual mode configurable in ChangeCW... */
109 or = new_rd_ia32_Or(NULL, irg, block, noreg, noreg, load_res, noreg,
111 set_ia32_Immop_tarval(or, new_tarval_from_long(3072, mode_Iu));
112 sched_add_before(before, or);
114 store = new_rd_ia32_Store(NULL, irg, block, frame, noreg, or, nomem);
115 set_ia32_am_support(store, ia32_am_Dest);
116 set_ia32_op_type(store, ia32_AddrModeD);
117 set_ia32_am_flavour(store, ia32_B);
118 set_ia32_ls_mode(store, lsmode);
119 set_ia32_use_frame(store);
120 sched_add_before(before, store);
122 fldcw = new_rd_ia32_FldCW(NULL, irg, block, frame, noreg, store);
123 set_ia32_am_support(fldcw, ia32_am_Source);
124 set_ia32_op_type(fldcw, ia32_AddrModeS);
125 set_ia32_am_flavour(fldcw, ia32_B);
126 set_ia32_ls_mode(fldcw, lsmode);
127 set_ia32_use_frame(fldcw);
128 arch_set_irn_register(cg->arch_env, fldcw, &ia32_fp_cw_regs[REG_FPCW]);
129 sched_add_before(before, fldcw);
137 typedef struct collect_fpu_mode_nodes_env_t {
138 const arch_env_t *arch_env;
139 ir_node **state_nodes;
140 } collect_fpu_mode_nodes_env_t;
143 void collect_fpu_mode_nodes_walker(ir_node *node, void *data)
145 collect_fpu_mode_nodes_env_t *env = data;
147 const arch_register_t *reg = arch_get_irn_register(env->arch_env, node);
148 if(reg == &ia32_fp_cw_regs[REG_FPCW] && !is_ia32_ChangeCW(node)) {
149 ARR_APP1(ir_node*, env->state_nodes, node);
154 void rewire_fpu_mode_nodes(be_irg_t *birg)
156 collect_fpu_mode_nodes_env_t env;
157 be_ssa_construction_env_t senv;
158 const arch_register_t *reg = &ia32_fp_cw_regs[REG_FPCW];
159 ir_graph *irg = be_get_birg_irg(birg);
160 ir_node *initial_value;
162 be_lv_t *lv = be_get_birg_liveness(birg);
165 /* do ssa construction for the fpu modes */
166 env.arch_env = birg->main_env->arch_env;
167 env.state_nodes = NEW_ARR_F(ir_node*, 0);
168 irg_walk_graph(irg, collect_fpu_mode_nodes_walker, NULL, &env);
170 initial_value = be_abi_get_ignore_irn(birg->abi, reg);
172 /* nothing needs to be done, in fact we must not continue as for endless
173 * loops noone is using the initial_value and it will point to a bad node
176 if(ARR_LEN(env.state_nodes) == 0) {
177 DEL_ARR_F(env.state_nodes);
181 be_ssa_construction_init(&senv, birg);
182 be_ssa_construction_add_copies(&senv, env.state_nodes,
183 ARR_LEN(env.state_nodes));
184 be_ssa_construction_fix_users(&senv, initial_value);
187 be_ssa_construction_update_liveness_phis(&senv, lv);
188 be_liveness_update(lv, initial_value);
189 len = ARR_LEN(env.state_nodes);
190 for(i = 0; i < len; ++i) {
191 be_liveness_update(lv, env.state_nodes[i]);
195 /* set registers for the phis */
196 phis = be_ssa_construction_get_new_phis(&senv);
198 for(i = 0; i < len; ++i) {
199 ir_node *phi = phis[i];
200 be_set_phi_flags(env.arch_env, phi, arch_irn_flags_ignore);
201 arch_set_irn_register(env.arch_env, phi, reg);
203 be_ssa_construction_destroy(&senv);
204 DEL_ARR_F(env.state_nodes);
207 void ia32_setup_fpu_mode(ia32_code_gen_t *cg)
209 /* do ssa construction for the fpu modes */
210 rewire_fpu_mode_nodes(cg->birg);
212 /* ensure correct fpu mode for operations */
213 be_assure_state(cg->birg, &ia32_fp_cw_regs[REG_FPCW],
214 cg, create_fpu_mode_spill, create_fpu_mode_reload);