2 * This is the main ia32 firm backend driver.
3 * @author Christian Wuerdig
18 #include <libcore/lc_opts.h>
19 #include <libcore/lc_opts_enum.h>
23 #include "pseudo_irg.h"
27 #include "iredges_t.h"
40 #include "../benode_t.h"
41 #include "../belower.h"
42 #include "../besched_t.h"
45 #include "../beirgmod.h"
46 #include "../be_dbgout.h"
47 #include "../beblocksched.h"
48 #include "../bemachine.h"
49 #include "../beilpsched.h"
50 #include "../bespillslots.h"
51 #include "../bemodule.h"
52 #include "../begnuas.h"
53 #include "../bestate.h"
55 #include "bearch_ia32_t.h"
57 #include "ia32_new_nodes.h"
58 #include "gen_ia32_regalloc_if.h"
59 #include "gen_ia32_machine.h"
60 #include "ia32_transform.h"
61 #include "ia32_emitter.h"
62 #include "ia32_map_regs.h"
63 #include "ia32_optimize.h"
65 #include "ia32_dbg_stat.h"
66 #include "ia32_finish.h"
67 #include "ia32_util.h"
70 static set *cur_reg_set = NULL;
72 typedef ir_node *(*create_const_node_func) (dbg_info *dbg, ir_graph *irg, ir_node *block);
74 static INLINE ir_node *create_const(ia32_code_gen_t *cg, ir_node **place,
75 create_const_node_func func,
86 block = get_irg_start_block(cg->irg);
87 res = func(NULL, cg->irg, block);
88 arch_set_irn_register(cg->arch_env, res, reg);
91 /* keep the node so it isn't accidently removed when unused ... */
93 keep = be_new_Keep(arch_register_get_class(reg), cg->irg, block, 1, in);
95 /* schedule the node if we already have a scheduled program */
96 startnode = get_irg_start(cg->irg);
97 if(sched_is_scheduled(startnode)) {
98 sched_add_after(startnode, res);
99 sched_add_after(res, keep);
105 /* Creates the unique per irg GP NoReg node. */
106 ir_node *ia32_new_NoReg_gp(ia32_code_gen_t *cg) {
107 return create_const(cg, &cg->noreg_gp, new_rd_ia32_NoReg_GP,
108 &ia32_gp_regs[REG_GP_NOREG]);
111 ir_node *ia32_new_NoReg_vfp(ia32_code_gen_t *cg) {
112 return create_const(cg, &cg->noreg_vfp, new_rd_ia32_NoReg_VFP,
113 &ia32_vfp_regs[REG_VFP_NOREG]);
116 ir_node *ia32_new_NoReg_xmm(ia32_code_gen_t *cg) {
117 return create_const(cg, &cg->noreg_xmm, new_rd_ia32_NoReg_XMM,
118 &ia32_xmm_regs[REG_XMM_NOREG]);
121 /* Creates the unique per irg FP NoReg node. */
122 ir_node *ia32_new_NoReg_fp(ia32_code_gen_t *cg) {
123 return USE_SSE2(cg) ? ia32_new_NoReg_xmm(cg) : ia32_new_NoReg_vfp(cg);
126 ir_node *ia32_new_Unknown_gp(ia32_code_gen_t *cg) {
127 return create_const(cg, &cg->unknown_gp, new_rd_ia32_Unknown_GP,
128 &ia32_gp_regs[REG_GP_UKNWN]);
131 ir_node *ia32_new_Unknown_vfp(ia32_code_gen_t *cg) {
132 return create_const(cg, &cg->unknown_vfp, new_rd_ia32_Unknown_VFP,
133 &ia32_vfp_regs[REG_VFP_UKNWN]);
136 ir_node *ia32_new_Unknown_xmm(ia32_code_gen_t *cg) {
137 return create_const(cg, &cg->unknown_xmm, new_rd_ia32_Unknown_XMM,
138 &ia32_xmm_regs[REG_XMM_UKNWN]);
141 ir_node *ia32_new_Fpu_truncate(ia32_code_gen_t *cg) {
142 return create_const(cg, &cg->fpu_trunc_mode, new_rd_ia32_ChangeCW,
143 &ia32_fp_cw_regs[REG_FPCW]);
148 * Returns gp_noreg or fp_noreg, depending in input requirements.
150 ir_node *ia32_get_admissible_noreg(ia32_code_gen_t *cg, ir_node *irn, int pos) {
151 const arch_register_req_t *req;
153 req = arch_get_register_req(cg->arch_env, irn, pos);
154 assert(req != NULL && "Missing register requirements");
155 if (req->cls == &ia32_reg_classes[CLASS_ia32_gp])
156 return ia32_new_NoReg_gp(cg);
158 return ia32_new_NoReg_fp(cg);
161 /**************************************************
164 * _ __ ___ __ _ __ _| | | ___ ___ _| |_
165 * | '__/ _ \/ _` | / _` | | |/ _ \ / __| | | _|
166 * | | | __/ (_| | | (_| | | | (_) | (__ | | |
167 * |_| \___|\__, | \__,_|_|_|\___/ \___| |_|_|
170 **************************************************/
173 * Return register requirements for an ia32 node.
174 * If the node returns a tuple (mode_T) then the proj's
175 * will be asked for this information.
177 static const arch_register_req_t *ia32_get_irn_reg_req(const void *self,
180 long node_pos = pos == -1 ? 0 : pos;
181 ir_mode *mode = is_Block(node) ? NULL : get_irn_mode(node);
183 if (is_Block(node) || mode == mode_X) {
184 return arch_no_register_req;
187 if (mode == mode_T && pos < 0) {
188 return arch_no_register_req;
193 return arch_no_register_req;
196 return arch_no_register_req;
199 node_pos = (pos == -1) ? get_Proj_proj(node) : pos;
200 node = skip_Proj_const(node);
203 if (is_ia32_irn(node)) {
204 const arch_register_req_t *req;
206 req = get_ia32_in_req(node, pos);
208 req = get_ia32_out_req(node, node_pos);
215 /* unknowns should be transformed already */
216 assert(!is_Unknown(node));
218 return arch_no_register_req;
221 static void ia32_set_irn_reg(const void *self, ir_node *irn, const arch_register_t *reg) {
224 if (get_irn_mode(irn) == mode_X) {
229 pos = get_Proj_proj(irn);
230 irn = skip_Proj(irn);
233 if (is_ia32_irn(irn)) {
234 const arch_register_t **slots;
236 slots = get_ia32_slots(irn);
239 ia32_set_firm_reg(irn, reg, cur_reg_set);
243 static const arch_register_t *ia32_get_irn_reg(const void *self, const ir_node *irn) {
245 const arch_register_t *reg = NULL;
249 if (get_irn_mode(irn) == mode_X) {
253 pos = get_Proj_proj(irn);
254 irn = skip_Proj_const(irn);
257 if (is_ia32_irn(irn)) {
258 const arch_register_t **slots;
259 slots = get_ia32_slots(irn);
262 reg = ia32_get_firm_reg(irn, cur_reg_set);
268 static arch_irn_class_t ia32_classify(const void *self, const ir_node *irn) {
269 arch_irn_class_t classification = arch_irn_class_normal;
271 irn = skip_Proj_const(irn);
274 classification |= arch_irn_class_branch;
276 if (! is_ia32_irn(irn))
277 return classification & ~arch_irn_class_normal;
279 if (is_ia32_Cnst(irn))
280 classification |= arch_irn_class_const;
283 classification |= arch_irn_class_load;
285 if (is_ia32_St(irn) || is_ia32_Store8Bit(irn))
286 classification |= arch_irn_class_store;
288 if (is_ia32_need_stackent(irn))
289 classification |= arch_irn_class_reload;
291 return classification;
294 static arch_irn_flags_t ia32_get_flags(const void *self, const ir_node *irn) {
295 arch_irn_flags_t flags = arch_irn_flags_none;
298 return arch_irn_flags_ignore;
300 if(is_Proj(irn) && mode_is_datab(get_irn_mode(irn))) {
301 ir_node *pred = get_Proj_pred(irn);
303 if(is_ia32_irn(pred)) {
304 flags = get_ia32_out_flags(pred, get_Proj_proj(irn));
310 if (is_ia32_irn(irn)) {
311 flags |= get_ia32_flags(irn);
318 * The IA32 ABI callback object.
321 be_abi_call_flags_bits_t flags; /**< The call flags. */
322 const arch_isa_t *isa; /**< The ISA handle. */
323 const arch_env_t *aenv; /**< The architecture environment. */
324 ir_graph *irg; /**< The associated graph. */
327 static ir_entity *ia32_get_frame_entity(const void *self, const ir_node *irn) {
328 return is_ia32_irn(irn) ? get_ia32_frame_ent(irn) : NULL;
331 static void ia32_set_frame_entity(const void *self, ir_node *irn, ir_entity *ent) {
332 set_ia32_frame_ent(irn, ent);
335 static void ia32_set_frame_offset(const void *self, ir_node *irn, int bias) {
336 const ia32_irn_ops_t *ops = self;
338 if (get_ia32_frame_ent(irn)) {
339 ia32_am_flavour_t am_flav;
341 if (is_ia32_Pop(irn)) {
342 int omit_fp = be_abi_omit_fp(ops->cg->birg->abi);
344 /* Pop nodes modify the stack pointer before calculating the destination
345 * address, so fix this here
351 am_flav = get_ia32_am_flavour(irn);
353 set_ia32_am_flavour(irn, am_flav);
355 add_ia32_am_offs_int(irn, bias);
359 static int ia32_get_sp_bias(const void *self, const ir_node *irn) {
361 long proj = get_Proj_proj(irn);
362 ir_node *pred = get_Proj_pred(irn);
364 if (is_ia32_Push(pred) && proj == pn_ia32_Push_stack)
366 if (is_ia32_Pop(pred) && proj == pn_ia32_Pop_stack)
374 * Put all registers which are saved by the prologue/epilogue in a set.
376 * @param self The callback object.
377 * @param s The result set.
379 static void ia32_abi_dont_save_regs(void *self, pset *s)
381 ia32_abi_env_t *env = self;
382 if(env->flags.try_omit_fp)
383 pset_insert_ptr(s, env->isa->bp);
388 static void get_regparams_startbarrier(ir_graph *irg, ir_node **regparams, ir_node **startbarrier)
390 const ir_edge_t *edge;
391 ir_node *start_block = get_irg_start_block(irg);
394 *startbarrier = NULL;
395 foreach_out_edge(start_block, edge) {
396 ir_node *src = get_edge_src_irn(edge);
398 if(be_is_RegParams(src)) {
400 if(*startbarrier != NULL)
403 if(be_is_Barrier(src)) {
405 if(*regparams != NULL)
410 panic("Couldn't find regparams and startbarrier!");
413 static void add_fpu_edges(be_irg_t *birg)
415 ir_graph *irg = be_get_birg_irg(birg);
417 ir_node *startbarrier;
420 const arch_env_t *arch_env = birg->main_env->arch_env;
421 const arch_register_t *reg = &ia32_fp_cw_regs[REG_FPCW];
425 get_regparams_startbarrier(irg, ®params, &startbarrier);
427 fp_cw_reg = be_RegParams_append_out_reg(regparams, arch_env, reg);
429 fp_cw_reg = be_Barrier_append_node(startbarrier, fp_cw_reg);
430 pos = get_Proj_proj(fp_cw_reg);
431 be_set_constr_single_reg(startbarrier, BE_OUT_POS(pos), reg);
432 arch_set_irn_register(arch_env, fp_cw_reg, reg);
435 end_block = get_irg_end_block(irg);
436 arity = get_irn_arity(end_block);
437 for(i = 0; i < arity; ++i) {
439 ir_node *ret = get_irn_n(end_block, i);
440 ir_node *end_barrier = NULL;
441 ir_node *fp_cw_after_end_barrier;
442 if(!be_is_Return(ret))
445 /* search the barrier before the return */
446 arity2 = get_irn_arity(ret);
447 for(i2 = 0; i2 < arity2; i2++) {
448 ir_node *proj = get_irn_n(ret, i2);
453 end_barrier = get_Proj_pred(proj);
454 if(!be_is_Barrier(end_barrier))
458 assert(end_barrier != NULL);
460 /* add fp_cw to the barrier */
461 fp_cw_after_end_barrier = be_Barrier_append_node(end_barrier, fp_cw_reg);
462 pos = get_Proj_proj(fp_cw_after_end_barrier);
463 be_set_constr_single_reg(end_barrier, BE_OUT_POS(pos), reg);
464 arch_set_irn_register(arch_env, fp_cw_after_end_barrier, reg);
466 /* and append it to the return node */
467 be_Return_append_node(ret, fp_cw_after_end_barrier);
474 static unsigned count_callee_saves(ia32_code_gen_t *cg)
476 unsigned callee_saves = 0;
477 int c, num_reg_classes;
480 num_reg_classes = arch_isa_get_n_reg_class(isa);
481 for(c = 0; c < num_reg_classes; ++c) {
482 int r, num_registers;
483 arch_register_class_t *regclass = arch_isa_get_reg_class(isa, c);
485 num_registers = arch_register_class_n_regs(regclass);
486 for(r = 0; r < num_registers; ++r) {
487 arch_register_t *reg = arch_register_for_index(regclass, r);
488 if(arch_register_type_is(reg, callee_save))
496 static void create_callee_save_regprojs(ia32_code_gen_t *cg, ir_node *regparams)
498 int c, num_reg_classes;
502 num_reg_classes = arch_isa_get_n_reg_class(isa);
503 cg->initial_regs = obstack_alloc(cg->obst,
504 num_reg_classes * sizeof(cg->initial_regs[0]));
506 for(c = 0; c < num_reg_classes; ++c) {
507 int r, num_registers;
508 ir_node **initial_regclass;
509 arch_register_class_t *regclass = arch_isa_get_reg_class(isa, c);
511 num_registers = arch_register_class_n_regs(regclass);
512 initial_regclass = obstack_alloc(num_registers * sizeof(initial_regclass[0]));
513 for(r = 0; r < num_registers; ++r) {
515 arch_register_t *reg = arch_register_for_index(regclass, r);
516 if(!arch_register_type_is(reg, callee_save))
519 proj = new_r_Proj(irg, start_block, regparams, n);
520 be_set_constr_single_reg(regparams, n, reg);
521 arch_set_irn_register(cg->arch_env, proj, reg);
523 initial_regclass[r] = proj;
526 cg->initial_regs[c] = initial_regclass;
530 static void callee_saves_obstack_grow(ia32_code_gen_t *cg)
532 int c, num_reg_classes;
535 for(c = 0; c < num_reg_classes; ++c) {
536 int r, num_registers;
538 num_registers = arch_register_class_n_regs(regclass);
539 for(r = 0; r < num_registers; ++r) {
541 arch_register_t *reg = arch_register_for_index(regclass, r);
542 if(!arch_register_type_is(reg, callee_save))
545 proj = cg->initial_regs[c][r];
546 obstack_ptr_grow(cg->obst, proj);
551 static unsigned count_parameters_in_regs(ia32_code_gen_t *cg)
556 static void ia32_gen_prologue(ia32_code_gen_t *cg)
558 ir_graph *irg = cg->irg;
559 ir_node *start_block = get_irg_start_block(irg);
564 /* Create the regparams node */
565 n_regparams_out = count_callee_saves(cg) + count_parameters_in_regs(cg);
566 regparams = be_new_RegParams(irg, start_block, n_regparams_out);
568 create_callee_save_regprojs(cg, regparams);
570 /* Setup the stack */
572 ir_node *bl = get_irg_start_block(env->irg);
573 ir_node *curr_sp = be_abi_reg_map_get(reg_map, env->isa->sp);
574 ir_node *curr_bp = be_abi_reg_map_get(reg_map, env->isa->bp);
575 ir_node *noreg = ia32_new_NoReg_gp(cg);
579 push = new_rd_ia32_Push(NULL, env->irg, bl, noreg, noreg, curr_bp, curr_sp, *mem);
580 curr_sp = new_r_Proj(env->irg, bl, push, get_irn_mode(curr_sp), pn_ia32_Push_stack);
581 *mem = new_r_Proj(env->irg, bl, push, mode_M, pn_ia32_Push_M);
583 /* the push must have SP out register */
584 arch_set_irn_register(env->aenv, curr_sp, env->isa->sp);
585 set_ia32_flags(push, arch_irn_flags_ignore);
587 /* move esp to ebp */
588 curr_bp = be_new_Copy(env->isa->bp->reg_class, env->irg, bl, curr_sp);
589 be_set_constr_single_reg(curr_bp, BE_OUT_POS(0), env->isa->bp);
590 arch_set_irn_register(env->aenv, curr_bp, env->isa->bp);
591 be_node_set_flags(curr_bp, BE_OUT_POS(0), arch_irn_flags_ignore);
593 /* beware: the copy must be done before any other sp use */
594 curr_sp = be_new_CopyKeep_single(env->isa->sp->reg_class, env->irg, bl, curr_sp, curr_bp, get_irn_mode(curr_sp));
595 be_set_constr_single_reg(curr_sp, BE_OUT_POS(0), env->isa->sp);
596 arch_set_irn_register(env->aenv, curr_sp, env->isa->sp);
597 be_node_set_flags(curr_sp, BE_OUT_POS(0), arch_irn_flags_ignore);
599 be_abi_reg_map_set(reg_map, env->isa->sp, curr_sp);
600 be_abi_reg_map_set(reg_map, env->isa->bp, curr_bp);
603 sp = be_new_IncSP(sp, irg, start_block, initialsp, BE_STACK_FRAME_SIZE_EXPAND);
604 set_irg_frame(irg, sp);
607 static void ia32_gen_epilogue(ia32_code_gen_t *cg)
609 int n_callee_saves = count_callee_saves(cg);
610 int n_results_regs = 0;
613 ir_node *end_block = get_irg_end_block(irg);
616 /* We have to make sure that all reloads occur before the stack frame
617 gets destroyed, so we create a barrier for all callee-save and return
619 barrier_size = n_callee_saves + n_results_regs;
620 barrier = be_new_Barrier(irg, end_block, barrier_size,
622 /* simply remove the stack frame here */
623 curr_sp = be_new_IncSP(env->isa->sp, env->irg, bl, curr_sp, BE_STACK_FRAME_SIZE_SHRINK);
624 add_irn_dep(curr_sp, *mem);
629 * Generate the routine prologue.
631 * @param self The callback object.
632 * @param mem A pointer to the mem node. Update this if you define new memory.
633 * @param reg_map A map mapping all callee_save/ignore/parameter registers to their defining nodes.
635 * @return The register which shall be used as a stack frame base.
637 * All nodes which define registers in @p reg_map must keep @p reg_map current.
639 static const arch_register_t *ia32_abi_prologue(void *self, ir_node **mem, pmap *reg_map)
641 ia32_abi_env_t *env = self;
642 const ia32_isa_t *isa = (ia32_isa_t *)env->isa;
643 ia32_code_gen_t *cg = isa->cg;
645 if (! env->flags.try_omit_fp) {
646 ir_node *bl = get_irg_start_block(env->irg);
647 ir_node *curr_sp = be_abi_reg_map_get(reg_map, env->isa->sp);
648 ir_node *curr_bp = be_abi_reg_map_get(reg_map, env->isa->bp);
649 ir_node *noreg = ia32_new_NoReg_gp(cg);
653 push = new_rd_ia32_Push(NULL, env->irg, bl, noreg, noreg, curr_bp, curr_sp, *mem);
654 curr_sp = new_r_Proj(env->irg, bl, push, get_irn_mode(curr_sp), pn_ia32_Push_stack);
655 *mem = new_r_Proj(env->irg, bl, push, mode_M, pn_ia32_Push_M);
657 /* the push must have SP out register */
658 arch_set_irn_register(env->aenv, curr_sp, env->isa->sp);
659 set_ia32_flags(push, arch_irn_flags_ignore);
661 /* move esp to ebp */
662 curr_bp = be_new_Copy(env->isa->bp->reg_class, env->irg, bl, curr_sp);
663 be_set_constr_single_reg(curr_bp, BE_OUT_POS(0), env->isa->bp);
664 arch_set_irn_register(env->aenv, curr_bp, env->isa->bp);
665 be_node_set_flags(curr_bp, BE_OUT_POS(0), arch_irn_flags_ignore);
667 /* beware: the copy must be done before any other sp use */
668 curr_sp = be_new_CopyKeep_single(env->isa->sp->reg_class, env->irg, bl, curr_sp, curr_bp, get_irn_mode(curr_sp));
669 be_set_constr_single_reg(curr_sp, BE_OUT_POS(0), env->isa->sp);
670 arch_set_irn_register(env->aenv, curr_sp, env->isa->sp);
671 be_node_set_flags(curr_sp, BE_OUT_POS(0), arch_irn_flags_ignore);
673 be_abi_reg_map_set(reg_map, env->isa->sp, curr_sp);
674 be_abi_reg_map_set(reg_map, env->isa->bp, curr_bp);
683 * Generate the routine epilogue.
684 * @param self The callback object.
685 * @param bl The block for the epilog
686 * @param mem A pointer to the mem node. Update this if you define new memory.
687 * @param reg_map A map mapping all callee_save/ignore/parameter registers to their defining nodes.
688 * @return The register which shall be used as a stack frame base.
690 * All nodes which define registers in @p reg_map must keep @p reg_map current.
692 static void ia32_abi_epilogue(void *self, ir_node *bl, ir_node **mem, pmap *reg_map)
694 ia32_abi_env_t *env = self;
695 ir_node *curr_sp = be_abi_reg_map_get(reg_map, env->isa->sp);
696 ir_node *curr_bp = be_abi_reg_map_get(reg_map, env->isa->bp);
698 if (env->flags.try_omit_fp) {
699 /* simply remove the stack frame here */
700 curr_sp = be_new_IncSP(env->isa->sp, env->irg, bl, curr_sp, BE_STACK_FRAME_SIZE_SHRINK);
701 add_irn_dep(curr_sp, *mem);
703 const ia32_isa_t *isa = (ia32_isa_t *)env->isa;
704 ia32_code_gen_t *cg = isa->cg;
705 ir_mode *mode_bp = env->isa->bp->reg_class->mode;
707 /* gcc always emits a leave at the end of a routine */
708 if (1 || ARCH_AMD(isa->opt_arch)) {
712 leave = new_rd_ia32_Leave(NULL, env->irg, bl, curr_sp, curr_bp);
713 set_ia32_flags(leave, arch_irn_flags_ignore);
714 curr_bp = new_r_Proj(current_ir_graph, bl, leave, mode_bp, pn_ia32_Leave_frame);
715 curr_sp = new_r_Proj(current_ir_graph, bl, leave, get_irn_mode(curr_sp), pn_ia32_Leave_stack);
717 ir_node *noreg = ia32_new_NoReg_gp(cg);
720 /* copy ebp to esp */
721 curr_sp = be_new_SetSP(env->isa->sp, env->irg, bl, curr_sp, curr_bp, *mem);
724 pop = new_rd_ia32_Pop(NULL, env->irg, bl, noreg, noreg, curr_sp, *mem);
725 set_ia32_flags(pop, arch_irn_flags_ignore);
726 curr_bp = new_r_Proj(current_ir_graph, bl, pop, mode_bp, pn_ia32_Pop_res);
727 curr_sp = new_r_Proj(current_ir_graph, bl, pop, get_irn_mode(curr_sp), pn_ia32_Pop_stack);
729 *mem = new_r_Proj(current_ir_graph, bl, pop, mode_M, pn_ia32_Pop_M);
731 arch_set_irn_register(env->aenv, curr_sp, env->isa->sp);
732 arch_set_irn_register(env->aenv, curr_bp, env->isa->bp);
735 be_abi_reg_map_set(reg_map, env->isa->sp, curr_sp);
736 be_abi_reg_map_set(reg_map, env->isa->bp, curr_bp);
740 * Initialize the callback object.
741 * @param call The call object.
742 * @param aenv The architecture environment.
743 * @param irg The graph with the method.
744 * @return Some pointer. This pointer is passed to all other callback functions as self object.
746 static void *ia32_abi_init(const be_abi_call_t *call, const arch_env_t *aenv, ir_graph *irg)
748 ia32_abi_env_t *env = xmalloc(sizeof(env[0]));
749 be_abi_call_flags_t fl = be_abi_call_get_flags(call);
750 env->flags = fl.bits;
753 env->isa = aenv->isa;
758 * Destroy the callback object.
759 * @param self The callback object.
761 static void ia32_abi_done(void *self) {
766 * Produces the type which sits between the stack args and the locals on the stack.
767 * it will contain the return address and space to store the old base pointer.
768 * @return The Firm type modeling the ABI between type.
770 static ir_type *ia32_abi_get_between_type(void *self)
772 #define IDENT(s) new_id_from_chars(s, sizeof(s)-1)
773 static ir_type *omit_fp_between_type = NULL;
774 static ir_type *between_type = NULL;
776 ia32_abi_env_t *env = self;
778 if (! between_type) {
779 ir_entity *old_bp_ent;
780 ir_entity *ret_addr_ent;
781 ir_entity *omit_fp_ret_addr_ent;
783 ir_type *old_bp_type = new_type_primitive(IDENT("bp"), mode_Iu);
784 ir_type *ret_addr_type = new_type_primitive(IDENT("return_addr"), mode_Iu);
786 between_type = new_type_struct(IDENT("ia32_between_type"));
787 old_bp_ent = new_entity(between_type, IDENT("old_bp"), old_bp_type);
788 ret_addr_ent = new_entity(between_type, IDENT("ret_addr"), ret_addr_type);
790 set_entity_offset(old_bp_ent, 0);
791 set_entity_offset(ret_addr_ent, get_type_size_bytes(old_bp_type));
792 set_type_size_bytes(between_type, get_type_size_bytes(old_bp_type) + get_type_size_bytes(ret_addr_type));
793 set_type_state(between_type, layout_fixed);
795 omit_fp_between_type = new_type_struct(IDENT("ia32_between_type_omit_fp"));
796 omit_fp_ret_addr_ent = new_entity(omit_fp_between_type, IDENT("ret_addr"), ret_addr_type);
798 set_entity_offset(omit_fp_ret_addr_ent, 0);
799 set_type_size_bytes(omit_fp_between_type, get_type_size_bytes(ret_addr_type));
800 set_type_state(omit_fp_between_type, layout_fixed);
803 return env->flags.try_omit_fp ? omit_fp_between_type : between_type;
808 * Get the estimated cycle count for @p irn.
810 * @param self The this pointer.
811 * @param irn The node.
813 * @return The estimated cycle count for this operation
815 static int ia32_get_op_estimated_cost(const void *self, const ir_node *irn)
818 ia32_op_type_t op_tp;
819 const ia32_irn_ops_t *ops = self;
823 if (!is_ia32_irn(irn))
826 assert(is_ia32_irn(irn));
828 cost = get_ia32_latency(irn);
829 op_tp = get_ia32_op_type(irn);
831 if (is_ia32_CopyB(irn)) {
833 if (ARCH_INTEL(ops->cg->arch))
836 else if (is_ia32_CopyB_i(irn)) {
837 int size = get_tarval_long(get_ia32_Immop_tarval(irn));
838 cost = 20 + (int)ceil((4/3) * size);
839 if (ARCH_INTEL(ops->cg->arch))
842 /* in case of address mode operations add additional cycles */
843 else if (op_tp == ia32_AddrModeD || op_tp == ia32_AddrModeS) {
845 In case of stack access add 5 cycles (we assume stack is in cache),
846 other memory operations cost 20 cycles.
848 cost += is_ia32_use_frame(irn) ? 5 : 20;
855 * Returns the inverse operation if @p irn, recalculating the argument at position @p i.
857 * @param irn The original operation
858 * @param i Index of the argument we want the inverse operation to yield
859 * @param inverse struct to be filled with the resulting inverse op
860 * @param obstack The obstack to use for allocation of the returned nodes array
861 * @return The inverse operation or NULL if operation invertible
863 static arch_inverse_t *ia32_get_inverse(const void *self, const ir_node *irn, int i, arch_inverse_t *inverse, struct obstack *obst) {
867 ir_node *block, *noreg, *nomem;
870 /* we cannot invert non-ia32 irns */
871 if (! is_ia32_irn(irn))
874 /* operand must always be a real operand (not base, index or mem) */
875 if (i != 2 && i != 3)
878 /* we don't invert address mode operations */
879 if (get_ia32_op_type(irn) != ia32_Normal)
882 irg = get_irn_irg(irn);
883 block = get_nodes_block(irn);
884 mode = get_irn_mode(irn);
885 irn_mode = get_irn_mode(irn);
886 noreg = get_irn_n(irn, 0);
887 nomem = new_r_NoMem(irg);
888 dbg = get_irn_dbg_info(irn);
890 /* initialize structure */
891 inverse->nodes = obstack_alloc(obst, 2 * sizeof(inverse->nodes[0]));
895 switch (get_ia32_irn_opcode(irn)) {
897 if (get_ia32_immop_type(irn) == ia32_ImmConst) {
898 /* we have an add with a const here */
899 /* invers == add with negated const */
900 inverse->nodes[0] = new_rd_ia32_Add(dbg, irg, block, noreg, noreg, get_irn_n(irn, i), noreg, nomem);
902 copy_ia32_Immop_attr(inverse->nodes[0], (ir_node *)irn);
903 set_ia32_Immop_tarval(inverse->nodes[0], tarval_neg(get_ia32_Immop_tarval(irn)));
904 set_ia32_commutative(inverse->nodes[0]);
906 else if (get_ia32_immop_type(irn) == ia32_ImmSymConst) {
907 /* we have an add with a symconst here */
908 /* invers == sub with const */
909 inverse->nodes[0] = new_rd_ia32_Sub(dbg, irg, block, noreg, noreg, get_irn_n(irn, i), noreg, nomem);
911 copy_ia32_Immop_attr(inverse->nodes[0], (ir_node *)irn);
914 /* normal add: inverse == sub */
915 inverse->nodes[0] = new_rd_ia32_Sub(dbg, irg, block, noreg, noreg, (ir_node*) irn, get_irn_n(irn, i ^ 1), nomem);
920 if (get_ia32_immop_type(irn) != ia32_ImmNone) {
921 /* we have a sub with a const/symconst here */
922 /* invers == add with this const */
923 inverse->nodes[0] = new_rd_ia32_Add(dbg, irg, block, noreg, noreg, get_irn_n(irn, i), noreg, nomem);
924 inverse->costs += (get_ia32_immop_type(irn) == ia32_ImmSymConst) ? 5 : 1;
925 copy_ia32_Immop_attr(inverse->nodes[0], (ir_node *)irn);
930 inverse->nodes[0] = new_rd_ia32_Add(dbg, irg, block, noreg, noreg, (ir_node*) irn, get_irn_n(irn, 3), nomem);
933 inverse->nodes[0] = new_rd_ia32_Sub(dbg, irg, block, noreg, noreg, get_irn_n(irn, 2), (ir_node*) irn, nomem);
939 if (get_ia32_immop_type(irn) != ia32_ImmNone) {
940 /* xor with const: inverse = xor */
941 inverse->nodes[0] = new_rd_ia32_Xor(dbg, irg, block, noreg, noreg, get_irn_n(irn, i), noreg, nomem);
942 inverse->costs += (get_ia32_immop_type(irn) == ia32_ImmSymConst) ? 5 : 1;
943 copy_ia32_Immop_attr(inverse->nodes[0], (ir_node *)irn);
947 inverse->nodes[0] = new_rd_ia32_Xor(dbg, irg, block, noreg, noreg, (ir_node *) irn, get_irn_n(irn, i), nomem);
952 inverse->nodes[0] = new_rd_ia32_Not(dbg, irg, block, noreg, noreg, (ir_node*) irn, nomem);
957 inverse->nodes[0] = new_rd_ia32_Neg(dbg, irg, block, noreg, noreg, (ir_node*) irn, nomem);
962 /* inverse operation not supported */
969 static ir_mode *get_spill_mode_mode(const ir_mode *mode)
971 if(mode_is_float(mode))
978 * Get the mode that should be used for spilling value node
980 static ir_mode *get_spill_mode(const ir_node *node)
982 ir_mode *mode = get_irn_mode(node);
983 return get_spill_mode_mode(mode);
987 * Checks wether an addressmode reload for a node with mode mode is compatible
988 * with a spillslot of mode spill_mode
990 static int ia32_is_spillmode_compatible(const ir_mode *mode, const ir_mode *spillmode)
992 if(mode_is_float(mode)) {
993 return mode == spillmode;
1000 * Check if irn can load it's operand at position i from memory (source addressmode).
1001 * @param self Pointer to irn ops itself
1002 * @param irn The irn to be checked
1003 * @param i The operands position
1004 * @return Non-Zero if operand can be loaded
1006 static int ia32_possible_memory_operand(const void *self, const ir_node *irn, unsigned int i) {
1007 ir_node *op = get_irn_n(irn, i);
1008 const ir_mode *mode = get_irn_mode(op);
1009 const ir_mode *spillmode = get_spill_mode(op);
1011 if (! is_ia32_irn(irn) || /* must be an ia32 irn */
1012 get_irn_arity(irn) != 5 || /* must be a binary operation */
1013 get_ia32_op_type(irn) != ia32_Normal || /* must not already be a addressmode irn */
1014 ! (get_ia32_am_support(irn) & ia32_am_Source) || /* must be capable of source addressmode */
1015 ! ia32_is_spillmode_compatible(mode, spillmode) ||
1016 (i != 2 && i != 3) || /* a "real" operand position must be requested */
1017 (i == 2 && ! is_ia32_commutative(irn)) || /* if first operand requested irn must be commutative */
1018 is_ia32_use_frame(irn)) /* must not already use frame */
1024 static void ia32_perform_memory_operand(const void *self, ir_node *irn, ir_node *spill, unsigned int i) {
1025 const ia32_irn_ops_t *ops = self;
1026 ia32_code_gen_t *cg = ops->cg;
1028 assert(ia32_possible_memory_operand(self, irn, i) && "Cannot perform memory operand change");
1031 ir_node *tmp = get_irn_n(irn, 3);
1032 set_irn_n(irn, 3, get_irn_n(irn, 2));
1033 set_irn_n(irn, 2, tmp);
1036 set_ia32_am_support(irn, ia32_am_Source);
1037 set_ia32_op_type(irn, ia32_AddrModeS);
1038 set_ia32_am_flavour(irn, ia32_B);
1039 set_ia32_ls_mode(irn, get_irn_mode(get_irn_n(irn, i)));
1040 set_ia32_use_frame(irn);
1041 set_ia32_need_stackent(irn);
1043 set_irn_n(irn, 0, get_irg_frame(get_irn_irg(irn)));
1044 set_irn_n(irn, 3, ia32_get_admissible_noreg(cg, irn, 3));
1045 set_irn_n(irn, 4, spill);
1047 //FIXME DBG_OPT_AM_S(reload, irn);
1050 static const be_abi_callbacks_t ia32_abi_callbacks = {
1053 ia32_abi_get_between_type,
1054 ia32_abi_dont_save_regs,
1059 /* fill register allocator interface */
1061 static const arch_irn_ops_if_t ia32_irn_ops_if = {
1062 ia32_get_irn_reg_req,
1067 ia32_get_frame_entity,
1068 ia32_set_frame_entity,
1069 ia32_set_frame_offset,
1072 ia32_get_op_estimated_cost,
1073 ia32_possible_memory_operand,
1074 ia32_perform_memory_operand,
1077 ia32_irn_ops_t ia32_irn_ops = {
1084 /**************************************************
1087 * ___ ___ __| | ___ __ _ ___ _ __ _| |_
1088 * / __/ _ \ / _` |/ _ \/ _` |/ _ \ '_ \ | | _|
1089 * | (_| (_) | (_| | __/ (_| | __/ | | | | | |
1090 * \___\___/ \__,_|\___|\__, |\___|_| |_| |_|_|
1093 **************************************************/
1096 * Transforms the standard firm graph into
1097 * an ia32 firm graph
1099 static void ia32_prepare_graph(void *self) {
1100 ia32_code_gen_t *cg = self;
1101 DEBUG_ONLY(firm_dbg_module_t *old_mod = cg->mod;)
1103 FIRM_DBG_REGISTER(cg->mod, "firm.be.ia32.transform");
1105 /* transform psi condition trees */
1106 ia32_pre_transform_phase(cg);
1108 /* transform all remaining nodes */
1109 ia32_transform_graph(cg);
1110 //add_fpu_edges(cg->birg);
1112 // Matze: disabled for now. Because after transformation start block has no
1113 // self-loop anymore so it might be merged with its successor block. This
1114 // will bring several nodes to the startblock which sometimes get scheduled
1115 // before the initial IncSP/Barrier
1116 //local_optimize_graph(cg->irg);
1119 be_dump(cg->irg, "-transformed", dump_ir_block_graph_sched);
1121 /* optimize address mode */
1122 FIRM_DBG_REGISTER(cg->mod, "firm.be.ia32.am");
1123 ia32_optimize_addressmode(cg);
1126 be_dump(cg->irg, "-am", dump_ir_block_graph_sched);
1128 /* do code placement, to optimize the position of constants */
1129 place_code(cg->irg);
1132 be_dump(cg->irg, "-place", dump_ir_block_graph_sched);
1134 DEBUG_ONLY(cg->mod = old_mod;)
1138 * Dummy functions for hooks we don't need but which must be filled.
1140 static void ia32_before_sched(void *self) {
1143 static void remove_unused_nodes(ir_node *irn, bitset_t *already_visited) {
1146 ir_node *mem_proj = NULL;
1151 mode = get_irn_mode(irn);
1153 /* check if we already saw this node or the node has more than one user */
1154 if (bitset_contains_irn(already_visited, irn) || get_irn_n_edges(irn) > 1) {
1158 /* mark irn visited */
1159 bitset_add_irn(already_visited, irn);
1161 /* non-Tuple nodes with one user: ok, return */
1162 if (get_irn_n_edges(irn) >= 1 && mode != mode_T) {
1166 /* tuple node has one user which is not the mem proj-> ok */
1167 if (mode == mode_T && get_irn_n_edges(irn) == 1) {
1168 mem_proj = ia32_get_proj_for_mode(irn, mode_M);
1169 if (mem_proj == NULL) {
1174 arity = get_irn_arity(irn);
1175 for (i = 0; i < arity; ++i) {
1176 ir_node *pred = get_irn_n(irn, i);
1178 /* do not follow memory edges or we will accidentally remove stores */
1179 if (get_irn_mode(pred) == mode_M) {
1180 if(mem_proj != NULL) {
1181 edges_reroute(mem_proj, pred, get_irn_irg(mem_proj));
1187 set_irn_n(irn, i, new_Bad());
1190 The current node is about to be removed: if the predecessor
1191 has only this node as user, it need to be removed as well.
1193 if (get_irn_n_edges(pred) <= 1)
1194 remove_unused_nodes(pred, already_visited);
1197 // we need to set the presd to Bad again to also get the memory edges
1198 arity = get_irn_arity(irn);
1199 for (i = 0; i < arity; ++i) {
1200 set_irn_n(irn, i, new_Bad());
1203 if (sched_is_scheduled(irn)) {
1208 static void remove_unused_loads_walker(ir_node *irn, void *env) {
1209 bitset_t *already_visited = env;
1210 if (is_ia32_Ld(irn) && ! bitset_contains_irn(already_visited, irn))
1211 remove_unused_nodes(irn, env);
1214 static ir_node *create_fpu_mode_spill(void *env, ir_node *value, int force)
1216 ia32_code_gen_t *cg = env;
1217 ir_graph *irg = get_irn_irg(value);
1218 ir_node *block = get_nodes_block(value);
1219 ir_node *noreg = ia32_new_NoReg_gp(cg);
1220 ir_node *nomem = new_NoMem();
1221 ir_node *spill = NULL;
1223 if(force == 1 || !is_ia32_ChangeCW(value)) {
1224 spill = new_rd_ia32_FnstCW(NULL, irg, block, noreg, noreg, value,
1226 set_ia32_am_support(spill, ia32_am_Dest);
1227 set_ia32_op_type(spill, ia32_AddrModeD);
1228 set_ia32_am_flavour(spill, ia32_B);
1229 set_ia32_ls_mode(spill, ia32_reg_classes[CLASS_ia32_fp_cw].mode);
1230 set_ia32_use_frame(spill);
1232 sched_add_after(value, spill);
1235 ir_fprintf(stderr, "Created spill of %+F (forced %d)\n", value, force);
1239 static ir_node *create_fpu_mode_reload(void *env, ir_node *value,
1240 ir_node *spill, ir_node *before)
1242 ia32_code_gen_t *cg = env;
1243 ir_graph *irg = get_irn_irg(value);
1244 ir_node *block = get_nodes_block(before);
1245 ir_node *noreg = ia32_new_NoReg_gp(cg);
1246 ir_node *reload = NULL;
1249 reload = new_rd_ia32_FldCW(NULL, irg, block, noreg, noreg, spill);
1250 set_ia32_am_support(reload, ia32_am_Source);
1251 set_ia32_op_type(reload, ia32_AddrModeS);
1252 set_ia32_am_flavour(reload, ia32_B);
1253 set_ia32_ls_mode(reload, ia32_reg_classes[CLASS_ia32_fp_cw].mode);
1254 set_ia32_use_frame(reload);
1256 sched_add_before(before, reload);
1258 ir_mode *lsmode = ia32_reg_classes[CLASS_ia32_fp_cw].mode;
1259 ir_node *nomem = new_NoMem();
1260 ir_node *cwstore, *load, *load_res, *or, *store, *fldcw;
1262 /* TODO: value is not correct... */
1263 /* TODO: reuse existing spills... */
1264 cwstore = new_rd_ia32_FnstCW(NULL, irg, block, noreg, noreg, value,
1266 set_ia32_am_support(cwstore, ia32_am_Dest);
1267 set_ia32_op_type(cwstore, ia32_AddrModeD);
1268 set_ia32_am_flavour(cwstore, ia32_B);
1269 set_ia32_ls_mode(cwstore, lsmode);
1270 set_ia32_use_frame(cwstore);
1271 sched_add_before(before, cwstore);
1273 load = new_rd_ia32_Load(NULL, irg, block, noreg, noreg, cwstore);
1274 set_ia32_am_support(load, ia32_am_Source);
1275 set_ia32_op_type(load, ia32_AddrModeS);
1276 set_ia32_am_flavour(load, ia32_B);
1277 set_ia32_ls_mode(load, lsmode);
1278 set_ia32_use_frame(load);
1279 sched_add_before(before, load);
1281 load_res = new_r_Proj(irg, block, load, mode_Iu, pn_ia32_Load_res);
1282 sched_add_before(before, load_res);
1284 /* TODO: make the actual mode configurable in ChangeCW... */
1285 or = new_rd_ia32_Or(NULL, irg, block, noreg, noreg, load_res, noreg,
1287 set_ia32_Immop_tarval(or, new_tarval_from_long(0x3072, mode_Iu));
1288 sched_add_before(before, or);
1290 store = new_rd_ia32_Store(NULL, irg, block, noreg, noreg, or, nomem);
1291 set_ia32_am_support(store, ia32_am_Dest);
1292 set_ia32_op_type(store, ia32_AddrModeD);
1293 set_ia32_am_flavour(store, ia32_B);
1294 set_ia32_ls_mode(store, lsmode);
1295 set_ia32_use_frame(store);
1296 sched_add_before(before, store);
1298 fldcw = new_rd_ia32_FldCW(NULL, irg, block, noreg, noreg, store);
1299 set_ia32_am_support(fldcw, ia32_am_Source);
1300 set_ia32_op_type(fldcw, ia32_AddrModeS);
1301 set_ia32_am_flavour(fldcw, ia32_B);
1302 set_ia32_ls_mode(fldcw, lsmode);
1303 set_ia32_use_frame(fldcw);
1304 sched_add_before(before, fldcw);
1309 ir_fprintf(stderr, "Create reload of %+F (spill %+F) before %+F\n",
1310 value, spill, before);
1315 * Called before the register allocator.
1316 * Calculate a block schedule here. We need it for the x87
1317 * simulator and the emitter.
1319 static void ia32_before_ra(void *self) {
1320 ia32_code_gen_t *cg = self;
1321 bitset_t *already_visited = bitset_irg_alloca(cg->irg);
1324 Handle special case:
1325 There are sometimes unused loads, only pinned by memory.
1326 We need to remove those Loads and all other nodes which won't be used
1327 after removing the Load from schedule.
1329 irg_walk_graph(cg->irg, NULL, remove_unused_loads_walker, already_visited);
1331 be_assure_state(cg->birg, &ia32_fp_cw_regs[REG_FPCW],
1332 cg, create_fpu_mode_spill, create_fpu_mode_reload);
1333 be_dump(cg->irg, "-assure-state", dump_ir_block_graph_sched);
1338 * Transforms a be_Reload into a ia32 Load.
1340 static void transform_to_Load(ia32_code_gen_t *cg, ir_node *node) {
1341 ir_graph *irg = get_irn_irg(node);
1342 dbg_info *dbg = get_irn_dbg_info(node);
1343 ir_node *block = get_nodes_block(node);
1344 ir_entity *ent = be_get_frame_entity(node);
1345 ir_mode *mode = get_irn_mode(node);
1346 ir_mode *spillmode = get_spill_mode(node);
1347 ir_node *noreg = ia32_new_NoReg_gp(cg);
1348 ir_node *sched_point = NULL;
1349 ir_node *ptr = get_irg_frame(irg);
1350 ir_node *mem = get_irn_n(node, be_pos_Reload_mem);
1351 ir_node *new_op, *proj;
1352 const arch_register_t *reg;
1354 if (sched_is_scheduled(node)) {
1355 sched_point = sched_prev(node);
1358 if (mode_is_float(spillmode)) {
1360 new_op = new_rd_ia32_xLoad(dbg, irg, block, ptr, noreg, mem);
1362 new_op = new_rd_ia32_vfld(dbg, irg, block, ptr, noreg, mem);
1364 else if (get_mode_size_bits(spillmode) == 128) {
1365 // Reload 128 bit sse registers
1366 new_op = new_rd_ia32_xxLoad(dbg, irg, block, ptr, noreg, mem);
1369 new_op = new_rd_ia32_Load(dbg, irg, block, ptr, noreg, mem);
1371 set_ia32_am_support(new_op, ia32_am_Source);
1372 set_ia32_op_type(new_op, ia32_AddrModeS);
1373 set_ia32_am_flavour(new_op, ia32_B);
1374 set_ia32_ls_mode(new_op, spillmode);
1375 set_ia32_frame_ent(new_op, ent);
1376 set_ia32_use_frame(new_op);
1378 DBG_OPT_RELOAD2LD(node, new_op);
1380 proj = new_rd_Proj(dbg, irg, block, new_op, mode, pn_ia32_Load_res);
1383 sched_add_after(sched_point, new_op);
1384 sched_add_after(new_op, proj);
1389 /* copy the register from the old node to the new Load */
1390 reg = arch_get_irn_register(cg->arch_env, node);
1391 arch_set_irn_register(cg->arch_env, new_op, reg);
1393 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(cg, node));
1395 exchange(node, proj);
1399 * Transforms a be_Spill node into a ia32 Store.
1401 static void transform_to_Store(ia32_code_gen_t *cg, ir_node *node) {
1402 ir_graph *irg = get_irn_irg(node);
1403 dbg_info *dbg = get_irn_dbg_info(node);
1404 ir_node *block = get_nodes_block(node);
1405 ir_entity *ent = be_get_frame_entity(node);
1406 const ir_node *spillval = get_irn_n(node, be_pos_Spill_val);
1407 ir_mode *mode = get_spill_mode(spillval);
1408 ir_node *noreg = ia32_new_NoReg_gp(cg);
1409 ir_node *nomem = new_rd_NoMem(irg);
1410 ir_node *ptr = get_irg_frame(irg);
1411 ir_node *val = get_irn_n(node, be_pos_Spill_val);
1413 ir_node *sched_point = NULL;
1415 if (sched_is_scheduled(node)) {
1416 sched_point = sched_prev(node);
1419 /* No need to spill unknown values... */
1420 if(is_ia32_Unknown_GP(val) ||
1421 is_ia32_Unknown_VFP(val) ||
1422 is_ia32_Unknown_XMM(val)) {
1427 exchange(node, store);
1431 if (mode_is_float(mode)) {
1433 store = new_rd_ia32_xStore(dbg, irg, block, ptr, noreg, val, nomem);
1435 store = new_rd_ia32_vfst(dbg, irg, block, ptr, noreg, val, nomem);
1437 else if (get_mode_size_bits(mode) == 128) {
1438 // Spill 128 bit SSE registers
1439 store = new_rd_ia32_xxStore(dbg, irg, block, ptr, noreg, val, nomem);
1441 else if (get_mode_size_bits(mode) == 8) {
1442 store = new_rd_ia32_Store8Bit(dbg, irg, block, ptr, noreg, val, nomem);
1445 store = new_rd_ia32_Store(dbg, irg, block, ptr, noreg, val, nomem);
1448 set_ia32_am_support(store, ia32_am_Dest);
1449 set_ia32_op_type(store, ia32_AddrModeD);
1450 set_ia32_am_flavour(store, ia32_B);
1451 set_ia32_ls_mode(store, mode);
1452 set_ia32_frame_ent(store, ent);
1453 set_ia32_use_frame(store);
1454 SET_IA32_ORIG_NODE(store, ia32_get_old_node_name(cg, node));
1455 DBG_OPT_SPILL2ST(node, store);
1458 sched_add_after(sched_point, store);
1462 exchange(node, store);
1465 static ir_node *create_push(ia32_code_gen_t *cg, ir_node *node, ir_node *schedpoint, ir_node *sp, ir_node *mem, ir_entity *ent) {
1466 ir_graph *irg = get_irn_irg(node);
1467 dbg_info *dbg = get_irn_dbg_info(node);
1468 ir_node *block = get_nodes_block(node);
1469 ir_node *noreg = ia32_new_NoReg_gp(cg);
1470 ir_node *frame = get_irg_frame(irg);
1472 ir_node *push = new_rd_ia32_Push(dbg, irg, block, frame, noreg, noreg, sp, mem);
1474 set_ia32_frame_ent(push, ent);
1475 set_ia32_use_frame(push);
1476 set_ia32_op_type(push, ia32_AddrModeS);
1477 set_ia32_am_flavour(push, ia32_B);
1478 set_ia32_ls_mode(push, mode_Is);
1480 sched_add_before(schedpoint, push);
1484 static ir_node *create_pop(ia32_code_gen_t *cg, ir_node *node, ir_node *schedpoint, ir_node *sp, ir_entity *ent) {
1485 ir_graph *irg = get_irn_irg(node);
1486 dbg_info *dbg = get_irn_dbg_info(node);
1487 ir_node *block = get_nodes_block(node);
1488 ir_node *noreg = ia32_new_NoReg_gp(cg);
1489 ir_node *frame = get_irg_frame(irg);
1491 ir_node *pop = new_rd_ia32_Pop(dbg, irg, block, frame, noreg, sp, new_NoMem());
1493 set_ia32_frame_ent(pop, ent);
1494 set_ia32_use_frame(pop);
1495 set_ia32_op_type(pop, ia32_AddrModeD);
1496 set_ia32_am_flavour(pop, ia32_am_OB);
1497 set_ia32_ls_mode(pop, mode_Is);
1499 sched_add_before(schedpoint, pop);
1504 static ir_node* create_spproj(ia32_code_gen_t *cg, ir_node *node, ir_node *pred, int pos, ir_node *schedpoint) {
1505 ir_graph *irg = get_irn_irg(node);
1506 dbg_info *dbg = get_irn_dbg_info(node);
1507 ir_node *block = get_nodes_block(node);
1508 ir_mode *spmode = mode_Iu;
1509 const arch_register_t *spreg = &ia32_gp_regs[REG_ESP];
1512 sp = new_rd_Proj(dbg, irg, block, pred, spmode, pos);
1513 arch_set_irn_register(cg->arch_env, sp, spreg);
1514 sched_add_before(schedpoint, sp);
1520 * Transform memperm, currently we do this the ugly way and produce
1521 * push/pop into/from memory cascades. This is possible without using
1524 static void transform_MemPerm(ia32_code_gen_t *cg, ir_node *node) {
1525 ir_graph *irg = get_irn_irg(node);
1526 ir_node *block = get_nodes_block(node);
1530 ir_node *sp = be_abi_get_ignore_irn(cg->birg->abi, &ia32_gp_regs[REG_ESP]);
1531 const ir_edge_t *edge;
1532 const ir_edge_t *next;
1535 arity = be_get_MemPerm_entity_arity(node);
1536 pops = alloca(arity * sizeof(pops[0]));
1539 for(i = 0; i < arity; ++i) {
1540 ir_entity *ent = be_get_MemPerm_in_entity(node, i);
1541 ir_type *enttype = get_entity_type(ent);
1542 int entbits = get_type_size_bits(enttype);
1543 ir_node *mem = get_irn_n(node, i + 1);
1546 assert( (entbits == 32 || entbits == 64) && "spillslot on x86 should be 32 or 64 bit");
1548 push = create_push(cg, node, node, sp, mem, ent);
1549 sp = create_spproj(cg, node, push, pn_ia32_Push_stack, node);
1551 // add another push after the first one
1552 push = create_push(cg, node, node, sp, mem, ent);
1553 add_ia32_am_offs_int(push, 4);
1554 sp = create_spproj(cg, node, push, pn_ia32_Push_stack, node);
1557 set_irn_n(node, i, new_Bad());
1561 for(i = arity - 1; i >= 0; --i) {
1562 ir_entity *ent = be_get_MemPerm_out_entity(node, i);
1563 ir_type *enttype = get_entity_type(ent);
1564 int entbits = get_type_size_bits(enttype);
1568 assert( (entbits == 32 || entbits == 64) && "spillslot on x86 should be 32 or 64 bit");
1570 pop = create_pop(cg, node, node, sp, ent);
1571 sp = create_spproj(cg, node, pop, pn_ia32_Pop_stack, node);
1573 add_ia32_am_offs_int(pop, 4);
1575 // add another pop after the first one
1576 pop = create_pop(cg, node, node, sp, ent);
1577 sp = create_spproj(cg, node, pop, pn_ia32_Pop_stack, node);
1584 keep = be_new_Keep(&ia32_reg_classes[CLASS_ia32_gp], irg, block, 1, in);
1585 sched_add_before(node, keep);
1587 // exchange memprojs
1588 foreach_out_edge_safe(node, edge, next) {
1589 ir_node *proj = get_edge_src_irn(edge);
1590 int p = get_Proj_proj(proj);
1594 set_Proj_pred(proj, pops[p]);
1595 set_Proj_proj(proj, 3);
1599 arity = get_irn_arity(node);
1600 for(i = 0; i < arity; ++i) {
1601 set_irn_n(node, i, new_Bad());
1607 * Block-Walker: Calls the transform functions Spill and Reload.
1609 static void ia32_after_ra_walker(ir_node *block, void *env) {
1610 ir_node *node, *prev;
1611 ia32_code_gen_t *cg = env;
1613 /* beware: the schedule is changed here */
1614 for (node = sched_last(block); !sched_is_begin(node); node = prev) {
1615 prev = sched_prev(node);
1617 if (be_is_Reload(node)) {
1618 transform_to_Load(cg, node);
1619 } else if (be_is_Spill(node)) {
1620 transform_to_Store(cg, node);
1621 } else if(be_is_MemPerm(node)) {
1622 transform_MemPerm(cg, node);
1628 * Collects nodes that need frame entities assigned.
1630 static void ia32_collect_frame_entity_nodes(ir_node *node, void *data)
1632 be_fec_env_t *env = data;
1634 if (be_is_Reload(node) && be_get_frame_entity(node) == NULL) {
1635 const ir_mode *mode = get_spill_mode_mode(get_irn_mode(node));
1636 int align = get_mode_size_bytes(mode);
1637 be_node_needs_frame_entity(env, node, mode, align);
1638 } else if(is_ia32_irn(node) && get_ia32_frame_ent(node) == NULL
1639 && is_ia32_use_frame(node)) {
1640 if (is_ia32_need_stackent(node) || is_ia32_Load(node)) {
1641 const ir_mode *mode = get_ia32_ls_mode(node);
1642 int align = get_mode_size_bytes(mode);
1643 be_node_needs_frame_entity(env, node, mode, align);
1644 } else if (is_ia32_vfild(node) || is_ia32_xLoad(node)) {
1645 const ir_mode *mode = get_ia32_ls_mode(node);
1647 be_node_needs_frame_entity(env, node, mode, align);
1648 } else if (is_ia32_SetST0(node)) {
1649 const ir_mode *mode = get_ia32_ls_mode(node);
1651 be_node_needs_frame_entity(env, node, mode, align);
1654 if(!is_ia32_Store(node)
1655 && !is_ia32_xStore(node)
1656 && !is_ia32_xStoreSimple(node)
1657 && !is_ia32_vfist(node)
1658 && !is_ia32_GetST0(node)) {
1667 * We transform Spill and Reload here. This needs to be done before
1668 * stack biasing otherwise we would miss the corrected offset for these nodes.
1670 static void ia32_after_ra(void *self) {
1671 ia32_code_gen_t *cg = self;
1672 ir_graph *irg = cg->irg;
1673 be_fec_env_t *fec_env = be_new_frame_entity_coalescer(cg->birg);
1675 /* create and coalesce frame entities */
1676 irg_walk_graph(irg, NULL, ia32_collect_frame_entity_nodes, fec_env);
1677 be_assign_entities(fec_env);
1678 be_free_frame_entity_coalescer(fec_env);
1680 irg_block_walk_graph(irg, NULL, ia32_after_ra_walker, cg);
1682 ia32_finish_irg(irg, cg);
1686 * Last touchups for the graph before emit: x87 simulation to replace the
1687 * virtual with real x87 instructions, creating a block schedule and peephole
1690 static void ia32_finish(void *self) {
1691 ia32_code_gen_t *cg = self;
1692 ir_graph *irg = cg->irg;
1694 /* if we do x87 code generation, rewrite all the virtual instructions and registers */
1695 if (cg->used_fp == fp_x87 || cg->force_sim) {
1696 x87_simulate_graph(cg->arch_env, cg->birg);
1699 /* create block schedule, this also removes empty blocks which might
1700 * produce critical edges */
1701 cg->blk_sched = be_create_block_schedule(irg, cg->birg->exec_freq);
1703 /* do peephole optimisations */
1704 ia32_peephole_optimization(irg, cg);
1708 * Emits the code, closes the output file and frees
1709 * the code generator interface.
1711 static void ia32_codegen(void *self) {
1712 ia32_code_gen_t *cg = self;
1713 ir_graph *irg = cg->irg;
1715 ia32_gen_routine(cg, irg);
1719 /* remove it from the isa */
1722 /* de-allocate code generator */
1723 del_set(cg->reg_set);
1727 static void *ia32_cg_init(be_irg_t *birg);
1729 static const arch_code_generator_if_t ia32_code_gen_if = {
1731 NULL, /* before abi introduce hook */
1734 ia32_before_sched, /* before scheduling hook */
1735 ia32_before_ra, /* before register allocation hook */
1736 ia32_after_ra, /* after register allocation hook */
1737 ia32_finish, /* called before codegen */
1738 ia32_codegen /* emit && done */
1742 * Initializes a IA32 code generator.
1744 static void *ia32_cg_init(be_irg_t *birg) {
1745 ia32_isa_t *isa = (ia32_isa_t *)birg->main_env->arch_env->isa;
1746 ia32_code_gen_t *cg = xcalloc(1, sizeof(*cg));
1748 cg->impl = &ia32_code_gen_if;
1749 cg->irg = birg->irg;
1750 cg->reg_set = new_set(ia32_cmp_irn_reg_assoc, 1024);
1751 cg->arch_env = birg->main_env->arch_env;
1754 cg->blk_sched = NULL;
1755 cg->fp_kind = isa->fp_kind;
1756 cg->used_fp = fp_none;
1757 cg->dump = (birg->main_env->options->dump_flags & DUMP_BE) ? 1 : 0;
1759 FIRM_DBG_REGISTER(cg->mod, "firm.be.ia32.cg");
1761 /* copy optimizations from isa for easier access */
1763 cg->arch = isa->arch;
1764 cg->opt_arch = isa->opt_arch;
1770 if (isa->name_obst) {
1771 obstack_free(isa->name_obst, NULL);
1772 obstack_init(isa->name_obst);
1776 cur_reg_set = cg->reg_set;
1778 ia32_irn_ops.cg = cg;
1780 return (arch_code_generator_t *)cg;
1785 /*****************************************************************
1786 * ____ _ _ _____ _____
1787 * | _ \ | | | | |_ _|/ ____| /\
1788 * | |_) | __ _ ___| | _____ _ __ __| | | | | (___ / \
1789 * | _ < / _` |/ __| |/ / _ \ '_ \ / _` | | | \___ \ / /\ \
1790 * | |_) | (_| | (__| < __/ | | | (_| | _| |_ ____) / ____ \
1791 * |____/ \__,_|\___|_|\_\___|_| |_|\__,_| |_____|_____/_/ \_\
1793 *****************************************************************/
1796 * Set output modes for GCC
1798 static const tarval_mode_info mo_integer = {
1805 * set the tarval output mode of all integer modes to decimal
1807 static void set_tarval_output_modes(void)
1811 for (i = get_irp_n_modes() - 1; i >= 0; --i) {
1812 ir_mode *mode = get_irp_mode(i);
1814 if (mode_is_int(mode))
1815 set_tarval_mode_output_option(mode, &mo_integer);
1819 const arch_isa_if_t ia32_isa_if;
1822 * The template that generates a new ISA object.
1823 * Note that this template can be changed by command line
1826 static ia32_isa_t ia32_isa_template = {
1828 &ia32_isa_if, /* isa interface implementation */
1829 &ia32_gp_regs[REG_ESP], /* stack pointer register */
1830 &ia32_gp_regs[REG_EBP], /* base pointer register */
1831 -1, /* stack direction */
1832 NULL, /* main environment */
1834 {}, /* emitter environment */
1835 NULL, /* 16bit register names */
1836 NULL, /* 8bit register names */
1840 IA32_OPT_INCDEC | /* optimize add 1, sub 1 into inc/dec default: on */
1841 IA32_OPT_DOAM | /* optimize address mode default: on */
1842 IA32_OPT_LEA | /* optimize for LEAs default: on */
1843 IA32_OPT_PLACECNST | /* place constants immediately before instructions, default: on */
1844 IA32_OPT_IMMOPS | /* operations can use immediates, default: on */
1845 IA32_OPT_PUSHARGS), /* create pushs for function argument passing, default: on */
1846 arch_pentium_4, /* instruction architecture */
1847 arch_pentium_4, /* optimize for architecture */
1848 fp_sse2, /* use sse2 unit */
1849 NULL, /* current code generator */
1851 NULL, /* name obstack */
1852 0 /* name obst size */
1857 * Initializes the backend ISA.
1859 static void *ia32_init(FILE *file_handle) {
1860 static int inited = 0;
1866 set_tarval_output_modes();
1868 isa = xmalloc(sizeof(*isa));
1869 memcpy(isa, &ia32_isa_template, sizeof(*isa));
1871 ia32_register_init(isa);
1872 ia32_create_opcodes();
1873 ia32_register_copy_attr_func();
1875 if ((ARCH_INTEL(isa->arch) && isa->arch < arch_pentium_4) ||
1876 (ARCH_AMD(isa->arch) && isa->arch < arch_athlon))
1877 /* no SSE2 for these cpu's */
1878 isa->fp_kind = fp_x87;
1880 if (ARCH_INTEL(isa->opt_arch) && isa->opt_arch >= arch_pentium_4) {
1881 /* Pentium 4 don't like inc and dec instructions */
1882 isa->opt &= ~IA32_OPT_INCDEC;
1885 be_emit_init_env(&isa->emit, file_handle);
1886 isa->regs_16bit = pmap_create();
1887 isa->regs_8bit = pmap_create();
1888 isa->types = pmap_create();
1889 isa->tv_ent = pmap_create();
1890 isa->cpu = ia32_init_machine_description();
1892 ia32_build_16bit_reg_map(isa->regs_16bit);
1893 ia32_build_8bit_reg_map(isa->regs_8bit);
1895 /* patch register names of x87 registers */
1896 ia32_st_regs[0].name = "st";
1897 ia32_st_regs[1].name = "st(1)";
1898 ia32_st_regs[2].name = "st(2)";
1899 ia32_st_regs[3].name = "st(3)";
1900 ia32_st_regs[4].name = "st(4)";
1901 ia32_st_regs[5].name = "st(5)";
1902 ia32_st_regs[6].name = "st(6)";
1903 ia32_st_regs[7].name = "st(7)";
1906 isa->name_obst = xmalloc(sizeof(*isa->name_obst));
1907 obstack_init(isa->name_obst);
1910 ia32_handle_intrinsics();
1912 /* needed for the debug support */
1913 be_gas_emit_switch_section(&isa->emit, GAS_SECTION_TEXT);
1914 be_emit_cstring(&isa->emit, ".Ltext0:\n");
1915 be_emit_write_line(&isa->emit);
1925 * Closes the output file and frees the ISA structure.
1927 static void ia32_done(void *self) {
1928 ia32_isa_t *isa = self;
1930 /* emit now all global declarations */
1931 be_gas_emit_decls(&isa->emit, isa->arch_isa.main_env);
1933 pmap_destroy(isa->regs_16bit);
1934 pmap_destroy(isa->regs_8bit);
1935 pmap_destroy(isa->tv_ent);
1936 pmap_destroy(isa->types);
1939 obstack_free(isa->name_obst, NULL);
1942 be_emit_destroy_env(&isa->emit);
1949 * Return the number of register classes for this architecture.
1950 * We report always these:
1951 * - the general purpose registers
1952 * - the SSE floating point register set
1953 * - the virtual floating point registers
1954 * - the SSE vector register set
1956 static int ia32_get_n_reg_class(const void *self) {
1961 * Return the register class for index i.
1963 static const arch_register_class_t *ia32_get_reg_class(const void *self, int i) {
1966 return &ia32_reg_classes[CLASS_ia32_gp];
1968 return &ia32_reg_classes[CLASS_ia32_xmm];
1970 return &ia32_reg_classes[CLASS_ia32_vfp];
1972 return &ia32_reg_classes[CLASS_ia32_fp_cw];
1974 assert(0 && "Invalid ia32 register class requested.");
1980 * Get the register class which shall be used to store a value of a given mode.
1981 * @param self The this pointer.
1982 * @param mode The mode in question.
1983 * @return A register class which can hold values of the given mode.
1985 const arch_register_class_t *ia32_get_reg_class_for_mode(const void *self, const ir_mode *mode) {
1986 const ia32_isa_t *isa = self;
1987 if (mode_is_float(mode)) {
1988 return USE_SSE2(isa) ? &ia32_reg_classes[CLASS_ia32_xmm] : &ia32_reg_classes[CLASS_ia32_vfp];
1991 return &ia32_reg_classes[CLASS_ia32_gp];
1995 * Get the ABI restrictions for procedure calls.
1996 * @param self The this pointer.
1997 * @param method_type The type of the method (procedure) in question.
1998 * @param abi The abi object to be modified
2000 static void ia32_get_call_abi(const void *self, ir_type *method_type, be_abi_call_t *abi) {
2001 const ia32_isa_t *isa = self;
2004 unsigned cc = get_method_calling_convention(method_type);
2005 int n = get_method_n_params(method_type);
2008 int i, ignore_1, ignore_2;
2010 const arch_register_t *reg;
2011 be_abi_call_flags_t call_flags = be_abi_call_get_flags(abi);
2013 unsigned use_push = !IS_P6_ARCH(isa->opt_arch);
2015 /* set abi flags for calls */
2016 call_flags.bits.left_to_right = 0; /* always last arg first on stack */
2017 call_flags.bits.store_args_sequential = use_push;
2018 /* call_flags.bits.try_omit_fp not changed: can handle both settings */
2019 call_flags.bits.fp_free = 0; /* the frame pointer is fixed in IA32 */
2020 call_flags.bits.call_has_imm = 1; /* IA32 calls can have immediate address */
2022 /* set stack parameter passing style */
2023 be_abi_call_set_flags(abi, call_flags, &ia32_abi_callbacks);
2025 /* collect the mode for each type */
2026 modes = alloca(n * sizeof(modes[0]));
2028 for (i = 0; i < n; i++) {
2029 tp = get_method_param_type(method_type, i);
2030 modes[i] = get_type_mode(tp);
2033 /* set register parameters */
2034 if (cc & cc_reg_param) {
2035 /* determine the number of parameters passed via registers */
2036 biggest_n = ia32_get_n_regparam_class(n, modes, &ignore_1, &ignore_2);
2038 /* loop over all parameters and set the register requirements */
2039 for (i = 0; i <= biggest_n; i++) {
2040 reg = ia32_get_RegParam_reg(n, modes, i, cc);
2041 assert(reg && "kaputt");
2042 be_abi_call_param_reg(abi, i, reg);
2049 /* set stack parameters */
2050 for (i = stack_idx; i < n; i++) {
2051 /* parameters on the stack are 32 bit aligned */
2052 be_abi_call_param_stack(abi, i, 4, 0, 0);
2056 /* set return registers */
2057 n = get_method_n_ress(method_type);
2059 assert(n <= 2 && "more than two results not supported");
2061 /* In case of 64bit returns, we will have two 32bit values */
2063 tp = get_method_res_type(method_type, 0);
2064 mode = get_type_mode(tp);
2066 assert(!mode_is_float(mode) && "two FP results not supported");
2068 tp = get_method_res_type(method_type, 1);
2069 mode = get_type_mode(tp);
2071 assert(!mode_is_float(mode) && "mixed INT, FP results not supported");
2073 be_abi_call_res_reg(abi, 0, &ia32_gp_regs[REG_EAX]);
2074 be_abi_call_res_reg(abi, 1, &ia32_gp_regs[REG_EDX]);
2077 const arch_register_t *reg;
2079 tp = get_method_res_type(method_type, 0);
2080 assert(is_atomic_type(tp));
2081 mode = get_type_mode(tp);
2083 reg = mode_is_float(mode) ? &ia32_vfp_regs[REG_VF0] : &ia32_gp_regs[REG_EAX];
2085 be_abi_call_res_reg(abi, 0, reg);
2090 static const void *ia32_get_irn_ops(const arch_irn_handler_t *self, const ir_node *irn) {
2091 return &ia32_irn_ops;
2094 const arch_irn_handler_t ia32_irn_handler = {
2098 const arch_irn_handler_t *ia32_get_irn_handler(const void *self) {
2099 return &ia32_irn_handler;
2102 int ia32_to_appear_in_schedule(void *block_env, const ir_node *irn) {
2103 return is_ia32_irn(irn) ? 1 : -1;
2107 * Initializes the code generator interface.
2109 static const arch_code_generator_if_t *ia32_get_code_generator_if(void *self) {
2110 return &ia32_code_gen_if;
2114 * Returns the estimated execution time of an ia32 irn.
2116 static sched_timestep_t ia32_sched_exectime(void *env, const ir_node *irn) {
2117 const arch_env_t *arch_env = env;
2118 return is_ia32_irn(irn) ? ia32_get_op_estimated_cost(arch_get_irn_ops(arch_env, irn), irn) : 1;
2121 list_sched_selector_t ia32_sched_selector;
2124 * Returns the reg_pressure scheduler with to_appear_in_schedule() overloaded
2126 static const list_sched_selector_t *ia32_get_list_sched_selector(const void *self, list_sched_selector_t *selector) {
2127 memcpy(&ia32_sched_selector, selector, sizeof(ia32_sched_selector));
2128 ia32_sched_selector.exectime = ia32_sched_exectime;
2129 ia32_sched_selector.to_appear_in_schedule = ia32_to_appear_in_schedule;
2130 return &ia32_sched_selector;
2133 static const ilp_sched_selector_t *ia32_get_ilp_sched_selector(const void *self) {
2138 * Returns the necessary byte alignment for storing a register of given class.
2140 static int ia32_get_reg_class_alignment(const void *self, const arch_register_class_t *cls) {
2141 ir_mode *mode = arch_register_class_mode(cls);
2142 int bytes = get_mode_size_bytes(mode);
2144 if (mode_is_float(mode) && bytes > 8)
2149 static const be_execution_unit_t ***ia32_get_allowed_execution_units(const void *self, const ir_node *irn) {
2150 static const be_execution_unit_t *_allowed_units_BRANCH[] = {
2151 &ia32_execution_units_BRANCH[IA32_EXECUNIT_TP_BRANCH_BRANCH1],
2152 &ia32_execution_units_BRANCH[IA32_EXECUNIT_TP_BRANCH_BRANCH2],
2155 static const be_execution_unit_t *_allowed_units_GP[] = {
2156 &ia32_execution_units_GP[IA32_EXECUNIT_TP_GP_GP_EAX],
2157 &ia32_execution_units_GP[IA32_EXECUNIT_TP_GP_GP_EBX],
2158 &ia32_execution_units_GP[IA32_EXECUNIT_TP_GP_GP_ECX],
2159 &ia32_execution_units_GP[IA32_EXECUNIT_TP_GP_GP_EDX],
2160 &ia32_execution_units_GP[IA32_EXECUNIT_TP_GP_GP_ESI],
2161 &ia32_execution_units_GP[IA32_EXECUNIT_TP_GP_GP_EDI],
2162 &ia32_execution_units_GP[IA32_EXECUNIT_TP_GP_GP_EBP],
2165 static const be_execution_unit_t *_allowed_units_DUMMY[] = {
2166 &be_machine_execution_units_DUMMY[0],
2169 static const be_execution_unit_t **_units_callret[] = {
2170 _allowed_units_BRANCH,
2173 static const be_execution_unit_t **_units_other[] = {
2177 static const be_execution_unit_t **_units_dummy[] = {
2178 _allowed_units_DUMMY,
2181 const be_execution_unit_t ***ret;
2183 if (is_ia32_irn(irn)) {
2184 ret = get_ia32_exec_units(irn);
2186 else if (is_be_node(irn)) {
2187 if (be_is_Call(irn) || be_is_Return(irn)) {
2188 ret = _units_callret;
2190 else if (be_is_Barrier(irn)) {
2205 * Return the abstract ia32 machine.
2207 static const be_machine_t *ia32_get_machine(const void *self) {
2208 const ia32_isa_t *isa = self;
2213 * Return irp irgs in the desired order.
2215 static ir_graph **ia32_get_irg_list(const void *self, ir_graph ***irg_list) {
2220 * Allows or disallows the creation of Psi nodes for the given Phi nodes.
2221 * @return 1 if allowed, 0 otherwise
2223 static int ia32_is_psi_allowed(ir_node *sel, ir_node *phi_list, int i, int j)
2225 ir_node *cmp, *cmp_a, *phi;
2228 /* we don't want long long an floating point Psi */
2229 #define IS_BAD_PSI_MODE(mode) (mode_is_float(mode) || get_mode_size_bits(mode) > 32)
2231 if (get_irn_mode(sel) != mode_b)
2234 cmp = get_Proj_pred(sel);
2235 cmp_a = get_Cmp_left(cmp);
2236 mode = get_irn_mode(cmp_a);
2238 if (IS_BAD_PSI_MODE(mode))
2241 /* check the Phi nodes */
2242 for (phi = phi_list; phi; phi = get_irn_link(phi)) {
2243 ir_node *pred_i = get_irn_n(phi, i);
2244 ir_node *pred_j = get_irn_n(phi, j);
2245 ir_mode *mode_i = get_irn_mode(pred_i);
2246 ir_mode *mode_j = get_irn_mode(pred_j);
2248 if (IS_BAD_PSI_MODE(mode_i) || IS_BAD_PSI_MODE(mode_j))
2252 #undef IS_BAD_PSI_MODE
2257 static ia32_intrinsic_env_t intrinsic_env = {
2258 NULL, /**< the irg, these entities belong to */
2259 NULL, /**< entity for first div operand (move into FPU) */
2260 NULL, /**< entity for second div operand (move into FPU) */
2261 NULL, /**< entity for converts ll -> d */
2262 NULL, /**< entity for converts d -> ll */
2266 * Returns the libFirm configuration parameter for this backend.
2268 static const backend_params *ia32_get_libfirm_params(void) {
2269 static const opt_if_conv_info_t ifconv = {
2270 4, /* maxdepth, doesn't matter for Psi-conversion */
2271 ia32_is_psi_allowed /* allows or disallows Psi creation for given selector */
2273 static const arch_dep_params_t ad = {
2274 1, /* also use subs */
2275 4, /* maximum shifts */
2276 31, /* maximum shift amount */
2278 1, /* allow Mulhs */
2279 1, /* allow Mulus */
2280 32 /* Mulh allowed up to 32 bit */
2282 static backend_params p = {
2283 NULL, /* no additional opcodes */
2284 NULL, /* will be set later */
2285 1, /* need dword lowering */
2286 ia32_create_intrinsic_fkt,
2287 &intrinsic_env, /* context for ia32_create_intrinsic_fkt */
2288 NULL, /* will be set later */
2292 p.if_conv_info = &ifconv;
2296 /* instruction set architectures. */
2297 static const lc_opt_enum_int_items_t arch_items[] = {
2298 { "386", arch_i386, },
2299 { "486", arch_i486, },
2300 { "pentium", arch_pentium, },
2301 { "586", arch_pentium, },
2302 { "pentiumpro", arch_pentium_pro, },
2303 { "686", arch_pentium_pro, },
2304 { "pentiummmx", arch_pentium_mmx, },
2305 { "pentium2", arch_pentium_2, },
2306 { "p2", arch_pentium_2, },
2307 { "pentium3", arch_pentium_3, },
2308 { "p3", arch_pentium_3, },
2309 { "pentium4", arch_pentium_4, },
2310 { "p4", arch_pentium_4, },
2311 { "pentiumm", arch_pentium_m, },
2312 { "pm", arch_pentium_m, },
2313 { "core", arch_core, },
2315 { "athlon", arch_athlon, },
2316 { "athlon64", arch_athlon_64, },
2317 { "opteron", arch_opteron, },
2321 static lc_opt_enum_int_var_t arch_var = {
2322 &ia32_isa_template.arch, arch_items
2325 static lc_opt_enum_int_var_t opt_arch_var = {
2326 &ia32_isa_template.opt_arch, arch_items
2329 static const lc_opt_enum_int_items_t fp_unit_items[] = {
2331 { "sse2", fp_sse2 },
2335 static lc_opt_enum_int_var_t fp_unit_var = {
2336 &ia32_isa_template.fp_kind, fp_unit_items
2339 static const lc_opt_enum_int_items_t gas_items[] = {
2340 { "normal", GAS_FLAVOUR_NORMAL },
2341 { "mingw", GAS_FLAVOUR_MINGW },
2345 static lc_opt_enum_int_var_t gas_var = {
2346 (int*) &be_gas_flavour, gas_items
2349 static const lc_opt_table_entry_t ia32_options[] = {
2350 LC_OPT_ENT_ENUM_INT("arch", "select the instruction architecture", &arch_var),
2351 LC_OPT_ENT_ENUM_INT("opt", "optimize for instruction architecture", &opt_arch_var),
2352 LC_OPT_ENT_ENUM_INT("fpunit", "select the floating point unit", &fp_unit_var),
2353 LC_OPT_ENT_NEGBIT("noaddrmode", "do not use address mode", &ia32_isa_template.opt, IA32_OPT_DOAM),
2354 LC_OPT_ENT_NEGBIT("nolea", "do not optimize for LEAs", &ia32_isa_template.opt, IA32_OPT_LEA),
2355 LC_OPT_ENT_NEGBIT("noplacecnst", "do not place constants", &ia32_isa_template.opt, IA32_OPT_PLACECNST),
2356 LC_OPT_ENT_NEGBIT("noimmop", "no operations with immediates", &ia32_isa_template.opt, IA32_OPT_IMMOPS),
2357 LC_OPT_ENT_NEGBIT("nopushargs", "do not create pushs for function arguments", &ia32_isa_template.opt, IA32_OPT_PUSHARGS),
2358 LC_OPT_ENT_ENUM_INT("gasmode", "set the GAS compatibility mode", &gas_var),
2362 const arch_isa_if_t ia32_isa_if = {
2365 ia32_get_n_reg_class,
2367 ia32_get_reg_class_for_mode,
2369 ia32_get_irn_handler,
2370 ia32_get_code_generator_if,
2371 ia32_get_list_sched_selector,
2372 ia32_get_ilp_sched_selector,
2373 ia32_get_reg_class_alignment,
2374 ia32_get_libfirm_params,
2375 ia32_get_allowed_execution_units,
2380 void be_init_arch_ia32(void)
2382 lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
2383 lc_opt_entry_t *ia32_grp = lc_opt_get_grp(be_grp, "ia32");
2385 lc_opt_add_table(ia32_grp, ia32_options);
2386 be_register_isa_if("ia32", &ia32_isa_if);
2389 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_arch_ia32);