2 * This is the main ia32 firm backend driver.
3 * @author Christian Wuerdig
18 #include <libcore/lc_opts.h>
19 #include <libcore/lc_opts_enum.h>
23 #include "pseudo_irg.h"
27 #include "iredges_t.h"
37 #include "../beabi.h" /* the general register allocator interface */
38 #include "../benode_t.h"
39 #include "../belower.h"
40 #include "../besched_t.h"
43 #include "../beirgmod.h"
44 #include "../be_dbgout.h"
45 #include "../beblocksched.h"
46 #include "../bemachine.h"
47 #include "../beilpsched.h"
48 #include "../bespillslots.h"
49 #include "../bemodule.h"
50 #include "../begnuas.h"
52 #include "bearch_ia32_t.h"
54 #include "ia32_new_nodes.h" /* ia32 nodes interface */
55 #include "gen_ia32_regalloc_if.h" /* the generated interface (register type and class defenitions) */
56 #include "gen_ia32_machine.h"
57 #include "ia32_transform.h"
58 #include "ia32_emitter.h"
59 #include "ia32_map_regs.h"
60 #include "ia32_optimize.h"
62 #include "ia32_dbg_stat.h"
63 #include "ia32_finish.h"
64 #include "ia32_util.h"
67 static set *cur_reg_set = NULL;
69 typedef ir_node *(*create_const_node_func) (dbg_info *dbg, ir_graph *irg, ir_node *block);
71 static INLINE ir_node *create_const(ia32_code_gen_t *cg, ir_node **place,
72 create_const_node_func func, arch_register_t* reg)
82 block = get_irg_start_block(cg->irg);
83 res = func(NULL, cg->irg, block);
84 arch_set_irn_register(cg->arch_env, res, reg);
87 /* keep the node so it isn't accidently removed when unused ... */
89 keep = be_new_Keep(arch_register_get_class(reg), cg->irg, block, 1, in);
91 /* schedule the node if we already have a scheduled program */
92 startnode = get_irg_start(cg->irg);
93 if(sched_is_scheduled(startnode)) {
94 sched_add_after(startnode, res);
95 sched_add_after(res, keep);
101 /* Creates the unique per irg GP NoReg node. */
102 ir_node *ia32_new_NoReg_gp(ia32_code_gen_t *cg) {
103 return create_const(cg, &cg->noreg_gp, new_rd_ia32_NoReg_GP,
104 &ia32_gp_regs[REG_GP_NOREG]);
107 ir_node *ia32_new_NoReg_vfp(ia32_code_gen_t *cg) {
108 return create_const(cg, &cg->noreg_vfp, new_rd_ia32_NoReg_VFP,
109 &ia32_vfp_regs[REG_VFP_NOREG]);
112 ir_node *ia32_new_NoReg_xmm(ia32_code_gen_t *cg) {
113 return create_const(cg, &cg->noreg_xmm, new_rd_ia32_NoReg_XMM,
114 &ia32_xmm_regs[REG_XMM_NOREG]);
117 /* Creates the unique per irg FP NoReg node. */
118 ir_node *ia32_new_NoReg_fp(ia32_code_gen_t *cg) {
119 return USE_SSE2(cg) ? ia32_new_NoReg_xmm(cg) : ia32_new_NoReg_vfp(cg);
122 ir_node *ia32_new_Unknown_gp(ia32_code_gen_t *cg) {
123 return create_const(cg, &cg->unknown_gp, new_rd_ia32_Unknown_GP,
124 &ia32_gp_regs[REG_GP_UKNWN]);
127 ir_node *ia32_new_Unknown_vfp(ia32_code_gen_t *cg) {
128 return create_const(cg, &cg->unknown_vfp, new_rd_ia32_Unknown_VFP,
129 &ia32_vfp_regs[REG_VFP_UKNWN]);
132 ir_node *ia32_new_Unknown_xmm(ia32_code_gen_t *cg) {
133 return create_const(cg, &cg->unknown_xmm, new_rd_ia32_Unknown_XMM,
134 &ia32_xmm_regs[REG_XMM_UKNWN]);
139 * Returns gp_noreg or fp_noreg, depending in input requirements.
141 ir_node *ia32_get_admissible_noreg(ia32_code_gen_t *cg, ir_node *irn, int pos) {
142 const arch_register_req_t *req;
144 req = arch_get_register_req(cg->arch_env, irn, pos);
145 assert(req != NULL && "Missing register requirements");
146 if (req->cls == &ia32_reg_classes[CLASS_ia32_gp])
147 return ia32_new_NoReg_gp(cg);
149 return ia32_new_NoReg_fp(cg);
152 /**************************************************
155 * _ __ ___ __ _ __ _| | | ___ ___ _| |_
156 * | '__/ _ \/ _` | / _` | | |/ _ \ / __| | | _|
157 * | | | __/ (_| | | (_| | | | (_) | (__ | | |
158 * |_| \___|\__, | \__,_|_|_|\___/ \___| |_|_|
161 **************************************************/
164 * Return register requirements for an ia32 node.
165 * If the node returns a tuple (mode_T) then the proj's
166 * will be asked for this information.
168 static const arch_register_req_t *ia32_get_irn_reg_req(const void *self,
171 long node_pos = pos == -1 ? 0 : pos;
172 ir_mode *mode = is_Block(node) ? NULL : get_irn_mode(node);
174 if (is_Block(node) || mode == mode_X) {
175 return arch_no_register_req;
178 if (mode == mode_T && pos < 0) {
179 return arch_no_register_req;
184 return arch_no_register_req;
187 return arch_no_register_req;
190 node_pos = (pos == -1) ? get_Proj_proj(node) : pos;
191 node = skip_Proj_const(node);
194 if (is_ia32_irn(node)) {
195 const arch_register_req_t *req;
197 req = get_ia32_in_req(node, pos);
199 req = get_ia32_out_req(node, node_pos);
206 /* unknowns should be transformed already */
207 assert(!is_Unknown(node));
209 return arch_no_register_req;
212 static void ia32_set_irn_reg(const void *self, ir_node *irn, const arch_register_t *reg) {
215 if (get_irn_mode(irn) == mode_X) {
220 pos = get_Proj_proj(irn);
221 irn = skip_Proj(irn);
224 if (is_ia32_irn(irn)) {
225 const arch_register_t **slots;
227 slots = get_ia32_slots(irn);
230 ia32_set_firm_reg(irn, reg, cur_reg_set);
234 static const arch_register_t *ia32_get_irn_reg(const void *self, const ir_node *irn) {
236 const arch_register_t *reg = NULL;
240 if (get_irn_mode(irn) == mode_X) {
244 pos = get_Proj_proj(irn);
245 irn = skip_Proj_const(irn);
248 if (is_ia32_irn(irn)) {
249 const arch_register_t **slots;
250 slots = get_ia32_slots(irn);
253 reg = ia32_get_firm_reg(irn, cur_reg_set);
259 static arch_irn_class_t ia32_classify(const void *self, const ir_node *irn) {
260 arch_irn_class_t classification = arch_irn_class_normal;
262 irn = skip_Proj_const(irn);
265 classification |= arch_irn_class_branch;
267 if (! is_ia32_irn(irn))
268 return classification & ~arch_irn_class_normal;
270 if (is_ia32_Cnst(irn))
271 classification |= arch_irn_class_const;
274 classification |= arch_irn_class_load;
276 if (is_ia32_St(irn) || is_ia32_Store8Bit(irn))
277 classification |= arch_irn_class_store;
279 if (is_ia32_need_stackent(irn))
280 classification |= arch_irn_class_reload;
282 return classification;
285 static arch_irn_flags_t ia32_get_flags(const void *self, const ir_node *irn) {
286 arch_irn_flags_t flags = arch_irn_flags_none;
289 return arch_irn_flags_ignore;
291 if(is_Proj(irn) && mode_is_datab(get_irn_mode(irn))) {
292 ir_node *pred = get_Proj_pred(irn);
294 if(is_ia32_irn(pred)) {
295 flags = get_ia32_out_flags(pred, get_Proj_proj(irn));
301 if (is_ia32_irn(irn)) {
302 flags |= get_ia32_flags(irn);
309 * The IA32 ABI callback object.
312 be_abi_call_flags_bits_t flags; /**< The call flags. */
313 const arch_isa_t *isa; /**< The ISA handle. */
314 const arch_env_t *aenv; /**< The architecture environment. */
315 ir_graph *irg; /**< The associated graph. */
318 static ir_entity *ia32_get_frame_entity(const void *self, const ir_node *irn) {
319 return is_ia32_irn(irn) ? get_ia32_frame_ent(irn) : NULL;
322 static void ia32_set_frame_entity(const void *self, ir_node *irn, ir_entity *ent) {
323 set_ia32_frame_ent(irn, ent);
326 static void ia32_set_frame_offset(const void *self, ir_node *irn, int bias) {
327 const ia32_irn_ops_t *ops = self;
329 if (get_ia32_frame_ent(irn)) {
330 ia32_am_flavour_t am_flav;
332 if (is_ia32_Pop(irn)) {
333 int omit_fp = be_abi_omit_fp(ops->cg->birg->abi);
335 /* Pop nodes modify the stack pointer before calculating the destination
336 * address, so fix this here
342 am_flav = get_ia32_am_flavour(irn);
344 set_ia32_am_flavour(irn, am_flav);
346 add_ia32_am_offs_int(irn, bias);
350 static int ia32_get_sp_bias(const void *self, const ir_node *irn) {
352 long proj = get_Proj_proj(irn);
353 ir_node *pred = get_Proj_pred(irn);
355 if (is_ia32_Push(pred) && proj == pn_ia32_Push_stack)
357 if (is_ia32_Pop(pred) && proj == pn_ia32_Pop_stack)
365 * Put all registers which are saved by the prologue/epilogue in a set.
367 * @param self The callback object.
368 * @param s The result set.
370 static void ia32_abi_dont_save_regs(void *self, pset *s)
372 ia32_abi_env_t *env = self;
373 if(env->flags.try_omit_fp)
374 pset_insert_ptr(s, env->isa->bp);
378 static unsigned count_callee_saves(ia32_code_gen_t *cg)
380 unsigned callee_saves = 0;
381 int c, num_reg_classes;
384 num_reg_classes = arch_isa_get_n_reg_class(isa);
385 for(c = 0; c < num_reg_classes; ++c) {
386 int r, num_registers;
387 arch_register_class_t *regclass = arch_isa_get_reg_class(isa, c);
389 num_registers = arch_register_class_n_regs(regclass);
390 for(r = 0; r < num_registers; ++r) {
391 arch_register_t *reg = arch_register_for_index(regclass, r);
392 if(arch_register_type_is(reg, callee_save))
400 static void create_callee_save_regprojs(ia32_code_gen_t *cg, ir_node *regparams)
402 int c, num_reg_classes;
406 num_reg_classes = arch_isa_get_n_reg_class(isa);
407 cg->initial_regs = obstack_alloc(cg->obst,
408 num_reg_classes * sizeof(cg->initial_regs[0]));
410 for(c = 0; c < num_reg_classes; ++c) {
411 int r, num_registers;
412 ir_node **initial_regclass;
413 arch_register_class_t *regclass = arch_isa_get_reg_class(isa, c);
415 num_registers = arch_register_class_n_regs(regclass);
416 initial_regclass = obstack_alloc(num_registers * sizeof(initial_regclass[0]));
417 for(r = 0; r < num_registers; ++r) {
419 arch_register_t *reg = arch_register_for_index(regclass, r);
420 if(!arch_register_type_is(reg, callee_save))
423 proj = new_r_Proj(irg, start_block, regparams, n);
424 be_set_constr_single_reg(regparams, n, reg);
425 arch_set_irn_register(cg->arch_env, proj, reg);
427 initial_regclass[r] = proj;
430 cg->initial_regs[c] = initial_regclass;
434 static void callee_saves_obstack_grow(ia32_code_gen_t *cg)
436 int c, num_reg_classes;
439 for(c = 0; c < num_reg_classes; ++c) {
440 int r, num_registers;
442 num_registers = arch_register_class_n_regs(regclass);
443 for(r = 0; r < num_registers; ++r) {
445 arch_register_t *reg = arch_register_for_index(regclass, r);
446 if(!arch_register_type_is(reg, callee_save))
449 proj = cg->initial_regs[c][r];
450 obstack_ptr_grow(cg->obst, proj);
455 static unsigned count_parameters_in_regs(ia32_code_gen_t *cg)
460 static void ia32_gen_prologue(ia32_code_gen_t *cg)
462 ir_graph *irg = cg->irg;
463 ir_node *start_block = get_irg_start_block(irg);
468 /* Create the regparams node */
469 n_regparams_out = count_callee_saves(cg) + count_parameters_in_regs(cg);
470 regparams = be_new_RegParams(irg, start_block, n_regparams_out);
472 create_callee_save_regprojs(cg, regparams);
474 /* Setup the stack */
476 ir_node *bl = get_irg_start_block(env->irg);
477 ir_node *curr_sp = be_abi_reg_map_get(reg_map, env->isa->sp);
478 ir_node *curr_bp = be_abi_reg_map_get(reg_map, env->isa->bp);
479 ir_node *noreg = ia32_new_NoReg_gp(cg);
483 push = new_rd_ia32_Push(NULL, env->irg, bl, noreg, noreg, curr_bp, curr_sp, *mem);
484 curr_sp = new_r_Proj(env->irg, bl, push, get_irn_mode(curr_sp), pn_ia32_Push_stack);
485 *mem = new_r_Proj(env->irg, bl, push, mode_M, pn_ia32_Push_M);
487 /* the push must have SP out register */
488 arch_set_irn_register(env->aenv, curr_sp, env->isa->sp);
489 set_ia32_flags(push, arch_irn_flags_ignore);
491 /* move esp to ebp */
492 curr_bp = be_new_Copy(env->isa->bp->reg_class, env->irg, bl, curr_sp);
493 be_set_constr_single_reg(curr_bp, BE_OUT_POS(0), env->isa->bp);
494 arch_set_irn_register(env->aenv, curr_bp, env->isa->bp);
495 be_node_set_flags(curr_bp, BE_OUT_POS(0), arch_irn_flags_ignore);
497 /* beware: the copy must be done before any other sp use */
498 curr_sp = be_new_CopyKeep_single(env->isa->sp->reg_class, env->irg, bl, curr_sp, curr_bp, get_irn_mode(curr_sp));
499 be_set_constr_single_reg(curr_sp, BE_OUT_POS(0), env->isa->sp);
500 arch_set_irn_register(env->aenv, curr_sp, env->isa->sp);
501 be_node_set_flags(curr_sp, BE_OUT_POS(0), arch_irn_flags_ignore);
503 be_abi_reg_map_set(reg_map, env->isa->sp, curr_sp);
504 be_abi_reg_map_set(reg_map, env->isa->bp, curr_bp);
507 sp = be_new_IncSP(sp, irg, start_block, initialsp, BE_STACK_FRAME_SIZE_EXPAND);
508 set_irg_frame(irg, sp);
511 static void ia32_gen_epilogue(ia32_code_gen_t *cg)
513 int n_callee_saves = count_callee_saves(cg);
514 int n_results_regs = 0;
517 ir_node *end_block = get_irg_end_block(irg);
520 /* We have to make sure that all reloads occur before the stack frame
521 gets destroyed, so we create a barrier for all callee-save and return
523 barrier_size = n_callee_saves + n_results_regs;
524 barrier = be_new_Barrier(irg, end_block, barrier_size,
526 /* simply remove the stack frame here */
527 curr_sp = be_new_IncSP(env->isa->sp, env->irg, bl, curr_sp, BE_STACK_FRAME_SIZE_SHRINK);
528 add_irn_dep(curr_sp, *mem);
533 * Generate the routine prologue.
535 * @param self The callback object.
536 * @param mem A pointer to the mem node. Update this if you define new memory.
537 * @param reg_map A map mapping all callee_save/ignore/parameter registers to their defining nodes.
539 * @return The register which shall be used as a stack frame base.
541 * All nodes which define registers in @p reg_map must keep @p reg_map current.
543 static const arch_register_t *ia32_abi_prologue(void *self, ir_node **mem, pmap *reg_map)
545 ia32_abi_env_t *env = self;
546 const ia32_isa_t *isa = (ia32_isa_t *)env->isa;
547 ia32_code_gen_t *cg = isa->cg;
549 if (! env->flags.try_omit_fp) {
550 ir_node *bl = get_irg_start_block(env->irg);
551 ir_node *curr_sp = be_abi_reg_map_get(reg_map, env->isa->sp);
552 ir_node *curr_bp = be_abi_reg_map_get(reg_map, env->isa->bp);
553 ir_node *noreg = ia32_new_NoReg_gp(cg);
557 push = new_rd_ia32_Push(NULL, env->irg, bl, noreg, noreg, curr_bp, curr_sp, *mem);
558 curr_sp = new_r_Proj(env->irg, bl, push, get_irn_mode(curr_sp), pn_ia32_Push_stack);
559 *mem = new_r_Proj(env->irg, bl, push, mode_M, pn_ia32_Push_M);
561 /* the push must have SP out register */
562 arch_set_irn_register(env->aenv, curr_sp, env->isa->sp);
563 set_ia32_flags(push, arch_irn_flags_ignore);
565 /* move esp to ebp */
566 curr_bp = be_new_Copy(env->isa->bp->reg_class, env->irg, bl, curr_sp);
567 be_set_constr_single_reg(curr_bp, BE_OUT_POS(0), env->isa->bp);
568 arch_set_irn_register(env->aenv, curr_bp, env->isa->bp);
569 be_node_set_flags(curr_bp, BE_OUT_POS(0), arch_irn_flags_ignore);
571 /* beware: the copy must be done before any other sp use */
572 curr_sp = be_new_CopyKeep_single(env->isa->sp->reg_class, env->irg, bl, curr_sp, curr_bp, get_irn_mode(curr_sp));
573 be_set_constr_single_reg(curr_sp, BE_OUT_POS(0), env->isa->sp);
574 arch_set_irn_register(env->aenv, curr_sp, env->isa->sp);
575 be_node_set_flags(curr_sp, BE_OUT_POS(0), arch_irn_flags_ignore);
577 be_abi_reg_map_set(reg_map, env->isa->sp, curr_sp);
578 be_abi_reg_map_set(reg_map, env->isa->bp, curr_bp);
587 * Generate the routine epilogue.
588 * @param self The callback object.
589 * @param bl The block for the epilog
590 * @param mem A pointer to the mem node. Update this if you define new memory.
591 * @param reg_map A map mapping all callee_save/ignore/parameter registers to their defining nodes.
592 * @return The register which shall be used as a stack frame base.
594 * All nodes which define registers in @p reg_map must keep @p reg_map current.
596 static void ia32_abi_epilogue(void *self, ir_node *bl, ir_node **mem, pmap *reg_map)
598 ia32_abi_env_t *env = self;
599 ir_node *curr_sp = be_abi_reg_map_get(reg_map, env->isa->sp);
600 ir_node *curr_bp = be_abi_reg_map_get(reg_map, env->isa->bp);
602 if (env->flags.try_omit_fp) {
603 /* simply remove the stack frame here */
604 curr_sp = be_new_IncSP(env->isa->sp, env->irg, bl, curr_sp, BE_STACK_FRAME_SIZE_SHRINK);
605 add_irn_dep(curr_sp, *mem);
607 const ia32_isa_t *isa = (ia32_isa_t *)env->isa;
608 ia32_code_gen_t *cg = isa->cg;
609 ir_mode *mode_bp = env->isa->bp->reg_class->mode;
611 /* gcc always emits a leave at the end of a routine */
612 if (1 || ARCH_AMD(isa->opt_arch)) {
616 leave = new_rd_ia32_Leave(NULL, env->irg, bl, curr_sp, curr_bp);
617 set_ia32_flags(leave, arch_irn_flags_ignore);
618 curr_bp = new_r_Proj(current_ir_graph, bl, leave, mode_bp, pn_ia32_Leave_frame);
619 curr_sp = new_r_Proj(current_ir_graph, bl, leave, get_irn_mode(curr_sp), pn_ia32_Leave_stack);
621 ir_node *noreg = ia32_new_NoReg_gp(cg);
624 /* copy ebp to esp */
625 curr_sp = be_new_SetSP(env->isa->sp, env->irg, bl, curr_sp, curr_bp, *mem);
628 pop = new_rd_ia32_Pop(NULL, env->irg, bl, noreg, noreg, curr_sp, *mem);
629 set_ia32_flags(pop, arch_irn_flags_ignore);
630 curr_bp = new_r_Proj(current_ir_graph, bl, pop, mode_bp, pn_ia32_Pop_res);
631 curr_sp = new_r_Proj(current_ir_graph, bl, pop, get_irn_mode(curr_sp), pn_ia32_Pop_stack);
633 *mem = new_r_Proj(current_ir_graph, bl, pop, mode_M, pn_ia32_Pop_M);
635 arch_set_irn_register(env->aenv, curr_sp, env->isa->sp);
636 arch_set_irn_register(env->aenv, curr_bp, env->isa->bp);
639 be_abi_reg_map_set(reg_map, env->isa->sp, curr_sp);
640 be_abi_reg_map_set(reg_map, env->isa->bp, curr_bp);
644 * Initialize the callback object.
645 * @param call The call object.
646 * @param aenv The architecture environment.
647 * @param irg The graph with the method.
648 * @return Some pointer. This pointer is passed to all other callback functions as self object.
650 static void *ia32_abi_init(const be_abi_call_t *call, const arch_env_t *aenv, ir_graph *irg)
652 ia32_abi_env_t *env = xmalloc(sizeof(env[0]));
653 be_abi_call_flags_t fl = be_abi_call_get_flags(call);
654 env->flags = fl.bits;
657 env->isa = aenv->isa;
662 * Destroy the callback object.
663 * @param self The callback object.
665 static void ia32_abi_done(void *self) {
670 * Produces the type which sits between the stack args and the locals on the stack.
671 * it will contain the return address and space to store the old base pointer.
672 * @return The Firm type modeling the ABI between type.
674 static ir_type *ia32_abi_get_between_type(void *self)
676 #define IDENT(s) new_id_from_chars(s, sizeof(s)-1)
677 static ir_type *omit_fp_between_type = NULL;
678 static ir_type *between_type = NULL;
680 ia32_abi_env_t *env = self;
682 if (! between_type) {
683 ir_entity *old_bp_ent;
684 ir_entity *ret_addr_ent;
685 ir_entity *omit_fp_ret_addr_ent;
687 ir_type *old_bp_type = new_type_primitive(IDENT("bp"), mode_Iu);
688 ir_type *ret_addr_type = new_type_primitive(IDENT("return_addr"), mode_Iu);
690 between_type = new_type_struct(IDENT("ia32_between_type"));
691 old_bp_ent = new_entity(between_type, IDENT("old_bp"), old_bp_type);
692 ret_addr_ent = new_entity(between_type, IDENT("ret_addr"), ret_addr_type);
694 set_entity_offset(old_bp_ent, 0);
695 set_entity_offset(ret_addr_ent, get_type_size_bytes(old_bp_type));
696 set_type_size_bytes(between_type, get_type_size_bytes(old_bp_type) + get_type_size_bytes(ret_addr_type));
697 set_type_state(between_type, layout_fixed);
699 omit_fp_between_type = new_type_struct(IDENT("ia32_between_type_omit_fp"));
700 omit_fp_ret_addr_ent = new_entity(omit_fp_between_type, IDENT("ret_addr"), ret_addr_type);
702 set_entity_offset(omit_fp_ret_addr_ent, 0);
703 set_type_size_bytes(omit_fp_between_type, get_type_size_bytes(ret_addr_type));
704 set_type_state(omit_fp_between_type, layout_fixed);
707 return env->flags.try_omit_fp ? omit_fp_between_type : between_type;
712 * Get the estimated cycle count for @p irn.
714 * @param self The this pointer.
715 * @param irn The node.
717 * @return The estimated cycle count for this operation
719 static int ia32_get_op_estimated_cost(const void *self, const ir_node *irn)
722 ia32_op_type_t op_tp;
723 const ia32_irn_ops_t *ops = self;
727 if (!is_ia32_irn(irn))
730 assert(is_ia32_irn(irn));
732 cost = get_ia32_latency(irn);
733 op_tp = get_ia32_op_type(irn);
735 if (is_ia32_CopyB(irn)) {
737 if (ARCH_INTEL(ops->cg->arch))
740 else if (is_ia32_CopyB_i(irn)) {
741 int size = get_tarval_long(get_ia32_Immop_tarval(irn));
742 cost = 20 + (int)ceil((4/3) * size);
743 if (ARCH_INTEL(ops->cg->arch))
746 /* in case of address mode operations add additional cycles */
747 else if (op_tp == ia32_AddrModeD || op_tp == ia32_AddrModeS) {
749 In case of stack access add 5 cycles (we assume stack is in cache),
750 other memory operations cost 20 cycles.
752 cost += is_ia32_use_frame(irn) ? 5 : 20;
759 * Returns the inverse operation if @p irn, recalculating the argument at position @p i.
761 * @param irn The original operation
762 * @param i Index of the argument we want the inverse operation to yield
763 * @param inverse struct to be filled with the resulting inverse op
764 * @param obstack The obstack to use for allocation of the returned nodes array
765 * @return The inverse operation or NULL if operation invertible
767 static arch_inverse_t *ia32_get_inverse(const void *self, const ir_node *irn, int i, arch_inverse_t *inverse, struct obstack *obst) {
771 ir_node *block, *noreg, *nomem;
774 /* we cannot invert non-ia32 irns */
775 if (! is_ia32_irn(irn))
778 /* operand must always be a real operand (not base, index or mem) */
779 if (i != 2 && i != 3)
782 /* we don't invert address mode operations */
783 if (get_ia32_op_type(irn) != ia32_Normal)
786 irg = get_irn_irg(irn);
787 block = get_nodes_block(irn);
788 mode = get_irn_mode(irn);
789 irn_mode = get_irn_mode(irn);
790 noreg = get_irn_n(irn, 0);
791 nomem = new_r_NoMem(irg);
792 dbg = get_irn_dbg_info(irn);
794 /* initialize structure */
795 inverse->nodes = obstack_alloc(obst, 2 * sizeof(inverse->nodes[0]));
799 switch (get_ia32_irn_opcode(irn)) {
801 if (get_ia32_immop_type(irn) == ia32_ImmConst) {
802 /* we have an add with a const here */
803 /* invers == add with negated const */
804 inverse->nodes[0] = new_rd_ia32_Add(dbg, irg, block, noreg, noreg, get_irn_n(irn, i), noreg, nomem);
806 copy_ia32_Immop_attr(inverse->nodes[0], (ir_node *)irn);
807 set_ia32_Immop_tarval(inverse->nodes[0], tarval_neg(get_ia32_Immop_tarval(irn)));
808 set_ia32_commutative(inverse->nodes[0]);
810 else if (get_ia32_immop_type(irn) == ia32_ImmSymConst) {
811 /* we have an add with a symconst here */
812 /* invers == sub with const */
813 inverse->nodes[0] = new_rd_ia32_Sub(dbg, irg, block, noreg, noreg, get_irn_n(irn, i), noreg, nomem);
815 copy_ia32_Immop_attr(inverse->nodes[0], (ir_node *)irn);
818 /* normal add: inverse == sub */
819 inverse->nodes[0] = new_rd_ia32_Sub(dbg, irg, block, noreg, noreg, (ir_node*) irn, get_irn_n(irn, i ^ 1), nomem);
824 if (get_ia32_immop_type(irn) != ia32_ImmNone) {
825 /* we have a sub with a const/symconst here */
826 /* invers == add with this const */
827 inverse->nodes[0] = new_rd_ia32_Add(dbg, irg, block, noreg, noreg, get_irn_n(irn, i), noreg, nomem);
828 inverse->costs += (get_ia32_immop_type(irn) == ia32_ImmSymConst) ? 5 : 1;
829 copy_ia32_Immop_attr(inverse->nodes[0], (ir_node *)irn);
834 inverse->nodes[0] = new_rd_ia32_Add(dbg, irg, block, noreg, noreg, (ir_node*) irn, get_irn_n(irn, 3), nomem);
837 inverse->nodes[0] = new_rd_ia32_Sub(dbg, irg, block, noreg, noreg, get_irn_n(irn, 2), (ir_node*) irn, nomem);
843 if (get_ia32_immop_type(irn) != ia32_ImmNone) {
844 /* xor with const: inverse = xor */
845 inverse->nodes[0] = new_rd_ia32_Xor(dbg, irg, block, noreg, noreg, get_irn_n(irn, i), noreg, nomem);
846 inverse->costs += (get_ia32_immop_type(irn) == ia32_ImmSymConst) ? 5 : 1;
847 copy_ia32_Immop_attr(inverse->nodes[0], (ir_node *)irn);
851 inverse->nodes[0] = new_rd_ia32_Xor(dbg, irg, block, noreg, noreg, (ir_node *) irn, get_irn_n(irn, i), nomem);
856 inverse->nodes[0] = new_rd_ia32_Not(dbg, irg, block, noreg, noreg, (ir_node*) irn, nomem);
861 inverse->nodes[0] = new_rd_ia32_Neg(dbg, irg, block, noreg, noreg, (ir_node*) irn, nomem);
866 /* inverse operation not supported */
873 static ir_mode *get_spill_mode_mode(const ir_mode *mode)
875 if(mode_is_float(mode))
882 * Get the mode that should be used for spilling value node
884 static ir_mode *get_spill_mode(const ir_node *node)
886 ir_mode *mode = get_irn_mode(node);
887 return get_spill_mode_mode(mode);
891 * Checks wether an addressmode reload for a node with mode mode is compatible
892 * with a spillslot of mode spill_mode
894 static int ia32_is_spillmode_compatible(const ir_mode *mode, const ir_mode *spillmode)
896 if(mode_is_float(mode)) {
897 return mode == spillmode;
904 * Check if irn can load it's operand at position i from memory (source addressmode).
905 * @param self Pointer to irn ops itself
906 * @param irn The irn to be checked
907 * @param i The operands position
908 * @return Non-Zero if operand can be loaded
910 static int ia32_possible_memory_operand(const void *self, const ir_node *irn, unsigned int i) {
911 ir_node *op = get_irn_n(irn, i);
912 const ir_mode *mode = get_irn_mode(op);
913 const ir_mode *spillmode = get_spill_mode(op);
915 if (! is_ia32_irn(irn) || /* must be an ia32 irn */
916 get_irn_arity(irn) != 5 || /* must be a binary operation */
917 get_ia32_op_type(irn) != ia32_Normal || /* must not already be a addressmode irn */
918 ! (get_ia32_am_support(irn) & ia32_am_Source) || /* must be capable of source addressmode */
919 ! ia32_is_spillmode_compatible(mode, spillmode) ||
920 (i != 2 && i != 3) || /* a "real" operand position must be requested */
921 (i == 2 && ! is_ia32_commutative(irn)) || /* if first operand requested irn must be commutative */
922 is_ia32_use_frame(irn)) /* must not already use frame */
928 static void ia32_perform_memory_operand(const void *self, ir_node *irn, ir_node *spill, unsigned int i) {
929 const ia32_irn_ops_t *ops = self;
930 ia32_code_gen_t *cg = ops->cg;
932 assert(ia32_possible_memory_operand(self, irn, i) && "Cannot perform memory operand change");
935 ir_node *tmp = get_irn_n(irn, 3);
936 set_irn_n(irn, 3, get_irn_n(irn, 2));
937 set_irn_n(irn, 2, tmp);
940 set_ia32_am_support(irn, ia32_am_Source);
941 set_ia32_op_type(irn, ia32_AddrModeS);
942 set_ia32_am_flavour(irn, ia32_B);
943 set_ia32_ls_mode(irn, get_irn_mode(get_irn_n(irn, i)));
944 set_ia32_use_frame(irn);
945 set_ia32_need_stackent(irn);
947 set_irn_n(irn, 0, get_irg_frame(get_irn_irg(irn)));
948 set_irn_n(irn, 3, ia32_get_admissible_noreg(cg, irn, 3));
949 set_irn_n(irn, 4, spill);
951 //FIXME DBG_OPT_AM_S(reload, irn);
954 static const be_abi_callbacks_t ia32_abi_callbacks = {
957 ia32_abi_get_between_type,
958 ia32_abi_dont_save_regs,
963 /* fill register allocator interface */
965 static const arch_irn_ops_if_t ia32_irn_ops_if = {
966 ia32_get_irn_reg_req,
971 ia32_get_frame_entity,
972 ia32_set_frame_entity,
973 ia32_set_frame_offset,
976 ia32_get_op_estimated_cost,
977 ia32_possible_memory_operand,
978 ia32_perform_memory_operand,
981 ia32_irn_ops_t ia32_irn_ops = {
988 /**************************************************
991 * ___ ___ __| | ___ __ _ ___ _ __ _| |_
992 * / __/ _ \ / _` |/ _ \/ _` |/ _ \ '_ \ | | _|
993 * | (_| (_) | (_| | __/ (_| | __/ | | | | | |
994 * \___\___/ \__,_|\___|\__, |\___|_| |_| |_|_|
997 **************************************************/
1000 * Transforms the standard firm graph into
1001 * an ia32 firm graph
1003 static void ia32_prepare_graph(void *self) {
1004 ia32_code_gen_t *cg = self;
1005 DEBUG_ONLY(firm_dbg_module_t *old_mod = cg->mod;)
1007 FIRM_DBG_REGISTER(cg->mod, "firm.be.ia32.transform");
1009 /* transform psi condition trees */
1010 ia32_pre_transform_phase(cg);
1012 /* transform all remaining nodes */
1013 ia32_transform_graph(cg);
1015 // Matze: disabled for now. Because after transformation start block has no
1016 // self-loop anymore so it might be merged with its successor block. This
1017 // will bring several nodes to the startblock which sometimes get scheduled
1018 // before the initial IncSP/Barrier
1019 //local_optimize_graph(cg->irg);
1022 be_dump(cg->irg, "-transformed", dump_ir_block_graph_sched);
1024 /* optimize address mode */
1025 FIRM_DBG_REGISTER(cg->mod, "firm.be.ia32.am");
1026 ia32_optimize_addressmode(cg);
1028 /* do code placement, to optimize the position of constants */
1029 place_code(cg->irg);
1032 be_dump(cg->irg, "-am", dump_ir_block_graph_sched);
1034 DEBUG_ONLY(cg->mod = old_mod;)
1038 * Dummy functions for hooks we don't need but which must be filled.
1040 static void ia32_before_sched(void *self) {
1043 static void remove_unused_nodes(ir_node *irn, bitset_t *already_visited) {
1046 ir_node *mem_proj = NULL;
1051 mode = get_irn_mode(irn);
1053 /* check if we already saw this node or the node has more than one user */
1054 if (bitset_contains_irn(already_visited, irn) || get_irn_n_edges(irn) > 1) {
1058 /* mark irn visited */
1059 bitset_add_irn(already_visited, irn);
1061 /* non-Tuple nodes with one user: ok, return */
1062 if (get_irn_n_edges(irn) >= 1 && mode != mode_T) {
1066 /* tuple node has one user which is not the mem proj-> ok */
1067 if (mode == mode_T && get_irn_n_edges(irn) == 1) {
1068 mem_proj = ia32_get_proj_for_mode(irn, mode_M);
1069 if (mem_proj == NULL) {
1074 arity = get_irn_arity(irn);
1075 for (i = 0; i < arity; ++i) {
1076 ir_node *pred = get_irn_n(irn, i);
1078 /* do not follow memory edges or we will accidentally remove stores */
1079 if (get_irn_mode(pred) == mode_M) {
1080 if(mem_proj != NULL) {
1081 edges_reroute(mem_proj, pred, get_irn_irg(mem_proj));
1087 set_irn_n(irn, i, new_Bad());
1090 The current node is about to be removed: if the predecessor
1091 has only this node as user, it need to be removed as well.
1093 if (get_irn_n_edges(pred) <= 1)
1094 remove_unused_nodes(pred, already_visited);
1097 // we need to set the presd to Bad again to also get the memory edges
1098 arity = get_irn_arity(irn);
1099 for (i = 0; i < arity; ++i) {
1100 set_irn_n(irn, i, new_Bad());
1103 if (sched_is_scheduled(irn)) {
1108 static void remove_unused_loads_walker(ir_node *irn, void *env) {
1109 bitset_t *already_visited = env;
1110 if (is_ia32_Ld(irn) && ! bitset_contains_irn(already_visited, irn))
1111 remove_unused_nodes(irn, env);
1115 * Called before the register allocator.
1116 * Calculate a block schedule here. We need it for the x87
1117 * simulator and the emitter.
1119 static void ia32_before_ra(void *self) {
1120 ia32_code_gen_t *cg = self;
1121 bitset_t *already_visited = bitset_irg_alloca(cg->irg);
1124 Handle special case:
1125 There are sometimes unused loads, only pinned by memory.
1126 We need to remove those Loads and all other nodes which won't be used
1127 after removing the Load from schedule.
1129 irg_walk_graph(cg->irg, NULL, remove_unused_loads_walker, already_visited);
1134 * Transforms a be_Reload into a ia32 Load.
1136 static void transform_to_Load(ia32_code_gen_t *cg, ir_node *node) {
1137 ir_graph *irg = get_irn_irg(node);
1138 dbg_info *dbg = get_irn_dbg_info(node);
1139 ir_node *block = get_nodes_block(node);
1140 ir_entity *ent = be_get_frame_entity(node);
1141 ir_mode *mode = get_irn_mode(node);
1142 ir_mode *spillmode = get_spill_mode(node);
1143 ir_node *noreg = ia32_new_NoReg_gp(cg);
1144 ir_node *sched_point = NULL;
1145 ir_node *ptr = get_irg_frame(irg);
1146 ir_node *mem = get_irn_n(node, be_pos_Reload_mem);
1147 ir_node *new_op, *proj;
1148 const arch_register_t *reg;
1150 if (sched_is_scheduled(node)) {
1151 sched_point = sched_prev(node);
1154 if (mode_is_float(spillmode)) {
1156 new_op = new_rd_ia32_xLoad(dbg, irg, block, ptr, noreg, mem);
1158 new_op = new_rd_ia32_vfld(dbg, irg, block, ptr, noreg, mem);
1160 else if (get_mode_size_bits(spillmode) == 128) {
1161 // Reload 128 bit sse registers
1162 new_op = new_rd_ia32_xxLoad(dbg, irg, block, ptr, noreg, mem);
1165 new_op = new_rd_ia32_Load(dbg, irg, block, ptr, noreg, mem);
1167 set_ia32_am_support(new_op, ia32_am_Source);
1168 set_ia32_op_type(new_op, ia32_AddrModeS);
1169 set_ia32_am_flavour(new_op, ia32_B);
1170 set_ia32_ls_mode(new_op, spillmode);
1171 set_ia32_frame_ent(new_op, ent);
1172 set_ia32_use_frame(new_op);
1174 DBG_OPT_RELOAD2LD(node, new_op);
1176 proj = new_rd_Proj(dbg, irg, block, new_op, mode, pn_ia32_Load_res);
1179 sched_add_after(sched_point, new_op);
1180 sched_add_after(new_op, proj);
1185 /* copy the register from the old node to the new Load */
1186 reg = arch_get_irn_register(cg->arch_env, node);
1187 arch_set_irn_register(cg->arch_env, new_op, reg);
1189 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(cg, node));
1191 exchange(node, proj);
1195 * Transforms a be_Spill node into a ia32 Store.
1197 static void transform_to_Store(ia32_code_gen_t *cg, ir_node *node) {
1198 ir_graph *irg = get_irn_irg(node);
1199 dbg_info *dbg = get_irn_dbg_info(node);
1200 ir_node *block = get_nodes_block(node);
1201 ir_entity *ent = be_get_frame_entity(node);
1202 const ir_node *spillval = get_irn_n(node, be_pos_Spill_val);
1203 ir_mode *mode = get_spill_mode(spillval);
1204 ir_node *noreg = ia32_new_NoReg_gp(cg);
1205 ir_node *nomem = new_rd_NoMem(irg);
1206 ir_node *ptr = get_irg_frame(irg);
1207 ir_node *val = get_irn_n(node, be_pos_Spill_val);
1209 ir_node *sched_point = NULL;
1211 if (sched_is_scheduled(node)) {
1212 sched_point = sched_prev(node);
1215 if (mode_is_float(mode)) {
1217 store = new_rd_ia32_xStore(dbg, irg, block, ptr, noreg, val, nomem);
1219 store = new_rd_ia32_vfst(dbg, irg, block, ptr, noreg, val, nomem);
1221 else if (get_mode_size_bits(mode) == 128) {
1222 // Spill 128 bit SSE registers
1223 store = new_rd_ia32_xxStore(dbg, irg, block, ptr, noreg, val, nomem);
1225 else if (get_mode_size_bits(mode) == 8) {
1226 store = new_rd_ia32_Store8Bit(dbg, irg, block, ptr, noreg, val, nomem);
1229 store = new_rd_ia32_Store(dbg, irg, block, ptr, noreg, val, nomem);
1232 set_ia32_am_support(store, ia32_am_Dest);
1233 set_ia32_op_type(store, ia32_AddrModeD);
1234 set_ia32_am_flavour(store, ia32_B);
1235 set_ia32_ls_mode(store, mode);
1236 set_ia32_frame_ent(store, ent);
1237 set_ia32_use_frame(store);
1239 DBG_OPT_SPILL2ST(node, store);
1240 SET_IA32_ORIG_NODE(store, ia32_get_old_node_name(cg, node));
1243 sched_add_after(sched_point, store);
1247 exchange(node, store);
1250 static ir_node *create_push(ia32_code_gen_t *cg, ir_node *node, ir_node *schedpoint, ir_node *sp, ir_node *mem, ir_entity *ent) {
1251 ir_graph *irg = get_irn_irg(node);
1252 dbg_info *dbg = get_irn_dbg_info(node);
1253 ir_node *block = get_nodes_block(node);
1254 ir_node *noreg = ia32_new_NoReg_gp(cg);
1255 ir_node *frame = get_irg_frame(irg);
1257 ir_node *push = new_rd_ia32_Push(dbg, irg, block, frame, noreg, noreg, sp, mem);
1259 set_ia32_frame_ent(push, ent);
1260 set_ia32_use_frame(push);
1261 set_ia32_op_type(push, ia32_AddrModeS);
1262 set_ia32_am_flavour(push, ia32_B);
1263 set_ia32_ls_mode(push, mode_Is);
1265 sched_add_before(schedpoint, push);
1269 static ir_node *create_pop(ia32_code_gen_t *cg, ir_node *node, ir_node *schedpoint, ir_node *sp, ir_entity *ent) {
1270 ir_graph *irg = get_irn_irg(node);
1271 dbg_info *dbg = get_irn_dbg_info(node);
1272 ir_node *block = get_nodes_block(node);
1273 ir_node *noreg = ia32_new_NoReg_gp(cg);
1274 ir_node *frame = get_irg_frame(irg);
1276 ir_node *pop = new_rd_ia32_Pop(dbg, irg, block, frame, noreg, sp, new_NoMem());
1278 set_ia32_frame_ent(pop, ent);
1279 set_ia32_use_frame(pop);
1280 set_ia32_op_type(pop, ia32_AddrModeD);
1281 set_ia32_am_flavour(pop, ia32_am_OB);
1282 set_ia32_ls_mode(pop, mode_Is);
1284 sched_add_before(schedpoint, pop);
1289 static ir_node* create_spproj(ia32_code_gen_t *cg, ir_node *node, ir_node *pred, int pos, ir_node *schedpoint) {
1290 ir_graph *irg = get_irn_irg(node);
1291 dbg_info *dbg = get_irn_dbg_info(node);
1292 ir_node *block = get_nodes_block(node);
1293 ir_mode *spmode = mode_Iu;
1294 const arch_register_t *spreg = &ia32_gp_regs[REG_ESP];
1297 sp = new_rd_Proj(dbg, irg, block, pred, spmode, pos);
1298 arch_set_irn_register(cg->arch_env, sp, spreg);
1299 sched_add_before(schedpoint, sp);
1305 * Transform memperm, currently we do this the ugly way and produce
1306 * push/pop into/from memory cascades. This is possible without using
1309 static void transform_MemPerm(ia32_code_gen_t *cg, ir_node *node) {
1310 ir_graph *irg = get_irn_irg(node);
1311 ir_node *block = get_nodes_block(node);
1315 ir_node *sp = be_abi_get_ignore_irn(cg->birg->abi, &ia32_gp_regs[REG_ESP]);
1316 const ir_edge_t *edge;
1317 const ir_edge_t *next;
1320 arity = be_get_MemPerm_entity_arity(node);
1321 pops = alloca(arity * sizeof(pops[0]));
1324 for(i = 0; i < arity; ++i) {
1325 ir_entity *ent = be_get_MemPerm_in_entity(node, i);
1326 ir_type *enttype = get_entity_type(ent);
1327 int entbits = get_type_size_bits(enttype);
1328 ir_node *mem = get_irn_n(node, i + 1);
1331 assert( (entbits == 32 || entbits == 64) && "spillslot on x86 should be 32 or 64 bit");
1333 push = create_push(cg, node, node, sp, mem, ent);
1334 sp = create_spproj(cg, node, push, pn_ia32_Push_stack, node);
1336 // add another push after the first one
1337 push = create_push(cg, node, node, sp, mem, ent);
1338 add_ia32_am_offs_int(push, 4);
1339 sp = create_spproj(cg, node, push, pn_ia32_Push_stack, node);
1342 set_irn_n(node, i, new_Bad());
1346 for(i = arity - 1; i >= 0; --i) {
1347 ir_entity *ent = be_get_MemPerm_out_entity(node, i);
1348 ir_type *enttype = get_entity_type(ent);
1349 int entbits = get_type_size_bits(enttype);
1353 assert( (entbits == 32 || entbits == 64) && "spillslot on x86 should be 32 or 64 bit");
1355 pop = create_pop(cg, node, node, sp, ent);
1356 sp = create_spproj(cg, node, pop, pn_ia32_Pop_stack, node);
1358 add_ia32_am_offs_int(pop, 4);
1360 // add another pop after the first one
1361 pop = create_pop(cg, node, node, sp, ent);
1362 sp = create_spproj(cg, node, pop, pn_ia32_Pop_stack, node);
1369 keep = be_new_Keep(&ia32_reg_classes[CLASS_ia32_gp], irg, block, 1, in);
1370 sched_add_before(node, keep);
1372 // exchange memprojs
1373 foreach_out_edge_safe(node, edge, next) {
1374 ir_node *proj = get_edge_src_irn(edge);
1375 int p = get_Proj_proj(proj);
1379 set_Proj_pred(proj, pops[p]);
1380 set_Proj_proj(proj, 3);
1384 arity = get_irn_arity(node);
1385 for(i = 0; i < arity; ++i) {
1386 set_irn_n(node, i, new_Bad());
1392 * Block-Walker: Calls the transform functions Spill and Reload.
1394 static void ia32_after_ra_walker(ir_node *block, void *env) {
1395 ir_node *node, *prev;
1396 ia32_code_gen_t *cg = env;
1398 /* beware: the schedule is changed here */
1399 for (node = sched_last(block); !sched_is_begin(node); node = prev) {
1400 prev = sched_prev(node);
1402 if (be_is_Reload(node)) {
1403 transform_to_Load(cg, node);
1404 } else if (be_is_Spill(node)) {
1405 transform_to_Store(cg, node);
1406 } else if(be_is_MemPerm(node)) {
1407 transform_MemPerm(cg, node);
1413 * Collects nodes that need frame entities assigned.
1415 static void ia32_collect_frame_entity_nodes(ir_node *node, void *data)
1417 be_fec_env_t *env = data;
1419 if (be_is_Reload(node) && be_get_frame_entity(node) == NULL) {
1420 const ir_mode *mode = get_spill_mode_mode(get_irn_mode(node));
1421 int align = get_mode_size_bytes(mode);
1422 be_node_needs_frame_entity(env, node, mode, align);
1423 } else if(is_ia32_irn(node) && get_ia32_frame_ent(node) == NULL
1424 && is_ia32_use_frame(node)) {
1425 if (is_ia32_need_stackent(node) || is_ia32_Load(node)) {
1426 const ir_mode *mode = get_ia32_ls_mode(node);
1427 int align = get_mode_size_bytes(mode);
1428 be_node_needs_frame_entity(env, node, mode, align);
1429 } else if (is_ia32_vfild(node) || is_ia32_xLoad(node)) {
1430 const ir_mode *mode = get_ia32_ls_mode(node);
1432 be_node_needs_frame_entity(env, node, mode, align);
1433 } else if (is_ia32_SetST0(node)) {
1434 const ir_mode *mode = get_ia32_ls_mode(node);
1436 be_node_needs_frame_entity(env, node, mode, align);
1439 if(!is_ia32_Store(node)
1440 && !is_ia32_xStore(node)
1441 && !is_ia32_xStoreSimple(node)
1442 && !is_ia32_vfist(node)
1443 && !is_ia32_GetST0(node)) {
1452 * We transform Spill and Reload here. This needs to be done before
1453 * stack biasing otherwise we would miss the corrected offset for these nodes.
1455 static void ia32_after_ra(void *self) {
1456 ia32_code_gen_t *cg = self;
1457 ir_graph *irg = cg->irg;
1458 be_fec_env_t *fec_env = be_new_frame_entity_coalescer(cg->birg);
1460 /* create and coalesce frame entities */
1461 irg_walk_graph(irg, NULL, ia32_collect_frame_entity_nodes, fec_env);
1462 be_assign_entities(fec_env);
1463 be_free_frame_entity_coalescer(fec_env);
1465 irg_block_walk_graph(irg, NULL, ia32_after_ra_walker, cg);
1467 ia32_finish_irg(irg, cg);
1471 * Last touchups for the graph before emit: x87 simulation to replace the
1472 * virtual with real x87 instructions, creating a block schedule and peephole
1475 static void ia32_finish(void *self) {
1476 ia32_code_gen_t *cg = self;
1477 ir_graph *irg = cg->irg;
1479 /* if we do x87 code generation, rewrite all the virtual instructions and registers */
1480 if (cg->used_fp == fp_x87 || cg->force_sim) {
1481 x87_simulate_graph(cg->arch_env, cg->birg);
1484 /* create block schedule, this also removes empty blocks which might
1485 * produce critical edges */
1486 cg->blk_sched = be_create_block_schedule(irg, cg->birg->exec_freq);
1488 /* do peephole optimisations */
1489 ia32_peephole_optimization(irg, cg);
1493 * Emits the code, closes the output file and frees
1494 * the code generator interface.
1496 static void ia32_codegen(void *self) {
1497 ia32_code_gen_t *cg = self;
1498 ir_graph *irg = cg->irg;
1500 ia32_gen_routine(cg, irg);
1504 /* remove it from the isa */
1507 /* de-allocate code generator */
1508 del_set(cg->reg_set);
1512 static void *ia32_cg_init(be_irg_t *birg);
1514 static const arch_code_generator_if_t ia32_code_gen_if = {
1516 NULL, /* before abi introduce hook */
1519 ia32_before_sched, /* before scheduling hook */
1520 ia32_before_ra, /* before register allocation hook */
1521 ia32_after_ra, /* after register allocation hook */
1522 ia32_finish, /* called before codegen */
1523 ia32_codegen /* emit && done */
1527 * Initializes a IA32 code generator.
1529 static void *ia32_cg_init(be_irg_t *birg) {
1530 ia32_isa_t *isa = (ia32_isa_t *)birg->main_env->arch_env->isa;
1531 ia32_code_gen_t *cg = xcalloc(1, sizeof(*cg));
1533 cg->impl = &ia32_code_gen_if;
1534 cg->irg = birg->irg;
1535 cg->reg_set = new_set(ia32_cmp_irn_reg_assoc, 1024);
1536 cg->arch_env = birg->main_env->arch_env;
1539 cg->blk_sched = NULL;
1540 cg->fp_kind = isa->fp_kind;
1541 cg->used_fp = fp_none;
1542 cg->dump = (birg->main_env->options->dump_flags & DUMP_BE) ? 1 : 0;
1544 FIRM_DBG_REGISTER(cg->mod, "firm.be.ia32.cg");
1546 /* copy optimizations from isa for easier access */
1548 cg->arch = isa->arch;
1549 cg->opt_arch = isa->opt_arch;
1555 if (isa->name_obst) {
1556 obstack_free(isa->name_obst, NULL);
1557 obstack_init(isa->name_obst);
1561 cur_reg_set = cg->reg_set;
1563 ia32_irn_ops.cg = cg;
1565 return (arch_code_generator_t *)cg;
1570 /*****************************************************************
1571 * ____ _ _ _____ _____
1572 * | _ \ | | | | |_ _|/ ____| /\
1573 * | |_) | __ _ ___| | _____ _ __ __| | | | | (___ / \
1574 * | _ < / _` |/ __| |/ / _ \ '_ \ / _` | | | \___ \ / /\ \
1575 * | |_) | (_| | (__| < __/ | | | (_| | _| |_ ____) / ____ \
1576 * |____/ \__,_|\___|_|\_\___|_| |_|\__,_| |_____|_____/_/ \_\
1578 *****************************************************************/
1581 * Set output modes for GCC
1583 static const tarval_mode_info mo_integer = {
1590 * set the tarval output mode of all integer modes to decimal
1592 static void set_tarval_output_modes(void)
1596 for (i = get_irp_n_modes() - 1; i >= 0; --i) {
1597 ir_mode *mode = get_irp_mode(i);
1599 if (mode_is_int(mode))
1600 set_tarval_mode_output_option(mode, &mo_integer);
1604 const arch_isa_if_t ia32_isa_if;
1607 * The template that generates a new ISA object.
1608 * Note that this template can be changed by command line
1611 static ia32_isa_t ia32_isa_template = {
1613 &ia32_isa_if, /* isa interface implementation */
1614 &ia32_gp_regs[REG_ESP], /* stack pointer register */
1615 &ia32_gp_regs[REG_EBP], /* base pointer register */
1616 -1, /* stack direction */
1617 NULL, /* main environment */
1619 {}, /* emitter environment */
1620 NULL, /* 16bit register names */
1621 NULL, /* 8bit register names */
1625 IA32_OPT_INCDEC | /* optimize add 1, sub 1 into inc/dec default: on */
1626 IA32_OPT_DOAM | /* optimize address mode default: on */
1627 IA32_OPT_LEA | /* optimize for LEAs default: on */
1628 IA32_OPT_PLACECNST | /* place constants immediately before instructions, default: on */
1629 IA32_OPT_IMMOPS | /* operations can use immediates, default: on */
1630 IA32_OPT_PUSHARGS), /* create pushs for function argument passing, default: on */
1631 arch_pentium_4, /* instruction architecture */
1632 arch_pentium_4, /* optimize for architecture */
1633 fp_sse2, /* use sse2 unit */
1634 NULL, /* current code generator */
1636 NULL, /* name obstack */
1637 0 /* name obst size */
1642 * Initializes the backend ISA.
1644 static void *ia32_init(FILE *file_handle) {
1645 static int inited = 0;
1651 set_tarval_output_modes();
1653 isa = xmalloc(sizeof(*isa));
1654 memcpy(isa, &ia32_isa_template, sizeof(*isa));
1656 ia32_register_init(isa);
1657 ia32_create_opcodes();
1658 ia32_register_copy_attr_func();
1660 if ((ARCH_INTEL(isa->arch) && isa->arch < arch_pentium_4) ||
1661 (ARCH_AMD(isa->arch) && isa->arch < arch_athlon))
1662 /* no SSE2 for these cpu's */
1663 isa->fp_kind = fp_x87;
1665 if (ARCH_INTEL(isa->opt_arch) && isa->opt_arch >= arch_pentium_4) {
1666 /* Pentium 4 don't like inc and dec instructions */
1667 isa->opt &= ~IA32_OPT_INCDEC;
1670 be_emit_init_env(&isa->emit, file_handle);
1671 isa->regs_16bit = pmap_create();
1672 isa->regs_8bit = pmap_create();
1673 isa->types = pmap_create();
1674 isa->tv_ent = pmap_create();
1675 isa->cpu = ia32_init_machine_description();
1677 ia32_build_16bit_reg_map(isa->regs_16bit);
1678 ia32_build_8bit_reg_map(isa->regs_8bit);
1680 /* patch register names of x87 registers */
1681 ia32_st_regs[0].name = "st";
1682 ia32_st_regs[1].name = "st(1)";
1683 ia32_st_regs[2].name = "st(2)";
1684 ia32_st_regs[3].name = "st(3)";
1685 ia32_st_regs[4].name = "st(4)";
1686 ia32_st_regs[5].name = "st(5)";
1687 ia32_st_regs[6].name = "st(6)";
1688 ia32_st_regs[7].name = "st(7)";
1691 isa->name_obst = xmalloc(sizeof(*isa->name_obst));
1692 obstack_init(isa->name_obst);
1695 ia32_handle_intrinsics();
1697 /* needed for the debug support */
1698 be_gas_emit_switch_section(&isa->emit, GAS_SECTION_TEXT);
1699 be_emit_cstring(&isa->emit, ".Ltext0:\n");
1700 be_emit_write_line(&isa->emit);
1710 * Closes the output file and frees the ISA structure.
1712 static void ia32_done(void *self) {
1713 ia32_isa_t *isa = self;
1715 /* emit now all global declarations */
1716 be_gas_emit_decls(&isa->emit, isa->arch_isa.main_env);
1718 pmap_destroy(isa->regs_16bit);
1719 pmap_destroy(isa->regs_8bit);
1720 pmap_destroy(isa->tv_ent);
1721 pmap_destroy(isa->types);
1724 obstack_free(isa->name_obst, NULL);
1727 be_emit_destroy_env(&isa->emit);
1734 * Return the number of register classes for this architecture.
1735 * We report always these:
1736 * - the general purpose registers
1737 * - the SSE floating point register set
1738 * - the virtual floating point registers
1739 * - the SSE vector register set
1741 static int ia32_get_n_reg_class(const void *self) {
1746 * Return the register class for index i.
1748 static const arch_register_class_t *ia32_get_reg_class(const void *self, int i) {
1751 return &ia32_reg_classes[CLASS_ia32_gp];
1753 return &ia32_reg_classes[CLASS_ia32_xmm];
1755 return &ia32_reg_classes[CLASS_ia32_vfp];
1757 assert(0 && "Invalid ia32 register class requested.");
1763 * Get the register class which shall be used to store a value of a given mode.
1764 * @param self The this pointer.
1765 * @param mode The mode in question.
1766 * @return A register class which can hold values of the given mode.
1768 const arch_register_class_t *ia32_get_reg_class_for_mode(const void *self, const ir_mode *mode) {
1769 const ia32_isa_t *isa = self;
1770 if (mode_is_float(mode)) {
1771 return USE_SSE2(isa) ? &ia32_reg_classes[CLASS_ia32_xmm] : &ia32_reg_classes[CLASS_ia32_vfp];
1774 return &ia32_reg_classes[CLASS_ia32_gp];
1778 * Get the ABI restrictions for procedure calls.
1779 * @param self The this pointer.
1780 * @param method_type The type of the method (procedure) in question.
1781 * @param abi The abi object to be modified
1783 static void ia32_get_call_abi(const void *self, ir_type *method_type, be_abi_call_t *abi) {
1784 const ia32_isa_t *isa = self;
1787 unsigned cc = get_method_calling_convention(method_type);
1788 int n = get_method_n_params(method_type);
1791 int i, ignore_1, ignore_2;
1793 const arch_register_t *reg;
1794 be_abi_call_flags_t call_flags = be_abi_call_get_flags(abi);
1796 unsigned use_push = !IS_P6_ARCH(isa->opt_arch);
1798 /* set abi flags for calls */
1799 call_flags.bits.left_to_right = 0; /* always last arg first on stack */
1800 call_flags.bits.store_args_sequential = use_push;
1801 /* call_flags.bits.try_omit_fp not changed: can handle both settings */
1802 call_flags.bits.fp_free = 0; /* the frame pointer is fixed in IA32 */
1803 call_flags.bits.call_has_imm = 1; /* IA32 calls can have immediate address */
1805 /* set stack parameter passing style */
1806 be_abi_call_set_flags(abi, call_flags, &ia32_abi_callbacks);
1808 /* collect the mode for each type */
1809 modes = alloca(n * sizeof(modes[0]));
1811 for (i = 0; i < n; i++) {
1812 tp = get_method_param_type(method_type, i);
1813 modes[i] = get_type_mode(tp);
1816 /* set register parameters */
1817 if (cc & cc_reg_param) {
1818 /* determine the number of parameters passed via registers */
1819 biggest_n = ia32_get_n_regparam_class(n, modes, &ignore_1, &ignore_2);
1821 /* loop over all parameters and set the register requirements */
1822 for (i = 0; i <= biggest_n; i++) {
1823 reg = ia32_get_RegParam_reg(n, modes, i, cc);
1824 assert(reg && "kaputt");
1825 be_abi_call_param_reg(abi, i, reg);
1832 /* set stack parameters */
1833 for (i = stack_idx; i < n; i++) {
1834 /* parameters on the stack are 32 bit aligned */
1835 be_abi_call_param_stack(abi, i, 4, 0, 0);
1839 /* set return registers */
1840 n = get_method_n_ress(method_type);
1842 assert(n <= 2 && "more than two results not supported");
1844 /* In case of 64bit returns, we will have two 32bit values */
1846 tp = get_method_res_type(method_type, 0);
1847 mode = get_type_mode(tp);
1849 assert(!mode_is_float(mode) && "two FP results not supported");
1851 tp = get_method_res_type(method_type, 1);
1852 mode = get_type_mode(tp);
1854 assert(!mode_is_float(mode) && "mixed INT, FP results not supported");
1856 be_abi_call_res_reg(abi, 0, &ia32_gp_regs[REG_EAX]);
1857 be_abi_call_res_reg(abi, 1, &ia32_gp_regs[REG_EDX]);
1860 const arch_register_t *reg;
1862 tp = get_method_res_type(method_type, 0);
1863 assert(is_atomic_type(tp));
1864 mode = get_type_mode(tp);
1866 reg = mode_is_float(mode) ? &ia32_vfp_regs[REG_VF0] : &ia32_gp_regs[REG_EAX];
1868 be_abi_call_res_reg(abi, 0, reg);
1873 static const void *ia32_get_irn_ops(const arch_irn_handler_t *self, const ir_node *irn) {
1874 return &ia32_irn_ops;
1877 const arch_irn_handler_t ia32_irn_handler = {
1881 const arch_irn_handler_t *ia32_get_irn_handler(const void *self) {
1882 return &ia32_irn_handler;
1885 int ia32_to_appear_in_schedule(void *block_env, const ir_node *irn) {
1886 return is_ia32_irn(irn) ? 1 : -1;
1890 * Initializes the code generator interface.
1892 static const arch_code_generator_if_t *ia32_get_code_generator_if(void *self) {
1893 return &ia32_code_gen_if;
1897 * Returns the estimated execution time of an ia32 irn.
1899 static sched_timestep_t ia32_sched_exectime(void *env, const ir_node *irn) {
1900 const arch_env_t *arch_env = env;
1901 return is_ia32_irn(irn) ? ia32_get_op_estimated_cost(arch_get_irn_ops(arch_env, irn), irn) : 1;
1904 list_sched_selector_t ia32_sched_selector;
1907 * Returns the reg_pressure scheduler with to_appear_in_schedule() overloaded
1909 static const list_sched_selector_t *ia32_get_list_sched_selector(const void *self, list_sched_selector_t *selector) {
1910 memcpy(&ia32_sched_selector, selector, sizeof(ia32_sched_selector));
1911 ia32_sched_selector.exectime = ia32_sched_exectime;
1912 ia32_sched_selector.to_appear_in_schedule = ia32_to_appear_in_schedule;
1913 return &ia32_sched_selector;
1916 static const ilp_sched_selector_t *ia32_get_ilp_sched_selector(const void *self) {
1921 * Returns the necessary byte alignment for storing a register of given class.
1923 static int ia32_get_reg_class_alignment(const void *self, const arch_register_class_t *cls) {
1924 ir_mode *mode = arch_register_class_mode(cls);
1925 int bytes = get_mode_size_bytes(mode);
1927 if (mode_is_float(mode) && bytes > 8)
1932 static const be_execution_unit_t ***ia32_get_allowed_execution_units(const void *self, const ir_node *irn) {
1933 static const be_execution_unit_t *_allowed_units_BRANCH[] = {
1934 &ia32_execution_units_BRANCH[IA32_EXECUNIT_TP_BRANCH_BRANCH1],
1935 &ia32_execution_units_BRANCH[IA32_EXECUNIT_TP_BRANCH_BRANCH2],
1938 static const be_execution_unit_t *_allowed_units_GP[] = {
1939 &ia32_execution_units_GP[IA32_EXECUNIT_TP_GP_GP_EAX],
1940 &ia32_execution_units_GP[IA32_EXECUNIT_TP_GP_GP_EBX],
1941 &ia32_execution_units_GP[IA32_EXECUNIT_TP_GP_GP_ECX],
1942 &ia32_execution_units_GP[IA32_EXECUNIT_TP_GP_GP_EDX],
1943 &ia32_execution_units_GP[IA32_EXECUNIT_TP_GP_GP_ESI],
1944 &ia32_execution_units_GP[IA32_EXECUNIT_TP_GP_GP_EDI],
1945 &ia32_execution_units_GP[IA32_EXECUNIT_TP_GP_GP_EBP],
1948 static const be_execution_unit_t *_allowed_units_DUMMY[] = {
1949 &be_machine_execution_units_DUMMY[0],
1952 static const be_execution_unit_t **_units_callret[] = {
1953 _allowed_units_BRANCH,
1956 static const be_execution_unit_t **_units_other[] = {
1960 static const be_execution_unit_t **_units_dummy[] = {
1961 _allowed_units_DUMMY,
1964 const be_execution_unit_t ***ret;
1966 if (is_ia32_irn(irn)) {
1967 ret = get_ia32_exec_units(irn);
1969 else if (is_be_node(irn)) {
1970 if (be_is_Call(irn) || be_is_Return(irn)) {
1971 ret = _units_callret;
1973 else if (be_is_Barrier(irn)) {
1988 * Return the abstract ia32 machine.
1990 static const be_machine_t *ia32_get_machine(const void *self) {
1991 const ia32_isa_t *isa = self;
1996 * Return irp irgs in the desired order.
1998 static ir_graph **ia32_get_irg_list(const void *self, ir_graph ***irg_list) {
2003 * Allows or disallows the creation of Psi nodes for the given Phi nodes.
2004 * @return 1 if allowed, 0 otherwise
2006 static int ia32_is_psi_allowed(ir_node *sel, ir_node *phi_list, int i, int j)
2008 ir_node *cmp, *cmp_a, *phi;
2011 /* we don't want long long an floating point Psi */
2012 #define IS_BAD_PSI_MODE(mode) (mode_is_float(mode) || get_mode_size_bits(mode) > 32)
2014 if (get_irn_mode(sel) != mode_b)
2017 cmp = get_Proj_pred(sel);
2018 cmp_a = get_Cmp_left(cmp);
2019 mode = get_irn_mode(cmp_a);
2021 if (IS_BAD_PSI_MODE(mode))
2024 /* check the Phi nodes */
2025 for (phi = phi_list; phi; phi = get_irn_link(phi)) {
2026 ir_node *pred_i = get_irn_n(phi, i);
2027 ir_node *pred_j = get_irn_n(phi, j);
2028 ir_mode *mode_i = get_irn_mode(pred_i);
2029 ir_mode *mode_j = get_irn_mode(pred_j);
2031 if (IS_BAD_PSI_MODE(mode_i) || IS_BAD_PSI_MODE(mode_j))
2035 #undef IS_BAD_PSI_MODE
2040 static ia32_intrinsic_env_t intrinsic_env = {
2041 NULL, /**< the irg, these entities belong to */
2042 NULL, /**< entity for first div operand (move into FPU) */
2043 NULL, /**< entity for second div operand (move into FPU) */
2044 NULL, /**< entity for converts ll -> d */
2045 NULL, /**< entity for converts d -> ll */
2049 * Returns the libFirm configuration parameter for this backend.
2051 static const backend_params *ia32_get_libfirm_params(void) {
2052 static const opt_if_conv_info_t ifconv = {
2053 4, /* maxdepth, doesn't matter for Psi-conversion */
2054 ia32_is_psi_allowed /* allows or disallows Psi creation for given selector */
2056 static const arch_dep_params_t ad = {
2057 1, /* also use subs */
2058 4, /* maximum shifts */
2059 31, /* maximum shift amount */
2061 1, /* allow Mulhs */
2062 1, /* allow Mulus */
2063 32 /* Mulh allowed up to 32 bit */
2065 static backend_params p = {
2066 NULL, /* no additional opcodes */
2067 NULL, /* will be set later */
2068 1, /* need dword lowering */
2069 ia32_create_intrinsic_fkt,
2070 &intrinsic_env, /* context for ia32_create_intrinsic_fkt */
2071 NULL, /* will be set later */
2075 p.if_conv_info = &ifconv;
2079 /* instruction set architectures. */
2080 static const lc_opt_enum_int_items_t arch_items[] = {
2081 { "386", arch_i386, },
2082 { "486", arch_i486, },
2083 { "pentium", arch_pentium, },
2084 { "586", arch_pentium, },
2085 { "pentiumpro", arch_pentium_pro, },
2086 { "686", arch_pentium_pro, },
2087 { "pentiummmx", arch_pentium_mmx, },
2088 { "pentium2", arch_pentium_2, },
2089 { "p2", arch_pentium_2, },
2090 { "pentium3", arch_pentium_3, },
2091 { "p3", arch_pentium_3, },
2092 { "pentium4", arch_pentium_4, },
2093 { "p4", arch_pentium_4, },
2094 { "pentiumm", arch_pentium_m, },
2095 { "pm", arch_pentium_m, },
2096 { "core", arch_core, },
2098 { "athlon", arch_athlon, },
2099 { "athlon64", arch_athlon_64, },
2100 { "opteron", arch_opteron, },
2104 static lc_opt_enum_int_var_t arch_var = {
2105 &ia32_isa_template.arch, arch_items
2108 static lc_opt_enum_int_var_t opt_arch_var = {
2109 &ia32_isa_template.opt_arch, arch_items
2112 static const lc_opt_enum_int_items_t fp_unit_items[] = {
2114 { "sse2", fp_sse2 },
2118 static lc_opt_enum_int_var_t fp_unit_var = {
2119 &ia32_isa_template.fp_kind, fp_unit_items
2122 static const lc_opt_enum_int_items_t gas_items[] = {
2123 { "normal", GAS_FLAVOUR_NORMAL },
2124 { "mingw", GAS_FLAVOUR_MINGW },
2128 static lc_opt_enum_int_var_t gas_var = {
2129 (int*) &be_gas_flavour, gas_items
2132 static const lc_opt_table_entry_t ia32_options[] = {
2133 LC_OPT_ENT_ENUM_INT("arch", "select the instruction architecture", &arch_var),
2134 LC_OPT_ENT_ENUM_INT("opt", "optimize for instruction architecture", &opt_arch_var),
2135 LC_OPT_ENT_ENUM_INT("fpunit", "select the floating point unit", &fp_unit_var),
2136 LC_OPT_ENT_NEGBIT("noaddrmode", "do not use address mode", &ia32_isa_template.opt, IA32_OPT_DOAM),
2137 LC_OPT_ENT_NEGBIT("nolea", "do not optimize for LEAs", &ia32_isa_template.opt, IA32_OPT_LEA),
2138 LC_OPT_ENT_NEGBIT("noplacecnst", "do not place constants", &ia32_isa_template.opt, IA32_OPT_PLACECNST),
2139 LC_OPT_ENT_NEGBIT("noimmop", "no operations with immediates", &ia32_isa_template.opt, IA32_OPT_IMMOPS),
2140 LC_OPT_ENT_NEGBIT("nopushargs", "do not create pushs for function arguments", &ia32_isa_template.opt, IA32_OPT_PUSHARGS),
2141 LC_OPT_ENT_ENUM_INT("gasmode", "set the GAS compatibility mode", &gas_var),
2145 const arch_isa_if_t ia32_isa_if = {
2148 ia32_get_n_reg_class,
2150 ia32_get_reg_class_for_mode,
2152 ia32_get_irn_handler,
2153 ia32_get_code_generator_if,
2154 ia32_get_list_sched_selector,
2155 ia32_get_ilp_sched_selector,
2156 ia32_get_reg_class_alignment,
2157 ia32_get_libfirm_params,
2158 ia32_get_allowed_execution_units,
2163 void be_init_arch_ia32(void)
2165 lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
2166 lc_opt_entry_t *ia32_grp = lc_opt_get_grp(be_grp, "ia32");
2168 lc_opt_add_table(ia32_grp, ia32_options);
2169 be_register_isa_if("ia32", &ia32_isa_if);
2172 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_arch_ia32);