2 * This is the main ia32 firm backend driver.
3 * @author Christian Wuerdig
18 #include <libcore/lc_opts.h>
19 #include <libcore/lc_opts_enum.h>
23 #include "pseudo_irg.h"
27 #include "iredges_t.h"
36 #include "../beabi.h" /* the general register allocator interface */
37 #include "../benode_t.h"
38 #include "../belower.h"
39 #include "../besched_t.h"
42 #include "../beirgmod.h"
43 #include "../be_dbgout.h"
44 #include "../beblocksched.h"
45 #include "../bemachine.h"
46 #include "../beilpsched.h"
47 #include "../bespillslots.h"
48 #include "../bemodule.h"
49 #include "../begnuas.h"
51 #include "bearch_ia32_t.h"
53 #include "ia32_new_nodes.h" /* ia32 nodes interface */
54 #include "gen_ia32_regalloc_if.h" /* the generated interface (register type and class defenitions) */
55 #include "gen_ia32_machine.h"
56 #include "ia32_transform.h"
57 #include "ia32_emitter.h"
58 #include "ia32_map_regs.h"
59 #include "ia32_optimize.h"
61 #include "ia32_dbg_stat.h"
62 #include "ia32_finish.h"
63 #include "ia32_util.h"
66 static set *cur_reg_set = NULL;
68 typedef ir_node *(*create_const_node_func) (dbg_info *dbg, ir_graph *irg, ir_node *block);
70 static INLINE ir_node *create_const(ia32_code_gen_t *cg, ir_node **place,
71 create_const_node_func func, arch_register_t* reg)
81 block = get_irg_start_block(cg->irg);
82 res = func(NULL, cg->irg, block);
83 arch_set_irn_register(cg->arch_env, res, reg);
86 /* keep the node so it isn't accidently removed when unused ... */
88 keep = be_new_Keep(arch_register_get_class(reg), cg->irg, block, 1, in);
90 /* schedule the node if we already have a scheduled program */
91 startnode = get_irg_start(cg->irg);
92 if(sched_is_scheduled(startnode)) {
93 sched_add_after(startnode, res);
94 sched_add_after(res, keep);
100 /* Creates the unique per irg GP NoReg node. */
101 ir_node *ia32_new_NoReg_gp(ia32_code_gen_t *cg) {
102 return create_const(cg, &cg->noreg_gp, new_rd_ia32_NoReg_GP,
103 &ia32_gp_regs[REG_GP_NOREG]);
106 ir_node *ia32_new_NoReg_vfp(ia32_code_gen_t *cg) {
107 return create_const(cg, &cg->noreg_vfp, new_rd_ia32_NoReg_VFP,
108 &ia32_vfp_regs[REG_VFP_NOREG]);
111 ir_node *ia32_new_NoReg_xmm(ia32_code_gen_t *cg) {
112 return create_const(cg, &cg->noreg_xmm, new_rd_ia32_NoReg_XMM,
113 &ia32_xmm_regs[REG_XMM_NOREG]);
116 /* Creates the unique per irg FP NoReg node. */
117 ir_node *ia32_new_NoReg_fp(ia32_code_gen_t *cg) {
118 return USE_SSE2(cg) ? ia32_new_NoReg_xmm(cg) : ia32_new_NoReg_vfp(cg);
121 ir_node *ia32_new_Unknown_gp(ia32_code_gen_t *cg) {
122 return create_const(cg, &cg->unknown_gp, new_rd_ia32_Unknown_GP,
123 &ia32_gp_regs[REG_GP_UKNWN]);
126 ir_node *ia32_new_Unknown_vfp(ia32_code_gen_t *cg) {
127 return create_const(cg, &cg->unknown_vfp, new_rd_ia32_Unknown_VFP,
128 &ia32_vfp_regs[REG_VFP_UKNWN]);
131 ir_node *ia32_new_Unknown_xmm(ia32_code_gen_t *cg) {
132 return create_const(cg, &cg->unknown_xmm, new_rd_ia32_Unknown_XMM,
133 &ia32_xmm_regs[REG_XMM_UKNWN]);
138 * Returns gp_noreg or fp_noreg, depending in input requirements.
140 ir_node *ia32_get_admissible_noreg(ia32_code_gen_t *cg, ir_node *irn, int pos) {
141 const arch_register_req_t *req;
143 req = arch_get_register_req(cg->arch_env, irn, pos);
144 assert(req != NULL && "Missing register requirements");
145 if (req->cls == &ia32_reg_classes[CLASS_ia32_gp])
146 return ia32_new_NoReg_gp(cg);
148 return ia32_new_NoReg_fp(cg);
151 /**************************************************
154 * _ __ ___ __ _ __ _| | | ___ ___ _| |_
155 * | '__/ _ \/ _` | / _` | | |/ _ \ / __| | | _|
156 * | | | __/ (_| | | (_| | | | (_) | (__ | | |
157 * |_| \___|\__, | \__,_|_|_|\___/ \___| |_|_|
160 **************************************************/
163 * Return register requirements for an ia32 node.
164 * If the node returns a tuple (mode_T) then the proj's
165 * will be asked for this information.
167 static const arch_register_req_t *ia32_get_irn_reg_req(const void *self,
170 long node_pos = pos == -1 ? 0 : pos;
171 ir_mode *mode = is_Block(node) ? NULL : get_irn_mode(node);
173 if (is_Block(node) || mode == mode_X) {
174 return arch_no_register_req;
177 if (mode == mode_T && pos < 0) {
178 return arch_no_register_req;
183 return arch_no_register_req;
186 return arch_no_register_req;
189 node_pos = (pos == -1) ? get_Proj_proj(node) : pos;
190 node = skip_Proj_const(node);
193 if (is_ia32_irn(node)) {
194 const arch_register_req_t *req;
196 req = get_ia32_in_req(node, pos);
198 req = get_ia32_out_req(node, node_pos);
205 /* unknowns should be transformed already */
206 assert(!is_Unknown(node));
208 return arch_no_register_req;
211 static void ia32_set_irn_reg(const void *self, ir_node *irn, const arch_register_t *reg) {
214 if (get_irn_mode(irn) == mode_X) {
219 pos = get_Proj_proj(irn);
220 irn = skip_Proj(irn);
223 if (is_ia32_irn(irn)) {
224 const arch_register_t **slots;
226 slots = get_ia32_slots(irn);
229 ia32_set_firm_reg(irn, reg, cur_reg_set);
233 static const arch_register_t *ia32_get_irn_reg(const void *self, const ir_node *irn) {
235 const arch_register_t *reg = NULL;
239 if (get_irn_mode(irn) == mode_X) {
243 pos = get_Proj_proj(irn);
244 irn = skip_Proj_const(irn);
247 if (is_ia32_irn(irn)) {
248 const arch_register_t **slots;
249 slots = get_ia32_slots(irn);
252 reg = ia32_get_firm_reg(irn, cur_reg_set);
258 static arch_irn_class_t ia32_classify(const void *self, const ir_node *irn) {
259 arch_irn_class_t classification = arch_irn_class_normal;
261 irn = skip_Proj_const(irn);
264 classification |= arch_irn_class_branch;
266 if (! is_ia32_irn(irn))
267 return classification & ~arch_irn_class_normal;
269 if (is_ia32_Cnst(irn))
270 classification |= arch_irn_class_const;
273 classification |= arch_irn_class_load;
275 if (is_ia32_St(irn) || is_ia32_Store8Bit(irn))
276 classification |= arch_irn_class_store;
278 if (is_ia32_need_stackent(irn))
279 classification |= arch_irn_class_reload;
281 return classification;
284 static arch_irn_flags_t ia32_get_flags(const void *self, const ir_node *irn) {
285 arch_irn_flags_t flags = arch_irn_flags_none;
288 return arch_irn_flags_ignore;
290 if(is_Proj(irn) && mode_is_datab(get_irn_mode(irn))) {
291 ir_node *pred = get_Proj_pred(irn);
293 if(is_ia32_irn(pred)) {
294 flags = get_ia32_out_flags(pred, get_Proj_proj(irn));
300 if (is_ia32_irn(irn)) {
301 flags |= get_ia32_flags(irn);
308 * The IA32 ABI callback object.
311 be_abi_call_flags_bits_t flags; /**< The call flags. */
312 const arch_isa_t *isa; /**< The ISA handle. */
313 const arch_env_t *aenv; /**< The architecture environment. */
314 ir_graph *irg; /**< The associated graph. */
317 static ir_entity *ia32_get_frame_entity(const void *self, const ir_node *irn) {
318 return is_ia32_irn(irn) ? get_ia32_frame_ent(irn) : NULL;
321 static void ia32_set_frame_entity(const void *self, ir_node *irn, ir_entity *ent) {
322 set_ia32_frame_ent(irn, ent);
325 static void ia32_set_frame_offset(const void *self, ir_node *irn, int bias) {
326 const ia32_irn_ops_t *ops = self;
328 if (get_ia32_frame_ent(irn)) {
329 ia32_am_flavour_t am_flav;
331 if (is_ia32_Pop(irn)) {
332 int omit_fp = be_abi_omit_fp(ops->cg->birg->abi);
334 /* Pop nodes modify the stack pointer before calculating the destination
335 * address, so fix this here
341 am_flav = get_ia32_am_flavour(irn);
343 set_ia32_am_flavour(irn, am_flav);
345 add_ia32_am_offs_int(irn, bias);
349 static int ia32_get_sp_bias(const void *self, const ir_node *irn) {
351 long proj = get_Proj_proj(irn);
352 ir_node *pred = get_Proj_pred(irn);
354 if (is_ia32_Push(pred) && proj == pn_ia32_Push_stack)
356 if (is_ia32_Pop(pred) && proj == pn_ia32_Pop_stack)
364 * Put all registers which are saved by the prologue/epilogue in a set.
366 * @param self The callback object.
367 * @param s The result set.
369 static void ia32_abi_dont_save_regs(void *self, pset *s)
371 ia32_abi_env_t *env = self;
372 if(env->flags.try_omit_fp)
373 pset_insert_ptr(s, env->isa->bp);
377 static unsigned count_callee_saves(ia32_code_gen_t *cg)
379 unsigned callee_saves = 0;
380 int c, num_reg_classes;
383 num_reg_classes = arch_isa_get_n_reg_class(isa);
384 for(c = 0; c < num_reg_classes; ++c) {
385 int r, num_registers;
386 arch_register_class_t *regclass = arch_isa_get_reg_class(isa, c);
388 num_registers = arch_register_class_n_regs(regclass);
389 for(r = 0; r < num_registers; ++r) {
390 arch_register_t *reg = arch_register_for_index(regclass, r);
391 if(arch_register_type_is(reg, callee_save))
399 static void create_callee_save_regprojs(ia32_code_gen_t *cg, ir_node *regparams)
401 int c, num_reg_classes;
405 num_reg_classes = arch_isa_get_n_reg_class(isa);
406 cg->initial_regs = obstack_alloc(cg->obst,
407 num_reg_classes * sizeof(cg->initial_regs[0]));
409 for(c = 0; c < num_reg_classes; ++c) {
410 int r, num_registers;
411 ir_node **initial_regclass;
412 arch_register_class_t *regclass = arch_isa_get_reg_class(isa, c);
414 num_registers = arch_register_class_n_regs(regclass);
415 initial_regclass = obstack_alloc(num_registers * sizeof(initial_regclass[0]));
416 for(r = 0; r < num_registers; ++r) {
418 arch_register_t *reg = arch_register_for_index(regclass, r);
419 if(!arch_register_type_is(reg, callee_save))
422 proj = new_r_Proj(irg, start_block, regparams, n);
423 be_set_constr_single_reg(regparams, n, reg);
424 arch_set_irn_register(cg->arch_env, proj, reg);
426 initial_regclass[r] = proj;
429 cg->initial_regs[c] = initial_regclass;
433 static void callee_saves_obstack_grow(ia32_code_gen_t *cg)
435 int c, num_reg_classes;
438 for(c = 0; c < num_reg_classes; ++c) {
439 int r, num_registers;
441 num_registers = arch_register_class_n_regs(regclass);
442 for(r = 0; r < num_registers; ++r) {
444 arch_register_t *reg = arch_register_for_index(regclass, r);
445 if(!arch_register_type_is(reg, callee_save))
448 proj = cg->initial_regs[c][r];
449 obstack_ptr_grow(cg->obst, proj);
454 static unsigned count_parameters_in_regs(ia32_code_gen_t *cg)
459 static void ia32_gen_prologue(ia32_code_gen_t *cg)
461 ir_graph *irg = cg->irg;
462 ir_node *start_block = get_irg_start_block(irg);
467 /* Create the regparams node */
468 n_regparams_out = count_callee_saves(cg) + count_parameters_in_regs(cg);
469 regparams = be_new_RegParams(irg, start_block, n_regparams_out);
471 create_callee_save_regprojs(cg, regparams);
473 /* Setup the stack */
475 ir_node *bl = get_irg_start_block(env->irg);
476 ir_node *curr_sp = be_abi_reg_map_get(reg_map, env->isa->sp);
477 ir_node *curr_bp = be_abi_reg_map_get(reg_map, env->isa->bp);
478 ir_node *noreg = ia32_new_NoReg_gp(cg);
482 push = new_rd_ia32_Push(NULL, env->irg, bl, noreg, noreg, curr_bp, curr_sp, *mem);
483 curr_sp = new_r_Proj(env->irg, bl, push, get_irn_mode(curr_sp), pn_ia32_Push_stack);
484 *mem = new_r_Proj(env->irg, bl, push, mode_M, pn_ia32_Push_M);
486 /* the push must have SP out register */
487 arch_set_irn_register(env->aenv, curr_sp, env->isa->sp);
488 set_ia32_flags(push, arch_irn_flags_ignore);
490 /* move esp to ebp */
491 curr_bp = be_new_Copy(env->isa->bp->reg_class, env->irg, bl, curr_sp);
492 be_set_constr_single_reg(curr_bp, BE_OUT_POS(0), env->isa->bp);
493 arch_set_irn_register(env->aenv, curr_bp, env->isa->bp);
494 be_node_set_flags(curr_bp, BE_OUT_POS(0), arch_irn_flags_ignore);
496 /* beware: the copy must be done before any other sp use */
497 curr_sp = be_new_CopyKeep_single(env->isa->sp->reg_class, env->irg, bl, curr_sp, curr_bp, get_irn_mode(curr_sp));
498 be_set_constr_single_reg(curr_sp, BE_OUT_POS(0), env->isa->sp);
499 arch_set_irn_register(env->aenv, curr_sp, env->isa->sp);
500 be_node_set_flags(curr_sp, BE_OUT_POS(0), arch_irn_flags_ignore);
502 be_abi_reg_map_set(reg_map, env->isa->sp, curr_sp);
503 be_abi_reg_map_set(reg_map, env->isa->bp, curr_bp);
506 sp = be_new_IncSP(sp, irg, start_block, initialsp, BE_STACK_FRAME_SIZE_EXPAND);
507 set_irg_frame(irg, sp);
510 static void ia32_gen_epilogue(ia32_code_gen_t *cg)
512 int n_callee_saves = count_callee_saves(cg);
513 int n_results_regs = 0;
516 ir_node *end_block = get_irg_end_block(irg);
519 /* We have to make sure that all reloads occur before the stack frame
520 gets destroyed, so we create a barrier for all callee-save and return
522 barrier_size = n_callee_saves + n_results_regs;
523 barrier = be_new_Barrier(irg, end_block, barrier_size,
525 /* simply remove the stack frame here */
526 curr_sp = be_new_IncSP(env->isa->sp, env->irg, bl, curr_sp, BE_STACK_FRAME_SIZE_SHRINK);
527 add_irn_dep(curr_sp, *mem);
532 * Generate the routine prologue.
534 * @param self The callback object.
535 * @param mem A pointer to the mem node. Update this if you define new memory.
536 * @param reg_map A map mapping all callee_save/ignore/parameter registers to their defining nodes.
538 * @return The register which shall be used as a stack frame base.
540 * All nodes which define registers in @p reg_map must keep @p reg_map current.
542 static const arch_register_t *ia32_abi_prologue(void *self, ir_node **mem, pmap *reg_map)
544 ia32_abi_env_t *env = self;
545 const ia32_isa_t *isa = (ia32_isa_t *)env->isa;
546 ia32_code_gen_t *cg = isa->cg;
548 if (! env->flags.try_omit_fp) {
549 ir_node *bl = get_irg_start_block(env->irg);
550 ir_node *curr_sp = be_abi_reg_map_get(reg_map, env->isa->sp);
551 ir_node *curr_bp = be_abi_reg_map_get(reg_map, env->isa->bp);
552 ir_node *noreg = ia32_new_NoReg_gp(cg);
556 push = new_rd_ia32_Push(NULL, env->irg, bl, noreg, noreg, curr_bp, curr_sp, *mem);
557 curr_sp = new_r_Proj(env->irg, bl, push, get_irn_mode(curr_sp), pn_ia32_Push_stack);
558 *mem = new_r_Proj(env->irg, bl, push, mode_M, pn_ia32_Push_M);
560 /* the push must have SP out register */
561 arch_set_irn_register(env->aenv, curr_sp, env->isa->sp);
562 set_ia32_flags(push, arch_irn_flags_ignore);
564 /* move esp to ebp */
565 curr_bp = be_new_Copy(env->isa->bp->reg_class, env->irg, bl, curr_sp);
566 be_set_constr_single_reg(curr_bp, BE_OUT_POS(0), env->isa->bp);
567 arch_set_irn_register(env->aenv, curr_bp, env->isa->bp);
568 be_node_set_flags(curr_bp, BE_OUT_POS(0), arch_irn_flags_ignore);
570 /* beware: the copy must be done before any other sp use */
571 curr_sp = be_new_CopyKeep_single(env->isa->sp->reg_class, env->irg, bl, curr_sp, curr_bp, get_irn_mode(curr_sp));
572 be_set_constr_single_reg(curr_sp, BE_OUT_POS(0), env->isa->sp);
573 arch_set_irn_register(env->aenv, curr_sp, env->isa->sp);
574 be_node_set_flags(curr_sp, BE_OUT_POS(0), arch_irn_flags_ignore);
576 be_abi_reg_map_set(reg_map, env->isa->sp, curr_sp);
577 be_abi_reg_map_set(reg_map, env->isa->bp, curr_bp);
586 * Generate the routine epilogue.
587 * @param self The callback object.
588 * @param bl The block for the epilog
589 * @param mem A pointer to the mem node. Update this if you define new memory.
590 * @param reg_map A map mapping all callee_save/ignore/parameter registers to their defining nodes.
591 * @return The register which shall be used as a stack frame base.
593 * All nodes which define registers in @p reg_map must keep @p reg_map current.
595 static void ia32_abi_epilogue(void *self, ir_node *bl, ir_node **mem, pmap *reg_map)
597 ia32_abi_env_t *env = self;
598 ir_node *curr_sp = be_abi_reg_map_get(reg_map, env->isa->sp);
599 ir_node *curr_bp = be_abi_reg_map_get(reg_map, env->isa->bp);
601 if (env->flags.try_omit_fp) {
602 /* simply remove the stack frame here */
603 curr_sp = be_new_IncSP(env->isa->sp, env->irg, bl, curr_sp, BE_STACK_FRAME_SIZE_SHRINK);
604 add_irn_dep(curr_sp, *mem);
606 const ia32_isa_t *isa = (ia32_isa_t *)env->isa;
607 ia32_code_gen_t *cg = isa->cg;
608 ir_mode *mode_bp = env->isa->bp->reg_class->mode;
610 /* gcc always emits a leave at the end of a routine */
611 if (1 || ARCH_AMD(isa->opt_arch)) {
615 leave = new_rd_ia32_Leave(NULL, env->irg, bl, curr_sp, curr_bp);
616 set_ia32_flags(leave, arch_irn_flags_ignore);
617 curr_bp = new_r_Proj(current_ir_graph, bl, leave, mode_bp, pn_ia32_Leave_frame);
618 curr_sp = new_r_Proj(current_ir_graph, bl, leave, get_irn_mode(curr_sp), pn_ia32_Leave_stack);
620 ir_node *noreg = ia32_new_NoReg_gp(cg);
623 /* copy ebp to esp */
624 curr_sp = be_new_SetSP(env->isa->sp, env->irg, bl, curr_sp, curr_bp, *mem);
627 pop = new_rd_ia32_Pop(NULL, env->irg, bl, noreg, noreg, curr_sp, *mem);
628 set_ia32_flags(pop, arch_irn_flags_ignore);
629 curr_bp = new_r_Proj(current_ir_graph, bl, pop, mode_bp, pn_ia32_Pop_res);
630 curr_sp = new_r_Proj(current_ir_graph, bl, pop, get_irn_mode(curr_sp), pn_ia32_Pop_stack);
632 *mem = new_r_Proj(current_ir_graph, bl, pop, mode_M, pn_ia32_Pop_M);
634 arch_set_irn_register(env->aenv, curr_sp, env->isa->sp);
635 arch_set_irn_register(env->aenv, curr_bp, env->isa->bp);
638 be_abi_reg_map_set(reg_map, env->isa->sp, curr_sp);
639 be_abi_reg_map_set(reg_map, env->isa->bp, curr_bp);
643 * Initialize the callback object.
644 * @param call The call object.
645 * @param aenv The architecture environment.
646 * @param irg The graph with the method.
647 * @return Some pointer. This pointer is passed to all other callback functions as self object.
649 static void *ia32_abi_init(const be_abi_call_t *call, const arch_env_t *aenv, ir_graph *irg)
651 ia32_abi_env_t *env = xmalloc(sizeof(env[0]));
652 be_abi_call_flags_t fl = be_abi_call_get_flags(call);
653 env->flags = fl.bits;
656 env->isa = aenv->isa;
661 * Destroy the callback object.
662 * @param self The callback object.
664 static void ia32_abi_done(void *self) {
669 * Produces the type which sits between the stack args and the locals on the stack.
670 * it will contain the return address and space to store the old base pointer.
671 * @return The Firm type modeling the ABI between type.
673 static ir_type *ia32_abi_get_between_type(void *self)
675 #define IDENT(s) new_id_from_chars(s, sizeof(s)-1)
676 static ir_type *omit_fp_between_type = NULL;
677 static ir_type *between_type = NULL;
679 ia32_abi_env_t *env = self;
681 if (! between_type) {
682 ir_entity *old_bp_ent;
683 ir_entity *ret_addr_ent;
684 ir_entity *omit_fp_ret_addr_ent;
686 ir_type *old_bp_type = new_type_primitive(IDENT("bp"), mode_Iu);
687 ir_type *ret_addr_type = new_type_primitive(IDENT("return_addr"), mode_Iu);
689 between_type = new_type_struct(IDENT("ia32_between_type"));
690 old_bp_ent = new_entity(between_type, IDENT("old_bp"), old_bp_type);
691 ret_addr_ent = new_entity(between_type, IDENT("ret_addr"), ret_addr_type);
693 set_entity_offset(old_bp_ent, 0);
694 set_entity_offset(ret_addr_ent, get_type_size_bytes(old_bp_type));
695 set_type_size_bytes(between_type, get_type_size_bytes(old_bp_type) + get_type_size_bytes(ret_addr_type));
696 set_type_state(between_type, layout_fixed);
698 omit_fp_between_type = new_type_struct(IDENT("ia32_between_type_omit_fp"));
699 omit_fp_ret_addr_ent = new_entity(omit_fp_between_type, IDENT("ret_addr"), ret_addr_type);
701 set_entity_offset(omit_fp_ret_addr_ent, 0);
702 set_type_size_bytes(omit_fp_between_type, get_type_size_bytes(ret_addr_type));
703 set_type_state(omit_fp_between_type, layout_fixed);
706 return env->flags.try_omit_fp ? omit_fp_between_type : between_type;
711 * Get the estimated cycle count for @p irn.
713 * @param self The this pointer.
714 * @param irn The node.
716 * @return The estimated cycle count for this operation
718 static int ia32_get_op_estimated_cost(const void *self, const ir_node *irn)
721 ia32_op_type_t op_tp;
722 const ia32_irn_ops_t *ops = self;
726 if (!is_ia32_irn(irn))
729 assert(is_ia32_irn(irn));
731 cost = get_ia32_latency(irn);
732 op_tp = get_ia32_op_type(irn);
734 if (is_ia32_CopyB(irn)) {
736 if (ARCH_INTEL(ops->cg->arch))
739 else if (is_ia32_CopyB_i(irn)) {
740 int size = get_tarval_long(get_ia32_Immop_tarval(irn));
741 cost = 20 + (int)ceil((4/3) * size);
742 if (ARCH_INTEL(ops->cg->arch))
745 /* in case of address mode operations add additional cycles */
746 else if (op_tp == ia32_AddrModeD || op_tp == ia32_AddrModeS) {
748 In case of stack access add 5 cycles (we assume stack is in cache),
749 other memory operations cost 20 cycles.
751 cost += is_ia32_use_frame(irn) ? 5 : 20;
758 * Returns the inverse operation if @p irn, recalculating the argument at position @p i.
760 * @param irn The original operation
761 * @param i Index of the argument we want the inverse operation to yield
762 * @param inverse struct to be filled with the resulting inverse op
763 * @param obstack The obstack to use for allocation of the returned nodes array
764 * @return The inverse operation or NULL if operation invertible
766 static arch_inverse_t *ia32_get_inverse(const void *self, const ir_node *irn, int i, arch_inverse_t *inverse, struct obstack *obst) {
770 ir_node *block, *noreg, *nomem;
773 /* we cannot invert non-ia32 irns */
774 if (! is_ia32_irn(irn))
777 /* operand must always be a real operand (not base, index or mem) */
778 if (i != 2 && i != 3)
781 /* we don't invert address mode operations */
782 if (get_ia32_op_type(irn) != ia32_Normal)
785 irg = get_irn_irg(irn);
786 block = get_nodes_block(irn);
787 mode = get_irn_mode(irn);
788 irn_mode = get_irn_mode(irn);
789 noreg = get_irn_n(irn, 0);
790 nomem = new_r_NoMem(irg);
791 dbg = get_irn_dbg_info(irn);
793 /* initialize structure */
794 inverse->nodes = obstack_alloc(obst, 2 * sizeof(inverse->nodes[0]));
798 switch (get_ia32_irn_opcode(irn)) {
800 if (get_ia32_immop_type(irn) == ia32_ImmConst) {
801 /* we have an add with a const here */
802 /* invers == add with negated const */
803 inverse->nodes[0] = new_rd_ia32_Add(dbg, irg, block, noreg, noreg, get_irn_n(irn, i), noreg, nomem);
805 copy_ia32_Immop_attr(inverse->nodes[0], (ir_node *)irn);
806 set_ia32_Immop_tarval(inverse->nodes[0], tarval_neg(get_ia32_Immop_tarval(irn)));
807 set_ia32_commutative(inverse->nodes[0]);
809 else if (get_ia32_immop_type(irn) == ia32_ImmSymConst) {
810 /* we have an add with a symconst here */
811 /* invers == sub with const */
812 inverse->nodes[0] = new_rd_ia32_Sub(dbg, irg, block, noreg, noreg, get_irn_n(irn, i), noreg, nomem);
814 copy_ia32_Immop_attr(inverse->nodes[0], (ir_node *)irn);
817 /* normal add: inverse == sub */
818 inverse->nodes[0] = new_rd_ia32_Sub(dbg, irg, block, noreg, noreg, (ir_node*) irn, get_irn_n(irn, i ^ 1), nomem);
823 if (get_ia32_immop_type(irn) != ia32_ImmNone) {
824 /* we have a sub with a const/symconst here */
825 /* invers == add with this const */
826 inverse->nodes[0] = new_rd_ia32_Add(dbg, irg, block, noreg, noreg, get_irn_n(irn, i), noreg, nomem);
827 inverse->costs += (get_ia32_immop_type(irn) == ia32_ImmSymConst) ? 5 : 1;
828 copy_ia32_Immop_attr(inverse->nodes[0], (ir_node *)irn);
833 inverse->nodes[0] = new_rd_ia32_Add(dbg, irg, block, noreg, noreg, (ir_node*) irn, get_irn_n(irn, 3), nomem);
836 inverse->nodes[0] = new_rd_ia32_Sub(dbg, irg, block, noreg, noreg, get_irn_n(irn, 2), (ir_node*) irn, nomem);
842 if (get_ia32_immop_type(irn) != ia32_ImmNone) {
843 /* xor with const: inverse = xor */
844 inverse->nodes[0] = new_rd_ia32_Xor(dbg, irg, block, noreg, noreg, get_irn_n(irn, i), noreg, nomem);
845 inverse->costs += (get_ia32_immop_type(irn) == ia32_ImmSymConst) ? 5 : 1;
846 copy_ia32_Immop_attr(inverse->nodes[0], (ir_node *)irn);
850 inverse->nodes[0] = new_rd_ia32_Xor(dbg, irg, block, noreg, noreg, (ir_node *) irn, get_irn_n(irn, i), nomem);
855 inverse->nodes[0] = new_rd_ia32_Not(dbg, irg, block, noreg, noreg, (ir_node*) irn, nomem);
860 inverse->nodes[0] = new_rd_ia32_Neg(dbg, irg, block, noreg, noreg, (ir_node*) irn, nomem);
865 /* inverse operation not supported */
872 static ir_mode *get_spill_mode_mode(const ir_mode *mode)
874 if(mode_is_float(mode))
881 * Get the mode that should be used for spilling value node
883 static ir_mode *get_spill_mode(const ir_node *node)
885 ir_mode *mode = get_irn_mode(node);
886 return get_spill_mode_mode(mode);
890 * Checks wether an addressmode reload for a node with mode mode is compatible
891 * with a spillslot of mode spill_mode
893 static int ia32_is_spillmode_compatible(const ir_mode *mode, const ir_mode *spillmode)
895 if(mode_is_float(mode)) {
896 return mode == spillmode;
903 * Check if irn can load it's operand at position i from memory (source addressmode).
904 * @param self Pointer to irn ops itself
905 * @param irn The irn to be checked
906 * @param i The operands position
907 * @return Non-Zero if operand can be loaded
909 static int ia32_possible_memory_operand(const void *self, const ir_node *irn, unsigned int i) {
910 ir_node *op = get_irn_n(irn, i);
911 const ir_mode *mode = get_irn_mode(op);
912 const ir_mode *spillmode = get_spill_mode(op);
914 if (! is_ia32_irn(irn) || /* must be an ia32 irn */
915 get_irn_arity(irn) != 5 || /* must be a binary operation */
916 get_ia32_op_type(irn) != ia32_Normal || /* must not already be a addressmode irn */
917 ! (get_ia32_am_support(irn) & ia32_am_Source) || /* must be capable of source addressmode */
918 ! ia32_is_spillmode_compatible(mode, spillmode) ||
919 (i != 2 && i != 3) || /* a "real" operand position must be requested */
920 (i == 2 && ! is_ia32_commutative(irn)) || /* if first operand requested irn must be commutative */
921 is_ia32_use_frame(irn)) /* must not already use frame */
927 static void ia32_perform_memory_operand(const void *self, ir_node *irn, ir_node *spill, unsigned int i) {
928 const ia32_irn_ops_t *ops = self;
929 ia32_code_gen_t *cg = ops->cg;
931 assert(ia32_possible_memory_operand(self, irn, i) && "Cannot perform memory operand change");
934 ir_node *tmp = get_irn_n(irn, 3);
935 set_irn_n(irn, 3, get_irn_n(irn, 2));
936 set_irn_n(irn, 2, tmp);
939 set_ia32_am_support(irn, ia32_am_Source);
940 set_ia32_op_type(irn, ia32_AddrModeS);
941 set_ia32_am_flavour(irn, ia32_B);
942 set_ia32_ls_mode(irn, get_irn_mode(get_irn_n(irn, i)));
943 set_ia32_use_frame(irn);
944 set_ia32_need_stackent(irn);
946 set_irn_n(irn, 0, get_irg_frame(get_irn_irg(irn)));
947 set_irn_n(irn, 3, ia32_get_admissible_noreg(cg, irn, 3));
948 set_irn_n(irn, 4, spill);
950 //FIXME DBG_OPT_AM_S(reload, irn);
953 static const be_abi_callbacks_t ia32_abi_callbacks = {
956 ia32_abi_get_between_type,
957 ia32_abi_dont_save_regs,
962 /* fill register allocator interface */
964 static const arch_irn_ops_if_t ia32_irn_ops_if = {
965 ia32_get_irn_reg_req,
970 ia32_get_frame_entity,
971 ia32_set_frame_entity,
972 ia32_set_frame_offset,
975 ia32_get_op_estimated_cost,
976 ia32_possible_memory_operand,
977 ia32_perform_memory_operand,
980 ia32_irn_ops_t ia32_irn_ops = {
987 /**************************************************
990 * ___ ___ __| | ___ __ _ ___ _ __ _| |_
991 * / __/ _ \ / _` |/ _ \/ _` |/ _ \ '_ \ | | _|
992 * | (_| (_) | (_| | __/ (_| | __/ | | | | | |
993 * \___\___/ \__,_|\___|\__, |\___|_| |_| |_|_|
996 **************************************************/
999 * Transforms the standard firm graph into
1000 * an ia32 firm graph
1002 static void ia32_prepare_graph(void *self) {
1003 ia32_code_gen_t *cg = self;
1004 DEBUG_ONLY(firm_dbg_module_t *old_mod = cg->mod;)
1006 FIRM_DBG_REGISTER(cg->mod, "firm.be.ia32.transform");
1008 /* 1st: transform psi condition trees */
1009 ia32_pre_transform_phase(cg);
1011 /* 2nd: transform all remaining nodes */
1012 ia32_transform_graph(cg);
1013 // Matze: disabled for now. Because after transformation start block has no
1014 // self-loop anymore so it might be merged with its successor block. This
1015 // will bring several nodes to the startblock which sometimes get scheduled
1016 // before the initial IncSP/Barrier
1017 //local_optimize_graph(cg->irg);
1020 be_dump(cg->irg, "-transformed", dump_ir_block_graph_sched);
1022 /* 3rd: optimize address mode */
1023 FIRM_DBG_REGISTER(cg->mod, "firm.be.ia32.am");
1024 ia32_optimize_addressmode(cg);
1027 be_dump(cg->irg, "-am", dump_ir_block_graph_sched);
1029 DEBUG_ONLY(cg->mod = old_mod;)
1033 * Dummy functions for hooks we don't need but which must be filled.
1035 static void ia32_before_sched(void *self) {
1038 static void remove_unused_nodes(ir_node *irn, bitset_t *already_visited) {
1041 ir_node *mem_proj = NULL;
1046 mode = get_irn_mode(irn);
1048 /* check if we already saw this node or the node has more than one user */
1049 if (bitset_contains_irn(already_visited, irn) || get_irn_n_edges(irn) > 1) {
1053 /* mark irn visited */
1054 bitset_add_irn(already_visited, irn);
1056 /* non-Tuple nodes with one user: ok, return */
1057 if (get_irn_n_edges(irn) >= 1 && mode != mode_T) {
1061 /* tuple node has one user which is not the mem proj-> ok */
1062 if (mode == mode_T && get_irn_n_edges(irn) == 1) {
1063 mem_proj = ia32_get_proj_for_mode(irn, mode_M);
1064 if (mem_proj == NULL) {
1069 arity = get_irn_arity(irn);
1070 for (i = 0; i < arity; ++i) {
1071 ir_node *pred = get_irn_n(irn, i);
1073 /* do not follow memory edges or we will accidentally remove stores */
1074 if (get_irn_mode(pred) == mode_M) {
1075 if(mem_proj != NULL) {
1076 edges_reroute(mem_proj, pred, get_irn_irg(mem_proj));
1082 set_irn_n(irn, i, new_Bad());
1085 The current node is about to be removed: if the predecessor
1086 has only this node as user, it need to be removed as well.
1088 if (get_irn_n_edges(pred) <= 1)
1089 remove_unused_nodes(pred, already_visited);
1092 // we need to set the presd to Bad again to also get the memory edges
1093 arity = get_irn_arity(irn);
1094 for (i = 0; i < arity; ++i) {
1095 set_irn_n(irn, i, new_Bad());
1098 if (sched_is_scheduled(irn)) {
1103 static void remove_unused_loads_walker(ir_node *irn, void *env) {
1104 bitset_t *already_visited = env;
1105 if (is_ia32_Ld(irn) && ! bitset_contains_irn(already_visited, irn))
1106 remove_unused_nodes(irn, env);
1110 * Called before the register allocator.
1111 * Calculate a block schedule here. We need it for the x87
1112 * simulator and the emitter.
1114 static void ia32_before_ra(void *self) {
1115 ia32_code_gen_t *cg = self;
1116 bitset_t *already_visited = bitset_irg_alloca(cg->irg);
1119 Handle special case:
1120 There are sometimes unused loads, only pinned by memory.
1121 We need to remove those Loads and all other nodes which won't be used
1122 after removing the Load from schedule.
1124 irg_walk_graph(cg->irg, NULL, remove_unused_loads_walker, already_visited);
1129 * Transforms a be_Reload into a ia32 Load.
1131 static void transform_to_Load(ia32_code_gen_t *cg, ir_node *node) {
1132 ir_graph *irg = get_irn_irg(node);
1133 dbg_info *dbg = get_irn_dbg_info(node);
1134 ir_node *block = get_nodes_block(node);
1135 ir_entity *ent = be_get_frame_entity(node);
1136 ir_mode *mode = get_irn_mode(node);
1137 ir_mode *spillmode = get_spill_mode(node);
1138 ir_node *noreg = ia32_new_NoReg_gp(cg);
1139 ir_node *sched_point = NULL;
1140 ir_node *ptr = get_irg_frame(irg);
1141 ir_node *mem = get_irn_n(node, be_pos_Reload_mem);
1142 ir_node *new_op, *proj;
1143 const arch_register_t *reg;
1145 if (sched_is_scheduled(node)) {
1146 sched_point = sched_prev(node);
1149 if (mode_is_float(spillmode)) {
1151 new_op = new_rd_ia32_xLoad(dbg, irg, block, ptr, noreg, mem);
1153 new_op = new_rd_ia32_vfld(dbg, irg, block, ptr, noreg, mem);
1155 else if (get_mode_size_bits(spillmode) == 128) {
1156 // Reload 128 bit sse registers
1157 new_op = new_rd_ia32_xxLoad(dbg, irg, block, ptr, noreg, mem);
1160 new_op = new_rd_ia32_Load(dbg, irg, block, ptr, noreg, mem);
1162 set_ia32_am_support(new_op, ia32_am_Source);
1163 set_ia32_op_type(new_op, ia32_AddrModeS);
1164 set_ia32_am_flavour(new_op, ia32_B);
1165 set_ia32_ls_mode(new_op, spillmode);
1166 set_ia32_frame_ent(new_op, ent);
1167 set_ia32_use_frame(new_op);
1169 DBG_OPT_RELOAD2LD(node, new_op);
1171 proj = new_rd_Proj(dbg, irg, block, new_op, mode, pn_ia32_Load_res);
1174 sched_add_after(sched_point, new_op);
1175 sched_add_after(new_op, proj);
1180 /* copy the register from the old node to the new Load */
1181 reg = arch_get_irn_register(cg->arch_env, node);
1182 arch_set_irn_register(cg->arch_env, new_op, reg);
1184 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(cg, node));
1186 exchange(node, proj);
1190 * Transforms a be_Spill node into a ia32 Store.
1192 static void transform_to_Store(ia32_code_gen_t *cg, ir_node *node) {
1193 ir_graph *irg = get_irn_irg(node);
1194 dbg_info *dbg = get_irn_dbg_info(node);
1195 ir_node *block = get_nodes_block(node);
1196 ir_entity *ent = be_get_frame_entity(node);
1197 const ir_node *spillval = get_irn_n(node, be_pos_Spill_val);
1198 ir_mode *mode = get_spill_mode(spillval);
1199 ir_node *noreg = ia32_new_NoReg_gp(cg);
1200 ir_node *nomem = new_rd_NoMem(irg);
1201 ir_node *ptr = get_irg_frame(irg);
1202 ir_node *val = get_irn_n(node, be_pos_Spill_val);
1204 ir_node *sched_point = NULL;
1206 if (sched_is_scheduled(node)) {
1207 sched_point = sched_prev(node);
1210 if (mode_is_float(mode)) {
1212 store = new_rd_ia32_xStore(dbg, irg, block, ptr, noreg, val, nomem);
1214 store = new_rd_ia32_vfst(dbg, irg, block, ptr, noreg, val, nomem);
1216 else if (get_mode_size_bits(mode) == 128) {
1217 // Spill 128 bit SSE registers
1218 store = new_rd_ia32_xxStore(dbg, irg, block, ptr, noreg, val, nomem);
1220 else if (get_mode_size_bits(mode) == 8) {
1221 store = new_rd_ia32_Store8Bit(dbg, irg, block, ptr, noreg, val, nomem);
1224 store = new_rd_ia32_Store(dbg, irg, block, ptr, noreg, val, nomem);
1227 set_ia32_am_support(store, ia32_am_Dest);
1228 set_ia32_op_type(store, ia32_AddrModeD);
1229 set_ia32_am_flavour(store, ia32_B);
1230 set_ia32_ls_mode(store, mode);
1231 set_ia32_frame_ent(store, ent);
1232 set_ia32_use_frame(store);
1234 DBG_OPT_SPILL2ST(node, store);
1235 SET_IA32_ORIG_NODE(store, ia32_get_old_node_name(cg, node));
1238 sched_add_after(sched_point, store);
1242 exchange(node, store);
1245 static ir_node *create_push(ia32_code_gen_t *cg, ir_node *node, ir_node *schedpoint, ir_node *sp, ir_node *mem, ir_entity *ent) {
1246 ir_graph *irg = get_irn_irg(node);
1247 dbg_info *dbg = get_irn_dbg_info(node);
1248 ir_node *block = get_nodes_block(node);
1249 ir_node *noreg = ia32_new_NoReg_gp(cg);
1250 ir_node *frame = get_irg_frame(irg);
1252 ir_node *push = new_rd_ia32_Push(dbg, irg, block, frame, noreg, noreg, sp, mem);
1254 set_ia32_frame_ent(push, ent);
1255 set_ia32_use_frame(push);
1256 set_ia32_op_type(push, ia32_AddrModeS);
1257 set_ia32_am_flavour(push, ia32_B);
1258 set_ia32_ls_mode(push, mode_Is);
1260 sched_add_before(schedpoint, push);
1264 static ir_node *create_pop(ia32_code_gen_t *cg, ir_node *node, ir_node *schedpoint, ir_node *sp, ir_entity *ent) {
1265 ir_graph *irg = get_irn_irg(node);
1266 dbg_info *dbg = get_irn_dbg_info(node);
1267 ir_node *block = get_nodes_block(node);
1268 ir_node *noreg = ia32_new_NoReg_gp(cg);
1269 ir_node *frame = get_irg_frame(irg);
1271 ir_node *pop = new_rd_ia32_Pop(dbg, irg, block, frame, noreg, sp, new_NoMem());
1273 set_ia32_frame_ent(pop, ent);
1274 set_ia32_use_frame(pop);
1275 set_ia32_op_type(pop, ia32_AddrModeD);
1276 set_ia32_am_flavour(pop, ia32_am_OB);
1277 set_ia32_ls_mode(pop, mode_Is);
1279 sched_add_before(schedpoint, pop);
1284 static ir_node* create_spproj(ia32_code_gen_t *cg, ir_node *node, ir_node *pred, int pos, ir_node *schedpoint) {
1285 ir_graph *irg = get_irn_irg(node);
1286 dbg_info *dbg = get_irn_dbg_info(node);
1287 ir_node *block = get_nodes_block(node);
1288 ir_mode *spmode = mode_Iu;
1289 const arch_register_t *spreg = &ia32_gp_regs[REG_ESP];
1292 sp = new_rd_Proj(dbg, irg, block, pred, spmode, pos);
1293 arch_set_irn_register(cg->arch_env, sp, spreg);
1294 sched_add_before(schedpoint, sp);
1300 * Transform memperm, currently we do this the ugly way and produce
1301 * push/pop into/from memory cascades. This is possible without using
1304 static void transform_MemPerm(ia32_code_gen_t *cg, ir_node *node) {
1305 ir_graph *irg = get_irn_irg(node);
1306 ir_node *block = get_nodes_block(node);
1310 ir_node *sp = be_abi_get_ignore_irn(cg->birg->abi, &ia32_gp_regs[REG_ESP]);
1311 const ir_edge_t *edge;
1312 const ir_edge_t *next;
1315 arity = be_get_MemPerm_entity_arity(node);
1316 pops = alloca(arity * sizeof(pops[0]));
1319 for(i = 0; i < arity; ++i) {
1320 ir_entity *ent = be_get_MemPerm_in_entity(node, i);
1321 ir_type *enttype = get_entity_type(ent);
1322 int entbits = get_type_size_bits(enttype);
1323 ir_node *mem = get_irn_n(node, i + 1);
1326 assert( (entbits == 32 || entbits == 64) && "spillslot on x86 should be 32 or 64 bit");
1328 push = create_push(cg, node, node, sp, mem, ent);
1329 sp = create_spproj(cg, node, push, pn_ia32_Push_stack, node);
1331 // add another push after the first one
1332 push = create_push(cg, node, node, sp, mem, ent);
1333 add_ia32_am_offs_int(push, 4);
1334 sp = create_spproj(cg, node, push, pn_ia32_Push_stack, node);
1337 set_irn_n(node, i, new_Bad());
1341 for(i = arity - 1; i >= 0; --i) {
1342 ir_entity *ent = be_get_MemPerm_out_entity(node, i);
1343 ir_type *enttype = get_entity_type(ent);
1344 int entbits = get_type_size_bits(enttype);
1348 assert( (entbits == 32 || entbits == 64) && "spillslot on x86 should be 32 or 64 bit");
1350 pop = create_pop(cg, node, node, sp, ent);
1351 sp = create_spproj(cg, node, pop, pn_ia32_Pop_stack, node);
1353 add_ia32_am_offs_int(pop, 4);
1355 // add another pop after the first one
1356 pop = create_pop(cg, node, node, sp, ent);
1357 sp = create_spproj(cg, node, pop, pn_ia32_Pop_stack, node);
1364 keep = be_new_Keep(&ia32_reg_classes[CLASS_ia32_gp], irg, block, 1, in);
1365 sched_add_before(node, keep);
1367 // exchange memprojs
1368 foreach_out_edge_safe(node, edge, next) {
1369 ir_node *proj = get_edge_src_irn(edge);
1370 int p = get_Proj_proj(proj);
1374 set_Proj_pred(proj, pops[p]);
1375 set_Proj_proj(proj, 3);
1379 arity = get_irn_arity(node);
1380 for(i = 0; i < arity; ++i) {
1381 set_irn_n(node, i, new_Bad());
1387 * Block-Walker: Calls the transform functions Spill and Reload.
1389 static void ia32_after_ra_walker(ir_node *block, void *env) {
1390 ir_node *node, *prev;
1391 ia32_code_gen_t *cg = env;
1393 /* beware: the schedule is changed here */
1394 for (node = sched_last(block); !sched_is_begin(node); node = prev) {
1395 prev = sched_prev(node);
1397 if (be_is_Reload(node)) {
1398 transform_to_Load(cg, node);
1399 } else if (be_is_Spill(node)) {
1400 transform_to_Store(cg, node);
1401 } else if(be_is_MemPerm(node)) {
1402 transform_MemPerm(cg, node);
1408 * Collects nodes that need frame entities assigned.
1410 static void ia32_collect_frame_entity_nodes(ir_node *node, void *data)
1412 be_fec_env_t *env = data;
1414 if (be_is_Reload(node) && be_get_frame_entity(node) == NULL) {
1415 const ir_mode *mode = get_spill_mode_mode(get_irn_mode(node));
1416 int align = get_mode_size_bytes(mode);
1417 be_node_needs_frame_entity(env, node, mode, align);
1418 } else if(is_ia32_irn(node) && get_ia32_frame_ent(node) == NULL
1419 && is_ia32_use_frame(node)) {
1420 if (is_ia32_need_stackent(node) || is_ia32_Load(node)) {
1421 const ir_mode *mode = get_ia32_ls_mode(node);
1422 int align = get_mode_size_bytes(mode);
1423 be_node_needs_frame_entity(env, node, mode, align);
1424 } else if (is_ia32_vfild(node) || is_ia32_xLoad(node)) {
1425 const ir_mode *mode = get_ia32_ls_mode(node);
1427 be_node_needs_frame_entity(env, node, mode, align);
1428 } else if (is_ia32_SetST0(node)) {
1429 const ir_mode *mode = get_ia32_ls_mode(node);
1431 be_node_needs_frame_entity(env, node, mode, align);
1434 if(!is_ia32_Store(node)
1435 && !is_ia32_xStore(node)
1436 && !is_ia32_xStoreSimple(node)
1437 && !is_ia32_vfist(node)
1438 && !is_ia32_GetST0(node)) {
1447 * We transform Spill and Reload here. This needs to be done before
1448 * stack biasing otherwise we would miss the corrected offset for these nodes.
1450 static void ia32_after_ra(void *self) {
1451 ia32_code_gen_t *cg = self;
1452 ir_graph *irg = cg->irg;
1453 be_fec_env_t *fec_env = be_new_frame_entity_coalescer(cg->birg);
1455 /* create and coalesce frame entities */
1456 irg_walk_graph(irg, NULL, ia32_collect_frame_entity_nodes, fec_env);
1457 be_assign_entities(fec_env);
1458 be_free_frame_entity_coalescer(fec_env);
1460 irg_block_walk_graph(irg, NULL, ia32_after_ra_walker, cg);
1462 ia32_finish_irg(irg, cg);
1466 * Last touchups for the graph before emit: x87 simulation to replace the
1467 * virtual with real x87 instructions, creating a block schedule and peephole
1470 static void ia32_finish(void *self) {
1471 ia32_code_gen_t *cg = self;
1472 ir_graph *irg = cg->irg;
1474 /* if we do x87 code generation, rewrite all the virtual instructions and registers */
1475 if (cg->used_fp == fp_x87 || cg->force_sim) {
1476 x87_simulate_graph(cg->arch_env, cg->birg);
1479 /* create block schedule, this also removes empty blocks which might
1480 * produce critical edges */
1481 cg->blk_sched = be_create_block_schedule(irg, cg->birg->exec_freq);
1483 /* do peephole optimisations */
1484 ia32_peephole_optimization(irg, cg);
1488 * Emits the code, closes the output file and frees
1489 * the code generator interface.
1491 static void ia32_codegen(void *self) {
1492 ia32_code_gen_t *cg = self;
1493 ir_graph *irg = cg->irg;
1495 ia32_gen_routine(cg, irg);
1499 /* remove it from the isa */
1502 /* de-allocate code generator */
1503 del_set(cg->reg_set);
1507 static void *ia32_cg_init(be_irg_t *birg);
1509 static const arch_code_generator_if_t ia32_code_gen_if = {
1511 NULL, /* before abi introduce hook */
1514 ia32_before_sched, /* before scheduling hook */
1515 ia32_before_ra, /* before register allocation hook */
1516 ia32_after_ra, /* after register allocation hook */
1517 ia32_finish, /* called before codegen */
1518 ia32_codegen /* emit && done */
1522 * Initializes a IA32 code generator.
1524 static void *ia32_cg_init(be_irg_t *birg) {
1525 ia32_isa_t *isa = (ia32_isa_t *)birg->main_env->arch_env->isa;
1526 ia32_code_gen_t *cg = xcalloc(1, sizeof(*cg));
1528 cg->impl = &ia32_code_gen_if;
1529 cg->irg = birg->irg;
1530 cg->reg_set = new_set(ia32_cmp_irn_reg_assoc, 1024);
1531 cg->arch_env = birg->main_env->arch_env;
1534 cg->blk_sched = NULL;
1535 cg->fp_kind = isa->fp_kind;
1536 cg->used_fp = fp_none;
1537 cg->dump = (birg->main_env->options->dump_flags & DUMP_BE) ? 1 : 0;
1539 FIRM_DBG_REGISTER(cg->mod, "firm.be.ia32.cg");
1541 /* copy optimizations from isa for easier access */
1543 cg->arch = isa->arch;
1544 cg->opt_arch = isa->opt_arch;
1550 if (isa->name_obst) {
1551 obstack_free(isa->name_obst, NULL);
1552 obstack_init(isa->name_obst);
1556 cur_reg_set = cg->reg_set;
1558 ia32_irn_ops.cg = cg;
1560 return (arch_code_generator_t *)cg;
1565 /*****************************************************************
1566 * ____ _ _ _____ _____
1567 * | _ \ | | | | |_ _|/ ____| /\
1568 * | |_) | __ _ ___| | _____ _ __ __| | | | | (___ / \
1569 * | _ < / _` |/ __| |/ / _ \ '_ \ / _` | | | \___ \ / /\ \
1570 * | |_) | (_| | (__| < __/ | | | (_| | _| |_ ____) / ____ \
1571 * |____/ \__,_|\___|_|\_\___|_| |_|\__,_| |_____|_____/_/ \_\
1573 *****************************************************************/
1576 * Set output modes for GCC
1578 static const tarval_mode_info mo_integer = {
1585 * set the tarval output mode of all integer modes to decimal
1587 static void set_tarval_output_modes(void)
1591 for (i = get_irp_n_modes() - 1; i >= 0; --i) {
1592 ir_mode *mode = get_irp_mode(i);
1594 if (mode_is_int(mode))
1595 set_tarval_mode_output_option(mode, &mo_integer);
1599 const arch_isa_if_t ia32_isa_if;
1602 * The template that generates a new ISA object.
1603 * Note that this template can be changed by command line
1606 static ia32_isa_t ia32_isa_template = {
1608 &ia32_isa_if, /* isa interface implementation */
1609 &ia32_gp_regs[REG_ESP], /* stack pointer register */
1610 &ia32_gp_regs[REG_EBP], /* base pointer register */
1611 -1, /* stack direction */
1612 NULL, /* main environment */
1614 {}, /* emitter environment */
1615 NULL, /* 16bit register names */
1616 NULL, /* 8bit register names */
1620 IA32_OPT_INCDEC | /* optimize add 1, sub 1 into inc/dec default: on */
1621 IA32_OPT_DOAM | /* optimize address mode default: on */
1622 IA32_OPT_LEA | /* optimize for LEAs default: on */
1623 IA32_OPT_PLACECNST | /* place constants immediately before instructions, default: on */
1624 IA32_OPT_IMMOPS | /* operations can use immediates, default: on */
1625 IA32_OPT_PUSHARGS), /* create pushs for function argument passing, default: on */
1626 arch_pentium_4, /* instruction architecture */
1627 arch_pentium_4, /* optimize for architecture */
1628 fp_sse2, /* use sse2 unit */
1629 NULL, /* current code generator */
1631 NULL, /* name obstack */
1632 0 /* name obst size */
1637 * Initializes the backend ISA.
1639 static void *ia32_init(FILE *file_handle) {
1640 static int inited = 0;
1646 set_tarval_output_modes();
1648 isa = xmalloc(sizeof(*isa));
1649 memcpy(isa, &ia32_isa_template, sizeof(*isa));
1651 ia32_register_init(isa);
1652 ia32_create_opcodes();
1653 ia32_register_copy_attr_func();
1655 if ((ARCH_INTEL(isa->arch) && isa->arch < arch_pentium_4) ||
1656 (ARCH_AMD(isa->arch) && isa->arch < arch_athlon))
1657 /* no SSE2 for these cpu's */
1658 isa->fp_kind = fp_x87;
1660 if (ARCH_INTEL(isa->opt_arch) && isa->opt_arch >= arch_pentium_4) {
1661 /* Pentium 4 don't like inc and dec instructions */
1662 isa->opt &= ~IA32_OPT_INCDEC;
1665 be_emit_init_env(&isa->emit, file_handle);
1666 isa->regs_16bit = pmap_create();
1667 isa->regs_8bit = pmap_create();
1668 isa->types = pmap_create();
1669 isa->tv_ent = pmap_create();
1670 isa->cpu = ia32_init_machine_description();
1672 ia32_build_16bit_reg_map(isa->regs_16bit);
1673 ia32_build_8bit_reg_map(isa->regs_8bit);
1675 /* patch register names of x87 registers */
1676 ia32_st_regs[0].name = "st";
1677 ia32_st_regs[1].name = "st(1)";
1678 ia32_st_regs[2].name = "st(2)";
1679 ia32_st_regs[3].name = "st(3)";
1680 ia32_st_regs[4].name = "st(4)";
1681 ia32_st_regs[5].name = "st(5)";
1682 ia32_st_regs[6].name = "st(6)";
1683 ia32_st_regs[7].name = "st(7)";
1686 isa->name_obst = xmalloc(sizeof(*isa->name_obst));
1687 obstack_init(isa->name_obst);
1690 ia32_handle_intrinsics();
1692 /* needed for the debug support */
1693 be_gas_emit_switch_section(&isa->emit, GAS_SECTION_TEXT);
1694 be_emit_cstring(&isa->emit, ".Ltext0:\n");
1695 be_emit_write_line(&isa->emit);
1705 * Closes the output file and frees the ISA structure.
1707 static void ia32_done(void *self) {
1708 ia32_isa_t *isa = self;
1710 /* emit now all global declarations */
1711 be_gas_emit_decls(&isa->emit, isa->arch_isa.main_env);
1713 pmap_destroy(isa->regs_16bit);
1714 pmap_destroy(isa->regs_8bit);
1715 pmap_destroy(isa->tv_ent);
1716 pmap_destroy(isa->types);
1719 obstack_free(isa->name_obst, NULL);
1722 be_emit_destroy_env(&isa->emit);
1729 * Return the number of register classes for this architecture.
1730 * We report always these:
1731 * - the general purpose registers
1732 * - the SSE floating point register set
1733 * - the virtual floating point registers
1734 * - the SSE vector register set
1736 static int ia32_get_n_reg_class(const void *self) {
1741 * Return the register class for index i.
1743 static const arch_register_class_t *ia32_get_reg_class(const void *self, int i) {
1746 return &ia32_reg_classes[CLASS_ia32_gp];
1748 return &ia32_reg_classes[CLASS_ia32_xmm];
1750 return &ia32_reg_classes[CLASS_ia32_vfp];
1752 assert(0 && "Invalid ia32 register class requested.");
1758 * Get the register class which shall be used to store a value of a given mode.
1759 * @param self The this pointer.
1760 * @param mode The mode in question.
1761 * @return A register class which can hold values of the given mode.
1763 const arch_register_class_t *ia32_get_reg_class_for_mode(const void *self, const ir_mode *mode) {
1764 const ia32_isa_t *isa = self;
1765 if (mode_is_float(mode)) {
1766 return USE_SSE2(isa) ? &ia32_reg_classes[CLASS_ia32_xmm] : &ia32_reg_classes[CLASS_ia32_vfp];
1769 return &ia32_reg_classes[CLASS_ia32_gp];
1773 * Get the ABI restrictions for procedure calls.
1774 * @param self The this pointer.
1775 * @param method_type The type of the method (procedure) in question.
1776 * @param abi The abi object to be modified
1778 static void ia32_get_call_abi(const void *self, ir_type *method_type, be_abi_call_t *abi) {
1779 const ia32_isa_t *isa = self;
1782 unsigned cc = get_method_calling_convention(method_type);
1783 int n = get_method_n_params(method_type);
1786 int i, ignore_1, ignore_2;
1788 const arch_register_t *reg;
1789 be_abi_call_flags_t call_flags = be_abi_call_get_flags(abi);
1791 unsigned use_push = !IS_P6_ARCH(isa->opt_arch);
1793 /* set abi flags for calls */
1794 call_flags.bits.left_to_right = 0; /* always last arg first on stack */
1795 call_flags.bits.store_args_sequential = use_push;
1796 /* call_flags.bits.try_omit_fp not changed: can handle both settings */
1797 call_flags.bits.fp_free = 0; /* the frame pointer is fixed in IA32 */
1798 call_flags.bits.call_has_imm = 1; /* IA32 calls can have immediate address */
1800 /* set stack parameter passing style */
1801 be_abi_call_set_flags(abi, call_flags, &ia32_abi_callbacks);
1803 /* collect the mode for each type */
1804 modes = alloca(n * sizeof(modes[0]));
1806 for (i = 0; i < n; i++) {
1807 tp = get_method_param_type(method_type, i);
1808 modes[i] = get_type_mode(tp);
1811 /* set register parameters */
1812 if (cc & cc_reg_param) {
1813 /* determine the number of parameters passed via registers */
1814 biggest_n = ia32_get_n_regparam_class(n, modes, &ignore_1, &ignore_2);
1816 /* loop over all parameters and set the register requirements */
1817 for (i = 0; i <= biggest_n; i++) {
1818 reg = ia32_get_RegParam_reg(n, modes, i, cc);
1819 assert(reg && "kaputt");
1820 be_abi_call_param_reg(abi, i, reg);
1827 /* set stack parameters */
1828 for (i = stack_idx; i < n; i++) {
1829 /* parameters on the stack are 32 bit aligned */
1830 be_abi_call_param_stack(abi, i, 4, 0, 0);
1834 /* set return registers */
1835 n = get_method_n_ress(method_type);
1837 assert(n <= 2 && "more than two results not supported");
1839 /* In case of 64bit returns, we will have two 32bit values */
1841 tp = get_method_res_type(method_type, 0);
1842 mode = get_type_mode(tp);
1844 assert(!mode_is_float(mode) && "two FP results not supported");
1846 tp = get_method_res_type(method_type, 1);
1847 mode = get_type_mode(tp);
1849 assert(!mode_is_float(mode) && "mixed INT, FP results not supported");
1851 be_abi_call_res_reg(abi, 0, &ia32_gp_regs[REG_EAX]);
1852 be_abi_call_res_reg(abi, 1, &ia32_gp_regs[REG_EDX]);
1855 const arch_register_t *reg;
1857 tp = get_method_res_type(method_type, 0);
1858 assert(is_atomic_type(tp));
1859 mode = get_type_mode(tp);
1861 reg = mode_is_float(mode) ? &ia32_vfp_regs[REG_VF0] : &ia32_gp_regs[REG_EAX];
1863 be_abi_call_res_reg(abi, 0, reg);
1868 static const void *ia32_get_irn_ops(const arch_irn_handler_t *self, const ir_node *irn) {
1869 return &ia32_irn_ops;
1872 const arch_irn_handler_t ia32_irn_handler = {
1876 const arch_irn_handler_t *ia32_get_irn_handler(const void *self) {
1877 return &ia32_irn_handler;
1880 int ia32_to_appear_in_schedule(void *block_env, const ir_node *irn) {
1881 return is_ia32_irn(irn) ? 1 : -1;
1885 * Initializes the code generator interface.
1887 static const arch_code_generator_if_t *ia32_get_code_generator_if(void *self) {
1888 return &ia32_code_gen_if;
1892 * Returns the estimated execution time of an ia32 irn.
1894 static sched_timestep_t ia32_sched_exectime(void *env, const ir_node *irn) {
1895 const arch_env_t *arch_env = env;
1896 return is_ia32_irn(irn) ? ia32_get_op_estimated_cost(arch_get_irn_ops(arch_env, irn), irn) : 1;
1899 list_sched_selector_t ia32_sched_selector;
1902 * Returns the reg_pressure scheduler with to_appear_in_schedule() overloaded
1904 static const list_sched_selector_t *ia32_get_list_sched_selector(const void *self, list_sched_selector_t *selector) {
1905 memcpy(&ia32_sched_selector, selector, sizeof(ia32_sched_selector));
1906 ia32_sched_selector.exectime = ia32_sched_exectime;
1907 ia32_sched_selector.to_appear_in_schedule = ia32_to_appear_in_schedule;
1908 return &ia32_sched_selector;
1911 static const ilp_sched_selector_t *ia32_get_ilp_sched_selector(const void *self) {
1916 * Returns the necessary byte alignment for storing a register of given class.
1918 static int ia32_get_reg_class_alignment(const void *self, const arch_register_class_t *cls) {
1919 ir_mode *mode = arch_register_class_mode(cls);
1920 int bytes = get_mode_size_bytes(mode);
1922 if (mode_is_float(mode) && bytes > 8)
1927 static const be_execution_unit_t ***ia32_get_allowed_execution_units(const void *self, const ir_node *irn) {
1928 static const be_execution_unit_t *_allowed_units_BRANCH[] = {
1929 &ia32_execution_units_BRANCH[IA32_EXECUNIT_TP_BRANCH_BRANCH1],
1930 &ia32_execution_units_BRANCH[IA32_EXECUNIT_TP_BRANCH_BRANCH2],
1933 static const be_execution_unit_t *_allowed_units_GP[] = {
1934 &ia32_execution_units_GP[IA32_EXECUNIT_TP_GP_GP_EAX],
1935 &ia32_execution_units_GP[IA32_EXECUNIT_TP_GP_GP_EBX],
1936 &ia32_execution_units_GP[IA32_EXECUNIT_TP_GP_GP_ECX],
1937 &ia32_execution_units_GP[IA32_EXECUNIT_TP_GP_GP_EDX],
1938 &ia32_execution_units_GP[IA32_EXECUNIT_TP_GP_GP_ESI],
1939 &ia32_execution_units_GP[IA32_EXECUNIT_TP_GP_GP_EDI],
1940 &ia32_execution_units_GP[IA32_EXECUNIT_TP_GP_GP_EBP],
1943 static const be_execution_unit_t *_allowed_units_DUMMY[] = {
1944 &be_machine_execution_units_DUMMY[0],
1947 static const be_execution_unit_t **_units_callret[] = {
1948 _allowed_units_BRANCH,
1951 static const be_execution_unit_t **_units_other[] = {
1955 static const be_execution_unit_t **_units_dummy[] = {
1956 _allowed_units_DUMMY,
1959 const be_execution_unit_t ***ret;
1961 if (is_ia32_irn(irn)) {
1962 ret = get_ia32_exec_units(irn);
1964 else if (is_be_node(irn)) {
1965 if (be_is_Call(irn) || be_is_Return(irn)) {
1966 ret = _units_callret;
1968 else if (be_is_Barrier(irn)) {
1983 * Return the abstract ia32 machine.
1985 static const be_machine_t *ia32_get_machine(const void *self) {
1986 const ia32_isa_t *isa = self;
1991 * Return irp irgs in the desired order.
1993 static ir_graph **ia32_get_irg_list(const void *self, ir_graph ***irg_list) {
1998 * Allows or disallows the creation of Psi nodes for the given Phi nodes.
1999 * @return 1 if allowed, 0 otherwise
2001 static int ia32_is_psi_allowed(ir_node *sel, ir_node *phi_list, int i, int j)
2003 ir_node *cmp, *cmp_a, *phi;
2006 /* we don't want long long an floating point Psi */
2007 #define IS_BAD_PSI_MODE(mode) (mode_is_float(mode) || get_mode_size_bits(mode) > 32)
2009 if (get_irn_mode(sel) != mode_b)
2012 cmp = get_Proj_pred(sel);
2013 cmp_a = get_Cmp_left(cmp);
2014 mode = get_irn_mode(cmp_a);
2016 if (IS_BAD_PSI_MODE(mode))
2019 /* check the Phi nodes */
2020 for (phi = phi_list; phi; phi = get_irn_link(phi)) {
2021 ir_node *pred_i = get_irn_n(phi, i);
2022 ir_node *pred_j = get_irn_n(phi, j);
2023 ir_mode *mode_i = get_irn_mode(pred_i);
2024 ir_mode *mode_j = get_irn_mode(pred_j);
2026 if (IS_BAD_PSI_MODE(mode_i) || IS_BAD_PSI_MODE(mode_j))
2030 #undef IS_BAD_PSI_MODE
2035 static ia32_intrinsic_env_t intrinsic_env = {
2036 NULL, /**< the irg, these entities belong to */
2037 NULL, /**< entity for first div operand (move into FPU) */
2038 NULL, /**< entity for second div operand (move into FPU) */
2039 NULL, /**< entity for converts ll -> d */
2040 NULL, /**< entity for converts d -> ll */
2044 * Returns the libFirm configuration parameter for this backend.
2046 static const backend_params *ia32_get_libfirm_params(void) {
2047 static const opt_if_conv_info_t ifconv = {
2048 4, /* maxdepth, doesn't matter for Psi-conversion */
2049 ia32_is_psi_allowed /* allows or disallows Psi creation for given selector */
2051 static const arch_dep_params_t ad = {
2052 1, /* also use subs */
2053 4, /* maximum shifts */
2054 31, /* maximum shift amount */
2056 1, /* allow Mulhs */
2057 1, /* allow Mulus */
2058 32 /* Mulh allowed up to 32 bit */
2060 static backend_params p = {
2061 NULL, /* no additional opcodes */
2062 NULL, /* will be set later */
2063 1, /* need dword lowering */
2064 ia32_create_intrinsic_fkt,
2065 &intrinsic_env, /* context for ia32_create_intrinsic_fkt */
2066 NULL, /* will be set later */
2070 p.if_conv_info = &ifconv;
2074 /* instruction set architectures. */
2075 static const lc_opt_enum_int_items_t arch_items[] = {
2076 { "386", arch_i386, },
2077 { "486", arch_i486, },
2078 { "pentium", arch_pentium, },
2079 { "586", arch_pentium, },
2080 { "pentiumpro", arch_pentium_pro, },
2081 { "686", arch_pentium_pro, },
2082 { "pentiummmx", arch_pentium_mmx, },
2083 { "pentium2", arch_pentium_2, },
2084 { "p2", arch_pentium_2, },
2085 { "pentium3", arch_pentium_3, },
2086 { "p3", arch_pentium_3, },
2087 { "pentium4", arch_pentium_4, },
2088 { "p4", arch_pentium_4, },
2089 { "pentiumm", arch_pentium_m, },
2090 { "pm", arch_pentium_m, },
2091 { "core", arch_core, },
2093 { "athlon", arch_athlon, },
2094 { "athlon64", arch_athlon_64, },
2095 { "opteron", arch_opteron, },
2099 static lc_opt_enum_int_var_t arch_var = {
2100 &ia32_isa_template.arch, arch_items
2103 static lc_opt_enum_int_var_t opt_arch_var = {
2104 &ia32_isa_template.opt_arch, arch_items
2107 static const lc_opt_enum_int_items_t fp_unit_items[] = {
2109 { "sse2", fp_sse2 },
2113 static lc_opt_enum_int_var_t fp_unit_var = {
2114 &ia32_isa_template.fp_kind, fp_unit_items
2117 static const lc_opt_enum_int_items_t gas_items[] = {
2118 { "normal", GAS_FLAVOUR_NORMAL },
2119 { "mingw", GAS_FLAVOUR_MINGW },
2123 static lc_opt_enum_int_var_t gas_var = {
2124 (int*) &be_gas_flavour, gas_items
2127 static const lc_opt_table_entry_t ia32_options[] = {
2128 LC_OPT_ENT_ENUM_INT("arch", "select the instruction architecture", &arch_var),
2129 LC_OPT_ENT_ENUM_INT("opt", "optimize for instruction architecture", &opt_arch_var),
2130 LC_OPT_ENT_ENUM_INT("fpunit", "select the floating point unit", &fp_unit_var),
2131 LC_OPT_ENT_NEGBIT("noaddrmode", "do not use address mode", &ia32_isa_template.opt, IA32_OPT_DOAM),
2132 LC_OPT_ENT_NEGBIT("nolea", "do not optimize for LEAs", &ia32_isa_template.opt, IA32_OPT_LEA),
2133 LC_OPT_ENT_NEGBIT("noplacecnst", "do not place constants", &ia32_isa_template.opt, IA32_OPT_PLACECNST),
2134 LC_OPT_ENT_NEGBIT("noimmop", "no operations with immediates", &ia32_isa_template.opt, IA32_OPT_IMMOPS),
2135 LC_OPT_ENT_NEGBIT("nopushargs", "do not create pushs for function arguments", &ia32_isa_template.opt, IA32_OPT_PUSHARGS),
2136 LC_OPT_ENT_ENUM_INT("gasmode", "set the GAS compatibility mode", &gas_var),
2140 const arch_isa_if_t ia32_isa_if = {
2143 ia32_get_n_reg_class,
2145 ia32_get_reg_class_for_mode,
2147 ia32_get_irn_handler,
2148 ia32_get_code_generator_if,
2149 ia32_get_list_sched_selector,
2150 ia32_get_ilp_sched_selector,
2151 ia32_get_reg_class_alignment,
2152 ia32_get_libfirm_params,
2153 ia32_get_allowed_execution_units,
2158 void be_init_arch_ia32(void)
2160 lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
2161 lc_opt_entry_t *ia32_grp = lc_opt_get_grp(be_grp, "ia32");
2163 lc_opt_add_table(ia32_grp, ia32_options);
2164 be_register_isa_if("ia32", &ia32_isa_if);
2167 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_arch_ia32);