2 * This is the main ia32 firm backend driver.
3 * @author Christian Wuerdig
20 #include <libcore/lc_opts.h>
21 #include <libcore/lc_opts_enum.h>
22 #endif /* WITH_LIBCORE */
26 #include "pseudo_irg.h"
30 #include "iredges_t.h"
38 #include "../beabi.h" /* the general register allocator interface */
39 #include "../benode_t.h"
40 #include "../belower.h"
41 #include "../besched_t.h"
44 #include "bearch_ia32_t.h"
46 #include "ia32_new_nodes.h" /* ia32 nodes interface */
47 #include "gen_ia32_regalloc_if.h" /* the generated interface (register type and class defenitions) */
48 #include "ia32_gen_decls.h" /* interface declaration emitter */
49 #include "ia32_transform.h"
50 #include "ia32_emitter.h"
51 #include "ia32_map_regs.h"
52 #include "ia32_optimize.h"
54 #include "ia32_dbg_stat.h"
55 #include "ia32_finish.h"
56 #include "ia32_util.h"
58 #define DEBUG_MODULE "firm.be.ia32.isa"
61 static set *cur_reg_set = NULL;
64 #define is_Start(irn) (get_irn_opcode(irn) == iro_Start)
66 /* Creates the unique per irg GP NoReg node. */
67 ir_node *ia32_new_NoReg_gp(ia32_code_gen_t *cg) {
68 return be_abi_get_callee_save_irn(cg->birg->abi, &ia32_gp_regs[REG_GP_NOREG]);
71 /* Creates the unique per irg FP NoReg node. */
72 ir_node *ia32_new_NoReg_fp(ia32_code_gen_t *cg) {
73 return be_abi_get_callee_save_irn(cg->birg->abi,
74 USE_SSE2(cg) ? &ia32_xmm_regs[REG_XMM_NOREG] : &ia32_vfp_regs[REG_VFP_NOREG]);
77 /**************************************************
80 * _ __ ___ __ _ __ _| | | ___ ___ _| |_
81 * | '__/ _ \/ _` | / _` | | |/ _ \ / __| | | _|
82 * | | | __/ (_| | | (_| | | | (_) | (__ | | |
83 * |_| \___|\__, | \__,_|_|_|\___/ \___| |_|_|
86 **************************************************/
88 static ir_node *my_skip_proj(const ir_node *n) {
96 * Return register requirements for an ia32 node.
97 * If the node returns a tuple (mode_T) then the proj's
98 * will be asked for this information.
100 static const arch_register_req_t *ia32_get_irn_reg_req(const void *self, arch_register_req_t *req, const ir_node *irn, int pos) {
101 const ia32_irn_ops_t *ops = self;
102 const ia32_register_req_t *irn_req;
103 long node_pos = pos == -1 ? 0 : pos;
104 ir_mode *mode = is_Block(irn) ? NULL : get_irn_mode(irn);
105 FIRM_DBG_REGISTER(firm_dbg_module_t *mod, DEBUG_MODULE);
107 if (is_Block(irn) || mode == mode_M || mode == mode_X) {
108 DBG((mod, LEVEL_1, "ignoring Block, mode_M, mode_X node %+F\n", irn));
112 if (mode == mode_T && pos < 0) {
113 DBG((mod, LEVEL_1, "ignoring request OUT requirements for node %+F\n", irn));
117 DBG((mod, LEVEL_1, "get requirements at pos %d for %+F ... ", pos, irn));
121 node_pos = ia32_translate_proj_pos(irn);
127 irn = my_skip_proj(irn);
129 DB((mod, LEVEL_1, "skipping Proj, going to %+F at pos %d ... ", irn, node_pos));
132 if (is_ia32_irn(irn)) {
134 irn_req = get_ia32_in_req(irn, pos);
137 irn_req = get_ia32_out_req(irn, node_pos);
140 DB((mod, LEVEL_1, "returning reqs for %+F at pos %d\n", irn, pos));
142 memcpy(req, &(irn_req->req), sizeof(*req));
144 if (arch_register_req_is(&(irn_req->req), should_be_same)) {
145 assert(irn_req->same_pos >= 0 && "should be same constraint for in -> out NYI");
146 req->other_same = get_irn_n(irn, irn_req->same_pos);
149 if (arch_register_req_is(&(irn_req->req), should_be_different)) {
150 assert(irn_req->different_pos >= 0 && "should be different constraint for in -> out NYI");
151 req->other_different = get_irn_n(irn, irn_req->different_pos);
155 /* treat Unknowns like Const with default requirements */
156 if (is_Unknown(irn)) {
157 DB((mod, LEVEL_1, "returning UKNWN reqs for %+F\n", irn));
158 if (mode_is_float(mode)) {
159 if (USE_SSE2(ops->cg))
160 memcpy(req, &(ia32_default_req_ia32_xmm_xmm_UKNWN), sizeof(*req));
162 memcpy(req, &(ia32_default_req_ia32_vfp_vfp_UKNWN), sizeof(*req));
164 else if (mode_is_int(mode) || mode_is_reference(mode))
165 memcpy(req, &(ia32_default_req_ia32_gp_gp_UKNWN), sizeof(*req));
166 else if (mode == mode_T || mode == mode_M) {
167 DBG((mod, LEVEL_1, "ignoring Unknown node %+F\n", irn));
171 assert(0 && "unsupported Unknown-Mode");
174 DB((mod, LEVEL_1, "returning NULL for %+F (not ia32)\n", irn));
182 static void ia32_set_irn_reg(const void *self, ir_node *irn, const arch_register_t *reg) {
184 const ia32_irn_ops_t *ops = self;
186 if (get_irn_mode(irn) == mode_X) {
190 DBG((ops->cg->mod, LEVEL_1, "ia32 assigned register %s to node %+F\n", reg->name, irn));
193 pos = ia32_translate_proj_pos(irn);
194 irn = my_skip_proj(irn);
197 if (is_ia32_irn(irn)) {
198 const arch_register_t **slots;
200 slots = get_ia32_slots(irn);
204 ia32_set_firm_reg(irn, reg, cur_reg_set);
208 static const arch_register_t *ia32_get_irn_reg(const void *self, const ir_node *irn) {
210 const arch_register_t *reg = NULL;
214 if (get_irn_mode(irn) == mode_X) {
218 pos = ia32_translate_proj_pos(irn);
219 irn = my_skip_proj(irn);
222 if (is_ia32_irn(irn)) {
223 const arch_register_t **slots;
224 slots = get_ia32_slots(irn);
228 reg = ia32_get_firm_reg(irn, cur_reg_set);
234 static arch_irn_class_t ia32_classify(const void *self, const ir_node *irn) {
235 arch_irn_class_t classification = arch_irn_class_normal;
237 irn = my_skip_proj(irn);
240 classification |= arch_irn_class_branch;
242 if (! is_ia32_irn(irn))
243 return classification & ~arch_irn_class_normal;
245 if (is_ia32_Cnst(irn))
246 classification |= arch_irn_class_const;
249 classification |= arch_irn_class_load;
251 if (is_ia32_St(irn) || is_ia32_Store8Bit(irn))
252 classification |= arch_irn_class_store;
254 if (is_ia32_got_reload(irn))
255 classification |= arch_irn_class_reload;
257 return classification;
260 static arch_irn_flags_t ia32_get_flags(const void *self, const ir_node *irn) {
263 ir_node *pred = get_Proj_pred(irn);
264 if(is_ia32_Push(pred) && get_Proj_proj(irn) == pn_ia32_Push_stack) {
265 return arch_irn_flags_modify_sp;
267 if(is_ia32_Pop(pred) && get_Proj_proj(irn) == pn_ia32_Pop_stack) {
268 return arch_irn_flags_modify_sp;
270 if(is_ia32_AddSP(pred) && get_Proj_proj(irn) == pn_ia32_AddSP_stack) {
271 return arch_irn_flags_modify_sp;
275 irn = my_skip_proj(irn);
276 if (is_ia32_irn(irn))
277 return get_ia32_flags(irn);
280 return arch_irn_flags_ignore;
285 static entity *ia32_get_frame_entity(const void *self, const ir_node *irn) {
286 return is_ia32_irn(irn) ? get_ia32_frame_ent(irn) : NULL;
289 static void ia32_set_frame_entity(const void *self, ir_node *irn, entity *ent) {
290 set_ia32_frame_ent(irn, ent);
293 static void ia32_set_frame_offset(const void *self, ir_node *irn, int bias) {
295 const ia32_irn_ops_t *ops = self;
297 if (get_ia32_frame_ent(irn)) {
298 ia32_am_flavour_t am_flav = get_ia32_am_flavour(irn);
300 /* Pop nodes modify the stack pointer before reading the destination
301 * address, so fix this here
303 if(is_ia32_Pop(irn)) {
307 DBG((ops->cg->mod, LEVEL_1, "stack biased %+F with %d\n", irn, bias));
309 snprintf(buf, sizeof(buf), "%d", bias);
311 if (get_ia32_op_type(irn) == ia32_Normal) {
312 set_ia32_cnst(irn, buf);
315 add_ia32_am_offs(irn, buf);
317 set_ia32_am_flavour(irn, am_flav);
322 static int ia32_get_sp_bias(const void *self, const ir_node *irn) {
324 int proj = get_Proj_proj(irn);
325 ir_node *pred = get_Proj_pred(irn);
327 if(is_ia32_Push(pred) && proj == 0)
329 else if(is_ia32_Pop(pred) && proj == 1)
337 be_abi_call_flags_bits_t flags;
338 const arch_isa_t *isa;
339 const arch_env_t *aenv;
343 static void *ia32_abi_init(const be_abi_call_t *call, const arch_env_t *aenv, ir_graph *irg)
345 ia32_abi_env_t *env = xmalloc(sizeof(env[0]));
346 be_abi_call_flags_t fl = be_abi_call_get_flags(call);
347 env->flags = fl.bits;
350 env->isa = aenv->isa;
355 * Put all registers which are saved by the prologue/epilogue in a set.
357 * @param self The callback object.
358 * @param s The result set.
360 static void ia32_abi_dont_save_regs(void *self, pset *s)
362 ia32_abi_env_t *env = self;
363 if(env->flags.try_omit_fp)
364 pset_insert_ptr(s, env->isa->bp);
368 * Generate the routine prologue.
370 * @param self The callback object.
371 * @param mem A pointer to the mem node. Update this if you define new memory.
372 * @param reg_map A map mapping all callee_save/ignore/parameter registers to their defining nodes.
374 * @return The register which shall be used as a stack frame base.
376 * All nodes which define registers in @p reg_map must keep @p reg_map current.
378 static const arch_register_t *ia32_abi_prologue(void *self, ir_node **mem, pmap *reg_map)
380 ia32_abi_env_t *env = self;
382 if (! env->flags.try_omit_fp) {
383 ir_node *bl = get_irg_start_block(env->irg);
384 ir_node *curr_sp = be_abi_reg_map_get(reg_map, env->isa->sp);
385 ir_node *curr_bp = be_abi_reg_map_get(reg_map, env->isa->bp);
389 push = new_rd_ia32_Push(NULL, env->irg, bl, curr_sp, curr_bp, *mem);
390 curr_sp = new_r_Proj(env->irg, bl, push, get_irn_mode(curr_sp), pn_ia32_Push_stack);
391 *mem = new_r_Proj(env->irg, bl, push, mode_M, pn_ia32_Push_M);
393 /* the push must have SP out register */
394 arch_set_irn_register(env->aenv, curr_sp, env->isa->sp);
395 set_ia32_flags(push, arch_irn_flags_ignore);
397 /* move esp to ebp */
398 curr_bp = be_new_Copy(env->isa->bp->reg_class, env->irg, bl, curr_sp);
399 be_set_constr_single_reg(curr_bp, BE_OUT_POS(0), env->isa->bp);
400 arch_set_irn_register(env->aenv, curr_bp, env->isa->bp);
401 be_node_set_flags(curr_bp, BE_OUT_POS(0), arch_irn_flags_ignore);
403 /* beware: the copy must be done before any other sp use */
404 curr_sp = be_new_CopyKeep_single(env->isa->sp->reg_class, env->irg, bl, curr_sp, curr_bp, get_irn_mode(curr_sp));
405 be_set_constr_single_reg(curr_sp, BE_OUT_POS(0), env->isa->sp);
406 arch_set_irn_register(env->aenv, curr_sp, env->isa->sp);
407 be_node_set_flags(curr_sp, BE_OUT_POS(0), arch_irn_flags_ignore);
409 be_abi_reg_map_set(reg_map, env->isa->sp, curr_sp);
410 be_abi_reg_map_set(reg_map, env->isa->bp, curr_bp);
419 * Generate the routine epilogue.
420 * @param self The callback object.
421 * @param bl The block for the epilog
422 * @param mem A pointer to the mem node. Update this if you define new memory.
423 * @param reg_map A map mapping all callee_save/ignore/parameter registers to their defining nodes.
424 * @return The register which shall be used as a stack frame base.
426 * All nodes which define registers in @p reg_map must keep @p reg_map current.
428 static void ia32_abi_epilogue(void *self, ir_node *bl, ir_node **mem, pmap *reg_map)
430 ia32_abi_env_t *env = self;
431 ir_node *curr_sp = be_abi_reg_map_get(reg_map, env->isa->sp);
432 ir_node *curr_bp = be_abi_reg_map_get(reg_map, env->isa->bp);
434 if (env->flags.try_omit_fp) {
435 /* simply remove the stack frame here */
436 curr_sp = be_new_IncSP(env->isa->sp, env->irg, bl, curr_sp, BE_STACK_FRAME_SIZE_SHRINK);
437 add_irn_dep(curr_sp, *mem);
440 const ia32_isa_t *isa = (ia32_isa_t *)env->isa;
441 ir_mode *mode_bp = env->isa->bp->reg_class->mode;
443 /* gcc always emits a leave at the end of a routine */
444 if (1 || ARCH_AMD(isa->opt_arch)) {
448 leave = new_rd_ia32_Leave(NULL, env->irg, bl, curr_sp, curr_bp);
449 set_ia32_flags(leave, arch_irn_flags_ignore);
450 curr_bp = new_r_Proj(current_ir_graph, bl, leave, mode_bp, pn_ia32_Leave_frame);
451 curr_sp = new_r_Proj(current_ir_graph, bl, leave, get_irn_mode(curr_sp), pn_ia32_Leave_stack);
452 *mem = new_r_Proj(current_ir_graph, bl, leave, mode_M, pn_ia32_Leave_M);
457 /* copy ebp to esp */
458 curr_sp = be_new_SetSP(env->isa->sp, env->irg, bl, curr_sp, curr_bp, *mem);
461 pop = new_rd_ia32_Pop(NULL, env->irg, bl, curr_sp, *mem);
462 set_ia32_flags(pop, arch_irn_flags_ignore);
463 curr_bp = new_r_Proj(current_ir_graph, bl, pop, mode_bp, pn_ia32_Pop_res);
464 curr_sp = new_r_Proj(current_ir_graph, bl, pop, get_irn_mode(curr_sp), pn_ia32_Pop_stack);
465 *mem = new_r_Proj(current_ir_graph, bl, pop, mode_M, pn_ia32_Pop_M);
467 arch_set_irn_register(env->aenv, curr_sp, env->isa->sp);
468 arch_set_irn_register(env->aenv, curr_bp, env->isa->bp);
471 be_abi_reg_map_set(reg_map, env->isa->sp, curr_sp);
472 be_abi_reg_map_set(reg_map, env->isa->bp, curr_bp);
476 * Produces the type which sits between the stack args and the locals on the stack.
477 * it will contain the return address and space to store the old base pointer.
478 * @return The Firm type modeling the ABI between type.
480 static ir_type *ia32_abi_get_between_type(void *self)
482 #define IDENT(s) new_id_from_chars(s, sizeof(s)-1)
483 static ir_type *omit_fp_between_type = NULL;
484 static ir_type *between_type = NULL;
486 ia32_abi_env_t *env = self;
488 if ( !between_type) {
490 entity *ret_addr_ent;
491 entity *omit_fp_ret_addr_ent;
493 ir_type *old_bp_type = new_type_primitive(IDENT("bp"), mode_P);
494 ir_type *ret_addr_type = new_type_primitive(IDENT("return_addr"), mode_P);
496 between_type = new_type_struct(IDENT("ia32_between_type"));
497 old_bp_ent = new_entity(between_type, IDENT("old_bp"), old_bp_type);
498 ret_addr_ent = new_entity(between_type, IDENT("ret_addr"), ret_addr_type);
500 set_entity_offset_bytes(old_bp_ent, 0);
501 set_entity_offset_bytes(ret_addr_ent, get_type_size_bytes(old_bp_type));
502 set_type_size_bytes(between_type, get_type_size_bytes(old_bp_type) + get_type_size_bytes(ret_addr_type));
503 set_type_state(between_type, layout_fixed);
505 omit_fp_between_type = new_type_struct(IDENT("ia32_between_type_omit_fp"));
506 omit_fp_ret_addr_ent = new_entity(omit_fp_between_type, IDENT("ret_addr"), ret_addr_type);
508 set_entity_offset_bytes(omit_fp_ret_addr_ent, 0);
509 set_type_size_bytes(omit_fp_between_type, get_type_size_bytes(ret_addr_type));
510 set_type_state(omit_fp_between_type, layout_fixed);
513 return env->flags.try_omit_fp ? omit_fp_between_type : between_type;
518 * Get the estimated cycle count for @p irn.
520 * @param self The this pointer.
521 * @param irn The node.
523 * @return The estimated cycle count for this operation
525 static int ia32_get_op_estimated_cost(const void *self, const ir_node *irn)
528 ia32_op_type_t op_tp;
529 const ia32_irn_ops_t *ops = self;
534 assert(is_ia32_irn(irn));
536 cost = get_ia32_latency(irn);
537 op_tp = get_ia32_op_type(irn);
539 if (is_ia32_CopyB(irn)) {
541 if (ARCH_INTEL(ops->cg->arch))
544 else if (is_ia32_CopyB_i(irn)) {
545 int size = get_tarval_long(get_ia32_Immop_tarval(irn));
546 cost = 20 + (int)ceil((4/3) * size);
547 if (ARCH_INTEL(ops->cg->arch))
550 /* in case of address mode operations add additional cycles */
551 else if (op_tp == ia32_AddrModeD || op_tp == ia32_AddrModeS) {
553 In case of stack access add 5 cycles (we assume stack is in cache),
554 other memory operations cost 20 cycles.
556 cost += is_ia32_use_frame(irn) ? 5 : 20;
563 * Returns the inverse operation if @p irn, recalculating the argument at position @p i.
565 * @param irn The original operation
566 * @param i Index of the argument we want the inverse operation to yield
567 * @param inverse struct to be filled with the resulting inverse op
568 * @param obstack The obstack to use for allocation of the returned nodes array
569 * @return The inverse operation or NULL if operation invertible
571 static arch_inverse_t *ia32_get_inverse(const void *self, const ir_node *irn, int i, arch_inverse_t *inverse, struct obstack *obst) {
574 ir_node *block, *noreg, *nomem;
577 /* we cannot invert non-ia32 irns */
578 if (! is_ia32_irn(irn))
581 /* operand must always be a real operand (not base, index or mem) */
582 if (i != 2 && i != 3)
585 /* we don't invert address mode operations */
586 if (get_ia32_op_type(irn) != ia32_Normal)
589 irg = get_irn_irg(irn);
590 block = get_nodes_block(irn);
591 mode = get_ia32_res_mode(irn);
592 noreg = get_irn_n(irn, 0);
593 nomem = new_r_NoMem(irg);
595 /* initialize structure */
596 inverse->nodes = obstack_alloc(obst, 2 * sizeof(inverse->nodes[0]));
600 switch (get_ia32_irn_opcode(irn)) {
602 if (get_ia32_immop_type(irn) == ia32_ImmConst) {
603 /* we have an add with a const here */
604 /* invers == add with negated const */
605 inverse->nodes[0] = new_rd_ia32_Add(NULL, irg, block, noreg, noreg, get_irn_n(irn, i), noreg, nomem);
606 pnc = pn_ia32_Add_res;
608 copy_ia32_Immop_attr(inverse->nodes[0], (ir_node *)irn);
609 set_ia32_Immop_tarval(inverse->nodes[0], tarval_neg(get_ia32_Immop_tarval(irn)));
610 set_ia32_commutative(inverse->nodes[0]);
612 else if (get_ia32_immop_type(irn) == ia32_ImmSymConst) {
613 /* we have an add with a symconst here */
614 /* invers == sub with const */
615 inverse->nodes[0] = new_rd_ia32_Sub(NULL, irg, block, noreg, noreg, get_irn_n(irn, i), noreg, nomem);
616 pnc = pn_ia32_Sub_res;
618 copy_ia32_Immop_attr(inverse->nodes[0], (ir_node *)irn);
621 /* normal add: inverse == sub */
622 ir_node *proj = ia32_get_res_proj(irn);
625 inverse->nodes[0] = new_rd_ia32_Sub(NULL, irg, block, noreg, noreg, proj, get_irn_n(irn, i ^ 1), nomem);
626 pnc = pn_ia32_Sub_res;
631 if (get_ia32_immop_type(irn) != ia32_ImmNone) {
632 /* we have a sub with a const/symconst here */
633 /* invers == add with this const */
634 inverse->nodes[0] = new_rd_ia32_Add(NULL, irg, block, noreg, noreg, get_irn_n(irn, i), noreg, nomem);
635 pnc = pn_ia32_Add_res;
636 inverse->costs += (get_ia32_immop_type(irn) == ia32_ImmSymConst) ? 5 : 1;
637 copy_ia32_Immop_attr(inverse->nodes[0], (ir_node *)irn);
641 ir_node *proj = ia32_get_res_proj(irn);
645 inverse->nodes[0] = new_rd_ia32_Add(NULL, irg, block, noreg, noreg, proj, get_irn_n(irn, 3), nomem);
648 inverse->nodes[0] = new_rd_ia32_Sub(NULL, irg, block, noreg, noreg, get_irn_n(irn, 2), proj, nomem);
650 pnc = pn_ia32_Sub_res;
655 if (get_ia32_immop_type(irn) != ia32_ImmNone) {
656 /* xor with const: inverse = xor */
657 inverse->nodes[0] = new_rd_ia32_Eor(NULL, irg, block, noreg, noreg, get_irn_n(irn, i), noreg, nomem);
658 pnc = pn_ia32_Eor_res;
659 inverse->costs += (get_ia32_immop_type(irn) == ia32_ImmSymConst) ? 5 : 1;
660 copy_ia32_Immop_attr(inverse->nodes[0], (ir_node *)irn);
664 inverse->nodes[0] = new_rd_ia32_Eor(NULL, irg, block, noreg, noreg, (ir_node *)irn, get_irn_n(irn, i), nomem);
665 pnc = pn_ia32_Eor_res;
670 ir_node *proj = ia32_get_res_proj(irn);
673 inverse->nodes[0] = new_rd_ia32_Not(NULL, irg, block, noreg, noreg, proj, nomem);
674 pnc = pn_ia32_Not_res;
678 case iro_ia32_Minus: {
679 ir_node *proj = ia32_get_res_proj(irn);
682 inverse->nodes[0] = new_rd_ia32_Minus(NULL, irg, block, noreg, noreg, proj, nomem);
683 pnc = pn_ia32_Minus_res;
688 /* inverse operation not supported */
692 set_ia32_res_mode(inverse->nodes[0], mode);
693 inverse->nodes[1] = new_r_Proj(irg, block, inverse->nodes[0], mode, pnc);
699 * Check if irn can load it's operand at position i from memory (source addressmode).
700 * @param self Pointer to irn ops itself
701 * @param irn The irn to be checked
702 * @param i The operands position
703 * @return Non-Zero if operand can be loaded
705 static int ia32_possible_memory_operand(const void *self, const ir_node *irn, unsigned int i) {
706 if (! is_ia32_irn(irn) || /* must be an ia32 irn */
707 get_irn_arity(irn) != 5 || /* must be a binary operation */
708 get_ia32_op_type(irn) != ia32_Normal || /* must not already be a addressmode irn */
709 ! (get_ia32_am_support(irn) & ia32_am_Source) || /* must be capable of source addressmode */
710 (i != 2 && i != 3) || /* a "real" operand position must be requested */
711 (i == 2 && ! is_ia32_commutative(irn)) || /* if first operand requested irn must be commutative */
712 is_ia32_use_frame(irn)) /* must not already use frame */
718 static void ia32_perform_memory_operand(const void *self, ir_node *irn, ir_node *spill, unsigned int i) {
719 assert(ia32_possible_memory_operand(self, irn, i) && "Cannot perform memory operand change");
722 ir_node *tmp = get_irn_n(irn, 3);
723 set_irn_n(irn, 3, get_irn_n(irn, 2));
724 set_irn_n(irn, 2, tmp);
727 set_ia32_am_support(irn, ia32_am_Source);
728 set_ia32_op_type(irn, ia32_AddrModeS);
729 set_ia32_am_flavour(irn, ia32_B);
730 set_ia32_ls_mode(irn, get_irn_mode(get_irn_n(irn, i)));
731 //TODO this will fail, if spill is a PhiM (give PhiMs entities?)
732 set_ia32_frame_ent(irn, be_get_frame_entity(spill));
733 set_ia32_use_frame(irn);
734 set_ia32_got_reload(irn);
736 set_irn_n(irn, 0, get_irg_frame(get_irn_irg(irn)));
737 set_irn_n(irn, 4, spill);
740 Input at position one is index register, which is NoReg.
741 We would need cg object to get a real noreg, but we cannot
744 set_irn_n(irn, 3, get_irn_n(irn, 1));
746 //FIXME DBG_OPT_AM_S(reload, irn);
749 static const be_abi_callbacks_t ia32_abi_callbacks = {
752 ia32_abi_get_between_type,
753 ia32_abi_dont_save_regs,
758 /* fill register allocator interface */
760 static const arch_irn_ops_if_t ia32_irn_ops_if = {
761 ia32_get_irn_reg_req,
766 ia32_get_frame_entity,
767 ia32_set_frame_entity,
768 ia32_set_frame_offset,
771 ia32_get_op_estimated_cost,
772 ia32_possible_memory_operand,
773 ia32_perform_memory_operand,
776 ia32_irn_ops_t ia32_irn_ops = {
783 /**************************************************
786 * ___ ___ __| | ___ __ _ ___ _ __ _| |_
787 * / __/ _ \ / _` |/ _ \/ _` |/ _ \ '_ \ | | _|
788 * | (_| (_) | (_| | __/ (_| | __/ | | | | | |
789 * \___\___/ \__,_|\___|\__, |\___|_| |_| |_|_|
792 **************************************************/
794 static void ia32_kill_convs(ia32_code_gen_t *cg) {
797 /* BEWARE: the Projs are inserted in the set */
798 foreach_nodeset(cg->kill_conv, irn) {
799 ir_node *in = get_irn_n(get_Proj_pred(irn), 2);
800 edges_reroute(irn, in, cg->birg->irg);
805 * Transforms the standard firm graph into
808 static void ia32_prepare_graph(void *self) {
809 ia32_code_gen_t *cg = self;
810 dom_front_info_t *dom;
811 DEBUG_ONLY(firm_dbg_module_t *old_mod = cg->mod;)
813 FIRM_DBG_REGISTER(cg->mod, "firm.be.ia32.transform");
815 /* 1st: transform constants and psi condition trees */
816 ia32_pre_transform_phase(cg);
818 /* 2nd: transform all remaining nodes */
819 ia32_register_transformers();
820 dom = be_compute_dominance_frontiers(cg->irg);
822 cg->kill_conv = new_nodeset(5);
823 irg_walk_blkwise_graph(cg->irg, NULL, ia32_transform_node, cg);
825 del_nodeset(cg->kill_conv);
827 be_free_dominance_frontiers(dom);
830 be_dump(cg->irg, "-transformed", dump_ir_block_graph_sched);
832 /* 3rd: optimize address mode */
833 FIRM_DBG_REGISTER(cg->mod, "firm.be.ia32.am");
834 ia32_optimize_addressmode(cg);
837 be_dump(cg->irg, "-am", dump_ir_block_graph_sched);
839 DEBUG_ONLY(cg->mod = old_mod;)
843 * Dummy functions for hooks we don't need but which must be filled.
845 static void ia32_before_sched(void *self) {
848 static void remove_unused_nodes(ir_node *irn, bitset_t *already_visited) {
856 mode = get_irn_mode(irn);
858 /* check if we already saw this node or the node has more than one user */
859 if (bitset_contains_irn(already_visited, irn) || get_irn_n_edges(irn) > 1)
862 /* mark irn visited */
863 bitset_add_irn(already_visited, irn);
865 /* non-Tuple nodes with one user: ok, return */
866 if (get_irn_n_edges(irn) >= 1 && mode != mode_T)
869 /* tuple node has one user which is not the mem proj-> ok */
870 if (mode == mode_T && get_irn_n_edges(irn) == 1) {
871 mem_proj = ia32_get_proj_for_mode(irn, mode_M);
876 for (i = get_irn_arity(irn) - 1; i >= 0; i--) {
877 ir_node *pred = get_irn_n(irn, i);
879 /* do not follow memory edges or we will accidentally remove stores */
880 if (is_Proj(pred) && get_irn_mode(pred) == mode_M)
883 set_irn_n(irn, i, new_Bad());
886 The current node is about to be removed: if the predecessor
887 has only this node as user, it need to be removed as well.
889 if (get_irn_n_edges(pred) <= 1)
890 remove_unused_nodes(pred, already_visited);
893 if (sched_is_scheduled(irn))
897 static void remove_unused_loads_walker(ir_node *irn, void *env) {
898 bitset_t *already_visited = env;
899 if (is_ia32_Ld(irn) && ! bitset_contains_irn(already_visited, irn))
900 remove_unused_nodes(irn, env);
904 * Called before the register allocator.
905 * Calculate a block schedule here. We need it for the x87
906 * simulator and the emitter.
908 static void ia32_before_ra(void *self) {
909 ia32_code_gen_t *cg = self;
910 bitset_t *already_visited = bitset_irg_malloc(cg->irg);
912 cg->blk_sched = sched_create_block_schedule(cg->irg);
916 There are sometimes unused loads, only pinned by memory.
917 We need to remove those Loads and all other nodes which won't be used
918 after removing the Load from schedule.
920 irg_walk_graph(cg->irg, remove_unused_loads_walker, NULL, already_visited);
921 bitset_free(already_visited);
926 * Transforms a be node into a Load.
928 static void transform_to_Load(ia32_transform_env_t *env) {
929 ir_node *irn = env->irn;
930 entity *ent = be_get_frame_entity(irn);
931 ir_mode *mode = env->mode;
932 ir_node *noreg = ia32_new_NoReg_gp(env->cg);
933 ir_node *nomem = new_rd_NoMem(env->irg);
934 ir_node *sched_point = NULL;
935 ir_node *ptr = get_irn_n(irn, 0);
936 ir_node *mem = be_is_Reload(irn) ? get_irn_n(irn, 1) : nomem;
937 ir_node *new_op, *proj;
938 const arch_register_t *reg;
940 if (sched_is_scheduled(irn)) {
941 sched_point = sched_prev(irn);
944 if (mode_is_float(mode)) {
945 if (USE_SSE2(env->cg))
946 new_op = new_rd_ia32_xLoad(env->dbg, env->irg, env->block, ptr, noreg, mem);
948 new_op = new_rd_ia32_vfld(env->dbg, env->irg, env->block, ptr, noreg, mem);
951 new_op = new_rd_ia32_Load(env->dbg, env->irg, env->block, ptr, noreg, mem);
954 set_ia32_am_support(new_op, ia32_am_Source);
955 set_ia32_op_type(new_op, ia32_AddrModeS);
956 set_ia32_am_flavour(new_op, ia32_B);
957 set_ia32_ls_mode(new_op, mode);
958 set_ia32_frame_ent(new_op, ent);
959 set_ia32_use_frame(new_op);
961 DBG_OPT_RELOAD2LD(irn, new_op);
963 proj = new_rd_Proj(env->dbg, env->irg, env->block, new_op, mode, pn_Load_res);
966 sched_add_after(sched_point, new_op);
967 sched_add_after(new_op, proj);
972 /* copy the register from the old node to the new Load */
973 reg = arch_get_irn_register(env->cg->arch_env, irn);
974 arch_set_irn_register(env->cg->arch_env, new_op, reg);
976 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env->cg, irn));
982 * Transforms a be node into a Store.
984 static void transform_to_Store(ia32_transform_env_t *env) {
985 ir_node *irn = env->irn;
986 entity *ent = be_get_frame_entity(irn);
987 ir_mode *mode = env->mode;
988 ir_node *noreg = ia32_new_NoReg_gp(env->cg);
989 ir_node *nomem = new_rd_NoMem(env->irg);
990 ir_node *ptr = get_irn_n(irn, 0);
991 ir_node *val = get_irn_n(irn, 1);
992 ir_node *new_op, *proj;
993 ir_node *sched_point = NULL;
995 if (sched_is_scheduled(irn)) {
996 sched_point = sched_prev(irn);
999 if (mode_is_float(mode)) {
1000 if (USE_SSE2(env->cg))
1001 new_op = new_rd_ia32_xStore(env->dbg, env->irg, env->block, ptr, noreg, val, nomem);
1003 new_op = new_rd_ia32_vfst(env->dbg, env->irg, env->block, ptr, noreg, val, nomem);
1005 else if (get_mode_size_bits(mode) == 8) {
1006 new_op = new_rd_ia32_Store8Bit(env->dbg, env->irg, env->block, ptr, noreg, val, nomem);
1009 new_op = new_rd_ia32_Store(env->dbg, env->irg, env->block, ptr, noreg, val, nomem);
1012 set_ia32_am_support(new_op, ia32_am_Dest);
1013 set_ia32_op_type(new_op, ia32_AddrModeD);
1014 set_ia32_am_flavour(new_op, ia32_B);
1015 set_ia32_ls_mode(new_op, mode);
1016 set_ia32_frame_ent(new_op, ent);
1017 set_ia32_use_frame(new_op);
1019 DBG_OPT_SPILL2ST(irn, new_op);
1021 proj = new_rd_Proj(env->dbg, env->irg, env->block, new_op, mode_M, pn_ia32_Store_M);
1024 sched_add_after(sched_point, new_op);
1028 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env->cg, irn));
1030 exchange(irn, proj);
1033 static ir_node *create_push(ia32_transform_env_t *env, ir_node *schedpoint, ir_node *sp, ir_node *mem, entity *ent, const char *offset) {
1034 ir_node *noreg = ia32_new_NoReg_gp(env->cg);
1036 ir_node *push = new_rd_ia32_Push(env->dbg, env->irg, env->block, sp, noreg, mem);
1038 set_ia32_frame_ent(push, ent);
1039 set_ia32_use_frame(push);
1040 set_ia32_op_type(push, ia32_AddrModeS);
1041 set_ia32_am_flavour(push, ia32_B);
1042 set_ia32_ls_mode(push, mode_Is);
1044 add_ia32_am_offs(push, offset);
1046 sched_add_before(schedpoint, push);
1050 static ir_node *create_pop(ia32_transform_env_t *env, ir_node *schedpoint, ir_node *sp, entity *ent, const char *offset) {
1051 ir_node *pop = new_rd_ia32_Pop(env->dbg, env->irg, env->block, sp, new_NoMem());
1053 set_ia32_frame_ent(pop, ent);
1054 set_ia32_use_frame(pop);
1055 set_ia32_op_type(pop, ia32_AddrModeD);
1056 set_ia32_am_flavour(pop, ia32_B);
1057 set_ia32_ls_mode(pop, mode_Is);
1059 add_ia32_am_offs(pop, offset);
1061 sched_add_before(schedpoint, pop);
1066 static ir_node* create_spproj(ia32_transform_env_t *env, ir_node *pred, int pos, ir_node *schedpoint, const ir_node *oldsp) {
1067 ir_mode *spmode = get_irn_mode(oldsp);
1068 const arch_register_t *spreg = arch_get_irn_register(env->cg->arch_env, oldsp);
1071 sp = new_rd_Proj(env->dbg, env->irg, env->block, pred, spmode, pos);
1072 arch_set_irn_register(env->cg->arch_env, sp, spreg);
1073 sched_add_before(schedpoint, sp);
1078 static void transform_MemPerm(ia32_transform_env_t *env) {
1080 * Transform memperm, currently we do this the ugly way and produce
1081 * push/pop into/from memory cascades. This is possible without using
1084 ir_node *node = env->irn;
1086 ir_node *sp = get_irn_n(node, 0);
1087 const ir_edge_t *edge;
1088 const ir_edge_t *next;
1091 arity = be_get_MemPerm_entity_arity(node);
1092 pops = alloca(arity * sizeof(pops[0]));
1095 for(i = 0; i < arity; ++i) {
1096 entity *ent = be_get_MemPerm_in_entity(node, i);
1097 ir_type *enttype = get_entity_type(ent);
1098 int entbits = get_type_size_bits(enttype);
1099 ir_node *mem = get_irn_n(node, i + 1);
1102 assert( (entbits == 32 || entbits == 64) && "spillslot on x86 should be 32 or 64 bit");
1104 push = create_push(env, node, sp, mem, ent, NULL);
1105 sp = create_spproj(env, push, 0, node, sp);
1107 // add another push after the first one
1108 push = create_push(env, node, sp, mem, ent, "4");
1109 sp = create_spproj(env, push, 0, node, sp);
1112 set_irn_n(node, i, new_Bad());
1116 for(i = arity - 1; i >= 0; --i) {
1117 entity *ent = be_get_MemPerm_out_entity(node, i);
1118 ir_type *enttype = get_entity_type(ent);
1119 int entbits = get_type_size_bits(enttype);
1123 assert( (entbits == 32 || entbits == 64) && "spillslot on x86 should be 32 or 64 bit");
1125 pop = create_pop(env, node, sp, ent, NULL);
1127 // add another pop after the first one
1128 sp = create_spproj(env, pop, 1, node, sp);
1129 pop = create_pop(env, node, sp, ent, "4");
1132 sp = create_spproj(env, pop, 1, node, sp);
1138 // exchange memprojs
1139 foreach_out_edge_safe(node, edge, next) {
1140 ir_node *proj = get_edge_src_irn(edge);
1141 int p = get_Proj_proj(proj);
1145 set_Proj_pred(proj, pops[p]);
1146 set_Proj_proj(proj, 3);
1153 * Fix the mode of Spill/Reload
1155 static ir_mode *fix_spill_mode(ia32_code_gen_t *cg, ir_mode *mode)
1157 if (mode_is_float(mode)) {
1169 * Block-Walker: Calls the transform functions Spill and Reload.
1171 static void ia32_after_ra_walker(ir_node *block, void *env) {
1172 ir_node *node, *prev;
1173 ia32_code_gen_t *cg = env;
1174 ia32_transform_env_t tenv;
1177 tenv.irg = current_ir_graph;
1179 DEBUG_ONLY(tenv.mod = cg->mod;)
1181 /* beware: the schedule is changed here */
1182 for (node = sched_last(block); !sched_is_begin(node); node = prev) {
1183 prev = sched_prev(node);
1184 if (be_is_Reload(node)) {
1185 /* we always reload the whole register */
1186 tenv.dbg = get_irn_dbg_info(node);
1188 tenv.mode = fix_spill_mode(cg, get_irn_mode(node));
1189 transform_to_Load(&tenv);
1191 else if (be_is_Spill(node)) {
1192 ir_node *spillval = get_irn_n(node, be_pos_Spill_val);
1193 /* we always spill the whole register */
1194 tenv.dbg = get_irn_dbg_info(node);
1196 tenv.mode = fix_spill_mode(cg, get_irn_mode(spillval));
1197 transform_to_Store(&tenv);
1199 else if(be_is_MemPerm(node)) {
1200 tenv.dbg = get_irn_dbg_info(node);
1202 transform_MemPerm(&tenv);
1208 * We transform Spill and Reload here. This needs to be done before
1209 * stack biasing otherwise we would miss the corrected offset for these nodes.
1211 * If x87 instruction should be emitted, run the x87 simulator and patch
1212 * the virtual instructions. This must obviously be done after register allocation.
1214 static void ia32_after_ra(void *self) {
1215 ia32_code_gen_t *cg = self;
1217 irg_block_walk_graph(cg->irg, NULL, ia32_after_ra_walker, self);
1219 /* if we do x87 code generation, rewrite all the virtual instructions and registers */
1220 if (cg->used_fp == fp_x87 || cg->force_sim) {
1221 x87_simulate_graph(cg->arch_env, cg->irg, cg->blk_sched);
1226 * Last touchups for the graph before emit
1228 static void ia32_finish(void *self) {
1229 ia32_code_gen_t *cg = self;
1230 ir_graph *irg = cg->irg;
1232 ia32_finish_irg(irg, cg);
1236 * Emits the code, closes the output file and frees
1237 * the code generator interface.
1239 static void ia32_codegen(void *self) {
1240 ia32_code_gen_t *cg = self;
1241 ir_graph *irg = cg->irg;
1243 ia32_gen_routine(cg->isa->out, irg, cg);
1247 /* remove it from the isa */
1250 /* de-allocate code generator */
1251 del_set(cg->reg_set);
1255 static void *ia32_cg_init(const be_irg_t *birg);
1257 static const arch_code_generator_if_t ia32_code_gen_if = {
1259 NULL, /* before abi introduce hook */
1261 ia32_before_sched, /* before scheduling hook */
1262 ia32_before_ra, /* before register allocation hook */
1263 ia32_after_ra, /* after register allocation hook */
1264 ia32_finish, /* called before codegen */
1265 ia32_codegen /* emit && done */
1269 * Initializes a IA32 code generator.
1271 static void *ia32_cg_init(const be_irg_t *birg) {
1272 ia32_isa_t *isa = (ia32_isa_t *)birg->main_env->arch_env->isa;
1273 ia32_code_gen_t *cg = xcalloc(1, sizeof(*cg));
1275 cg->impl = &ia32_code_gen_if;
1276 cg->irg = birg->irg;
1277 cg->reg_set = new_set(ia32_cmp_irn_reg_assoc, 1024);
1278 cg->arch_env = birg->main_env->arch_env;
1281 cg->blk_sched = NULL;
1282 cg->fp_to_gp = NULL;
1283 cg->gp_to_fp = NULL;
1284 cg->fp_kind = isa->fp_kind;
1285 cg->used_fp = fp_none;
1286 cg->dump = (birg->main_env->options->dump_flags & DUMP_BE) ? 1 : 0;
1288 FIRM_DBG_REGISTER(cg->mod, "firm.be.ia32.cg");
1290 /* copy optimizations from isa for easier access */
1292 cg->arch = isa->arch;
1293 cg->opt_arch = isa->opt_arch;
1299 if (isa->name_obst_size) {
1300 //printf("freed %d bytes from name obst\n", isa->name_obst_size);
1301 isa->name_obst_size = 0;
1302 obstack_free(isa->name_obst, NULL);
1303 obstack_init(isa->name_obst);
1307 cur_reg_set = cg->reg_set;
1309 ia32_irn_ops.cg = cg;
1311 return (arch_code_generator_t *)cg;
1316 /*****************************************************************
1317 * ____ _ _ _____ _____
1318 * | _ \ | | | | |_ _|/ ____| /\
1319 * | |_) | __ _ ___| | _____ _ __ __| | | | | (___ / \
1320 * | _ < / _` |/ __| |/ / _ \ '_ \ / _` | | | \___ \ / /\ \
1321 * | |_) | (_| | (__| < __/ | | | (_| | _| |_ ____) / ____ \
1322 * |____/ \__,_|\___|_|\_\___|_| |_|\__,_| |_____|_____/_/ \_\
1324 *****************************************************************/
1327 * Set output modes for GCC
1329 static const tarval_mode_info mo_integer = {
1336 * set the tarval output mode to C-semantics
1338 static void set_tarval_output_modes(void)
1340 set_tarval_mode_output_option(get_modeLs(), &mo_integer);
1341 set_tarval_mode_output_option(get_modeLu(), &mo_integer);
1342 set_tarval_mode_output_option(get_modeIs(), &mo_integer);
1343 set_tarval_mode_output_option(get_modeIu(), &mo_integer);
1344 set_tarval_mode_output_option(get_modeHs(), &mo_integer);
1345 set_tarval_mode_output_option(get_modeHu(), &mo_integer);
1346 set_tarval_mode_output_option(get_modeBs(), &mo_integer);
1347 set_tarval_mode_output_option(get_modeBu(), &mo_integer);
1348 set_tarval_mode_output_option(get_modeC(), &mo_integer);
1349 set_tarval_mode_output_option(get_modeU(), &mo_integer);
1350 set_tarval_mode_output_option(get_modeIu(), &mo_integer);
1355 * The template that generates a new ISA object.
1356 * Note that this template can be changed by command line
1359 static ia32_isa_t ia32_isa_template = {
1361 &ia32_isa_if, /* isa interface implementation */
1362 &ia32_gp_regs[REG_ESP], /* stack pointer register */
1363 &ia32_gp_regs[REG_EBP], /* base pointer register */
1364 -1, /* stack direction */
1366 NULL, /* 16bit register names */
1367 NULL, /* 8bit register names */
1371 IA32_OPT_INCDEC | /* optimize add 1, sub 1 into inc/dec default: on */
1372 IA32_OPT_DOAM | /* optimize address mode default: on */
1373 IA32_OPT_LEA | /* optimize for LEAs default: on */
1374 IA32_OPT_PLACECNST | /* place constants immediately before instructions, default: on */
1375 IA32_OPT_IMMOPS | /* operations can use immediates, default: on */
1376 IA32_OPT_EXTBB), /* use extended basic block scheduling, default: on */
1377 arch_pentium_4, /* instruction architecture */
1378 arch_pentium_4, /* optimize for architecture */
1379 fp_sse2, /* use sse2 unit */
1380 NULL, /* current code generator */
1382 NULL, /* name obstack */
1383 0 /* name obst size */
1388 * Initializes the backend ISA.
1390 static void *ia32_init(FILE *file_handle) {
1391 static int inited = 0;
1397 set_tarval_output_modes();
1399 isa = xmalloc(sizeof(*isa));
1400 memcpy(isa, &ia32_isa_template, sizeof(*isa));
1402 ia32_register_init(isa);
1403 ia32_create_opcodes();
1405 if ((ARCH_INTEL(isa->arch) && isa->arch < arch_pentium_4) ||
1406 (ARCH_AMD(isa->arch) && isa->arch < arch_athlon))
1407 /* no SSE2 for these cpu's */
1408 isa->fp_kind = fp_x87;
1410 if (ARCH_INTEL(isa->opt_arch) && isa->opt_arch >= arch_pentium_4) {
1411 /* Pentium 4 don't like inc and dec instructions */
1412 isa->opt &= ~IA32_OPT_INCDEC;
1415 isa->regs_16bit = pmap_create();
1416 isa->regs_8bit = pmap_create();
1417 isa->types = pmap_create();
1418 isa->tv_ent = pmap_create();
1419 isa->out = file_handle;
1421 ia32_build_16bit_reg_map(isa->regs_16bit);
1422 ia32_build_8bit_reg_map(isa->regs_8bit);
1424 /* patch register names of x87 registers */
1426 ia32_st_regs[0].name = "st";
1427 ia32_st_regs[1].name = "st(1)";
1428 ia32_st_regs[2].name = "st(2)";
1429 ia32_st_regs[3].name = "st(3)";
1430 ia32_st_regs[4].name = "st(4)";
1431 ia32_st_regs[5].name = "st(5)";
1432 ia32_st_regs[6].name = "st(6)";
1433 ia32_st_regs[7].name = "st(7)";
1437 isa->name_obst = xmalloc(sizeof(*isa->name_obst));
1438 obstack_init(isa->name_obst);
1439 isa->name_obst_size = 0;
1442 ia32_handle_intrinsics();
1443 ia32_switch_section(NULL, NO_SECTION);
1444 fprintf(isa->out, "\t.intel_syntax\n");
1454 * Closes the output file and frees the ISA structure.
1456 static void ia32_done(void *self) {
1457 ia32_isa_t *isa = self;
1459 /* emit now all global declarations */
1460 ia32_gen_decls(isa->out);
1462 pmap_destroy(isa->regs_16bit);
1463 pmap_destroy(isa->regs_8bit);
1464 pmap_destroy(isa->tv_ent);
1465 pmap_destroy(isa->types);
1468 //printf("name obst size = %d bytes\n", isa->name_obst_size);
1469 obstack_free(isa->name_obst, NULL);
1477 * Return the number of register classes for this architecture.
1478 * We report always these:
1479 * - the general purpose registers
1480 * - the SSE floating point register set
1481 * - the virtual floating point registers
1483 static int ia32_get_n_reg_class(const void *self) {
1488 * Return the register class for index i.
1490 static const arch_register_class_t *ia32_get_reg_class(const void *self, int i) {
1491 assert(i >= 0 && i < 3 && "Invalid ia32 register class requested.");
1493 return &ia32_reg_classes[CLASS_ia32_gp];
1495 return &ia32_reg_classes[CLASS_ia32_xmm];
1497 return &ia32_reg_classes[CLASS_ia32_vfp];
1501 * Get the register class which shall be used to store a value of a given mode.
1502 * @param self The this pointer.
1503 * @param mode The mode in question.
1504 * @return A register class which can hold values of the given mode.
1506 const arch_register_class_t *ia32_get_reg_class_for_mode(const void *self, const ir_mode *mode) {
1507 const ia32_isa_t *isa = self;
1508 if (mode_is_float(mode)) {
1509 return USE_SSE2(isa) ? &ia32_reg_classes[CLASS_ia32_xmm] : &ia32_reg_classes[CLASS_ia32_vfp];
1512 return &ia32_reg_classes[CLASS_ia32_gp];
1516 * Get the ABI restrictions for procedure calls.
1517 * @param self The this pointer.
1518 * @param method_type The type of the method (procedure) in question.
1519 * @param abi The abi object to be modified
1521 static void ia32_get_call_abi(const void *self, ir_type *method_type, be_abi_call_t *abi) {
1522 const ia32_isa_t *isa = self;
1525 unsigned cc = get_method_calling_convention(method_type);
1526 int n = get_method_n_params(method_type);
1529 int i, ignore_1, ignore_2;
1531 const arch_register_t *reg;
1532 be_abi_call_flags_t call_flags = be_abi_call_get_flags(abi);
1534 unsigned use_push = !IS_P6_ARCH(isa->opt_arch);
1536 /* set abi flags for calls */
1537 call_flags.bits.left_to_right = 0; /* always last arg first on stack */
1538 call_flags.bits.store_args_sequential = use_push;
1539 /* call_flags.bits.try_omit_fp not changed: can handle both settings */
1540 call_flags.bits.fp_free = 0; /* the frame pointer is fixed in IA32 */
1541 call_flags.bits.call_has_imm = 1; /* IA32 calls can have immediate address */
1543 /* set stack parameter passing style */
1544 be_abi_call_set_flags(abi, call_flags, &ia32_abi_callbacks);
1546 /* collect the mode for each type */
1547 modes = alloca(n * sizeof(modes[0]));
1549 for (i = 0; i < n; i++) {
1550 tp = get_method_param_type(method_type, i);
1551 modes[i] = get_type_mode(tp);
1554 /* set register parameters */
1555 if (cc & cc_reg_param) {
1556 /* determine the number of parameters passed via registers */
1557 biggest_n = ia32_get_n_regparam_class(n, modes, &ignore_1, &ignore_2);
1559 /* loop over all parameters and set the register requirements */
1560 for (i = 0; i <= biggest_n; i++) {
1561 reg = ia32_get_RegParam_reg(n, modes, i, cc);
1562 assert(reg && "kaputt");
1563 be_abi_call_param_reg(abi, i, reg);
1570 /* set stack parameters */
1571 for (i = stack_idx; i < n; i++) {
1572 be_abi_call_param_stack(abi, i, 1, 0, 0);
1576 /* set return registers */
1577 n = get_method_n_ress(method_type);
1579 assert(n <= 2 && "more than two results not supported");
1581 /* In case of 64bit returns, we will have two 32bit values */
1583 tp = get_method_res_type(method_type, 0);
1584 mode = get_type_mode(tp);
1586 assert(!mode_is_float(mode) && "two FP results not supported");
1588 tp = get_method_res_type(method_type, 1);
1589 mode = get_type_mode(tp);
1591 assert(!mode_is_float(mode) && "two FP results not supported");
1593 be_abi_call_res_reg(abi, 0, &ia32_gp_regs[REG_EAX]);
1594 be_abi_call_res_reg(abi, 1, &ia32_gp_regs[REG_EDX]);
1597 const arch_register_t *reg;
1599 tp = get_method_res_type(method_type, 0);
1600 assert(is_atomic_type(tp));
1601 mode = get_type_mode(tp);
1603 reg = mode_is_float(mode) ?
1604 (USE_SSE2(isa) ? &ia32_xmm_regs[REG_XMM0] : &ia32_vfp_regs[REG_VF0]) :
1605 &ia32_gp_regs[REG_EAX];
1607 be_abi_call_res_reg(abi, 0, reg);
1612 static const void *ia32_get_irn_ops(const arch_irn_handler_t *self, const ir_node *irn) {
1613 return &ia32_irn_ops;
1616 const arch_irn_handler_t ia32_irn_handler = {
1620 const arch_irn_handler_t *ia32_get_irn_handler(const void *self) {
1621 return &ia32_irn_handler;
1624 int ia32_to_appear_in_schedule(void *block_env, const ir_node *irn) {
1625 return is_ia32_irn(irn) ? 1 : -1;
1629 * Initializes the code generator interface.
1631 static const arch_code_generator_if_t *ia32_get_code_generator_if(void *self) {
1632 return &ia32_code_gen_if;
1636 * Returns the estimated execution time of an ia32 irn.
1638 static sched_timestep_t ia32_sched_exectime(void *env, const ir_node *irn) {
1639 const arch_env_t *arch_env = env;
1640 return is_ia32_irn(irn) ? ia32_get_op_estimated_cost(arch_get_irn_ops(arch_env, irn), irn) : 1;
1643 list_sched_selector_t ia32_sched_selector;
1646 * Returns the reg_pressure scheduler with to_appear_in_schedule() overloaded
1648 static const list_sched_selector_t *ia32_get_list_sched_selector(const void *self, list_sched_selector_t *selector) {
1649 memcpy(&ia32_sched_selector, selector, sizeof(ia32_sched_selector));
1650 ia32_sched_selector.exectime = ia32_sched_exectime;
1651 ia32_sched_selector.to_appear_in_schedule = ia32_to_appear_in_schedule;
1652 return &ia32_sched_selector;
1656 * Returns the necessary byte alignment for storing a register of given class.
1658 static int ia32_get_reg_class_alignment(const void *self, const arch_register_class_t *cls) {
1659 ir_mode *mode = arch_register_class_mode(cls);
1660 int bytes = get_mode_size_bytes(mode);
1662 if (mode_is_float(mode) && bytes > 8)
1667 static ia32_intrinsic_env_t intrinsic_env = { NULL, NULL };
1670 * Returns the libFirm configuration parameter for this backend.
1672 static const backend_params *ia32_get_libfirm_params(void) {
1673 static const arch_dep_params_t ad = {
1674 1, /* also use subs */
1675 4, /* maximum shifts */
1676 31, /* maximum shift amount */
1678 1, /* allow Mulhs */
1679 1, /* allow Mulus */
1680 32 /* Mulh allowed up to 32 bit */
1682 static backend_params p = {
1683 NULL, /* no additional opcodes */
1684 NULL, /* will be set later */
1685 1, /* need dword lowering */
1686 ia32_create_intrinsic_fkt,
1687 &intrinsic_env, /* context for ia32_create_intrinsic_fkt */
1695 /* instruction set architectures. */
1696 static const lc_opt_enum_int_items_t arch_items[] = {
1697 { "386", arch_i386, },
1698 { "486", arch_i486, },
1699 { "pentium", arch_pentium, },
1700 { "586", arch_pentium, },
1701 { "pentiumpro", arch_pentium_pro, },
1702 { "686", arch_pentium_pro, },
1703 { "pentiummmx", arch_pentium_mmx, },
1704 { "pentium2", arch_pentium_2, },
1705 { "p2", arch_pentium_2, },
1706 { "pentium3", arch_pentium_3, },
1707 { "p3", arch_pentium_3, },
1708 { "pentium4", arch_pentium_4, },
1709 { "p4", arch_pentium_4, },
1710 { "pentiumm", arch_pentium_m, },
1711 { "pm", arch_pentium_m, },
1712 { "core", arch_core, },
1714 { "athlon", arch_athlon, },
1715 { "athlon64", arch_athlon_64, },
1716 { "opteron", arch_opteron, },
1720 static lc_opt_enum_int_var_t arch_var = {
1721 &ia32_isa_template.arch, arch_items
1724 static lc_opt_enum_int_var_t opt_arch_var = {
1725 &ia32_isa_template.opt_arch, arch_items
1728 static const lc_opt_enum_int_items_t fp_unit_items[] = {
1730 { "sse2", fp_sse2 },
1734 static lc_opt_enum_int_var_t fp_unit_var = {
1735 &ia32_isa_template.fp_kind, fp_unit_items
1738 static const lc_opt_enum_int_items_t gas_items[] = {
1739 { "linux", ASM_LINUX_GAS },
1740 { "mingw", ASM_MINGW_GAS },
1744 static lc_opt_enum_int_var_t gas_var = {
1745 (int *)&asm_flavour, gas_items
1748 static const lc_opt_table_entry_t ia32_options[] = {
1749 LC_OPT_ENT_ENUM_INT("arch", "select the instruction architecture", &arch_var),
1750 LC_OPT_ENT_ENUM_INT("opt", "optimize for instruction architecture", &opt_arch_var),
1751 LC_OPT_ENT_ENUM_INT("fpunit", "select the floating point unit", &fp_unit_var),
1752 LC_OPT_ENT_NEGBIT("noaddrmode", "do not use address mode", &ia32_isa_template.opt, IA32_OPT_DOAM),
1753 LC_OPT_ENT_NEGBIT("nolea", "do not optimize for LEAs", &ia32_isa_template.opt, IA32_OPT_LEA),
1754 LC_OPT_ENT_NEGBIT("noplacecnst", "do not place constants", &ia32_isa_template.opt, IA32_OPT_PLACECNST),
1755 LC_OPT_ENT_NEGBIT("noimmop", "no operations with immediates", &ia32_isa_template.opt, IA32_OPT_IMMOPS),
1756 LC_OPT_ENT_NEGBIT("noextbb", "do not use extended basic block scheduling", &ia32_isa_template.opt, IA32_OPT_EXTBB),
1757 LC_OPT_ENT_ENUM_INT("gasmode", "set the GAS compatibility mode", &gas_var),
1762 * Register command line options for the ia32 backend.
1766 * ia32-arch=arch create instruction for arch
1767 * ia32-opt=arch optimize for run on arch
1768 * ia32-fpunit=unit select floating point unit (x87 or SSE2)
1769 * ia32-incdec optimize for inc/dec
1770 * ia32-noaddrmode do not use address mode
1771 * ia32-nolea do not optimize for LEAs
1772 * ia32-noplacecnst do not place constants,
1773 * ia32-noimmop no operations with immediates
1774 * ia32-noextbb do not use extended basic block scheduling
1775 * ia32-gasmode set the GAS compatibility mode
1777 static void ia32_register_options(lc_opt_entry_t *ent)
1779 lc_opt_entry_t *be_grp_ia32 = lc_opt_get_grp(ent, "ia32");
1780 lc_opt_add_table(be_grp_ia32, ia32_options);
1782 #endif /* WITH_LIBCORE */
1784 const arch_isa_if_t ia32_isa_if = {
1787 ia32_get_n_reg_class,
1789 ia32_get_reg_class_for_mode,
1791 ia32_get_irn_handler,
1792 ia32_get_code_generator_if,
1793 ia32_get_list_sched_selector,
1794 ia32_get_reg_class_alignment,
1795 ia32_get_libfirm_params,
1797 ia32_register_options