2 * This is the main ia32 firm backend driver.
3 * @author Christian Wuerdig
20 #include <libcore/lc_opts.h>
21 #include <libcore/lc_opts_enum.h>
22 #endif /* WITH_LIBCORE */
26 #include "pseudo_irg.h"
30 #include "iredges_t.h"
38 #include "../beabi.h" /* the general register allocator interface */
39 #include "../benode_t.h"
40 #include "../belower.h"
41 #include "../besched_t.h"
44 #include "bearch_ia32_t.h"
46 #include "ia32_new_nodes.h" /* ia32 nodes interface */
47 #include "gen_ia32_regalloc_if.h" /* the generated interface (register type and class defenitions) */
48 #include "ia32_gen_decls.h" /* interface declaration emitter */
49 #include "ia32_transform.h"
50 #include "ia32_emitter.h"
51 #include "ia32_map_regs.h"
52 #include "ia32_optimize.h"
54 #include "ia32_dbg_stat.h"
55 #include "ia32_finish.h"
56 #include "ia32_util.h"
58 #define DEBUG_MODULE "firm.be.ia32.isa"
61 static set *cur_reg_set = NULL;
64 #define is_Start(irn) (get_irn_opcode(irn) == iro_Start)
66 /* Creates the unique per irg GP NoReg node. */
67 ir_node *ia32_new_NoReg_gp(ia32_code_gen_t *cg) {
68 return be_abi_get_callee_save_irn(cg->birg->abi, &ia32_gp_regs[REG_GP_NOREG]);
71 /* Creates the unique per irg FP NoReg node. */
72 ir_node *ia32_new_NoReg_fp(ia32_code_gen_t *cg) {
73 return be_abi_get_callee_save_irn(cg->birg->abi,
74 USE_SSE2(cg) ? &ia32_xmm_regs[REG_XMM_NOREG] : &ia32_vfp_regs[REG_VFP_NOREG]);
77 /**************************************************
80 * _ __ ___ __ _ __ _| | | ___ ___ _| |_
81 * | '__/ _ \/ _` | / _` | | |/ _ \ / __| | | _|
82 * | | | __/ (_| | | (_| | | | (_) | (__ | | |
83 * |_| \___|\__, | \__,_|_|_|\___/ \___| |_|_|
86 **************************************************/
88 static ir_node *my_skip_proj(const ir_node *n) {
96 * Return register requirements for an ia32 node.
97 * If the node returns a tuple (mode_T) then the proj's
98 * will be asked for this information.
100 static const arch_register_req_t *ia32_get_irn_reg_req(const void *self, arch_register_req_t *req, const ir_node *irn, int pos) {
101 const ia32_irn_ops_t *ops = self;
102 const ia32_register_req_t *irn_req;
103 long node_pos = pos == -1 ? 0 : pos;
104 ir_mode *mode = is_Block(irn) ? NULL : get_irn_mode(irn);
105 FIRM_DBG_REGISTER(firm_dbg_module_t *mod, DEBUG_MODULE);
107 if (is_Block(irn) || mode == mode_M || mode == mode_X) {
108 DBG((mod, LEVEL_1, "ignoring Block, mode_M, mode_X node %+F\n", irn));
112 if (mode == mode_T && pos < 0) {
113 DBG((mod, LEVEL_1, "ignoring request OUT requirements for node %+F\n", irn));
117 DBG((mod, LEVEL_1, "get requirements at pos %d for %+F ... ", pos, irn));
121 node_pos = ia32_translate_proj_pos(irn);
127 irn = my_skip_proj(irn);
129 DB((mod, LEVEL_1, "skipping Proj, going to %+F at pos %d ... ", irn, node_pos));
132 if (is_ia32_irn(irn)) {
134 irn_req = get_ia32_in_req(irn, pos);
137 irn_req = get_ia32_out_req(irn, node_pos);
140 DB((mod, LEVEL_1, "returning reqs for %+F at pos %d\n", irn, pos));
142 memcpy(req, &(irn_req->req), sizeof(*req));
144 if (arch_register_req_is(&(irn_req->req), should_be_same)) {
145 assert(irn_req->same_pos >= 0 && "should be same constraint for in -> out NYI");
146 req->other_same = get_irn_n(irn, irn_req->same_pos);
149 if (arch_register_req_is(&(irn_req->req), should_be_different)) {
150 assert(irn_req->different_pos >= 0 && "should be different constraint for in -> out NYI");
151 req->other_different = get_irn_n(irn, irn_req->different_pos);
155 /* treat Unknowns like Const with default requirements */
156 if (is_Unknown(irn)) {
157 DB((mod, LEVEL_1, "returning UKNWN reqs for %+F\n", irn));
158 if (mode_is_float(mode)) {
159 if (USE_SSE2(ops->cg))
160 memcpy(req, &(ia32_default_req_ia32_xmm_xmm_UKNWN), sizeof(*req));
162 memcpy(req, &(ia32_default_req_ia32_vfp_vfp_UKNWN), sizeof(*req));
164 else if (mode_is_int(mode) || mode_is_reference(mode))
165 memcpy(req, &(ia32_default_req_ia32_gp_gp_UKNWN), sizeof(*req));
166 else if (mode == mode_T || mode == mode_M) {
167 DBG((mod, LEVEL_1, "ignoring Unknown node %+F\n", irn));
171 assert(0 && "unsupported Unknown-Mode");
174 DB((mod, LEVEL_1, "returning NULL for %+F (not ia32)\n", irn));
182 static void ia32_set_irn_reg(const void *self, ir_node *irn, const arch_register_t *reg) {
184 const ia32_irn_ops_t *ops = self;
186 if (get_irn_mode(irn) == mode_X) {
190 DBG((ops->cg->mod, LEVEL_1, "ia32 assigned register %s to node %+F\n", reg->name, irn));
193 pos = ia32_translate_proj_pos(irn);
194 irn = my_skip_proj(irn);
197 if (is_ia32_irn(irn)) {
198 const arch_register_t **slots;
200 slots = get_ia32_slots(irn);
204 ia32_set_firm_reg(irn, reg, cur_reg_set);
208 static const arch_register_t *ia32_get_irn_reg(const void *self, const ir_node *irn) {
210 const arch_register_t *reg = NULL;
214 if (get_irn_mode(irn) == mode_X) {
218 pos = ia32_translate_proj_pos(irn);
219 irn = my_skip_proj(irn);
222 if (is_ia32_irn(irn)) {
223 const arch_register_t **slots;
224 slots = get_ia32_slots(irn);
228 reg = ia32_get_firm_reg(irn, cur_reg_set);
234 static arch_irn_class_t ia32_classify(const void *self, const ir_node *irn) {
235 arch_irn_class_t classification = arch_irn_class_normal;
237 irn = my_skip_proj(irn);
240 classification |= arch_irn_class_branch;
242 if (! is_ia32_irn(irn))
243 return classification & ~arch_irn_class_normal;
245 if (is_ia32_Cnst(irn))
246 classification |= arch_irn_class_const;
249 classification |= arch_irn_class_load;
251 if (is_ia32_St(irn) || is_ia32_Store8Bit(irn))
252 classification |= arch_irn_class_store;
254 if (is_ia32_got_reload(irn))
255 classification |= arch_irn_class_reload;
257 return classification;
260 static arch_irn_flags_t ia32_get_flags(const void *self, const ir_node *irn) {
263 ir_node *pred = get_Proj_pred(irn);
264 if(is_ia32_Push(pred) && get_Proj_proj(irn) == pn_ia32_Push_stack) {
265 return arch_irn_flags_modify_sp;
267 if(is_ia32_Pop(pred) && get_Proj_proj(irn) == pn_ia32_Pop_stack) {
268 return arch_irn_flags_modify_sp;
270 if(is_ia32_AddSP(pred) && get_Proj_proj(irn) == pn_ia32_AddSP_stack) {
271 return arch_irn_flags_modify_sp;
275 irn = my_skip_proj(irn);
276 if (is_ia32_irn(irn))
277 return get_ia32_flags(irn);
280 return arch_irn_flags_ignore;
285 static entity *ia32_get_frame_entity(const void *self, const ir_node *irn) {
286 return is_ia32_irn(irn) ? get_ia32_frame_ent(irn) : NULL;
289 static void ia32_set_frame_entity(const void *self, ir_node *irn, entity *ent) {
290 set_ia32_frame_ent(irn, ent);
293 static void ia32_set_frame_offset(const void *self, ir_node *irn, int bias) {
295 const ia32_irn_ops_t *ops = self;
297 if (get_ia32_frame_ent(irn)) {
298 ia32_am_flavour_t am_flav = get_ia32_am_flavour(irn);
300 /* Pop nodes modify the stack pointer before reading the destination
301 * address, so fix this here
303 if(is_ia32_Pop(irn)) {
307 DBG((ops->cg->mod, LEVEL_1, "stack biased %+F with %d\n", irn, bias));
309 snprintf(buf, sizeof(buf), "%d", bias);
311 if (get_ia32_op_type(irn) == ia32_Normal) {
312 set_ia32_cnst(irn, buf);
315 add_ia32_am_offs(irn, buf);
317 set_ia32_am_flavour(irn, am_flav);
322 static int ia32_get_sp_bias(const void *self, const ir_node *irn) {
324 int proj = get_Proj_proj(irn);
325 ir_node *pred = get_Proj_pred(irn);
327 if(is_ia32_Push(pred) && proj == 0)
329 else if(is_ia32_Pop(pred) && proj == 1)
337 be_abi_call_flags_bits_t flags;
338 const arch_isa_t *isa;
339 const arch_env_t *aenv;
343 static void *ia32_abi_init(const be_abi_call_t *call, const arch_env_t *aenv, ir_graph *irg)
345 ia32_abi_env_t *env = xmalloc(sizeof(env[0]));
346 be_abi_call_flags_t fl = be_abi_call_get_flags(call);
347 env->flags = fl.bits;
350 env->isa = aenv->isa;
355 * Put all registers which are saved by the prologue/epilogue in a set.
357 * @param self The callback object.
358 * @param s The result set.
360 static void ia32_abi_dont_save_regs(void *self, pset *s)
362 ia32_abi_env_t *env = self;
363 if(env->flags.try_omit_fp)
364 pset_insert_ptr(s, env->isa->bp);
368 * Generate the routine prologue.
370 * @param self The callback object.
371 * @param mem A pointer to the mem node. Update this if you define new memory.
372 * @param reg_map A map mapping all callee_save/ignore/parameter registers to their defining nodes.
374 * @return The register which shall be used as a stack frame base.
376 * All nodes which define registers in @p reg_map must keep @p reg_map current.
378 static const arch_register_t *ia32_abi_prologue(void *self, ir_node **mem, pmap *reg_map)
380 ia32_abi_env_t *env = self;
382 if (! env->flags.try_omit_fp) {
383 ir_node *bl = get_irg_start_block(env->irg);
384 ir_node *curr_sp = be_abi_reg_map_get(reg_map, env->isa->sp);
385 ir_node *curr_bp = be_abi_reg_map_get(reg_map, env->isa->bp);
389 push = new_rd_ia32_Push(NULL, env->irg, bl, curr_sp, curr_bp, *mem);
390 curr_sp = new_r_Proj(env->irg, bl, push, get_irn_mode(curr_sp), pn_ia32_Push_stack);
391 *mem = new_r_Proj(env->irg, bl, push, mode_M, pn_ia32_Push_M);
393 /* the push must have SP out register */
394 arch_set_irn_register(env->aenv, curr_sp, env->isa->sp);
395 set_ia32_flags(push, arch_irn_flags_ignore);
397 /* move esp to ebp */
398 curr_bp = be_new_Copy(env->isa->bp->reg_class, env->irg, bl, curr_sp);
399 be_set_constr_single_reg(curr_bp, BE_OUT_POS(0), env->isa->bp);
400 arch_set_irn_register(env->aenv, curr_bp, env->isa->bp);
401 be_node_set_flags(curr_bp, BE_OUT_POS(0), arch_irn_flags_ignore);
403 /* beware: the copy must be done before any other sp use */
404 curr_sp = be_new_CopyKeep_single(env->isa->sp->reg_class, env->irg, bl, curr_sp, curr_bp, get_irn_mode(curr_sp));
405 be_set_constr_single_reg(curr_sp, BE_OUT_POS(0), env->isa->sp);
406 arch_set_irn_register(env->aenv, curr_sp, env->isa->sp);
407 be_node_set_flags(curr_sp, BE_OUT_POS(0), arch_irn_flags_ignore);
409 be_abi_reg_map_set(reg_map, env->isa->sp, curr_sp);
410 be_abi_reg_map_set(reg_map, env->isa->bp, curr_bp);
419 * Generate the routine epilogue.
420 * @param self The callback object.
421 * @param bl The block for the epilog
422 * @param mem A pointer to the mem node. Update this if you define new memory.
423 * @param reg_map A map mapping all callee_save/ignore/parameter registers to their defining nodes.
424 * @return The register which shall be used as a stack frame base.
426 * All nodes which define registers in @p reg_map must keep @p reg_map current.
428 static void ia32_abi_epilogue(void *self, ir_node *bl, ir_node **mem, pmap *reg_map)
430 ia32_abi_env_t *env = self;
431 ir_node *curr_sp = be_abi_reg_map_get(reg_map, env->isa->sp);
432 ir_node *curr_bp = be_abi_reg_map_get(reg_map, env->isa->bp);
434 if (env->flags.try_omit_fp) {
435 /* simply remove the stack frame here */
436 curr_sp = be_new_IncSP(env->isa->sp, env->irg, bl, curr_sp, BE_STACK_FRAME_SIZE_SHRINK);
437 add_irn_dep(curr_sp, *mem);
440 const ia32_isa_t *isa = (ia32_isa_t *)env->isa;
441 ir_mode *mode_bp = env->isa->bp->reg_class->mode;
443 /* gcc always emits a leave at the end of a routine */
444 if (1 || ARCH_AMD(isa->opt_arch)) {
448 leave = new_rd_ia32_Leave(NULL, env->irg, bl, curr_sp, curr_bp);
449 set_ia32_flags(leave, arch_irn_flags_ignore);
450 curr_bp = new_r_Proj(current_ir_graph, bl, leave, mode_bp, pn_ia32_Leave_frame);
451 curr_sp = new_r_Proj(current_ir_graph, bl, leave, get_irn_mode(curr_sp), pn_ia32_Leave_stack);
452 *mem = new_r_Proj(current_ir_graph, bl, leave, mode_M, pn_ia32_Leave_M);
457 /* copy ebp to esp */
458 curr_sp = be_new_SetSP(env->isa->sp, env->irg, bl, curr_sp, curr_bp, *mem);
461 pop = new_rd_ia32_Pop(NULL, env->irg, bl, curr_sp, *mem);
462 set_ia32_flags(pop, arch_irn_flags_ignore);
463 curr_bp = new_r_Proj(current_ir_graph, bl, pop, mode_bp, pn_ia32_Pop_res);
464 curr_sp = new_r_Proj(current_ir_graph, bl, pop, get_irn_mode(curr_sp), pn_ia32_Pop_stack);
465 *mem = new_r_Proj(current_ir_graph, bl, pop, mode_M, pn_ia32_Pop_M);
467 arch_set_irn_register(env->aenv, curr_sp, env->isa->sp);
468 arch_set_irn_register(env->aenv, curr_bp, env->isa->bp);
471 be_abi_reg_map_set(reg_map, env->isa->sp, curr_sp);
472 be_abi_reg_map_set(reg_map, env->isa->bp, curr_bp);
476 * Produces the type which sits between the stack args and the locals on the stack.
477 * it will contain the return address and space to store the old base pointer.
478 * @return The Firm type modeling the ABI between type.
480 static ir_type *ia32_abi_get_between_type(void *self)
482 #define IDENT(s) new_id_from_chars(s, sizeof(s)-1)
483 static ir_type *omit_fp_between_type = NULL;
484 static ir_type *between_type = NULL;
486 ia32_abi_env_t *env = self;
488 if ( !between_type) {
490 entity *ret_addr_ent;
491 entity *omit_fp_ret_addr_ent;
493 ir_type *old_bp_type = new_type_primitive(IDENT("bp"), mode_P);
494 ir_type *ret_addr_type = new_type_primitive(IDENT("return_addr"), mode_P);
496 between_type = new_type_struct(IDENT("ia32_between_type"));
497 old_bp_ent = new_entity(between_type, IDENT("old_bp"), old_bp_type);
498 ret_addr_ent = new_entity(between_type, IDENT("ret_addr"), ret_addr_type);
500 set_entity_offset_bytes(old_bp_ent, 0);
501 set_entity_offset_bytes(ret_addr_ent, get_type_size_bytes(old_bp_type));
502 set_type_size_bytes(between_type, get_type_size_bytes(old_bp_type) + get_type_size_bytes(ret_addr_type));
503 set_type_state(between_type, layout_fixed);
505 omit_fp_between_type = new_type_struct(IDENT("ia32_between_type_omit_fp"));
506 omit_fp_ret_addr_ent = new_entity(omit_fp_between_type, IDENT("ret_addr"), ret_addr_type);
508 set_entity_offset_bytes(omit_fp_ret_addr_ent, 0);
509 set_type_size_bytes(omit_fp_between_type, get_type_size_bytes(ret_addr_type));
510 set_type_state(omit_fp_between_type, layout_fixed);
513 return env->flags.try_omit_fp ? omit_fp_between_type : between_type;
518 * Get the estimated cycle count for @p irn.
520 * @param self The this pointer.
521 * @param irn The node.
523 * @return The estimated cycle count for this operation
525 static int ia32_get_op_estimated_cost(const void *self, const ir_node *irn)
528 ia32_op_type_t op_tp;
529 const ia32_irn_ops_t *ops = self;
534 assert(is_ia32_irn(irn));
536 cost = get_ia32_latency(irn);
537 op_tp = get_ia32_op_type(irn);
539 if (is_ia32_CopyB(irn)) {
541 if (ARCH_INTEL(ops->cg->arch))
544 else if (is_ia32_CopyB_i(irn)) {
545 int size = get_tarval_long(get_ia32_Immop_tarval(irn));
546 cost = 20 + (int)ceil((4/3) * size);
547 if (ARCH_INTEL(ops->cg->arch))
550 /* in case of address mode operations add additional cycles */
551 else if (op_tp == ia32_AddrModeD || op_tp == ia32_AddrModeS) {
553 In case of stack access add 5 cycles (we assume stack is in cache),
554 other memory operations cost 20 cycles.
556 cost += is_ia32_use_frame(irn) ? 5 : 20;
563 * Returns the inverse operation if @p irn, recalculating the argument at position @p i.
565 * @param irn The original operation
566 * @param i Index of the argument we want the inverse operation to yield
567 * @param inverse struct to be filled with the resulting inverse op
568 * @param obstack The obstack to use for allocation of the returned nodes array
569 * @return The inverse operation or NULL if operation invertible
571 static arch_inverse_t *ia32_get_inverse(const void *self, const ir_node *irn, int i, arch_inverse_t *inverse, struct obstack *obst) {
574 ir_node *block, *noreg, *nomem;
577 /* we cannot invert non-ia32 irns */
578 if (! is_ia32_irn(irn))
581 /* operand must always be a real operand (not base, index or mem) */
582 if (i != 2 && i != 3)
585 /* we don't invert address mode operations */
586 if (get_ia32_op_type(irn) != ia32_Normal)
589 irg = get_irn_irg(irn);
590 block = get_nodes_block(irn);
591 mode = get_ia32_res_mode(irn);
592 noreg = get_irn_n(irn, 0);
593 nomem = new_r_NoMem(irg);
595 /* initialize structure */
596 inverse->nodes = obstack_alloc(obst, 2 * sizeof(inverse->nodes[0]));
600 switch (get_ia32_irn_opcode(irn)) {
602 if (get_ia32_immop_type(irn) == ia32_ImmConst) {
603 /* we have an add with a const here */
604 /* invers == add with negated const */
605 inverse->nodes[0] = new_rd_ia32_Add(NULL, irg, block, noreg, noreg, get_irn_n(irn, i), noreg, nomem);
606 pnc = pn_ia32_Add_res;
608 copy_ia32_Immop_attr(inverse->nodes[0], (ir_node *)irn);
609 set_ia32_Immop_tarval(inverse->nodes[0], tarval_neg(get_ia32_Immop_tarval(irn)));
610 set_ia32_commutative(inverse->nodes[0]);
612 else if (get_ia32_immop_type(irn) == ia32_ImmSymConst) {
613 /* we have an add with a symconst here */
614 /* invers == sub with const */
615 inverse->nodes[0] = new_rd_ia32_Sub(NULL, irg, block, noreg, noreg, get_irn_n(irn, i), noreg, nomem);
616 pnc = pn_ia32_Sub_res;
618 copy_ia32_Immop_attr(inverse->nodes[0], (ir_node *)irn);
621 /* normal add: inverse == sub */
622 ir_node *proj = ia32_get_res_proj(irn);
625 inverse->nodes[0] = new_rd_ia32_Sub(NULL, irg, block, noreg, noreg, proj, get_irn_n(irn, i ^ 1), nomem);
626 pnc = pn_ia32_Sub_res;
631 if (get_ia32_immop_type(irn) != ia32_ImmNone) {
632 /* we have a sub with a const/symconst here */
633 /* invers == add with this const */
634 inverse->nodes[0] = new_rd_ia32_Add(NULL, irg, block, noreg, noreg, get_irn_n(irn, i), noreg, nomem);
635 pnc = pn_ia32_Add_res;
636 inverse->costs += (get_ia32_immop_type(irn) == ia32_ImmSymConst) ? 5 : 1;
637 copy_ia32_Immop_attr(inverse->nodes[0], (ir_node *)irn);
641 ir_node *proj = ia32_get_res_proj(irn);
645 inverse->nodes[0] = new_rd_ia32_Add(NULL, irg, block, noreg, noreg, proj, get_irn_n(irn, 3), nomem);
648 inverse->nodes[0] = new_rd_ia32_Sub(NULL, irg, block, noreg, noreg, get_irn_n(irn, 2), proj, nomem);
650 pnc = pn_ia32_Sub_res;
655 if (get_ia32_immop_type(irn) != ia32_ImmNone) {
656 /* xor with const: inverse = xor */
657 inverse->nodes[0] = new_rd_ia32_Eor(NULL, irg, block, noreg, noreg, get_irn_n(irn, i), noreg, nomem);
658 pnc = pn_ia32_Eor_res;
659 inverse->costs += (get_ia32_immop_type(irn) == ia32_ImmSymConst) ? 5 : 1;
660 copy_ia32_Immop_attr(inverse->nodes[0], (ir_node *)irn);
664 inverse->nodes[0] = new_rd_ia32_Eor(NULL, irg, block, noreg, noreg, (ir_node *)irn, get_irn_n(irn, i), nomem);
665 pnc = pn_ia32_Eor_res;
670 ir_node *proj = ia32_get_res_proj(irn);
673 inverse->nodes[0] = new_rd_ia32_Not(NULL, irg, block, noreg, noreg, proj, nomem);
674 pnc = pn_ia32_Not_res;
678 case iro_ia32_Minus: {
679 ir_node *proj = ia32_get_res_proj(irn);
682 inverse->nodes[0] = new_rd_ia32_Minus(NULL, irg, block, noreg, noreg, proj, nomem);
683 pnc = pn_ia32_Minus_res;
688 /* inverse operation not supported */
692 set_ia32_res_mode(inverse->nodes[0], mode);
693 inverse->nodes[1] = new_r_Proj(irg, block, inverse->nodes[0], mode, pnc);
699 * Check if irn can load it's operand at position i from memory (source addressmode).
700 * @param self Pointer to irn ops itself
701 * @param irn The irn to be checked
702 * @param i The operands position
703 * @return Non-Zero if operand can be loaded
705 static int ia32_possible_memory_operand(const void *self, const ir_node *irn, unsigned int i) {
706 if (! is_ia32_irn(irn) || /* must be an ia32 irn */
707 get_irn_arity(irn) != 5 || /* must be a binary operation */
708 get_ia32_op_type(irn) != ia32_Normal || /* must not already be a addressmode irn */
709 ! (get_ia32_am_support(irn) & ia32_am_Source) || /* must be capable of source addressmode */
710 (i != 2 && i != 3) || /* a "real" operand position must be requested */
711 (i == 2 && ! is_ia32_commutative(irn)) || /* if first operand requested irn must be commutative */
712 is_ia32_use_frame(irn)) /* must not already use frame */
718 static void ia32_perform_memory_operand(const void *self, ir_node *irn, ir_node *spill, unsigned int i) {
719 assert(ia32_possible_memory_operand(self, irn, i) && "Cannot perform memory operand change");
722 ir_node *tmp = get_irn_n(irn, 3);
723 set_irn_n(irn, 3, get_irn_n(irn, 2));
724 set_irn_n(irn, 2, tmp);
727 set_ia32_am_support(irn, ia32_am_Source);
728 set_ia32_op_type(irn, ia32_AddrModeS);
729 set_ia32_am_flavour(irn, ia32_B);
730 set_ia32_ls_mode(irn, get_irn_mode(get_irn_n(irn, i)));
731 //TODO this will fail, if spill is a PhiM (give PhiMs entities?)
732 set_ia32_frame_ent(irn, be_get_frame_entity(spill));
733 set_ia32_use_frame(irn);
734 set_ia32_got_reload(irn);
736 set_irn_n(irn, 0, get_irg_frame(get_irn_irg(irn)));
737 set_irn_n(irn, 4, spill);
740 Input at position one is index register, which is NoReg.
741 We would need cg object to get a real noreg, but we cannot
744 set_irn_n(irn, 3, get_irn_n(irn, 1));
746 //FIXME DBG_OPT_AM_S(reload, irn);
749 static const be_abi_callbacks_t ia32_abi_callbacks = {
752 ia32_abi_get_between_type,
753 ia32_abi_dont_save_regs,
758 /* fill register allocator interface */
760 static const arch_irn_ops_if_t ia32_irn_ops_if = {
761 ia32_get_irn_reg_req,
766 ia32_get_frame_entity,
767 ia32_set_frame_entity,
768 ia32_set_frame_offset,
771 ia32_get_op_estimated_cost,
772 ia32_possible_memory_operand,
773 ia32_perform_memory_operand,
776 ia32_irn_ops_t ia32_irn_ops = {
783 /**************************************************
786 * ___ ___ __| | ___ __ _ ___ _ __ _| |_
787 * / __/ _ \ / _` |/ _ \/ _` |/ _ \ '_ \ | | _|
788 * | (_| (_) | (_| | __/ (_| | __/ | | | | | |
789 * \___\___/ \__,_|\___|\__, |\___|_| |_| |_|_|
792 **************************************************/
794 static void ia32_kill_convs(ia32_code_gen_t *cg) {
797 /* BEWARE: the Projs are inserted in the set */
798 foreach_nodeset(cg->kill_conv, irn) {
799 ir_node *in = get_irn_n(get_Proj_pred(irn), 2);
800 edges_reroute(irn, in, cg->birg->irg);
805 * Transform the Thread Local Store base.
807 static void transform_tls(ir_graph *irg) {
808 ir_node *irn = get_irg_tls(irg);
811 dbg_info *dbg = get_irn_dbg_info(irn);
812 ir_node *blk = get_nodes_block(irn);
814 newn = new_rd_ia32_LdTls(dbg, irg, blk, get_irn_mode(irn));
821 * Transforms the standard firm graph into
824 static void ia32_prepare_graph(void *self) {
825 ia32_code_gen_t *cg = self;
826 dom_front_info_t *dom;
827 DEBUG_ONLY(firm_dbg_module_t *old_mod = cg->mod;)
829 FIRM_DBG_REGISTER(cg->mod, "firm.be.ia32.transform");
831 /* 1st: transform constants and psi condition trees */
832 ia32_pre_transform_phase(cg);
834 /* 2nd: transform all remaining nodes */
835 ia32_register_transformers();
836 dom = be_compute_dominance_frontiers(cg->irg);
838 cg->kill_conv = new_nodeset(5);
839 transform_tls(cg->irg);
840 irg_walk_blkwise_graph(cg->irg, NULL, ia32_transform_node, cg);
842 del_nodeset(cg->kill_conv);
844 be_free_dominance_frontiers(dom);
847 be_dump(cg->irg, "-transformed", dump_ir_block_graph_sched);
849 /* 3rd: optimize address mode */
850 FIRM_DBG_REGISTER(cg->mod, "firm.be.ia32.am");
851 ia32_optimize_addressmode(cg);
854 be_dump(cg->irg, "-am", dump_ir_block_graph_sched);
856 DEBUG_ONLY(cg->mod = old_mod;)
860 * Dummy functions for hooks we don't need but which must be filled.
862 static void ia32_before_sched(void *self) {
865 static void remove_unused_nodes(ir_node *irn, bitset_t *already_visited) {
873 mode = get_irn_mode(irn);
875 /* check if we already saw this node or the node has more than one user */
876 if (bitset_contains_irn(already_visited, irn) || get_irn_n_edges(irn) > 1)
879 /* mark irn visited */
880 bitset_add_irn(already_visited, irn);
882 /* non-Tuple nodes with one user: ok, return */
883 if (get_irn_n_edges(irn) >= 1 && mode != mode_T)
886 /* tuple node has one user which is not the mem proj-> ok */
887 if (mode == mode_T && get_irn_n_edges(irn) == 1) {
888 mem_proj = ia32_get_proj_for_mode(irn, mode_M);
893 for (i = get_irn_arity(irn) - 1; i >= 0; i--) {
894 ir_node *pred = get_irn_n(irn, i);
896 /* do not follow memory edges or we will accidentally remove stores */
897 if (is_Proj(pred) && get_irn_mode(pred) == mode_M)
900 set_irn_n(irn, i, new_Bad());
903 The current node is about to be removed: if the predecessor
904 has only this node as user, it need to be removed as well.
906 if (get_irn_n_edges(pred) <= 1)
907 remove_unused_nodes(pred, already_visited);
910 if (sched_is_scheduled(irn))
914 static void remove_unused_loads_walker(ir_node *irn, void *env) {
915 bitset_t *already_visited = env;
916 if (is_ia32_Ld(irn) && ! bitset_contains_irn(already_visited, irn))
917 remove_unused_nodes(irn, env);
921 * Called before the register allocator.
922 * Calculate a block schedule here. We need it for the x87
923 * simulator and the emitter.
925 static void ia32_before_ra(void *self) {
926 ia32_code_gen_t *cg = self;
927 bitset_t *already_visited = bitset_irg_malloc(cg->irg);
929 cg->blk_sched = sched_create_block_schedule(cg->irg);
933 There are sometimes unused loads, only pinned by memory.
934 We need to remove those Loads and all other nodes which won't be used
935 after removing the Load from schedule.
937 irg_walk_graph(cg->irg, remove_unused_loads_walker, NULL, already_visited);
938 bitset_free(already_visited);
943 * Transforms a be node into a Load.
945 static void transform_to_Load(ia32_transform_env_t *env) {
946 ir_node *irn = env->irn;
947 entity *ent = be_get_frame_entity(irn);
948 ir_mode *mode = env->mode;
949 ir_node *noreg = ia32_new_NoReg_gp(env->cg);
950 ir_node *nomem = new_rd_NoMem(env->irg);
951 ir_node *sched_point = NULL;
952 ir_node *ptr = get_irn_n(irn, 0);
953 ir_node *mem = be_is_Reload(irn) ? get_irn_n(irn, 1) : nomem;
954 ir_node *new_op, *proj;
955 const arch_register_t *reg;
957 if (sched_is_scheduled(irn)) {
958 sched_point = sched_prev(irn);
961 if (mode_is_float(mode)) {
962 if (USE_SSE2(env->cg))
963 new_op = new_rd_ia32_xLoad(env->dbg, env->irg, env->block, ptr, noreg, mem);
965 new_op = new_rd_ia32_vfld(env->dbg, env->irg, env->block, ptr, noreg, mem);
968 new_op = new_rd_ia32_Load(env->dbg, env->irg, env->block, ptr, noreg, mem);
971 set_ia32_am_support(new_op, ia32_am_Source);
972 set_ia32_op_type(new_op, ia32_AddrModeS);
973 set_ia32_am_flavour(new_op, ia32_B);
974 set_ia32_ls_mode(new_op, mode);
975 set_ia32_frame_ent(new_op, ent);
976 set_ia32_use_frame(new_op);
978 DBG_OPT_RELOAD2LD(irn, new_op);
980 proj = new_rd_Proj(env->dbg, env->irg, env->block, new_op, mode, pn_Load_res);
983 sched_add_after(sched_point, new_op);
984 sched_add_after(new_op, proj);
989 /* copy the register from the old node to the new Load */
990 reg = arch_get_irn_register(env->cg->arch_env, irn);
991 arch_set_irn_register(env->cg->arch_env, new_op, reg);
993 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env->cg, irn));
999 * Transforms a be node into a Store.
1001 static void transform_to_Store(ia32_transform_env_t *env) {
1002 ir_node *irn = env->irn;
1003 entity *ent = be_get_frame_entity(irn);
1004 ir_mode *mode = env->mode;
1005 ir_node *noreg = ia32_new_NoReg_gp(env->cg);
1006 ir_node *nomem = new_rd_NoMem(env->irg);
1007 ir_node *ptr = get_irn_n(irn, 0);
1008 ir_node *val = get_irn_n(irn, 1);
1009 ir_node *new_op, *proj;
1010 ir_node *sched_point = NULL;
1012 if (sched_is_scheduled(irn)) {
1013 sched_point = sched_prev(irn);
1016 if (mode_is_float(mode)) {
1017 if (USE_SSE2(env->cg))
1018 new_op = new_rd_ia32_xStore(env->dbg, env->irg, env->block, ptr, noreg, val, nomem);
1020 new_op = new_rd_ia32_vfst(env->dbg, env->irg, env->block, ptr, noreg, val, nomem);
1022 else if (get_mode_size_bits(mode) == 8) {
1023 new_op = new_rd_ia32_Store8Bit(env->dbg, env->irg, env->block, ptr, noreg, val, nomem);
1026 new_op = new_rd_ia32_Store(env->dbg, env->irg, env->block, ptr, noreg, val, nomem);
1029 set_ia32_am_support(new_op, ia32_am_Dest);
1030 set_ia32_op_type(new_op, ia32_AddrModeD);
1031 set_ia32_am_flavour(new_op, ia32_B);
1032 set_ia32_ls_mode(new_op, mode);
1033 set_ia32_frame_ent(new_op, ent);
1034 set_ia32_use_frame(new_op);
1036 DBG_OPT_SPILL2ST(irn, new_op);
1038 proj = new_rd_Proj(env->dbg, env->irg, env->block, new_op, mode_M, pn_ia32_Store_M);
1041 sched_add_after(sched_point, new_op);
1045 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env->cg, irn));
1047 exchange(irn, proj);
1050 static ir_node *create_push(ia32_transform_env_t *env, ir_node *schedpoint, ir_node *sp, ir_node *mem, entity *ent, const char *offset) {
1051 ir_node *noreg = ia32_new_NoReg_gp(env->cg);
1053 ir_node *push = new_rd_ia32_Push(env->dbg, env->irg, env->block, sp, noreg, mem);
1055 set_ia32_frame_ent(push, ent);
1056 set_ia32_use_frame(push);
1057 set_ia32_op_type(push, ia32_AddrModeS);
1058 set_ia32_am_flavour(push, ia32_B);
1059 set_ia32_ls_mode(push, mode_Is);
1061 add_ia32_am_offs(push, offset);
1063 sched_add_before(schedpoint, push);
1067 static ir_node *create_pop(ia32_transform_env_t *env, ir_node *schedpoint, ir_node *sp, entity *ent, const char *offset) {
1068 ir_node *pop = new_rd_ia32_Pop(env->dbg, env->irg, env->block, sp, new_NoMem());
1070 set_ia32_frame_ent(pop, ent);
1071 set_ia32_use_frame(pop);
1072 set_ia32_op_type(pop, ia32_AddrModeD);
1073 set_ia32_am_flavour(pop, ia32_B);
1074 set_ia32_ls_mode(pop, mode_Is);
1076 add_ia32_am_offs(pop, offset);
1078 sched_add_before(schedpoint, pop);
1083 static ir_node* create_spproj(ia32_transform_env_t *env, ir_node *pred, int pos, ir_node *schedpoint, const ir_node *oldsp) {
1084 ir_mode *spmode = get_irn_mode(oldsp);
1085 const arch_register_t *spreg = arch_get_irn_register(env->cg->arch_env, oldsp);
1088 sp = new_rd_Proj(env->dbg, env->irg, env->block, pred, spmode, pos);
1089 arch_set_irn_register(env->cg->arch_env, sp, spreg);
1090 sched_add_before(schedpoint, sp);
1095 static void transform_MemPerm(ia32_transform_env_t *env) {
1097 * Transform memperm, currently we do this the ugly way and produce
1098 * push/pop into/from memory cascades. This is possible without using
1101 ir_node *node = env->irn;
1103 ir_node *sp = get_irn_n(node, 0);
1104 const ir_edge_t *edge;
1105 const ir_edge_t *next;
1108 arity = be_get_MemPerm_entity_arity(node);
1109 pops = alloca(arity * sizeof(pops[0]));
1112 for(i = 0; i < arity; ++i) {
1113 entity *ent = be_get_MemPerm_in_entity(node, i);
1114 ir_type *enttype = get_entity_type(ent);
1115 int entbits = get_type_size_bits(enttype);
1116 ir_node *mem = get_irn_n(node, i + 1);
1119 assert( (entbits == 32 || entbits == 64) && "spillslot on x86 should be 32 or 64 bit");
1121 push = create_push(env, node, sp, mem, ent, NULL);
1122 sp = create_spproj(env, push, 0, node, sp);
1124 // add another push after the first one
1125 push = create_push(env, node, sp, mem, ent, "4");
1126 sp = create_spproj(env, push, 0, node, sp);
1129 set_irn_n(node, i, new_Bad());
1133 for(i = arity - 1; i >= 0; --i) {
1134 entity *ent = be_get_MemPerm_out_entity(node, i);
1135 ir_type *enttype = get_entity_type(ent);
1136 int entbits = get_type_size_bits(enttype);
1140 assert( (entbits == 32 || entbits == 64) && "spillslot on x86 should be 32 or 64 bit");
1142 pop = create_pop(env, node, sp, ent, NULL);
1144 // add another pop after the first one
1145 sp = create_spproj(env, pop, 1, node, sp);
1146 pop = create_pop(env, node, sp, ent, "4");
1149 sp = create_spproj(env, pop, 1, node, sp);
1155 // exchange memprojs
1156 foreach_out_edge_safe(node, edge, next) {
1157 ir_node *proj = get_edge_src_irn(edge);
1158 int p = get_Proj_proj(proj);
1162 set_Proj_pred(proj, pops[p]);
1163 set_Proj_proj(proj, 3);
1170 * Fix the mode of Spill/Reload
1172 static ir_mode *fix_spill_mode(ia32_code_gen_t *cg, ir_mode *mode)
1174 if (mode_is_float(mode)) {
1186 * Block-Walker: Calls the transform functions Spill and Reload.
1188 static void ia32_after_ra_walker(ir_node *block, void *env) {
1189 ir_node *node, *prev;
1190 ia32_code_gen_t *cg = env;
1191 ia32_transform_env_t tenv;
1194 tenv.irg = current_ir_graph;
1196 DEBUG_ONLY(tenv.mod = cg->mod;)
1198 /* beware: the schedule is changed here */
1199 for (node = sched_last(block); !sched_is_begin(node); node = prev) {
1200 prev = sched_prev(node);
1201 if (be_is_Reload(node)) {
1202 /* we always reload the whole register */
1203 tenv.dbg = get_irn_dbg_info(node);
1205 tenv.mode = fix_spill_mode(cg, get_irn_mode(node));
1206 transform_to_Load(&tenv);
1208 else if (be_is_Spill(node)) {
1209 ir_node *spillval = get_irn_n(node, be_pos_Spill_val);
1210 /* we always spill the whole register */
1211 tenv.dbg = get_irn_dbg_info(node);
1213 tenv.mode = fix_spill_mode(cg, get_irn_mode(spillval));
1214 transform_to_Store(&tenv);
1216 else if(be_is_MemPerm(node)) {
1217 tenv.dbg = get_irn_dbg_info(node);
1219 transform_MemPerm(&tenv);
1225 * We transform Spill and Reload here. This needs to be done before
1226 * stack biasing otherwise we would miss the corrected offset for these nodes.
1228 * If x87 instruction should be emitted, run the x87 simulator and patch
1229 * the virtual instructions. This must obviously be done after register allocation.
1231 static void ia32_after_ra(void *self) {
1232 ia32_code_gen_t *cg = self;
1233 ir_graph *irg = cg->irg;
1235 irg_block_walk_graph(irg, NULL, ia32_after_ra_walker, self);
1237 ia32_finish_irg(irg, cg);
1241 * Last touchups for the graph before emit
1243 static void ia32_finish(void *self) {
1244 ia32_code_gen_t *cg = self;
1245 ir_graph *irg = cg->irg;
1247 /* if we do x87 code generation, rewrite all the virtual instructions and registers */
1248 if (cg->used_fp == fp_x87 || cg->force_sim) {
1249 x87_simulate_graph(cg->arch_env, irg, cg->blk_sched);
1252 ia32_peephole_optimization(irg, cg);
1256 * Emits the code, closes the output file and frees
1257 * the code generator interface.
1259 static void ia32_codegen(void *self) {
1260 ia32_code_gen_t *cg = self;
1261 ir_graph *irg = cg->irg;
1263 ia32_gen_routine(cg->isa->out, irg, cg);
1267 /* remove it from the isa */
1270 /* de-allocate code generator */
1271 del_set(cg->reg_set);
1275 static void *ia32_cg_init(const be_irg_t *birg);
1277 static const arch_code_generator_if_t ia32_code_gen_if = {
1279 NULL, /* before abi introduce hook */
1281 ia32_before_sched, /* before scheduling hook */
1282 ia32_before_ra, /* before register allocation hook */
1283 ia32_after_ra, /* after register allocation hook */
1284 ia32_finish, /* called before codegen */
1285 ia32_codegen /* emit && done */
1289 * Initializes a IA32 code generator.
1291 static void *ia32_cg_init(const be_irg_t *birg) {
1292 ia32_isa_t *isa = (ia32_isa_t *)birg->main_env->arch_env->isa;
1293 ia32_code_gen_t *cg = xcalloc(1, sizeof(*cg));
1295 cg->impl = &ia32_code_gen_if;
1296 cg->irg = birg->irg;
1297 cg->reg_set = new_set(ia32_cmp_irn_reg_assoc, 1024);
1298 cg->arch_env = birg->main_env->arch_env;
1301 cg->blk_sched = NULL;
1302 cg->fp_to_gp = NULL;
1303 cg->gp_to_fp = NULL;
1304 cg->fp_kind = isa->fp_kind;
1305 cg->used_fp = fp_none;
1306 cg->dump = (birg->main_env->options->dump_flags & DUMP_BE) ? 1 : 0;
1308 FIRM_DBG_REGISTER(cg->mod, "firm.be.ia32.cg");
1310 /* copy optimizations from isa for easier access */
1312 cg->arch = isa->arch;
1313 cg->opt_arch = isa->opt_arch;
1319 if (isa->name_obst_size) {
1320 //printf("freed %d bytes from name obst\n", isa->name_obst_size);
1321 isa->name_obst_size = 0;
1322 obstack_free(isa->name_obst, NULL);
1323 obstack_init(isa->name_obst);
1327 cur_reg_set = cg->reg_set;
1329 ia32_irn_ops.cg = cg;
1331 return (arch_code_generator_t *)cg;
1336 /*****************************************************************
1337 * ____ _ _ _____ _____
1338 * | _ \ | | | | |_ _|/ ____| /\
1339 * | |_) | __ _ ___| | _____ _ __ __| | | | | (___ / \
1340 * | _ < / _` |/ __| |/ / _ \ '_ \ / _` | | | \___ \ / /\ \
1341 * | |_) | (_| | (__| < __/ | | | (_| | _| |_ ____) / ____ \
1342 * |____/ \__,_|\___|_|\_\___|_| |_|\__,_| |_____|_____/_/ \_\
1344 *****************************************************************/
1347 * Set output modes for GCC
1349 static const tarval_mode_info mo_integer = {
1356 * set the tarval output mode to C-semantics
1358 static void set_tarval_output_modes(void)
1360 set_tarval_mode_output_option(get_modeLs(), &mo_integer);
1361 set_tarval_mode_output_option(get_modeLu(), &mo_integer);
1362 set_tarval_mode_output_option(get_modeIs(), &mo_integer);
1363 set_tarval_mode_output_option(get_modeIu(), &mo_integer);
1364 set_tarval_mode_output_option(get_modeHs(), &mo_integer);
1365 set_tarval_mode_output_option(get_modeHu(), &mo_integer);
1366 set_tarval_mode_output_option(get_modeBs(), &mo_integer);
1367 set_tarval_mode_output_option(get_modeBu(), &mo_integer);
1368 set_tarval_mode_output_option(get_modeC(), &mo_integer);
1369 set_tarval_mode_output_option(get_modeU(), &mo_integer);
1370 set_tarval_mode_output_option(get_modeIu(), &mo_integer);
1375 * The template that generates a new ISA object.
1376 * Note that this template can be changed by command line
1379 static ia32_isa_t ia32_isa_template = {
1381 &ia32_isa_if, /* isa interface implementation */
1382 &ia32_gp_regs[REG_ESP], /* stack pointer register */
1383 &ia32_gp_regs[REG_EBP], /* base pointer register */
1384 -1, /* stack direction */
1386 NULL, /* 16bit register names */
1387 NULL, /* 8bit register names */
1391 IA32_OPT_INCDEC | /* optimize add 1, sub 1 into inc/dec default: on */
1392 IA32_OPT_DOAM | /* optimize address mode default: on */
1393 IA32_OPT_LEA | /* optimize for LEAs default: on */
1394 IA32_OPT_PLACECNST | /* place constants immediately before instructions, default: on */
1395 IA32_OPT_IMMOPS | /* operations can use immediates, default: on */
1396 IA32_OPT_EXTBB), /* use extended basic block scheduling, default: on */
1397 arch_pentium_4, /* instruction architecture */
1398 arch_pentium_4, /* optimize for architecture */
1399 fp_sse2, /* use sse2 unit */
1400 NULL, /* current code generator */
1402 NULL, /* name obstack */
1403 0 /* name obst size */
1408 * Initializes the backend ISA.
1410 static void *ia32_init(FILE *file_handle) {
1411 static int inited = 0;
1417 set_tarval_output_modes();
1419 isa = xmalloc(sizeof(*isa));
1420 memcpy(isa, &ia32_isa_template, sizeof(*isa));
1422 ia32_register_init(isa);
1423 ia32_create_opcodes();
1425 if ((ARCH_INTEL(isa->arch) && isa->arch < arch_pentium_4) ||
1426 (ARCH_AMD(isa->arch) && isa->arch < arch_athlon))
1427 /* no SSE2 for these cpu's */
1428 isa->fp_kind = fp_x87;
1430 if (ARCH_INTEL(isa->opt_arch) && isa->opt_arch >= arch_pentium_4) {
1431 /* Pentium 4 don't like inc and dec instructions */
1432 isa->opt &= ~IA32_OPT_INCDEC;
1435 isa->regs_16bit = pmap_create();
1436 isa->regs_8bit = pmap_create();
1437 isa->types = pmap_create();
1438 isa->tv_ent = pmap_create();
1439 isa->out = file_handle;
1441 ia32_build_16bit_reg_map(isa->regs_16bit);
1442 ia32_build_8bit_reg_map(isa->regs_8bit);
1444 /* patch register names of x87 registers */
1446 ia32_st_regs[0].name = "st";
1447 ia32_st_regs[1].name = "st(1)";
1448 ia32_st_regs[2].name = "st(2)";
1449 ia32_st_regs[3].name = "st(3)";
1450 ia32_st_regs[4].name = "st(4)";
1451 ia32_st_regs[5].name = "st(5)";
1452 ia32_st_regs[6].name = "st(6)";
1453 ia32_st_regs[7].name = "st(7)";
1457 isa->name_obst = xmalloc(sizeof(*isa->name_obst));
1458 obstack_init(isa->name_obst);
1459 isa->name_obst_size = 0;
1462 ia32_handle_intrinsics();
1463 ia32_switch_section(NULL, NO_SECTION);
1464 fprintf(isa->out, "\t.intel_syntax\n");
1474 * Closes the output file and frees the ISA structure.
1476 static void ia32_done(void *self) {
1477 ia32_isa_t *isa = self;
1479 /* emit now all global declarations */
1480 ia32_gen_decls(isa->out);
1482 pmap_destroy(isa->regs_16bit);
1483 pmap_destroy(isa->regs_8bit);
1484 pmap_destroy(isa->tv_ent);
1485 pmap_destroy(isa->types);
1488 //printf("name obst size = %d bytes\n", isa->name_obst_size);
1489 obstack_free(isa->name_obst, NULL);
1497 * Return the number of register classes for this architecture.
1498 * We report always these:
1499 * - the general purpose registers
1500 * - the SSE floating point register set
1501 * - the virtual floating point registers
1503 static int ia32_get_n_reg_class(const void *self) {
1508 * Return the register class for index i.
1510 static const arch_register_class_t *ia32_get_reg_class(const void *self, int i) {
1511 assert(i >= 0 && i < 3 && "Invalid ia32 register class requested.");
1513 return &ia32_reg_classes[CLASS_ia32_gp];
1515 return &ia32_reg_classes[CLASS_ia32_xmm];
1517 return &ia32_reg_classes[CLASS_ia32_vfp];
1521 * Get the register class which shall be used to store a value of a given mode.
1522 * @param self The this pointer.
1523 * @param mode The mode in question.
1524 * @return A register class which can hold values of the given mode.
1526 const arch_register_class_t *ia32_get_reg_class_for_mode(const void *self, const ir_mode *mode) {
1527 const ia32_isa_t *isa = self;
1528 if (mode_is_float(mode)) {
1529 return USE_SSE2(isa) ? &ia32_reg_classes[CLASS_ia32_xmm] : &ia32_reg_classes[CLASS_ia32_vfp];
1532 return &ia32_reg_classes[CLASS_ia32_gp];
1536 * Get the ABI restrictions for procedure calls.
1537 * @param self The this pointer.
1538 * @param method_type The type of the method (procedure) in question.
1539 * @param abi The abi object to be modified
1541 static void ia32_get_call_abi(const void *self, ir_type *method_type, be_abi_call_t *abi) {
1542 const ia32_isa_t *isa = self;
1545 unsigned cc = get_method_calling_convention(method_type);
1546 int n = get_method_n_params(method_type);
1549 int i, ignore_1, ignore_2;
1551 const arch_register_t *reg;
1552 be_abi_call_flags_t call_flags = be_abi_call_get_flags(abi);
1554 unsigned use_push = !IS_P6_ARCH(isa->opt_arch);
1556 /* set abi flags for calls */
1557 call_flags.bits.left_to_right = 0; /* always last arg first on stack */
1558 call_flags.bits.store_args_sequential = use_push;
1559 /* call_flags.bits.try_omit_fp not changed: can handle both settings */
1560 call_flags.bits.fp_free = 0; /* the frame pointer is fixed in IA32 */
1561 call_flags.bits.call_has_imm = 1; /* IA32 calls can have immediate address */
1563 /* set stack parameter passing style */
1564 be_abi_call_set_flags(abi, call_flags, &ia32_abi_callbacks);
1566 /* collect the mode for each type */
1567 modes = alloca(n * sizeof(modes[0]));
1569 for (i = 0; i < n; i++) {
1570 tp = get_method_param_type(method_type, i);
1571 modes[i] = get_type_mode(tp);
1574 /* set register parameters */
1575 if (cc & cc_reg_param) {
1576 /* determine the number of parameters passed via registers */
1577 biggest_n = ia32_get_n_regparam_class(n, modes, &ignore_1, &ignore_2);
1579 /* loop over all parameters and set the register requirements */
1580 for (i = 0; i <= biggest_n; i++) {
1581 reg = ia32_get_RegParam_reg(n, modes, i, cc);
1582 assert(reg && "kaputt");
1583 be_abi_call_param_reg(abi, i, reg);
1590 /* set stack parameters */
1591 for (i = stack_idx; i < n; i++) {
1592 be_abi_call_param_stack(abi, i, 1, 0, 0);
1596 /* set return registers */
1597 n = get_method_n_ress(method_type);
1599 assert(n <= 2 && "more than two results not supported");
1601 /* In case of 64bit returns, we will have two 32bit values */
1603 tp = get_method_res_type(method_type, 0);
1604 mode = get_type_mode(tp);
1606 assert(!mode_is_float(mode) && "two FP results not supported");
1608 tp = get_method_res_type(method_type, 1);
1609 mode = get_type_mode(tp);
1611 assert(!mode_is_float(mode) && "two FP results not supported");
1613 be_abi_call_res_reg(abi, 0, &ia32_gp_regs[REG_EAX]);
1614 be_abi_call_res_reg(abi, 1, &ia32_gp_regs[REG_EDX]);
1617 const arch_register_t *reg;
1619 tp = get_method_res_type(method_type, 0);
1620 assert(is_atomic_type(tp));
1621 mode = get_type_mode(tp);
1623 reg = mode_is_float(mode) ?
1624 (USE_SSE2(isa) ? &ia32_xmm_regs[REG_XMM0] : &ia32_vfp_regs[REG_VF0]) :
1625 &ia32_gp_regs[REG_EAX];
1627 be_abi_call_res_reg(abi, 0, reg);
1632 static const void *ia32_get_irn_ops(const arch_irn_handler_t *self, const ir_node *irn) {
1633 return &ia32_irn_ops;
1636 const arch_irn_handler_t ia32_irn_handler = {
1640 const arch_irn_handler_t *ia32_get_irn_handler(const void *self) {
1641 return &ia32_irn_handler;
1644 int ia32_to_appear_in_schedule(void *block_env, const ir_node *irn) {
1645 return is_ia32_irn(irn) ? 1 : -1;
1649 * Initializes the code generator interface.
1651 static const arch_code_generator_if_t *ia32_get_code_generator_if(void *self) {
1652 return &ia32_code_gen_if;
1656 * Returns the estimated execution time of an ia32 irn.
1658 static sched_timestep_t ia32_sched_exectime(void *env, const ir_node *irn) {
1659 const arch_env_t *arch_env = env;
1660 return is_ia32_irn(irn) ? ia32_get_op_estimated_cost(arch_get_irn_ops(arch_env, irn), irn) : 1;
1663 list_sched_selector_t ia32_sched_selector;
1666 * Returns the reg_pressure scheduler with to_appear_in_schedule() overloaded
1668 static const list_sched_selector_t *ia32_get_list_sched_selector(const void *self, list_sched_selector_t *selector) {
1669 memcpy(&ia32_sched_selector, selector, sizeof(ia32_sched_selector));
1670 ia32_sched_selector.exectime = ia32_sched_exectime;
1671 ia32_sched_selector.to_appear_in_schedule = ia32_to_appear_in_schedule;
1672 return &ia32_sched_selector;
1676 * Returns the necessary byte alignment for storing a register of given class.
1678 static int ia32_get_reg_class_alignment(const void *self, const arch_register_class_t *cls) {
1679 ir_mode *mode = arch_register_class_mode(cls);
1680 int bytes = get_mode_size_bytes(mode);
1682 if (mode_is_float(mode) && bytes > 8)
1687 static ia32_intrinsic_env_t intrinsic_env = { NULL, NULL };
1690 * Returns the libFirm configuration parameter for this backend.
1692 static const backend_params *ia32_get_libfirm_params(void) {
1693 static const arch_dep_params_t ad = {
1694 1, /* also use subs */
1695 4, /* maximum shifts */
1696 31, /* maximum shift amount */
1698 1, /* allow Mulhs */
1699 1, /* allow Mulus */
1700 32 /* Mulh allowed up to 32 bit */
1702 static backend_params p = {
1703 NULL, /* no additional opcodes */
1704 NULL, /* will be set later */
1705 1, /* need dword lowering */
1706 ia32_create_intrinsic_fkt,
1707 &intrinsic_env, /* context for ia32_create_intrinsic_fkt */
1715 /* instruction set architectures. */
1716 static const lc_opt_enum_int_items_t arch_items[] = {
1717 { "386", arch_i386, },
1718 { "486", arch_i486, },
1719 { "pentium", arch_pentium, },
1720 { "586", arch_pentium, },
1721 { "pentiumpro", arch_pentium_pro, },
1722 { "686", arch_pentium_pro, },
1723 { "pentiummmx", arch_pentium_mmx, },
1724 { "pentium2", arch_pentium_2, },
1725 { "p2", arch_pentium_2, },
1726 { "pentium3", arch_pentium_3, },
1727 { "p3", arch_pentium_3, },
1728 { "pentium4", arch_pentium_4, },
1729 { "p4", arch_pentium_4, },
1730 { "pentiumm", arch_pentium_m, },
1731 { "pm", arch_pentium_m, },
1732 { "core", arch_core, },
1734 { "athlon", arch_athlon, },
1735 { "athlon64", arch_athlon_64, },
1736 { "opteron", arch_opteron, },
1740 static lc_opt_enum_int_var_t arch_var = {
1741 &ia32_isa_template.arch, arch_items
1744 static lc_opt_enum_int_var_t opt_arch_var = {
1745 &ia32_isa_template.opt_arch, arch_items
1748 static const lc_opt_enum_int_items_t fp_unit_items[] = {
1750 { "sse2", fp_sse2 },
1754 static lc_opt_enum_int_var_t fp_unit_var = {
1755 &ia32_isa_template.fp_kind, fp_unit_items
1758 static const lc_opt_enum_int_items_t gas_items[] = {
1759 { "linux", ASM_LINUX_GAS },
1760 { "mingw", ASM_MINGW_GAS },
1764 static lc_opt_enum_int_var_t gas_var = {
1765 (int *)&asm_flavour, gas_items
1768 static const lc_opt_table_entry_t ia32_options[] = {
1769 LC_OPT_ENT_ENUM_INT("arch", "select the instruction architecture", &arch_var),
1770 LC_OPT_ENT_ENUM_INT("opt", "optimize for instruction architecture", &opt_arch_var),
1771 LC_OPT_ENT_ENUM_INT("fpunit", "select the floating point unit", &fp_unit_var),
1772 LC_OPT_ENT_NEGBIT("noaddrmode", "do not use address mode", &ia32_isa_template.opt, IA32_OPT_DOAM),
1773 LC_OPT_ENT_NEGBIT("nolea", "do not optimize for LEAs", &ia32_isa_template.opt, IA32_OPT_LEA),
1774 LC_OPT_ENT_NEGBIT("noplacecnst", "do not place constants", &ia32_isa_template.opt, IA32_OPT_PLACECNST),
1775 LC_OPT_ENT_NEGBIT("noimmop", "no operations with immediates", &ia32_isa_template.opt, IA32_OPT_IMMOPS),
1776 LC_OPT_ENT_NEGBIT("noextbb", "do not use extended basic block scheduling", &ia32_isa_template.opt, IA32_OPT_EXTBB),
1777 LC_OPT_ENT_ENUM_INT("gasmode", "set the GAS compatibility mode", &gas_var),
1782 * Register command line options for the ia32 backend.
1786 * ia32-arch=arch create instruction for arch
1787 * ia32-opt=arch optimize for run on arch
1788 * ia32-fpunit=unit select floating point unit (x87 or SSE2)
1789 * ia32-incdec optimize for inc/dec
1790 * ia32-noaddrmode do not use address mode
1791 * ia32-nolea do not optimize for LEAs
1792 * ia32-noplacecnst do not place constants,
1793 * ia32-noimmop no operations with immediates
1794 * ia32-noextbb do not use extended basic block scheduling
1795 * ia32-gasmode set the GAS compatibility mode
1797 static void ia32_register_options(lc_opt_entry_t *ent)
1799 lc_opt_entry_t *be_grp_ia32 = lc_opt_get_grp(ent, "ia32");
1800 lc_opt_add_table(be_grp_ia32, ia32_options);
1802 #endif /* WITH_LIBCORE */
1804 const arch_isa_if_t ia32_isa_if = {
1807 ia32_get_n_reg_class,
1809 ia32_get_reg_class_for_mode,
1811 ia32_get_irn_handler,
1812 ia32_get_code_generator_if,
1813 ia32_get_list_sched_selector,
1814 ia32_get_reg_class_alignment,
1815 ia32_get_libfirm_params,
1817 ia32_register_options