2 * This is the main ia32 firm backend driver.
3 * @author Christian Wuerdig
20 #include <libcore/lc_opts.h>
21 #include <libcore/lc_opts_enum.h>
22 #endif /* WITH_LIBCORE */
26 #include "pseudo_irg.h"
30 #include "iredges_t.h"
38 #include "../beabi.h" /* the general register allocator interface */
39 #include "../benode_t.h"
40 #include "../belower.h"
41 #include "../besched_t.h"
44 #include "../beirgmod.h"
45 #include "bearch_ia32_t.h"
47 #include "ia32_new_nodes.h" /* ia32 nodes interface */
48 #include "gen_ia32_regalloc_if.h" /* the generated interface (register type and class defenitions) */
49 #include "ia32_gen_decls.h" /* interface declaration emitter */
50 #include "ia32_transform.h"
51 #include "ia32_emitter.h"
52 #include "ia32_map_regs.h"
53 #include "ia32_optimize.h"
55 #include "ia32_dbg_stat.h"
56 #include "ia32_finish.h"
57 #include "ia32_util.h"
59 #define DEBUG_MODULE "firm.be.ia32.isa"
62 static set *cur_reg_set = NULL;
65 #define is_Start(irn) (get_irn_opcode(irn) == iro_Start)
67 /* Creates the unique per irg GP NoReg node. */
68 ir_node *ia32_new_NoReg_gp(ia32_code_gen_t *cg) {
69 return be_abi_get_callee_save_irn(cg->birg->abi, &ia32_gp_regs[REG_GP_NOREG]);
72 /* Creates the unique per irg FP NoReg node. */
73 ir_node *ia32_new_NoReg_fp(ia32_code_gen_t *cg) {
74 return be_abi_get_callee_save_irn(cg->birg->abi,
75 USE_SSE2(cg) ? &ia32_xmm_regs[REG_XMM_NOREG] : &ia32_vfp_regs[REG_VFP_NOREG]);
78 /**************************************************
81 * _ __ ___ __ _ __ _| | | ___ ___ _| |_
82 * | '__/ _ \/ _` | / _` | | |/ _ \ / __| | | _|
83 * | | | __/ (_| | | (_| | | | (_) | (__ | | |
84 * |_| \___|\__, | \__,_|_|_|\___/ \___| |_|_|
87 **************************************************/
89 static ir_node *my_skip_proj(const ir_node *n) {
97 * Return register requirements for an ia32 node.
98 * If the node returns a tuple (mode_T) then the proj's
99 * will be asked for this information.
101 static const arch_register_req_t *ia32_get_irn_reg_req(const void *self, arch_register_req_t *req, const ir_node *irn, int pos) {
102 const ia32_irn_ops_t *ops = self;
103 const ia32_register_req_t *irn_req;
104 long node_pos = pos == -1 ? 0 : pos;
105 ir_mode *mode = is_Block(irn) ? NULL : get_irn_mode(irn);
106 FIRM_DBG_REGISTER(firm_dbg_module_t *mod, DEBUG_MODULE);
108 if (is_Block(irn) || mode == mode_M || mode == mode_X) {
109 DBG((mod, LEVEL_1, "ignoring Block, mode_M, mode_X node %+F\n", irn));
113 if (mode == mode_T && pos < 0) {
114 DBG((mod, LEVEL_1, "ignoring request OUT requirements for node %+F\n", irn));
118 DBG((mod, LEVEL_1, "get requirements at pos %d for %+F ... ", pos, irn));
122 node_pos = ia32_translate_proj_pos(irn);
128 irn = my_skip_proj(irn);
130 DB((mod, LEVEL_1, "skipping Proj, going to %+F at pos %d ... ", irn, node_pos));
133 if (is_ia32_irn(irn)) {
135 irn_req = get_ia32_in_req(irn, pos);
138 irn_req = get_ia32_out_req(irn, node_pos);
141 DB((mod, LEVEL_1, "returning reqs for %+F at pos %d\n", irn, pos));
143 memcpy(req, &(irn_req->req), sizeof(*req));
145 if (arch_register_req_is(&(irn_req->req), should_be_same)) {
146 assert(irn_req->same_pos >= 0 && "should be same constraint for in -> out NYI");
147 req->other_same = get_irn_n(irn, irn_req->same_pos);
150 if (arch_register_req_is(&(irn_req->req), should_be_different)) {
151 assert(irn_req->different_pos >= 0 && "should be different constraint for in -> out NYI");
152 req->other_different = get_irn_n(irn, irn_req->different_pos);
156 /* treat Unknowns like Const with default requirements */
157 if (is_Unknown(irn)) {
158 DB((mod, LEVEL_1, "returning UKNWN reqs for %+F\n", irn));
159 if (mode_is_float(mode)) {
160 if (USE_SSE2(ops->cg))
161 memcpy(req, &(ia32_default_req_ia32_xmm_xmm_UKNWN), sizeof(*req));
163 memcpy(req, &(ia32_default_req_ia32_vfp_vfp_UKNWN), sizeof(*req));
165 else if (mode_is_int(mode) || mode_is_reference(mode))
166 memcpy(req, &(ia32_default_req_ia32_gp_gp_UKNWN), sizeof(*req));
167 else if (mode == mode_T || mode == mode_M) {
168 DBG((mod, LEVEL_1, "ignoring Unknown node %+F\n", irn));
172 assert(0 && "unsupported Unknown-Mode");
175 DB((mod, LEVEL_1, "returning NULL for %+F (not ia32)\n", irn));
183 static void ia32_set_irn_reg(const void *self, ir_node *irn, const arch_register_t *reg) {
185 const ia32_irn_ops_t *ops = self;
187 if (get_irn_mode(irn) == mode_X) {
191 DBG((ops->cg->mod, LEVEL_1, "ia32 assigned register %s to node %+F\n", reg->name, irn));
194 pos = ia32_translate_proj_pos(irn);
195 irn = my_skip_proj(irn);
198 if (is_ia32_irn(irn)) {
199 const arch_register_t **slots;
201 slots = get_ia32_slots(irn);
205 ia32_set_firm_reg(irn, reg, cur_reg_set);
209 static const arch_register_t *ia32_get_irn_reg(const void *self, const ir_node *irn) {
211 const arch_register_t *reg = NULL;
215 if (get_irn_mode(irn) == mode_X) {
219 pos = ia32_translate_proj_pos(irn);
220 irn = my_skip_proj(irn);
223 if (is_ia32_irn(irn)) {
224 const arch_register_t **slots;
225 slots = get_ia32_slots(irn);
229 reg = ia32_get_firm_reg(irn, cur_reg_set);
235 static arch_irn_class_t ia32_classify(const void *self, const ir_node *irn) {
236 arch_irn_class_t classification = arch_irn_class_normal;
238 irn = my_skip_proj(irn);
241 classification |= arch_irn_class_branch;
243 if (! is_ia32_irn(irn))
244 return classification & ~arch_irn_class_normal;
246 if (is_ia32_Cnst(irn))
247 classification |= arch_irn_class_const;
250 classification |= arch_irn_class_load;
252 if (is_ia32_St(irn) || is_ia32_Store8Bit(irn))
253 classification |= arch_irn_class_store;
255 if (is_ia32_got_reload(irn))
256 classification |= arch_irn_class_reload;
258 return classification;
261 static arch_irn_flags_t ia32_get_flags(const void *self, const ir_node *irn) {
264 ir_node *pred = get_Proj_pred(irn);
265 if(is_ia32_Push(pred) && get_Proj_proj(irn) == pn_ia32_Push_stack) {
266 return arch_irn_flags_modify_sp;
268 if(is_ia32_Pop(pred) && get_Proj_proj(irn) == pn_ia32_Pop_stack) {
269 return arch_irn_flags_modify_sp;
271 if(is_ia32_AddSP(pred) && get_Proj_proj(irn) == pn_ia32_AddSP_stack) {
272 return arch_irn_flags_modify_sp;
276 irn = my_skip_proj(irn);
277 if (is_ia32_irn(irn))
278 return get_ia32_flags(irn);
281 return arch_irn_flags_ignore;
286 static entity *ia32_get_frame_entity(const void *self, const ir_node *irn) {
287 return is_ia32_irn(irn) ? get_ia32_frame_ent(irn) : NULL;
290 static void ia32_set_frame_entity(const void *self, ir_node *irn, entity *ent) {
291 set_ia32_frame_ent(irn, ent);
294 static void ia32_set_frame_offset(const void *self, ir_node *irn, int bias) {
296 const ia32_irn_ops_t *ops = self;
298 if (get_ia32_frame_ent(irn)) {
299 ia32_am_flavour_t am_flav = get_ia32_am_flavour(irn);
301 /* Pop nodes modify the stack pointer before calculating the destination
302 * address, so fix this here
304 if(is_ia32_Pop(irn)) {
308 DBG((ops->cg->mod, LEVEL_1, "stack biased %+F with %d\n", irn, bias));
310 snprintf(buf, sizeof(buf), "%d", bias);
312 if (get_ia32_op_type(irn) == ia32_Normal) {
313 set_ia32_cnst(irn, buf);
315 add_ia32_am_offs(irn, buf);
317 set_ia32_am_flavour(irn, am_flav);
322 static int ia32_get_sp_bias(const void *self, const ir_node *irn) {
324 int proj = get_Proj_proj(irn);
325 ir_node *pred = get_Proj_pred(irn);
327 if(is_ia32_Push(pred) && proj == 0)
329 if(is_ia32_Pop(pred) && proj == 1)
337 be_abi_call_flags_bits_t flags;
338 const arch_isa_t *isa;
339 const arch_env_t *aenv;
343 static void *ia32_abi_init(const be_abi_call_t *call, const arch_env_t *aenv, ir_graph *irg)
345 ia32_abi_env_t *env = xmalloc(sizeof(env[0]));
346 be_abi_call_flags_t fl = be_abi_call_get_flags(call);
347 env->flags = fl.bits;
350 env->isa = aenv->isa;
355 * Put all registers which are saved by the prologue/epilogue in a set.
357 * @param self The callback object.
358 * @param s The result set.
360 static void ia32_abi_dont_save_regs(void *self, pset *s)
362 ia32_abi_env_t *env = self;
363 if(env->flags.try_omit_fp)
364 pset_insert_ptr(s, env->isa->bp);
368 * Generate the routine prologue.
370 * @param self The callback object.
371 * @param mem A pointer to the mem node. Update this if you define new memory.
372 * @param reg_map A map mapping all callee_save/ignore/parameter registers to their defining nodes.
374 * @return The register which shall be used as a stack frame base.
376 * All nodes which define registers in @p reg_map must keep @p reg_map current.
378 static const arch_register_t *ia32_abi_prologue(void *self, ir_node **mem, pmap *reg_map)
380 ia32_abi_env_t *env = self;
382 if (! env->flags.try_omit_fp) {
383 ir_node *bl = get_irg_start_block(env->irg);
384 ir_node *curr_sp = be_abi_reg_map_get(reg_map, env->isa->sp);
385 ir_node *curr_bp = be_abi_reg_map_get(reg_map, env->isa->bp);
389 push = new_rd_ia32_Push(NULL, env->irg, bl, curr_sp, curr_bp, *mem);
390 curr_sp = new_r_Proj(env->irg, bl, push, get_irn_mode(curr_sp), pn_ia32_Push_stack);
391 *mem = new_r_Proj(env->irg, bl, push, mode_M, pn_ia32_Push_M);
393 /* the push must have SP out register */
394 arch_set_irn_register(env->aenv, curr_sp, env->isa->sp);
395 set_ia32_flags(push, arch_irn_flags_ignore);
397 /* move esp to ebp */
398 curr_bp = be_new_Copy(env->isa->bp->reg_class, env->irg, bl, curr_sp);
399 be_set_constr_single_reg(curr_bp, BE_OUT_POS(0), env->isa->bp);
400 arch_set_irn_register(env->aenv, curr_bp, env->isa->bp);
401 be_node_set_flags(curr_bp, BE_OUT_POS(0), arch_irn_flags_ignore);
403 /* beware: the copy must be done before any other sp use */
404 curr_sp = be_new_CopyKeep_single(env->isa->sp->reg_class, env->irg, bl, curr_sp, curr_bp, get_irn_mode(curr_sp));
405 be_set_constr_single_reg(curr_sp, BE_OUT_POS(0), env->isa->sp);
406 arch_set_irn_register(env->aenv, curr_sp, env->isa->sp);
407 be_node_set_flags(curr_sp, BE_OUT_POS(0), arch_irn_flags_ignore);
409 be_abi_reg_map_set(reg_map, env->isa->sp, curr_sp);
410 be_abi_reg_map_set(reg_map, env->isa->bp, curr_bp);
419 * Generate the routine epilogue.
420 * @param self The callback object.
421 * @param bl The block for the epilog
422 * @param mem A pointer to the mem node. Update this if you define new memory.
423 * @param reg_map A map mapping all callee_save/ignore/parameter registers to their defining nodes.
424 * @return The register which shall be used as a stack frame base.
426 * All nodes which define registers in @p reg_map must keep @p reg_map current.
428 static void ia32_abi_epilogue(void *self, ir_node *bl, ir_node **mem, pmap *reg_map)
430 ia32_abi_env_t *env = self;
431 ir_node *curr_sp = be_abi_reg_map_get(reg_map, env->isa->sp);
432 ir_node *curr_bp = be_abi_reg_map_get(reg_map, env->isa->bp);
434 if (env->flags.try_omit_fp) {
435 /* simply remove the stack frame here */
436 curr_sp = be_new_IncSP(env->isa->sp, env->irg, bl, curr_sp, BE_STACK_FRAME_SIZE_SHRINK);
437 add_irn_dep(curr_sp, *mem);
440 const ia32_isa_t *isa = (ia32_isa_t *)env->isa;
441 ir_mode *mode_bp = env->isa->bp->reg_class->mode;
443 /* gcc always emits a leave at the end of a routine */
444 if (1 || ARCH_AMD(isa->opt_arch)) {
448 leave = new_rd_ia32_Leave(NULL, env->irg, bl, curr_sp, curr_bp);
449 set_ia32_flags(leave, arch_irn_flags_ignore);
450 curr_bp = new_r_Proj(current_ir_graph, bl, leave, mode_bp, pn_ia32_Leave_frame);
451 curr_sp = new_r_Proj(current_ir_graph, bl, leave, get_irn_mode(curr_sp), pn_ia32_Leave_stack);
452 *mem = new_r_Proj(current_ir_graph, bl, leave, mode_M, pn_ia32_Leave_M);
457 /* copy ebp to esp */
458 curr_sp = be_new_SetSP(env->isa->sp, env->irg, bl, curr_sp, curr_bp, *mem);
461 pop = new_rd_ia32_Pop(NULL, env->irg, bl, curr_sp, *mem);
462 set_ia32_flags(pop, arch_irn_flags_ignore);
463 curr_bp = new_r_Proj(current_ir_graph, bl, pop, mode_bp, pn_ia32_Pop_res);
464 curr_sp = new_r_Proj(current_ir_graph, bl, pop, get_irn_mode(curr_sp), pn_ia32_Pop_stack);
465 *mem = new_r_Proj(current_ir_graph, bl, pop, mode_M, pn_ia32_Pop_M);
467 arch_set_irn_register(env->aenv, curr_sp, env->isa->sp);
468 arch_set_irn_register(env->aenv, curr_bp, env->isa->bp);
471 be_abi_reg_map_set(reg_map, env->isa->sp, curr_sp);
472 be_abi_reg_map_set(reg_map, env->isa->bp, curr_bp);
476 * Produces the type which sits between the stack args and the locals on the stack.
477 * it will contain the return address and space to store the old base pointer.
478 * @return The Firm type modeling the ABI between type.
480 static ir_type *ia32_abi_get_between_type(void *self)
482 #define IDENT(s) new_id_from_chars(s, sizeof(s)-1)
483 static ir_type *omit_fp_between_type = NULL;
484 static ir_type *between_type = NULL;
486 ia32_abi_env_t *env = self;
488 if ( !between_type) {
490 entity *ret_addr_ent;
491 entity *omit_fp_ret_addr_ent;
493 ir_type *old_bp_type = new_type_primitive(IDENT("bp"), mode_P);
494 ir_type *ret_addr_type = new_type_primitive(IDENT("return_addr"), mode_P);
496 between_type = new_type_struct(IDENT("ia32_between_type"));
497 old_bp_ent = new_entity(between_type, IDENT("old_bp"), old_bp_type);
498 ret_addr_ent = new_entity(between_type, IDENT("ret_addr"), ret_addr_type);
500 set_entity_offset_bytes(old_bp_ent, 0);
501 set_entity_offset_bytes(ret_addr_ent, get_type_size_bytes(old_bp_type));
502 set_type_size_bytes(between_type, get_type_size_bytes(old_bp_type) + get_type_size_bytes(ret_addr_type));
503 set_type_state(between_type, layout_fixed);
505 omit_fp_between_type = new_type_struct(IDENT("ia32_between_type_omit_fp"));
506 omit_fp_ret_addr_ent = new_entity(omit_fp_between_type, IDENT("ret_addr"), ret_addr_type);
508 set_entity_offset_bytes(omit_fp_ret_addr_ent, 0);
509 set_type_size_bytes(omit_fp_between_type, get_type_size_bytes(ret_addr_type));
510 set_type_state(omit_fp_between_type, layout_fixed);
513 return env->flags.try_omit_fp ? omit_fp_between_type : between_type;
518 * Get the estimated cycle count for @p irn.
520 * @param self The this pointer.
521 * @param irn The node.
523 * @return The estimated cycle count for this operation
525 static int ia32_get_op_estimated_cost(const void *self, const ir_node *irn)
528 ia32_op_type_t op_tp;
529 const ia32_irn_ops_t *ops = self;
534 assert(is_ia32_irn(irn));
536 cost = get_ia32_latency(irn);
537 op_tp = get_ia32_op_type(irn);
539 if (is_ia32_CopyB(irn)) {
541 if (ARCH_INTEL(ops->cg->arch))
544 else if (is_ia32_CopyB_i(irn)) {
545 int size = get_tarval_long(get_ia32_Immop_tarval(irn));
546 cost = 20 + (int)ceil((4/3) * size);
547 if (ARCH_INTEL(ops->cg->arch))
550 /* in case of address mode operations add additional cycles */
551 else if (op_tp == ia32_AddrModeD || op_tp == ia32_AddrModeS) {
553 In case of stack access add 5 cycles (we assume stack is in cache),
554 other memory operations cost 20 cycles.
556 cost += is_ia32_use_frame(irn) ? 5 : 20;
563 * Returns the inverse operation if @p irn, recalculating the argument at position @p i.
565 * @param irn The original operation
566 * @param i Index of the argument we want the inverse operation to yield
567 * @param inverse struct to be filled with the resulting inverse op
568 * @param obstack The obstack to use for allocation of the returned nodes array
569 * @return The inverse operation or NULL if operation invertible
571 static arch_inverse_t *ia32_get_inverse(const void *self, const ir_node *irn, int i, arch_inverse_t *inverse, struct obstack *obst) {
574 ir_node *block, *noreg, *nomem;
577 /* we cannot invert non-ia32 irns */
578 if (! is_ia32_irn(irn))
581 /* operand must always be a real operand (not base, index or mem) */
582 if (i != 2 && i != 3)
585 /* we don't invert address mode operations */
586 if (get_ia32_op_type(irn) != ia32_Normal)
589 irg = get_irn_irg(irn);
590 block = get_nodes_block(irn);
591 mode = get_ia32_res_mode(irn);
592 noreg = get_irn_n(irn, 0);
593 nomem = new_r_NoMem(irg);
595 /* initialize structure */
596 inverse->nodes = obstack_alloc(obst, 2 * sizeof(inverse->nodes[0]));
600 switch (get_ia32_irn_opcode(irn)) {
602 if (get_ia32_immop_type(irn) == ia32_ImmConst) {
603 /* we have an add with a const here */
604 /* invers == add with negated const */
605 inverse->nodes[0] = new_rd_ia32_Add(NULL, irg, block, noreg, noreg, get_irn_n(irn, i), noreg, nomem);
606 pnc = pn_ia32_Add_res;
608 copy_ia32_Immop_attr(inverse->nodes[0], (ir_node *)irn);
609 set_ia32_Immop_tarval(inverse->nodes[0], tarval_neg(get_ia32_Immop_tarval(irn)));
610 set_ia32_commutative(inverse->nodes[0]);
612 else if (get_ia32_immop_type(irn) == ia32_ImmSymConst) {
613 /* we have an add with a symconst here */
614 /* invers == sub with const */
615 inverse->nodes[0] = new_rd_ia32_Sub(NULL, irg, block, noreg, noreg, get_irn_n(irn, i), noreg, nomem);
616 pnc = pn_ia32_Sub_res;
618 copy_ia32_Immop_attr(inverse->nodes[0], (ir_node *)irn);
621 /* normal add: inverse == sub */
622 ir_node *proj = ia32_get_res_proj(irn);
625 inverse->nodes[0] = new_rd_ia32_Sub(NULL, irg, block, noreg, noreg, proj, get_irn_n(irn, i ^ 1), nomem);
626 pnc = pn_ia32_Sub_res;
631 if (get_ia32_immop_type(irn) != ia32_ImmNone) {
632 /* we have a sub with a const/symconst here */
633 /* invers == add with this const */
634 inverse->nodes[0] = new_rd_ia32_Add(NULL, irg, block, noreg, noreg, get_irn_n(irn, i), noreg, nomem);
635 pnc = pn_ia32_Add_res;
636 inverse->costs += (get_ia32_immop_type(irn) == ia32_ImmSymConst) ? 5 : 1;
637 copy_ia32_Immop_attr(inverse->nodes[0], (ir_node *)irn);
641 ir_node *proj = ia32_get_res_proj(irn);
645 inverse->nodes[0] = new_rd_ia32_Add(NULL, irg, block, noreg, noreg, proj, get_irn_n(irn, 3), nomem);
648 inverse->nodes[0] = new_rd_ia32_Sub(NULL, irg, block, noreg, noreg, get_irn_n(irn, 2), proj, nomem);
650 pnc = pn_ia32_Sub_res;
655 if (get_ia32_immop_type(irn) != ia32_ImmNone) {
656 /* xor with const: inverse = xor */
657 inverse->nodes[0] = new_rd_ia32_Eor(NULL, irg, block, noreg, noreg, get_irn_n(irn, i), noreg, nomem);
658 pnc = pn_ia32_Eor_res;
659 inverse->costs += (get_ia32_immop_type(irn) == ia32_ImmSymConst) ? 5 : 1;
660 copy_ia32_Immop_attr(inverse->nodes[0], (ir_node *)irn);
664 inverse->nodes[0] = new_rd_ia32_Eor(NULL, irg, block, noreg, noreg, (ir_node *)irn, get_irn_n(irn, i), nomem);
665 pnc = pn_ia32_Eor_res;
670 ir_node *proj = ia32_get_res_proj(irn);
673 inverse->nodes[0] = new_rd_ia32_Not(NULL, irg, block, noreg, noreg, proj, nomem);
674 pnc = pn_ia32_Not_res;
678 case iro_ia32_Minus: {
679 ir_node *proj = ia32_get_res_proj(irn);
682 inverse->nodes[0] = new_rd_ia32_Minus(NULL, irg, block, noreg, noreg, proj, nomem);
683 pnc = pn_ia32_Minus_res;
688 /* inverse operation not supported */
692 set_ia32_res_mode(inverse->nodes[0], mode);
693 inverse->nodes[1] = new_r_Proj(irg, block, inverse->nodes[0], mode, pnc);
699 * Check if irn can load it's operand at position i from memory (source addressmode).
700 * @param self Pointer to irn ops itself
701 * @param irn The irn to be checked
702 * @param i The operands position
703 * @return Non-Zero if operand can be loaded
705 static int ia32_possible_memory_operand(const void *self, const ir_node *irn, unsigned int i) {
706 if (! is_ia32_irn(irn) || /* must be an ia32 irn */
707 get_irn_arity(irn) != 5 || /* must be a binary operation */
708 get_ia32_op_type(irn) != ia32_Normal || /* must not already be a addressmode irn */
709 ! (get_ia32_am_support(irn) & ia32_am_Source) || /* must be capable of source addressmode */
710 (i != 2 && i != 3) || /* a "real" operand position must be requested */
711 (i == 2 && ! is_ia32_commutative(irn)) || /* if first operand requested irn must be commutative */
712 is_ia32_use_frame(irn)) /* must not already use frame */
718 static void ia32_perform_memory_operand(const void *self, ir_node *irn, ir_node *spill, unsigned int i) {
719 assert(ia32_possible_memory_operand(self, irn, i) && "Cannot perform memory operand change");
722 ir_node *tmp = get_irn_n(irn, 3);
723 set_irn_n(irn, 3, get_irn_n(irn, 2));
724 set_irn_n(irn, 2, tmp);
727 set_ia32_am_support(irn, ia32_am_Source);
728 set_ia32_op_type(irn, ia32_AddrModeS);
729 set_ia32_am_flavour(irn, ia32_B);
730 set_ia32_ls_mode(irn, get_irn_mode(get_irn_n(irn, i)));
731 //TODO this will fail, if spill is a PhiM (give PhiMs entities?)
732 set_ia32_frame_ent(irn, be_get_frame_entity(spill));
733 set_ia32_use_frame(irn);
734 set_ia32_got_reload(irn);
736 set_irn_n(irn, 0, get_irg_frame(get_irn_irg(irn)));
737 set_irn_n(irn, 4, spill);
740 Input at position one is index register, which is NoReg.
741 We would need cg object to get a real noreg, but we cannot
744 set_irn_n(irn, 3, get_irn_n(irn, 1));
746 //FIXME DBG_OPT_AM_S(reload, irn);
749 static const be_abi_callbacks_t ia32_abi_callbacks = {
752 ia32_abi_get_between_type,
753 ia32_abi_dont_save_regs,
758 /* fill register allocator interface */
760 static const arch_irn_ops_if_t ia32_irn_ops_if = {
761 ia32_get_irn_reg_req,
766 ia32_get_frame_entity,
767 ia32_set_frame_entity,
768 ia32_set_frame_offset,
771 ia32_get_op_estimated_cost,
772 ia32_possible_memory_operand,
773 ia32_perform_memory_operand,
776 ia32_irn_ops_t ia32_irn_ops = {
783 /**************************************************
786 * ___ ___ __| | ___ __ _ ___ _ __ _| |_
787 * / __/ _ \ / _` |/ _ \/ _` |/ _ \ '_ \ | | _|
788 * | (_| (_) | (_| | __/ (_| | __/ | | | | | |
789 * \___\___/ \__,_|\___|\__, |\___|_| |_| |_|_|
792 **************************************************/
794 static void ia32_kill_convs(ia32_code_gen_t *cg) {
797 /* BEWARE: the Projs are inserted in the set */
798 foreach_nodeset(cg->kill_conv, irn) {
799 ir_node *in = get_irn_n(get_Proj_pred(irn), 2);
800 edges_reroute(irn, in, cg->birg->irg);
805 * Transform the Thread Local Store base.
807 static void transform_tls(ir_graph *irg) {
808 ir_node *irn = get_irg_tls(irg);
811 dbg_info *dbg = get_irn_dbg_info(irn);
812 ir_node *blk = get_nodes_block(irn);
814 newn = new_rd_ia32_LdTls(dbg, irg, blk, get_irn_mode(irn));
821 * Transforms the standard firm graph into
824 static void ia32_prepare_graph(void *self) {
825 ia32_code_gen_t *cg = self;
826 dom_front_info_t *dom;
827 DEBUG_ONLY(firm_dbg_module_t *old_mod = cg->mod;)
829 FIRM_DBG_REGISTER(cg->mod, "firm.be.ia32.transform");
831 /* 1st: transform constants and psi condition trees */
832 ia32_pre_transform_phase(cg);
834 /* 2nd: transform all remaining nodes */
835 ia32_register_transformers();
836 dom = be_compute_dominance_frontiers(cg->irg);
838 cg->kill_conv = new_nodeset(5);
839 transform_tls(cg->irg);
840 irg_walk_blkwise_graph(cg->irg, NULL, ia32_transform_node, cg);
842 del_nodeset(cg->kill_conv);
844 be_free_dominance_frontiers(dom);
847 be_dump(cg->irg, "-transformed", dump_ir_block_graph_sched);
849 /* 3rd: optimize address mode */
850 FIRM_DBG_REGISTER(cg->mod, "firm.be.ia32.am");
851 ia32_optimize_addressmode(cg);
854 be_dump(cg->irg, "-am", dump_ir_block_graph_sched);
856 DEBUG_ONLY(cg->mod = old_mod;)
860 * Dummy functions for hooks we don't need but which must be filled.
862 static void ia32_before_sched(void *self) {
865 static void remove_unused_nodes(ir_node *irn, bitset_t *already_visited) {
873 mode = get_irn_mode(irn);
875 /* check if we already saw this node or the node has more than one user */
876 if (bitset_contains_irn(already_visited, irn) || get_irn_n_edges(irn) > 1)
879 /* mark irn visited */
880 bitset_add_irn(already_visited, irn);
882 /* non-Tuple nodes with one user: ok, return */
883 if (get_irn_n_edges(irn) >= 1 && mode != mode_T)
886 /* tuple node has one user which is not the mem proj-> ok */
887 if (mode == mode_T && get_irn_n_edges(irn) == 1) {
888 mem_proj = ia32_get_proj_for_mode(irn, mode_M);
893 for (i = get_irn_arity(irn) - 1; i >= 0; i--) {
894 ir_node *pred = get_irn_n(irn, i);
896 /* do not follow memory edges or we will accidentally remove stores */
897 if (is_Proj(pred) && get_irn_mode(pred) == mode_M)
900 set_irn_n(irn, i, new_Bad());
903 The current node is about to be removed: if the predecessor
904 has only this node as user, it need to be removed as well.
906 if (get_irn_n_edges(pred) <= 1)
907 remove_unused_nodes(pred, already_visited);
910 if (sched_is_scheduled(irn)) {
911 set_irn_n(irn, 0, new_Bad());
912 set_irn_n(irn, 1, new_Bad());
913 set_irn_n(irn, 2, new_Bad());
918 static void remove_unused_loads_walker(ir_node *irn, void *env) {
919 bitset_t *already_visited = env;
920 if (is_ia32_Ld(irn) && ! bitset_contains_irn(already_visited, irn))
921 remove_unused_nodes(irn, env);
925 * Called before the register allocator.
926 * Calculate a block schedule here. We need it for the x87
927 * simulator and the emitter.
929 static void ia32_before_ra(void *self) {
930 ia32_code_gen_t *cg = self;
931 bitset_t *already_visited = bitset_irg_malloc(cg->irg);
935 There are sometimes unused loads, only pinned by memory.
936 We need to remove those Loads and all other nodes which won't be used
937 after removing the Load from schedule.
939 irg_walk_graph(cg->irg, remove_unused_loads_walker, NULL, already_visited);
940 bitset_free(already_visited);
945 * Transforms a be node into a Load.
947 static void transform_to_Load(ia32_transform_env_t *env) {
948 ir_node *irn = env->irn;
949 entity *ent = be_get_frame_entity(irn);
950 ir_mode *mode = env->mode;
951 ir_node *noreg = ia32_new_NoReg_gp(env->cg);
952 ir_node *nomem = new_rd_NoMem(env->irg);
953 ir_node *sched_point = NULL;
954 ir_node *ptr = get_irn_n(irn, 0);
955 ir_node *mem = be_is_Reload(irn) ? get_irn_n(irn, 1) : nomem;
956 ir_node *new_op, *proj;
957 const arch_register_t *reg;
959 if (sched_is_scheduled(irn)) {
960 sched_point = sched_prev(irn);
963 if (mode_is_float(mode)) {
964 if (USE_SSE2(env->cg))
965 new_op = new_rd_ia32_xLoad(env->dbg, env->irg, env->block, ptr, noreg, mem);
967 new_op = new_rd_ia32_vfld(env->dbg, env->irg, env->block, ptr, noreg, mem);
970 new_op = new_rd_ia32_Load(env->dbg, env->irg, env->block, ptr, noreg, mem);
973 set_ia32_am_support(new_op, ia32_am_Source);
974 set_ia32_op_type(new_op, ia32_AddrModeS);
975 set_ia32_am_flavour(new_op, ia32_B);
976 set_ia32_ls_mode(new_op, mode);
977 set_ia32_frame_ent(new_op, ent);
978 set_ia32_use_frame(new_op);
980 DBG_OPT_RELOAD2LD(irn, new_op);
982 proj = new_rd_Proj(env->dbg, env->irg, env->block, new_op, mode, pn_Load_res);
985 sched_add_after(sched_point, new_op);
986 sched_add_after(new_op, proj);
991 /* copy the register from the old node to the new Load */
992 reg = arch_get_irn_register(env->cg->arch_env, irn);
993 arch_set_irn_register(env->cg->arch_env, new_op, reg);
995 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env->cg, irn));
1001 * Transforms a be node into a Store.
1003 static void transform_to_Store(ia32_transform_env_t *env) {
1004 ir_node *irn = env->irn;
1005 entity *ent = be_get_frame_entity(irn);
1006 ir_mode *mode = env->mode;
1007 ir_node *noreg = ia32_new_NoReg_gp(env->cg);
1008 ir_node *nomem = new_rd_NoMem(env->irg);
1009 ir_node *ptr = get_irn_n(irn, 0);
1010 ir_node *val = get_irn_n(irn, 1);
1011 ir_node *new_op, *proj;
1012 ir_node *sched_point = NULL;
1014 if (sched_is_scheduled(irn)) {
1015 sched_point = sched_prev(irn);
1018 if (mode_is_float(mode)) {
1019 if (USE_SSE2(env->cg))
1020 new_op = new_rd_ia32_xStore(env->dbg, env->irg, env->block, ptr, noreg, val, nomem);
1022 new_op = new_rd_ia32_vfst(env->dbg, env->irg, env->block, ptr, noreg, val, nomem);
1024 else if (get_mode_size_bits(mode) == 8) {
1025 new_op = new_rd_ia32_Store8Bit(env->dbg, env->irg, env->block, ptr, noreg, val, nomem);
1028 new_op = new_rd_ia32_Store(env->dbg, env->irg, env->block, ptr, noreg, val, nomem);
1031 set_ia32_am_support(new_op, ia32_am_Dest);
1032 set_ia32_op_type(new_op, ia32_AddrModeD);
1033 set_ia32_am_flavour(new_op, ia32_B);
1034 set_ia32_ls_mode(new_op, mode);
1035 set_ia32_frame_ent(new_op, ent);
1036 set_ia32_use_frame(new_op);
1038 DBG_OPT_SPILL2ST(irn, new_op);
1040 proj = new_rd_Proj(env->dbg, env->irg, env->block, new_op, mode_M, pn_ia32_Store_M);
1043 sched_add_after(sched_point, new_op);
1047 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env->cg, irn));
1049 exchange(irn, proj);
1052 static ir_node *create_push(ia32_transform_env_t *env, ir_node *schedpoint, ir_node *sp, ir_node *mem, entity *ent, const char *offset) {
1053 ir_node *noreg = ia32_new_NoReg_gp(env->cg);
1055 ir_node *push = new_rd_ia32_Push(env->dbg, env->irg, env->block, sp, noreg, mem);
1057 set_ia32_frame_ent(push, ent);
1058 set_ia32_use_frame(push);
1059 set_ia32_op_type(push, ia32_AddrModeS);
1060 set_ia32_am_flavour(push, ia32_B);
1061 set_ia32_ls_mode(push, mode_Is);
1063 add_ia32_am_offs(push, offset);
1065 sched_add_before(schedpoint, push);
1069 static ir_node *create_pop(ia32_transform_env_t *env, ir_node *schedpoint, ir_node *sp, entity *ent, const char *offset) {
1070 ir_node *pop = new_rd_ia32_Pop(env->dbg, env->irg, env->block, sp, new_NoMem());
1072 set_ia32_frame_ent(pop, ent);
1073 set_ia32_use_frame(pop);
1074 set_ia32_op_type(pop, ia32_AddrModeD);
1075 set_ia32_am_flavour(pop, ia32_B);
1076 set_ia32_ls_mode(pop, mode_Is);
1078 add_ia32_am_offs(pop, offset);
1080 sched_add_before(schedpoint, pop);
1085 static ir_node* create_spproj(ia32_transform_env_t *env, ir_node *pred, int pos, ir_node *schedpoint, const ir_node *oldsp) {
1086 ir_mode *spmode = get_irn_mode(oldsp);
1087 const arch_register_t *spreg = arch_get_irn_register(env->cg->arch_env, oldsp);
1090 sp = new_rd_Proj(env->dbg, env->irg, env->block, pred, spmode, pos);
1091 arch_set_irn_register(env->cg->arch_env, sp, spreg);
1092 sched_add_before(schedpoint, sp);
1097 static void transform_MemPerm(ia32_transform_env_t *env) {
1099 * Transform memperm, currently we do this the ugly way and produce
1100 * push/pop into/from memory cascades. This is possible without using
1103 ir_node *node = env->irn;
1105 ir_node *sp = get_irn_n(node, 0);
1106 const ir_edge_t *edge;
1107 const ir_edge_t *next;
1110 arity = be_get_MemPerm_entity_arity(node);
1111 pops = alloca(arity * sizeof(pops[0]));
1114 for(i = 0; i < arity; ++i) {
1115 entity *ent = be_get_MemPerm_in_entity(node, i);
1116 ir_type *enttype = get_entity_type(ent);
1117 int entbits = get_type_size_bits(enttype);
1118 ir_node *mem = get_irn_n(node, i + 1);
1121 assert( (entbits == 32 || entbits == 64) && "spillslot on x86 should be 32 or 64 bit");
1123 push = create_push(env, node, sp, mem, ent, NULL);
1124 sp = create_spproj(env, push, 0, node, sp);
1126 // add another push after the first one
1127 push = create_push(env, node, sp, mem, ent, "4");
1128 sp = create_spproj(env, push, 0, node, sp);
1131 set_irn_n(node, i, new_Bad());
1135 for(i = arity - 1; i >= 0; --i) {
1136 entity *ent = be_get_MemPerm_out_entity(node, i);
1137 ir_type *enttype = get_entity_type(ent);
1138 int entbits = get_type_size_bits(enttype);
1142 assert( (entbits == 32 || entbits == 64) && "spillslot on x86 should be 32 or 64 bit");
1144 pop = create_pop(env, node, sp, ent, NULL);
1146 // add another pop after the first one
1147 sp = create_spproj(env, pop, 1, node, sp);
1148 pop = create_pop(env, node, sp, ent, "4");
1151 sp = create_spproj(env, pop, 1, node, sp);
1157 // exchange memprojs
1158 foreach_out_edge_safe(node, edge, next) {
1159 ir_node *proj = get_edge_src_irn(edge);
1160 int p = get_Proj_proj(proj);
1164 set_Proj_pred(proj, pops[p]);
1165 set_Proj_proj(proj, 3);
1169 arity = get_irn_arity(node);
1170 for(i = 0; i < arity; ++i) {
1171 set_irn_n(node, i, new_Bad());
1177 * Fix the mode of Spill/Reload
1179 static ir_mode *fix_spill_mode(ia32_code_gen_t *cg, ir_mode *mode)
1181 if (mode_is_float(mode)) {
1193 * Block-Walker: Calls the transform functions Spill and Reload.
1195 static void ia32_after_ra_walker(ir_node *block, void *env) {
1196 ir_node *node, *prev;
1197 ia32_code_gen_t *cg = env;
1198 ia32_transform_env_t tenv;
1201 tenv.irg = current_ir_graph;
1203 DEBUG_ONLY(tenv.mod = cg->mod;)
1205 /* beware: the schedule is changed here */
1206 for (node = sched_last(block); !sched_is_begin(node); node = prev) {
1207 prev = sched_prev(node);
1208 if (be_is_Reload(node)) {
1209 /* we always reload the whole register */
1210 tenv.dbg = get_irn_dbg_info(node);
1212 tenv.mode = fix_spill_mode(cg, get_irn_mode(node));
1213 transform_to_Load(&tenv);
1215 else if (be_is_Spill(node)) {
1216 ir_node *spillval = get_irn_n(node, be_pos_Spill_val);
1217 /* we always spill the whole register */
1218 tenv.dbg = get_irn_dbg_info(node);
1220 tenv.mode = fix_spill_mode(cg, get_irn_mode(spillval));
1221 transform_to_Store(&tenv);
1223 else if(be_is_MemPerm(node)) {
1224 tenv.dbg = get_irn_dbg_info(node);
1226 transform_MemPerm(&tenv);
1232 * We transform Spill and Reload here. This needs to be done before
1233 * stack biasing otherwise we would miss the corrected offset for these nodes.
1235 * If x87 instruction should be emitted, run the x87 simulator and patch
1236 * the virtual instructions. This must obviously be done after register allocation.
1238 static void ia32_after_ra(void *self) {
1239 ia32_code_gen_t *cg = self;
1240 ir_graph *irg = cg->irg;
1242 irg_block_walk_graph(irg, NULL, ia32_after_ra_walker, cg);
1244 ia32_finish_irg(irg, cg);
1248 * Last touchups for the graph before emit
1250 static void ia32_finish(void *self) {
1251 ia32_code_gen_t *cg = self;
1252 ir_graph *irg = cg->irg;
1254 // Matze: disabled for now, as the irextbb algo sometimes returns extbb in
1255 // the wrong order if the graph has critical edges
1256 be_remove_empty_blocks(irg);
1258 cg->blk_sched = sched_create_block_schedule(cg->irg, cg->birg->execfreqs);
1260 /* if we do x87 code generation, rewrite all the virtual instructions and registers */
1261 if (cg->used_fp == fp_x87 || cg->force_sim) {
1262 x87_simulate_graph(cg->arch_env, irg, cg->blk_sched);
1265 ia32_peephole_optimization(irg, cg);
1269 * Emits the code, closes the output file and frees
1270 * the code generator interface.
1272 static void ia32_codegen(void *self) {
1273 ia32_code_gen_t *cg = self;
1274 ir_graph *irg = cg->irg;
1276 ia32_gen_routine(cg->isa->out, irg, cg);
1280 /* remove it from the isa */
1283 /* de-allocate code generator */
1284 del_set(cg->reg_set);
1288 static void *ia32_cg_init(const be_irg_t *birg);
1290 static const arch_code_generator_if_t ia32_code_gen_if = {
1292 NULL, /* before abi introduce hook */
1294 ia32_before_sched, /* before scheduling hook */
1295 ia32_before_ra, /* before register allocation hook */
1296 ia32_after_ra, /* after register allocation hook */
1297 ia32_finish, /* called before codegen */
1298 ia32_codegen /* emit && done */
1302 * Initializes a IA32 code generator.
1304 static void *ia32_cg_init(const be_irg_t *birg) {
1305 ia32_isa_t *isa = (ia32_isa_t *)birg->main_env->arch_env->isa;
1306 ia32_code_gen_t *cg = xcalloc(1, sizeof(*cg));
1308 cg->impl = &ia32_code_gen_if;
1309 cg->irg = birg->irg;
1310 cg->reg_set = new_set(ia32_cmp_irn_reg_assoc, 1024);
1311 cg->arch_env = birg->main_env->arch_env;
1314 cg->blk_sched = NULL;
1315 cg->fp_to_gp = NULL;
1316 cg->gp_to_fp = NULL;
1317 cg->fp_kind = isa->fp_kind;
1318 cg->used_fp = fp_none;
1319 cg->dump = (birg->main_env->options->dump_flags & DUMP_BE) ? 1 : 0;
1321 FIRM_DBG_REGISTER(cg->mod, "firm.be.ia32.cg");
1323 /* copy optimizations from isa for easier access */
1325 cg->arch = isa->arch;
1326 cg->opt_arch = isa->opt_arch;
1332 if (isa->name_obst_size) {
1333 //printf("freed %d bytes from name obst\n", isa->name_obst_size);
1334 isa->name_obst_size = 0;
1335 obstack_free(isa->name_obst, NULL);
1336 obstack_init(isa->name_obst);
1340 cur_reg_set = cg->reg_set;
1342 ia32_irn_ops.cg = cg;
1344 return (arch_code_generator_t *)cg;
1349 /*****************************************************************
1350 * ____ _ _ _____ _____
1351 * | _ \ | | | | |_ _|/ ____| /\
1352 * | |_) | __ _ ___| | _____ _ __ __| | | | | (___ / \
1353 * | _ < / _` |/ __| |/ / _ \ '_ \ / _` | | | \___ \ / /\ \
1354 * | |_) | (_| | (__| < __/ | | | (_| | _| |_ ____) / ____ \
1355 * |____/ \__,_|\___|_|\_\___|_| |_|\__,_| |_____|_____/_/ \_\
1357 *****************************************************************/
1360 * Set output modes for GCC
1362 static const tarval_mode_info mo_integer = {
1369 * set the tarval output mode to C-semantics
1371 static void set_tarval_output_modes(void)
1373 set_tarval_mode_output_option(get_modeLs(), &mo_integer);
1374 set_tarval_mode_output_option(get_modeLu(), &mo_integer);
1375 set_tarval_mode_output_option(get_modeIs(), &mo_integer);
1376 set_tarval_mode_output_option(get_modeIu(), &mo_integer);
1377 set_tarval_mode_output_option(get_modeHs(), &mo_integer);
1378 set_tarval_mode_output_option(get_modeHu(), &mo_integer);
1379 set_tarval_mode_output_option(get_modeBs(), &mo_integer);
1380 set_tarval_mode_output_option(get_modeBu(), &mo_integer);
1381 set_tarval_mode_output_option(get_modeC(), &mo_integer);
1382 set_tarval_mode_output_option(get_modeU(), &mo_integer);
1383 set_tarval_mode_output_option(get_modeIu(), &mo_integer);
1388 * The template that generates a new ISA object.
1389 * Note that this template can be changed by command line
1392 static ia32_isa_t ia32_isa_template = {
1394 &ia32_isa_if, /* isa interface implementation */
1395 &ia32_gp_regs[REG_ESP], /* stack pointer register */
1396 &ia32_gp_regs[REG_EBP], /* base pointer register */
1397 -1, /* stack direction */
1398 NULL, /* main environment */
1400 NULL, /* 16bit register names */
1401 NULL, /* 8bit register names */
1405 IA32_OPT_INCDEC | /* optimize add 1, sub 1 into inc/dec default: on */
1406 IA32_OPT_DOAM | /* optimize address mode default: on */
1407 IA32_OPT_LEA | /* optimize for LEAs default: on */
1408 IA32_OPT_PLACECNST | /* place constants immediately before instructions, default: on */
1409 IA32_OPT_IMMOPS | /* operations can use immediates, default: on */
1410 IA32_OPT_EXTBB), /* use extended basic block scheduling, default: on */
1411 arch_pentium_4, /* instruction architecture */
1412 arch_pentium_4, /* optimize for architecture */
1413 fp_sse2, /* use sse2 unit */
1414 NULL, /* current code generator */
1415 NULL, /* output file */
1417 NULL, /* name obstack */
1418 0 /* name obst size */
1423 * Initializes the backend ISA.
1425 static void *ia32_init(FILE *file_handle) {
1426 static int inited = 0;
1432 set_tarval_output_modes();
1434 isa = xmalloc(sizeof(*isa));
1435 memcpy(isa, &ia32_isa_template, sizeof(*isa));
1437 ia32_register_init(isa);
1438 ia32_create_opcodes();
1440 if ((ARCH_INTEL(isa->arch) && isa->arch < arch_pentium_4) ||
1441 (ARCH_AMD(isa->arch) && isa->arch < arch_athlon))
1442 /* no SSE2 for these cpu's */
1443 isa->fp_kind = fp_x87;
1445 if (ARCH_INTEL(isa->opt_arch) && isa->opt_arch >= arch_pentium_4) {
1446 /* Pentium 4 don't like inc and dec instructions */
1447 isa->opt &= ~IA32_OPT_INCDEC;
1450 isa->regs_16bit = pmap_create();
1451 isa->regs_8bit = pmap_create();
1452 isa->types = pmap_create();
1453 isa->tv_ent = pmap_create();
1454 isa->out = file_handle;
1456 ia32_build_16bit_reg_map(isa->regs_16bit);
1457 ia32_build_8bit_reg_map(isa->regs_8bit);
1459 /* patch register names of x87 registers */
1461 ia32_st_regs[0].name = "st";
1462 ia32_st_regs[1].name = "st(1)";
1463 ia32_st_regs[2].name = "st(2)";
1464 ia32_st_regs[3].name = "st(3)";
1465 ia32_st_regs[4].name = "st(4)";
1466 ia32_st_regs[5].name = "st(5)";
1467 ia32_st_regs[6].name = "st(6)";
1468 ia32_st_regs[7].name = "st(7)";
1472 isa->name_obst = xmalloc(sizeof(*isa->name_obst));
1473 obstack_init(isa->name_obst);
1474 isa->name_obst_size = 0;
1477 ia32_handle_intrinsics();
1478 ia32_switch_section(NULL, NO_SECTION);
1479 fprintf(isa->out, "\t.intel_syntax\n");
1489 * Closes the output file and frees the ISA structure.
1491 static void ia32_done(void *self) {
1492 ia32_isa_t *isa = self;
1494 /* emit now all global declarations */
1495 ia32_gen_decls(isa->out, isa->arch_isa.main_env);
1497 pmap_destroy(isa->regs_16bit);
1498 pmap_destroy(isa->regs_8bit);
1499 pmap_destroy(isa->tv_ent);
1500 pmap_destroy(isa->types);
1503 //printf("name obst size = %d bytes\n", isa->name_obst_size);
1504 obstack_free(isa->name_obst, NULL);
1512 * Return the number of register classes for this architecture.
1513 * We report always these:
1514 * - the general purpose registers
1515 * - the SSE floating point register set
1516 * - the virtual floating point registers
1518 static int ia32_get_n_reg_class(const void *self) {
1523 * Return the register class for index i.
1525 static const arch_register_class_t *ia32_get_reg_class(const void *self, int i) {
1526 assert(i >= 0 && i < 3 && "Invalid ia32 register class requested.");
1528 return &ia32_reg_classes[CLASS_ia32_gp];
1530 return &ia32_reg_classes[CLASS_ia32_xmm];
1532 return &ia32_reg_classes[CLASS_ia32_vfp];
1536 * Get the register class which shall be used to store a value of a given mode.
1537 * @param self The this pointer.
1538 * @param mode The mode in question.
1539 * @return A register class which can hold values of the given mode.
1541 const arch_register_class_t *ia32_get_reg_class_for_mode(const void *self, const ir_mode *mode) {
1542 const ia32_isa_t *isa = self;
1543 if (mode_is_float(mode)) {
1544 return USE_SSE2(isa) ? &ia32_reg_classes[CLASS_ia32_xmm] : &ia32_reg_classes[CLASS_ia32_vfp];
1547 return &ia32_reg_classes[CLASS_ia32_gp];
1551 * Get the ABI restrictions for procedure calls.
1552 * @param self The this pointer.
1553 * @param method_type The type of the method (procedure) in question.
1554 * @param abi The abi object to be modified
1556 static void ia32_get_call_abi(const void *self, ir_type *method_type, be_abi_call_t *abi) {
1557 const ia32_isa_t *isa = self;
1560 unsigned cc = get_method_calling_convention(method_type);
1561 int n = get_method_n_params(method_type);
1564 int i, ignore_1, ignore_2;
1566 const arch_register_t *reg;
1567 be_abi_call_flags_t call_flags = be_abi_call_get_flags(abi);
1569 unsigned use_push = !IS_P6_ARCH(isa->opt_arch);
1571 /* set abi flags for calls */
1572 call_flags.bits.left_to_right = 0; /* always last arg first on stack */
1573 call_flags.bits.store_args_sequential = use_push;
1574 /* call_flags.bits.try_omit_fp not changed: can handle both settings */
1575 call_flags.bits.fp_free = 0; /* the frame pointer is fixed in IA32 */
1576 call_flags.bits.call_has_imm = 1; /* IA32 calls can have immediate address */
1578 /* set stack parameter passing style */
1579 be_abi_call_set_flags(abi, call_flags, &ia32_abi_callbacks);
1581 /* collect the mode for each type */
1582 modes = alloca(n * sizeof(modes[0]));
1584 for (i = 0; i < n; i++) {
1585 tp = get_method_param_type(method_type, i);
1586 modes[i] = get_type_mode(tp);
1589 /* set register parameters */
1590 if (cc & cc_reg_param) {
1591 /* determine the number of parameters passed via registers */
1592 biggest_n = ia32_get_n_regparam_class(n, modes, &ignore_1, &ignore_2);
1594 /* loop over all parameters and set the register requirements */
1595 for (i = 0; i <= biggest_n; i++) {
1596 reg = ia32_get_RegParam_reg(n, modes, i, cc);
1597 assert(reg && "kaputt");
1598 be_abi_call_param_reg(abi, i, reg);
1605 /* set stack parameters */
1606 for (i = stack_idx; i < n; i++) {
1607 be_abi_call_param_stack(abi, i, 1, 0, 0);
1611 /* set return registers */
1612 n = get_method_n_ress(method_type);
1614 assert(n <= 2 && "more than two results not supported");
1616 /* In case of 64bit returns, we will have two 32bit values */
1618 tp = get_method_res_type(method_type, 0);
1619 mode = get_type_mode(tp);
1621 assert(!mode_is_float(mode) && "two FP results not supported");
1623 tp = get_method_res_type(method_type, 1);
1624 mode = get_type_mode(tp);
1626 assert(!mode_is_float(mode) && "two FP results not supported");
1628 be_abi_call_res_reg(abi, 0, &ia32_gp_regs[REG_EAX]);
1629 be_abi_call_res_reg(abi, 1, &ia32_gp_regs[REG_EDX]);
1632 const arch_register_t *reg;
1634 tp = get_method_res_type(method_type, 0);
1635 assert(is_atomic_type(tp));
1636 mode = get_type_mode(tp);
1638 reg = mode_is_float(mode) ?
1639 (USE_SSE2(isa) ? &ia32_xmm_regs[REG_XMM0] : &ia32_vfp_regs[REG_VF0]) :
1640 &ia32_gp_regs[REG_EAX];
1642 be_abi_call_res_reg(abi, 0, reg);
1647 static const void *ia32_get_irn_ops(const arch_irn_handler_t *self, const ir_node *irn) {
1648 return &ia32_irn_ops;
1651 const arch_irn_handler_t ia32_irn_handler = {
1655 const arch_irn_handler_t *ia32_get_irn_handler(const void *self) {
1656 return &ia32_irn_handler;
1659 int ia32_to_appear_in_schedule(void *block_env, const ir_node *irn) {
1660 return is_ia32_irn(irn) ? 1 : -1;
1664 * Initializes the code generator interface.
1666 static const arch_code_generator_if_t *ia32_get_code_generator_if(void *self) {
1667 return &ia32_code_gen_if;
1671 * Returns the estimated execution time of an ia32 irn.
1673 static sched_timestep_t ia32_sched_exectime(void *env, const ir_node *irn) {
1674 const arch_env_t *arch_env = env;
1675 return is_ia32_irn(irn) ? ia32_get_op_estimated_cost(arch_get_irn_ops(arch_env, irn), irn) : 1;
1678 list_sched_selector_t ia32_sched_selector;
1681 * Returns the reg_pressure scheduler with to_appear_in_schedule() overloaded
1683 static const list_sched_selector_t *ia32_get_list_sched_selector(const void *self, list_sched_selector_t *selector) {
1684 memcpy(&ia32_sched_selector, selector, sizeof(ia32_sched_selector));
1685 ia32_sched_selector.exectime = ia32_sched_exectime;
1686 ia32_sched_selector.to_appear_in_schedule = ia32_to_appear_in_schedule;
1687 return &ia32_sched_selector;
1691 * Returns the necessary byte alignment for storing a register of given class.
1693 static int ia32_get_reg_class_alignment(const void *self, const arch_register_class_t *cls) {
1694 ir_mode *mode = arch_register_class_mode(cls);
1695 int bytes = get_mode_size_bytes(mode);
1697 if (mode_is_float(mode) && bytes > 8)
1702 static ia32_intrinsic_env_t intrinsic_env = { NULL, NULL };
1705 * Returns the libFirm configuration parameter for this backend.
1707 static const backend_params *ia32_get_libfirm_params(void) {
1708 static const arch_dep_params_t ad = {
1709 1, /* also use subs */
1710 4, /* maximum shifts */
1711 31, /* maximum shift amount */
1713 1, /* allow Mulhs */
1714 1, /* allow Mulus */
1715 32 /* Mulh allowed up to 32 bit */
1717 static backend_params p = {
1718 NULL, /* no additional opcodes */
1719 NULL, /* will be set later */
1720 1, /* need dword lowering */
1721 ia32_create_intrinsic_fkt,
1722 &intrinsic_env, /* context for ia32_create_intrinsic_fkt */
1730 /* instruction set architectures. */
1731 static const lc_opt_enum_int_items_t arch_items[] = {
1732 { "386", arch_i386, },
1733 { "486", arch_i486, },
1734 { "pentium", arch_pentium, },
1735 { "586", arch_pentium, },
1736 { "pentiumpro", arch_pentium_pro, },
1737 { "686", arch_pentium_pro, },
1738 { "pentiummmx", arch_pentium_mmx, },
1739 { "pentium2", arch_pentium_2, },
1740 { "p2", arch_pentium_2, },
1741 { "pentium3", arch_pentium_3, },
1742 { "p3", arch_pentium_3, },
1743 { "pentium4", arch_pentium_4, },
1744 { "p4", arch_pentium_4, },
1745 { "pentiumm", arch_pentium_m, },
1746 { "pm", arch_pentium_m, },
1747 { "core", arch_core, },
1749 { "athlon", arch_athlon, },
1750 { "athlon64", arch_athlon_64, },
1751 { "opteron", arch_opteron, },
1755 static lc_opt_enum_int_var_t arch_var = {
1756 &ia32_isa_template.arch, arch_items
1759 static lc_opt_enum_int_var_t opt_arch_var = {
1760 &ia32_isa_template.opt_arch, arch_items
1763 static const lc_opt_enum_int_items_t fp_unit_items[] = {
1765 { "sse2", fp_sse2 },
1769 static lc_opt_enum_int_var_t fp_unit_var = {
1770 &ia32_isa_template.fp_kind, fp_unit_items
1773 static const lc_opt_enum_int_items_t gas_items[] = {
1774 { "linux", ASM_LINUX_GAS },
1775 { "mingw", ASM_MINGW_GAS },
1779 static lc_opt_enum_int_var_t gas_var = {
1780 (int *)&asm_flavour, gas_items
1783 static const lc_opt_table_entry_t ia32_options[] = {
1784 LC_OPT_ENT_ENUM_INT("arch", "select the instruction architecture", &arch_var),
1785 LC_OPT_ENT_ENUM_INT("opt", "optimize for instruction architecture", &opt_arch_var),
1786 LC_OPT_ENT_ENUM_INT("fpunit", "select the floating point unit", &fp_unit_var),
1787 LC_OPT_ENT_NEGBIT("noaddrmode", "do not use address mode", &ia32_isa_template.opt, IA32_OPT_DOAM),
1788 LC_OPT_ENT_NEGBIT("nolea", "do not optimize for LEAs", &ia32_isa_template.opt, IA32_OPT_LEA),
1789 LC_OPT_ENT_NEGBIT("noplacecnst", "do not place constants", &ia32_isa_template.opt, IA32_OPT_PLACECNST),
1790 LC_OPT_ENT_NEGBIT("noimmop", "no operations with immediates", &ia32_isa_template.opt, IA32_OPT_IMMOPS),
1791 LC_OPT_ENT_NEGBIT("noextbb", "do not use extended basic block scheduling", &ia32_isa_template.opt, IA32_OPT_EXTBB),
1792 LC_OPT_ENT_ENUM_INT("gasmode", "set the GAS compatibility mode", &gas_var),
1797 * Register command line options for the ia32 backend.
1801 * ia32-arch=arch create instruction for arch
1802 * ia32-opt=arch optimize for run on arch
1803 * ia32-fpunit=unit select floating point unit (x87 or SSE2)
1804 * ia32-incdec optimize for inc/dec
1805 * ia32-noaddrmode do not use address mode
1806 * ia32-nolea do not optimize for LEAs
1807 * ia32-noplacecnst do not place constants,
1808 * ia32-noimmop no operations with immediates
1809 * ia32-noextbb do not use extended basic block scheduling
1810 * ia32-gasmode set the GAS compatibility mode
1812 static void ia32_register_options(lc_opt_entry_t *ent)
1814 lc_opt_entry_t *be_grp_ia32 = lc_opt_get_grp(ent, "ia32");
1815 lc_opt_add_table(be_grp_ia32, ia32_options);
1817 #endif /* WITH_LIBCORE */
1819 const arch_isa_if_t ia32_isa_if = {
1822 ia32_get_n_reg_class,
1824 ia32_get_reg_class_for_mode,
1826 ia32_get_irn_handler,
1827 ia32_get_code_generator_if,
1828 ia32_get_list_sched_selector,
1829 ia32_get_reg_class_alignment,
1830 ia32_get_libfirm_params,
1832 ia32_register_options