2 * This is the main ia32 firm backend driver.
3 * @author Christian Wuerdig
20 #include <libcore/lc_opts.h>
21 #include <libcore/lc_opts_enum.h>
22 #endif /* WITH_LIBCORE */
26 #include "pseudo_irg.h"
30 #include "iredges_t.h"
39 #include "../beabi.h" /* the general register allocator interface */
40 #include "../benode_t.h"
41 #include "../belower.h"
42 #include "../besched_t.h"
45 #include "../beirgmod.h"
46 #include "../be_dbgout.h"
47 #include "../beblocksched.h"
48 #include "../bemachine.h"
49 #include "../beilpsched.h"
51 #include "bearch_ia32_t.h"
53 #include "ia32_new_nodes.h" /* ia32 nodes interface */
54 #include "gen_ia32_regalloc_if.h" /* the generated interface (register type and class defenitions) */
55 #include "gen_ia32_machine.h"
56 #include "ia32_gen_decls.h" /* interface declaration emitter */
57 #include "ia32_transform.h"
58 #include "ia32_emitter.h"
59 #include "ia32_map_regs.h"
60 #include "ia32_optimize.h"
62 #include "ia32_dbg_stat.h"
63 #include "ia32_finish.h"
64 #include "ia32_util.h"
66 #define DEBUG_MODULE "firm.be.ia32.isa"
69 static set *cur_reg_set = NULL;
71 /* Creates the unique per irg GP NoReg node. */
72 ir_node *ia32_new_NoReg_gp(ia32_code_gen_t *cg) {
73 return be_abi_get_callee_save_irn(cg->birg->abi, &ia32_gp_regs[REG_GP_NOREG]);
76 /* Creates the unique per irg FP NoReg node. */
77 ir_node *ia32_new_NoReg_fp(ia32_code_gen_t *cg) {
78 return be_abi_get_callee_save_irn(cg->birg->abi,
79 USE_SSE2(cg) ? &ia32_xmm_regs[REG_XMM_NOREG] : &ia32_vfp_regs[REG_VFP_NOREG]);
83 * Returns gp_noreg or fp_noreg, depending in input requirements.
85 ir_node *ia32_get_admissible_noreg(ia32_code_gen_t *cg, ir_node *irn, int pos) {
86 arch_register_req_t req;
87 const arch_register_req_t *p_req;
89 p_req = arch_get_register_req(cg->arch_env, &req, irn, pos);
90 assert(p_req && "Missing register requirements");
91 if (p_req->cls == &ia32_reg_classes[CLASS_ia32_gp])
92 return ia32_new_NoReg_gp(cg);
94 return ia32_new_NoReg_fp(cg);
97 /**************************************************
100 * _ __ ___ __ _ __ _| | | ___ ___ _| |_
101 * | '__/ _ \/ _` | / _` | | |/ _ \ / __| | | _|
102 * | | | __/ (_| | | (_| | | | (_) | (__ | | |
103 * |_| \___|\__, | \__,_|_|_|\___/ \___| |_|_|
106 **************************************************/
109 * Return register requirements for an ia32 node.
110 * If the node returns a tuple (mode_T) then the proj's
111 * will be asked for this information.
113 static const arch_register_req_t *ia32_get_irn_reg_req(const void *self, arch_register_req_t *req, const ir_node *irn, int pos) {
114 const ia32_irn_ops_t *ops = self;
115 const ia32_register_req_t *irn_req;
116 long node_pos = pos == -1 ? 0 : pos;
117 ir_mode *mode = is_Block(irn) ? NULL : get_irn_mode(irn);
118 FIRM_DBG_REGISTER(firm_dbg_module_t *mod, DEBUG_MODULE);
120 if (is_Block(irn) || mode == mode_X) {
121 DBG((mod, LEVEL_1, "ignoring Block, mode_M, mode_X node %+F\n", irn));
125 if (mode == mode_T && pos < 0) {
126 DBG((mod, LEVEL_1, "ignoring request OUT requirements for node %+F\n", irn));
130 DBG((mod, LEVEL_1, "get requirements at pos %d for %+F ... ", pos, irn));
137 DBG((mod, LEVEL_1, "ignoring request IN requirements for node %+F\n", irn));
141 node_pos = (pos == -1) ? get_Proj_proj(irn) : pos;
142 irn = skip_Proj_const(irn);
144 DB((mod, LEVEL_1, "skipping Proj, going to %+F at pos %d ... ", irn, node_pos));
147 if (is_ia32_irn(irn)) {
148 irn_req = (pos >= 0) ? get_ia32_in_req(irn, pos) : get_ia32_out_req(irn, node_pos);
149 if (irn_req == NULL) {
150 /* no requirements */
154 DB((mod, LEVEL_1, "returning reqs for %+F at pos %d\n", irn, pos));
156 memcpy(req, &(irn_req->req), sizeof(*req));
158 if (arch_register_req_is(&(irn_req->req), should_be_same)) {
159 assert(irn_req->same_pos >= 0 && "should be same constraint for in -> out NYI");
160 req->other_same = get_irn_n(irn, irn_req->same_pos);
163 if (arch_register_req_is(&(irn_req->req), should_be_different)) {
164 assert(irn_req->different_pos >= 0 && "should be different constraint for in -> out NYI");
165 req->other_different = get_irn_n(irn, irn_req->different_pos);
169 /* treat Unknowns like Const with default requirements */
170 if (is_Unknown(irn)) {
171 DB((mod, LEVEL_1, "returning UKNWN reqs for %+F\n", irn));
172 if (mode_is_float(mode)) {
173 if (USE_SSE2(ops->cg))
174 memcpy(req, &(ia32_default_req_ia32_xmm_xmm_UKNWN), sizeof(*req));
176 memcpy(req, &(ia32_default_req_ia32_vfp_vfp_UKNWN), sizeof(*req));
178 else if (mode_is_int(mode) || mode_is_reference(mode))
179 memcpy(req, &(ia32_default_req_ia32_gp_gp_UKNWN), sizeof(*req));
180 else if (mode == mode_T || mode == mode_M) {
181 DBG((mod, LEVEL_1, "ignoring Unknown node %+F\n", irn));
185 assert(0 && "unsupported Unknown-Mode");
188 DB((mod, LEVEL_1, "returning NULL for %+F (not ia32)\n", irn));
196 static void ia32_set_irn_reg(const void *self, ir_node *irn, const arch_register_t *reg) {
198 const ia32_irn_ops_t *ops = self;
200 if (get_irn_mode(irn) == mode_X) {
204 DBG((ops->cg->mod, LEVEL_1, "ia32 assigned register %s to node %+F\n", reg->name, irn));
207 pos = get_Proj_proj(irn);
208 irn = skip_Proj(irn);
211 if (is_ia32_irn(irn)) {
212 const arch_register_t **slots;
214 slots = get_ia32_slots(irn);
218 ia32_set_firm_reg(irn, reg, cur_reg_set);
222 static const arch_register_t *ia32_get_irn_reg(const void *self, const ir_node *irn) {
224 const arch_register_t *reg = NULL;
228 if (get_irn_mode(irn) == mode_X) {
232 pos = get_Proj_proj(irn);
233 irn = skip_Proj_const(irn);
236 if (is_ia32_irn(irn)) {
237 const arch_register_t **slots;
238 slots = get_ia32_slots(irn);
242 reg = ia32_get_firm_reg(irn, cur_reg_set);
248 static arch_irn_class_t ia32_classify(const void *self, const ir_node *irn) {
249 arch_irn_class_t classification = arch_irn_class_normal;
251 irn = skip_Proj_const(irn);
254 classification |= arch_irn_class_branch;
256 if (! is_ia32_irn(irn))
257 return classification & ~arch_irn_class_normal;
259 if (is_ia32_Cnst(irn))
260 classification |= arch_irn_class_const;
263 classification |= arch_irn_class_load;
265 if (is_ia32_St(irn) || is_ia32_Store8Bit(irn))
266 classification |= arch_irn_class_store;
268 if (is_ia32_got_reload(irn))
269 classification |= arch_irn_class_reload;
271 return classification;
274 static arch_irn_flags_t ia32_get_flags(const void *self, const ir_node *irn) {
275 arch_irn_flags_t flags;
276 ir_node *pred = is_Proj(irn) && mode_is_datab(get_irn_mode(irn)) ? get_Proj_pred(irn) : NULL;
279 flags = arch_irn_flags_ignore;
281 /* pred is only set, if we have a Proj */
282 flags = pred && is_ia32_irn(pred) ? get_ia32_out_flags(pred, get_Proj_proj(irn)) : arch_irn_flags_none;
284 irn = skip_Proj_const(irn);
285 if (is_ia32_irn(irn))
286 flags |= get_ia32_flags(irn);
293 * The IA32 ABI callback object.
296 be_abi_call_flags_bits_t flags; /**< The call flags. */
297 const arch_isa_t *isa; /**< The ISA handle. */
298 const arch_env_t *aenv; /**< The architecture environment. */
299 ir_graph *irg; /**< The associated graph. */
302 static ir_entity *ia32_get_frame_entity(const void *self, const ir_node *irn) {
303 return is_ia32_irn(irn) ? get_ia32_frame_ent(irn) : NULL;
306 static void ia32_set_frame_entity(const void *self, ir_node *irn, ir_entity *ent) {
307 set_ia32_frame_ent(irn, ent);
310 static void ia32_set_frame_offset(const void *self, ir_node *irn, int bias) {
311 const ia32_irn_ops_t *ops = self;
313 if (get_ia32_frame_ent(irn)) {
314 if(is_ia32_Pop(irn)) {
315 int omit_fp = be_abi_omit_fp(ops->cg->birg->abi);
317 /* Pop nodes modify the stack pointer before calculating the destination
318 * address, so fix this here
324 DBG((ops->cg->mod, LEVEL_1, "stack biased %+F with %d\n", irn, bias));
326 if (get_ia32_op_type(irn) == ia32_Normal) {
327 // Matze: When does this case happen?
329 snprintf(buf, sizeof(buf), "%d", bias);
330 set_ia32_cnst(irn, buf);
332 ia32_am_flavour_t am_flav = get_ia32_am_flavour(irn);
334 set_ia32_am_flavour(irn, am_flav);
336 add_ia32_am_offs_int(irn, bias);
341 static int ia32_get_sp_bias(const void *self, const ir_node *irn) {
343 long proj = get_Proj_proj(irn);
344 ir_node *pred = get_Proj_pred(irn);
346 if (is_ia32_Push(pred) && proj == pn_ia32_Push_stack)
348 if (is_ia32_Pop(pred) && proj == pn_ia32_Pop_stack)
356 * Put all registers which are saved by the prologue/epilogue in a set.
358 * @param self The callback object.
359 * @param s The result set.
361 static void ia32_abi_dont_save_regs(void *self, pset *s)
363 ia32_abi_env_t *env = self;
364 if(env->flags.try_omit_fp)
365 pset_insert_ptr(s, env->isa->bp);
369 * Generate the routine prologue.
371 * @param self The callback object.
372 * @param mem A pointer to the mem node. Update this if you define new memory.
373 * @param reg_map A map mapping all callee_save/ignore/parameter registers to their defining nodes.
375 * @return The register which shall be used as a stack frame base.
377 * All nodes which define registers in @p reg_map must keep @p reg_map current.
379 static const arch_register_t *ia32_abi_prologue(void *self, ir_node **mem, pmap *reg_map)
381 ia32_abi_env_t *env = self;
383 if (! env->flags.try_omit_fp) {
384 ir_node *bl = get_irg_start_block(env->irg);
385 ir_node *curr_sp = be_abi_reg_map_get(reg_map, env->isa->sp);
386 ir_node *curr_bp = be_abi_reg_map_get(reg_map, env->isa->bp);
387 ir_node *noreg = be_abi_reg_map_get(reg_map, &ia32_gp_regs[REG_GP_NOREG]);
391 push = new_rd_ia32_Push(NULL, env->irg, bl, noreg, noreg, curr_bp, curr_sp, *mem);
392 curr_sp = new_r_Proj(env->irg, bl, push, get_irn_mode(curr_sp), pn_ia32_Push_stack);
393 *mem = new_r_Proj(env->irg, bl, push, mode_M, pn_ia32_Push_M);
395 /* the push must have SP out register */
396 arch_set_irn_register(env->aenv, curr_sp, env->isa->sp);
397 set_ia32_flags(push, arch_irn_flags_ignore);
399 /* move esp to ebp */
400 curr_bp = be_new_Copy(env->isa->bp->reg_class, env->irg, bl, curr_sp);
401 be_set_constr_single_reg(curr_bp, BE_OUT_POS(0), env->isa->bp);
402 arch_set_irn_register(env->aenv, curr_bp, env->isa->bp);
403 be_node_set_flags(curr_bp, BE_OUT_POS(0), arch_irn_flags_ignore);
405 /* beware: the copy must be done before any other sp use */
406 curr_sp = be_new_CopyKeep_single(env->isa->sp->reg_class, env->irg, bl, curr_sp, curr_bp, get_irn_mode(curr_sp));
407 be_set_constr_single_reg(curr_sp, BE_OUT_POS(0), env->isa->sp);
408 arch_set_irn_register(env->aenv, curr_sp, env->isa->sp);
409 be_node_set_flags(curr_sp, BE_OUT_POS(0), arch_irn_flags_ignore);
411 be_abi_reg_map_set(reg_map, env->isa->sp, curr_sp);
412 be_abi_reg_map_set(reg_map, env->isa->bp, curr_bp);
421 * Generate the routine epilogue.
422 * @param self The callback object.
423 * @param bl The block for the epilog
424 * @param mem A pointer to the mem node. Update this if you define new memory.
425 * @param reg_map A map mapping all callee_save/ignore/parameter registers to their defining nodes.
426 * @return The register which shall be used as a stack frame base.
428 * All nodes which define registers in @p reg_map must keep @p reg_map current.
430 static void ia32_abi_epilogue(void *self, ir_node *bl, ir_node **mem, pmap *reg_map)
432 ia32_abi_env_t *env = self;
433 ir_node *curr_sp = be_abi_reg_map_get(reg_map, env->isa->sp);
434 ir_node *curr_bp = be_abi_reg_map_get(reg_map, env->isa->bp);
436 if (env->flags.try_omit_fp) {
437 /* simply remove the stack frame here */
438 curr_sp = be_new_IncSP(env->isa->sp, env->irg, bl, curr_sp, BE_STACK_FRAME_SIZE_SHRINK);
439 add_irn_dep(curr_sp, *mem);
442 const ia32_isa_t *isa = (ia32_isa_t *)env->isa;
443 ir_mode *mode_bp = env->isa->bp->reg_class->mode;
445 /* gcc always emits a leave at the end of a routine */
446 if (1 || ARCH_AMD(isa->opt_arch)) {
450 leave = new_rd_ia32_Leave(NULL, env->irg, bl, curr_sp, curr_bp);
451 set_ia32_flags(leave, arch_irn_flags_ignore);
452 curr_bp = new_r_Proj(current_ir_graph, bl, leave, mode_bp, pn_ia32_Leave_frame);
453 curr_sp = new_r_Proj(current_ir_graph, bl, leave, get_irn_mode(curr_sp), pn_ia32_Leave_stack);
454 *mem = new_r_Proj(current_ir_graph, bl, leave, mode_M, pn_ia32_Leave_M);
457 ir_node *noreg = be_abi_reg_map_get(reg_map, &ia32_gp_regs[REG_GP_NOREG]);
460 /* copy ebp to esp */
461 curr_sp = be_new_SetSP(env->isa->sp, env->irg, bl, curr_sp, curr_bp, *mem);
464 pop = new_rd_ia32_Pop(NULL, env->irg, bl, noreg, noreg, curr_sp, *mem);
465 set_ia32_flags(pop, arch_irn_flags_ignore);
466 curr_bp = new_r_Proj(current_ir_graph, bl, pop, mode_bp, pn_ia32_Pop_res);
467 curr_sp = new_r_Proj(current_ir_graph, bl, pop, get_irn_mode(curr_sp), pn_ia32_Pop_stack);
468 *mem = new_r_Proj(current_ir_graph, bl, pop, mode_M, pn_ia32_Pop_M);
470 arch_set_irn_register(env->aenv, curr_sp, env->isa->sp);
471 arch_set_irn_register(env->aenv, curr_bp, env->isa->bp);
474 be_abi_reg_map_set(reg_map, env->isa->sp, curr_sp);
475 be_abi_reg_map_set(reg_map, env->isa->bp, curr_bp);
479 * Initialize the callback object.
480 * @param call The call object.
481 * @param aenv The architecture environment.
482 * @param irg The graph with the method.
483 * @return Some pointer. This pointer is passed to all other callback functions as self object.
485 static void *ia32_abi_init(const be_abi_call_t *call, const arch_env_t *aenv, ir_graph *irg)
487 ia32_abi_env_t *env = xmalloc(sizeof(env[0]));
488 be_abi_call_flags_t fl = be_abi_call_get_flags(call);
489 env->flags = fl.bits;
492 env->isa = aenv->isa;
497 * Destroy the callback object.
498 * @param self The callback object.
500 static void ia32_abi_done(void *self) {
505 * Produces the type which sits between the stack args and the locals on the stack.
506 * it will contain the return address and space to store the old base pointer.
507 * @return The Firm type modeling the ABI between type.
509 static ir_type *ia32_abi_get_between_type(void *self)
511 #define IDENT(s) new_id_from_chars(s, sizeof(s)-1)
512 static ir_type *omit_fp_between_type = NULL;
513 static ir_type *between_type = NULL;
515 ia32_abi_env_t *env = self;
517 if (! between_type) {
518 ir_entity *old_bp_ent;
519 ir_entity *ret_addr_ent;
520 ir_entity *omit_fp_ret_addr_ent;
522 ir_type *old_bp_type = new_type_primitive(IDENT("bp"), mode_P);
523 ir_type *ret_addr_type = new_type_primitive(IDENT("return_addr"), mode_P);
525 between_type = new_type_struct(IDENT("ia32_between_type"));
526 old_bp_ent = new_entity(between_type, IDENT("old_bp"), old_bp_type);
527 ret_addr_ent = new_entity(between_type, IDENT("ret_addr"), ret_addr_type);
529 set_entity_offset(old_bp_ent, 0);
530 set_entity_offset(ret_addr_ent, get_type_size_bytes(old_bp_type));
531 set_type_size_bytes(between_type, get_type_size_bytes(old_bp_type) + get_type_size_bytes(ret_addr_type));
532 set_type_state(between_type, layout_fixed);
534 omit_fp_between_type = new_type_struct(IDENT("ia32_between_type_omit_fp"));
535 omit_fp_ret_addr_ent = new_entity(omit_fp_between_type, IDENT("ret_addr"), ret_addr_type);
537 set_entity_offset(omit_fp_ret_addr_ent, 0);
538 set_type_size_bytes(omit_fp_between_type, get_type_size_bytes(ret_addr_type));
539 set_type_state(omit_fp_between_type, layout_fixed);
542 return env->flags.try_omit_fp ? omit_fp_between_type : between_type;
547 * Get the estimated cycle count for @p irn.
549 * @param self The this pointer.
550 * @param irn The node.
552 * @return The estimated cycle count for this operation
554 static int ia32_get_op_estimated_cost(const void *self, const ir_node *irn)
557 ia32_op_type_t op_tp;
558 const ia32_irn_ops_t *ops = self;
562 if (!is_ia32_irn(irn))
565 assert(is_ia32_irn(irn));
567 cost = get_ia32_latency(irn);
568 op_tp = get_ia32_op_type(irn);
570 if (is_ia32_CopyB(irn)) {
572 if (ARCH_INTEL(ops->cg->arch))
575 else if (is_ia32_CopyB_i(irn)) {
576 int size = get_tarval_long(get_ia32_Immop_tarval(irn));
577 cost = 20 + (int)ceil((4/3) * size);
578 if (ARCH_INTEL(ops->cg->arch))
581 /* in case of address mode operations add additional cycles */
582 else if (op_tp == ia32_AddrModeD || op_tp == ia32_AddrModeS) {
584 In case of stack access add 5 cycles (we assume stack is in cache),
585 other memory operations cost 20 cycles.
587 cost += is_ia32_use_frame(irn) ? 5 : 20;
594 * Returns the inverse operation if @p irn, recalculating the argument at position @p i.
596 * @param irn The original operation
597 * @param i Index of the argument we want the inverse operation to yield
598 * @param inverse struct to be filled with the resulting inverse op
599 * @param obstack The obstack to use for allocation of the returned nodes array
600 * @return The inverse operation or NULL if operation invertible
602 static arch_inverse_t *ia32_get_inverse(const void *self, const ir_node *irn, int i, arch_inverse_t *inverse, struct obstack *obst) {
606 ir_node *block, *noreg, *nomem;
609 /* we cannot invert non-ia32 irns */
610 if (! is_ia32_irn(irn))
613 /* operand must always be a real operand (not base, index or mem) */
614 if (i != 2 && i != 3)
617 /* we don't invert address mode operations */
618 if (get_ia32_op_type(irn) != ia32_Normal)
621 irg = get_irn_irg(irn);
622 block = get_nodes_block(irn);
623 mode = get_ia32_res_mode(irn);
624 irn_mode = get_irn_mode(irn);
625 noreg = get_irn_n(irn, 0);
626 nomem = new_r_NoMem(irg);
627 dbg = get_irn_dbg_info(irn);
629 /* initialize structure */
630 inverse->nodes = obstack_alloc(obst, 2 * sizeof(inverse->nodes[0]));
634 switch (get_ia32_irn_opcode(irn)) {
636 if (get_ia32_immop_type(irn) == ia32_ImmConst) {
637 /* we have an add with a const here */
638 /* invers == add with negated const */
639 inverse->nodes[0] = new_rd_ia32_Add(dbg, irg, block, noreg, noreg, get_irn_n(irn, i), noreg, nomem, irn_mode);
641 copy_ia32_Immop_attr(inverse->nodes[0], (ir_node *)irn);
642 set_ia32_Immop_tarval(inverse->nodes[0], tarval_neg(get_ia32_Immop_tarval(irn)));
643 set_ia32_commutative(inverse->nodes[0]);
645 else if (get_ia32_immop_type(irn) == ia32_ImmSymConst) {
646 /* we have an add with a symconst here */
647 /* invers == sub with const */
648 inverse->nodes[0] = new_rd_ia32_Sub(dbg, irg, block, noreg, noreg, get_irn_n(irn, i), noreg, nomem, irn_mode);
650 copy_ia32_Immop_attr(inverse->nodes[0], (ir_node *)irn);
653 /* normal add: inverse == sub */
654 ir_node *proj = ia32_get_res_proj(irn);
657 inverse->nodes[0] = new_rd_ia32_Sub(dbg, irg, block, noreg, noreg, proj, get_irn_n(irn, i ^ 1), nomem, irn_mode);
662 if (get_ia32_immop_type(irn) != ia32_ImmNone) {
663 /* we have a sub with a const/symconst here */
664 /* invers == add with this const */
665 inverse->nodes[0] = new_rd_ia32_Add(dbg, irg, block, noreg, noreg, get_irn_n(irn, i), noreg, nomem, irn_mode);
666 inverse->costs += (get_ia32_immop_type(irn) == ia32_ImmSymConst) ? 5 : 1;
667 copy_ia32_Immop_attr(inverse->nodes[0], (ir_node *)irn);
671 ir_node *proj = ia32_get_res_proj(irn);
675 inverse->nodes[0] = new_rd_ia32_Add(dbg, irg, block, noreg, noreg, proj, get_irn_n(irn, 3), nomem, irn_mode);
678 inverse->nodes[0] = new_rd_ia32_Sub(dbg, irg, block, noreg, noreg, get_irn_n(irn, 2), proj, nomem, irn_mode);
684 if (get_ia32_immop_type(irn) != ia32_ImmNone) {
685 /* xor with const: inverse = xor */
686 inverse->nodes[0] = new_rd_ia32_Eor(dbg, irg, block, noreg, noreg, get_irn_n(irn, i), noreg, nomem, irn_mode);
687 inverse->costs += (get_ia32_immop_type(irn) == ia32_ImmSymConst) ? 5 : 1;
688 copy_ia32_Immop_attr(inverse->nodes[0], (ir_node *)irn);
692 inverse->nodes[0] = new_rd_ia32_Eor(dbg, irg, block, noreg, noreg, (ir_node *)irn, get_irn_n(irn, i), nomem, irn_mode);
697 ir_node *proj = ia32_get_res_proj(irn);
700 inverse->nodes[0] = new_rd_ia32_Not(dbg, irg, block, noreg, noreg, proj, nomem, irn_mode);
704 case iro_ia32_Minus: {
705 ir_node *proj = ia32_get_res_proj(irn);
708 inverse->nodes[0] = new_rd_ia32_Minus(dbg, irg, block, noreg, noreg, proj, nomem, irn_mode);
713 /* inverse operation not supported */
717 set_ia32_res_mode(inverse->nodes[0], mode);
723 * Get the mode that should be used for spilling value node
725 static ir_mode *get_spill_mode(ia32_code_gen_t *cg, const ir_node *node)
727 ir_mode *mode = get_irn_mode(node);
728 if (mode_is_float(mode)) {
730 // super exact spilling...
747 * Checks wether an addressmode reload for a node with mode mode is compatible
748 * with a spillslot of mode spill_mode
750 static int ia32_is_spillmode_compatible(const ir_mode *mode, const ir_mode *spillmode)
752 if(mode_is_float(mode)) {
753 return mode == spillmode;
760 * Check if irn can load it's operand at position i from memory (source addressmode).
761 * @param self Pointer to irn ops itself
762 * @param irn The irn to be checked
763 * @param i The operands position
764 * @return Non-Zero if operand can be loaded
766 static int ia32_possible_memory_operand(const void *self, const ir_node *irn, unsigned int i) {
767 const ia32_irn_ops_t *ops = self;
768 ia32_code_gen_t *cg = ops->cg;
769 ir_node *op = get_irn_n(irn, i);
770 const ir_mode *mode = get_irn_mode(op);
771 const ir_mode *spillmode = get_spill_mode(cg, op);
773 if (! is_ia32_irn(irn) || /* must be an ia32 irn */
774 get_irn_arity(irn) != 5 || /* must be a binary operation */
775 get_ia32_op_type(irn) != ia32_Normal || /* must not already be a addressmode irn */
776 ! (get_ia32_am_support(irn) & ia32_am_Source) || /* must be capable of source addressmode */
777 ! ia32_is_spillmode_compatible(mode, spillmode) ||
778 (i != 2 && i != 3) || /* a "real" operand position must be requested */
779 (i == 2 && ! is_ia32_commutative(irn)) || /* if first operand requested irn must be commutative */
780 is_ia32_use_frame(irn)) /* must not already use frame */
786 static void ia32_perform_memory_operand(const void *self, ir_node *irn, ir_node *spill, unsigned int i) {
787 const ia32_irn_ops_t *ops = self;
788 ia32_code_gen_t *cg = ops->cg;
790 assert(ia32_possible_memory_operand(self, irn, i) && "Cannot perform memory operand change");
793 ir_node *tmp = get_irn_n(irn, 3);
794 set_irn_n(irn, 3, get_irn_n(irn, 2));
795 set_irn_n(irn, 2, tmp);
798 set_ia32_am_support(irn, ia32_am_Source);
799 set_ia32_op_type(irn, ia32_AddrModeS);
800 set_ia32_am_flavour(irn, ia32_B);
801 set_ia32_ls_mode(irn, get_irn_mode(get_irn_n(irn, i)));
802 set_ia32_use_frame(irn);
803 set_ia32_got_reload(irn);
805 set_irn_n(irn, 0, get_irg_frame(get_irn_irg(irn)));
806 set_irn_n(irn, 3, ia32_get_admissible_noreg(cg, irn, 3));
807 set_irn_n(irn, 4, spill);
809 //FIXME DBG_OPT_AM_S(reload, irn);
812 static const be_abi_callbacks_t ia32_abi_callbacks = {
815 ia32_abi_get_between_type,
816 ia32_abi_dont_save_regs,
821 /* fill register allocator interface */
823 static const arch_irn_ops_if_t ia32_irn_ops_if = {
824 ia32_get_irn_reg_req,
829 ia32_get_frame_entity,
830 ia32_set_frame_entity,
831 ia32_set_frame_offset,
834 ia32_get_op_estimated_cost,
835 ia32_possible_memory_operand,
836 ia32_perform_memory_operand,
839 ia32_irn_ops_t ia32_irn_ops = {
846 /**************************************************
849 * ___ ___ __| | ___ __ _ ___ _ __ _| |_
850 * / __/ _ \ / _` |/ _ \/ _` |/ _ \ '_ \ | | _|
851 * | (_| (_) | (_| | __/ (_| | __/ | | | | | |
852 * \___\___/ \__,_|\___|\__, |\___|_| |_| |_|_|
855 **************************************************/
857 static void ia32_kill_convs(ia32_code_gen_t *cg) {
860 foreach_nodeset(cg->kill_conv, irn) {
861 ir_node *in = get_irn_n(irn, 2);
862 edges_reroute(irn, in, cg->birg->irg);
867 * Transform the Thread Local Store base.
869 static void transform_tls(ir_graph *irg) {
870 ir_node *irn = get_irg_tls(irg);
873 dbg_info *dbg = get_irn_dbg_info(irn);
874 ir_node *blk = get_nodes_block(irn);
876 newn = new_rd_ia32_LdTls(dbg, irg, blk, get_irn_mode(irn));
883 * Transforms the standard firm graph into
886 static void ia32_prepare_graph(void *self) {
887 ia32_code_gen_t *cg = self;
888 DEBUG_ONLY(firm_dbg_module_t *old_mod = cg->mod;)
890 FIRM_DBG_REGISTER(cg->mod, "firm.be.ia32.transform");
892 /* 1st: transform constants and psi condition trees */
893 ia32_pre_transform_phase(cg);
895 /* 2nd: transform all remaining nodes */
896 ia32_register_transformers();
898 cg->kill_conv = new_nodeset(5);
899 transform_tls(cg->irg);
900 edges_deactivate(cg->irg);
901 edges_activate(cg->irg);
902 irg_walk_blkwise_graph(cg->irg, NULL, ia32_transform_node, cg);
904 del_nodeset(cg->kill_conv);
907 be_dump(cg->irg, "-transformed", dump_ir_block_graph_sched);
909 /* 3rd: optimize address mode */
910 FIRM_DBG_REGISTER(cg->mod, "firm.be.ia32.am");
911 ia32_optimize_addressmode(cg);
914 be_dump(cg->irg, "-am", dump_ir_block_graph_sched);
916 DEBUG_ONLY(cg->mod = old_mod;)
920 * Dummy functions for hooks we don't need but which must be filled.
922 static void ia32_before_sched(void *self) {
925 static void remove_unused_nodes(ir_node *irn, bitset_t *already_visited) {
928 ir_node *mem_proj = NULL;
933 mode = get_irn_mode(irn);
935 /* check if we already saw this node or the node has more than one user */
936 if (bitset_contains_irn(already_visited, irn) || get_irn_n_edges(irn) > 1) {
940 /* mark irn visited */
941 bitset_add_irn(already_visited, irn);
943 /* non-Tuple nodes with one user: ok, return */
944 if (get_irn_n_edges(irn) >= 1 && mode != mode_T) {
948 /* tuple node has one user which is not the mem proj-> ok */
949 if (mode == mode_T && get_irn_n_edges(irn) == 1) {
950 mem_proj = ia32_get_proj_for_mode(irn, mode_M);
951 if (mem_proj == NULL) {
956 arity = get_irn_arity(irn);
957 for (i = 0; i < arity; ++i) {
958 ir_node *pred = get_irn_n(irn, i);
960 /* do not follow memory edges or we will accidentally remove stores */
961 if (get_irn_mode(pred) == mode_M) {
962 if(mem_proj != NULL) {
963 edges_reroute(mem_proj, pred, get_irn_irg(mem_proj));
969 set_irn_n(irn, i, new_Bad());
972 The current node is about to be removed: if the predecessor
973 has only this node as user, it need to be removed as well.
975 if (get_irn_n_edges(pred) <= 1)
976 remove_unused_nodes(pred, already_visited);
979 // we need to set the presd to Bad again to also get the memory edges
980 arity = get_irn_arity(irn);
981 for (i = 0; i < arity; ++i) {
982 set_irn_n(irn, i, new_Bad());
985 if (sched_is_scheduled(irn)) {
990 static void remove_unused_loads_walker(ir_node *irn, void *env) {
991 bitset_t *already_visited = env;
992 if (is_ia32_Ld(irn) && ! bitset_contains_irn(already_visited, irn))
993 remove_unused_nodes(irn, env);
997 * Called before the register allocator.
998 * Calculate a block schedule here. We need it for the x87
999 * simulator and the emitter.
1001 static void ia32_before_ra(void *self) {
1002 ia32_code_gen_t *cg = self;
1003 bitset_t *already_visited = bitset_irg_alloca(cg->irg);
1006 Handle special case:
1007 There are sometimes unused loads, only pinned by memory.
1008 We need to remove those Loads and all other nodes which won't be used
1009 after removing the Load from schedule.
1011 irg_walk_graph(cg->irg, NULL, remove_unused_loads_walker, already_visited);
1016 * Transforms a be_Reload into a ia32 Load.
1018 static void transform_to_Load(ia32_transform_env_t *env) {
1019 ir_node *irn = env->irn;
1020 ir_entity *ent = be_get_frame_entity(irn);
1021 ir_mode *mode = get_irn_mode(irn);
1022 ir_mode *spillmode = get_spill_mode(env->cg, irn);
1023 ir_node *noreg = ia32_new_NoReg_gp(env->cg);
1024 ir_node *sched_point = NULL;
1025 ir_node *ptr = get_irg_frame(env->irg);
1026 ir_node *mem = get_irn_n(irn, be_pos_Reload_mem);
1027 ir_node *new_op, *proj;
1028 const arch_register_t *reg;
1030 if (sched_is_scheduled(irn)) {
1031 sched_point = sched_prev(irn);
1034 if (mode_is_float(spillmode)) {
1035 if (USE_SSE2(env->cg))
1036 new_op = new_rd_ia32_xLoad(env->dbg, env->irg, env->block, ptr, noreg, mem);
1038 new_op = new_rd_ia32_vfld(env->dbg, env->irg, env->block, ptr, noreg, mem);
1041 new_op = new_rd_ia32_Load(env->dbg, env->irg, env->block, ptr, noreg, mem);
1043 set_ia32_am_support(new_op, ia32_am_Source);
1044 set_ia32_op_type(new_op, ia32_AddrModeS);
1045 set_ia32_am_flavour(new_op, ia32_B);
1046 set_ia32_ls_mode(new_op, spillmode);
1047 set_ia32_frame_ent(new_op, ent);
1048 set_ia32_use_frame(new_op);
1050 DBG_OPT_RELOAD2LD(irn, new_op);
1052 proj = new_rd_Proj(env->dbg, env->irg, env->block, new_op, mode, pn_ia32_Load_res);
1055 sched_add_after(sched_point, new_op);
1056 sched_add_after(new_op, proj);
1061 /* copy the register from the old node to the new Load */
1062 reg = arch_get_irn_register(env->cg->arch_env, irn);
1063 arch_set_irn_register(env->cg->arch_env, new_op, reg);
1065 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env->cg, irn));
1067 exchange(irn, proj);
1071 * Transforms a be_Spill node into a ia32 Store.
1073 static void transform_to_Store(ia32_transform_env_t *env) {
1074 ir_node *irn = env->irn;
1075 ir_entity *ent = be_get_frame_entity(irn);
1076 const ir_node *spillval = get_irn_n(irn, be_pos_Spill_val);
1077 ir_mode *mode = get_spill_mode(env->cg, spillval);
1078 ir_node *noreg = ia32_new_NoReg_gp(env->cg);
1079 ir_node *nomem = new_rd_NoMem(env->irg);
1080 ir_node *ptr = get_irg_frame(env->irg);
1081 ir_node *val = get_irn_n(irn, be_pos_Spill_val);
1083 ir_node *sched_point = NULL;
1085 if (sched_is_scheduled(irn)) {
1086 sched_point = sched_prev(irn);
1089 if (mode_is_float(mode)) {
1090 if (USE_SSE2(env->cg))
1091 store = new_rd_ia32_xStore(env->dbg, env->irg, env->block, ptr, noreg, val, nomem);
1093 store = new_rd_ia32_vfst(env->dbg, env->irg, env->block, ptr, noreg, val, nomem);
1095 else if (get_mode_size_bits(mode) == 8) {
1096 store = new_rd_ia32_Store8Bit(env->dbg, env->irg, env->block, ptr, noreg, val, nomem);
1099 store = new_rd_ia32_Store(env->dbg, env->irg, env->block, ptr, noreg, val, nomem);
1102 set_ia32_am_support(store, ia32_am_Dest);
1103 set_ia32_op_type(store, ia32_AddrModeD);
1104 set_ia32_am_flavour(store, ia32_B);
1105 set_ia32_ls_mode(store, mode);
1106 set_ia32_frame_ent(store, ent);
1107 set_ia32_use_frame(store);
1109 DBG_OPT_SPILL2ST(irn, store);
1110 SET_IA32_ORIG_NODE(store, ia32_get_old_node_name(env->cg, irn));
1113 sched_add_after(sched_point, store);
1117 exchange(irn, store);
1120 static ir_node *create_push(ia32_transform_env_t *env, ir_node *schedpoint, ir_node *sp, ir_node *mem, ir_entity *ent) {
1121 ir_node *noreg = ia32_new_NoReg_gp(env->cg);
1122 ir_node *frame = get_irg_frame(env->irg);
1124 ir_node *push = new_rd_ia32_Push(env->dbg, env->irg, env->block, frame, noreg, noreg, sp, mem);
1126 set_ia32_frame_ent(push, ent);
1127 set_ia32_use_frame(push);
1128 set_ia32_op_type(push, ia32_AddrModeS);
1129 set_ia32_am_flavour(push, ia32_B);
1130 set_ia32_ls_mode(push, mode_Is);
1132 sched_add_before(schedpoint, push);
1136 static ir_node *create_pop(ia32_transform_env_t *env, ir_node *schedpoint, ir_node *sp, ir_entity *ent) {
1137 ir_node *noreg = ia32_new_NoReg_gp(env->cg);
1138 ir_node *frame = get_irg_frame(env->irg);
1140 ir_node *pop = new_rd_ia32_Pop(env->dbg, env->irg, env->block, frame, noreg, sp, new_NoMem());
1142 set_ia32_frame_ent(pop, ent);
1143 set_ia32_use_frame(pop);
1144 set_ia32_op_type(pop, ia32_AddrModeD);
1145 set_ia32_am_flavour(pop, ia32_am_OB);
1146 set_ia32_ls_mode(pop, mode_Is);
1148 sched_add_before(schedpoint, pop);
1153 static ir_node* create_spproj(ia32_transform_env_t *env, ir_node *pred, int pos, ir_node *schedpoint) {
1154 ir_mode *spmode = mode_Iu;
1155 const arch_register_t *spreg = &ia32_gp_regs[REG_ESP];
1158 sp = new_rd_Proj(env->dbg, env->irg, env->block, pred, spmode, pos);
1159 arch_set_irn_register(env->cg->arch_env, sp, spreg);
1160 sched_add_before(schedpoint, sp);
1166 * Transform memperm, currently we do this the ugly way and produce
1167 * push/pop into/from memory cascades. This is possible without using
1170 static void transform_MemPerm(ia32_transform_env_t *env) {
1171 ir_node *node = env->irn;
1173 ir_node *sp = be_abi_get_ignore_irn(env->cg->birg->abi, &ia32_gp_regs[REG_ESP]);
1174 const ir_edge_t *edge;
1175 const ir_edge_t *next;
1178 arity = be_get_MemPerm_entity_arity(node);
1179 pops = alloca(arity * sizeof(pops[0]));
1182 for(i = 0; i < arity; ++i) {
1183 ir_entity *ent = be_get_MemPerm_in_entity(node, i);
1184 ir_type *enttype = get_entity_type(ent);
1185 int entbits = get_type_size_bits(enttype);
1186 ir_node *mem = get_irn_n(node, i + 1);
1189 assert( (entbits == 32 || entbits == 64) && "spillslot on x86 should be 32 or 64 bit");
1191 push = create_push(env, node, sp, mem, ent);
1192 sp = create_spproj(env, push, 0, node);
1194 // add another push after the first one
1195 push = create_push(env, node, sp, mem, ent);
1196 add_ia32_am_offs_int(push, 4);
1197 sp = create_spproj(env, push, 0, node);
1200 set_irn_n(node, i, new_Bad());
1204 for(i = arity - 1; i >= 0; --i) {
1205 ir_entity *ent = be_get_MemPerm_out_entity(node, i);
1206 ir_type *enttype = get_entity_type(ent);
1207 int entbits = get_type_size_bits(enttype);
1211 assert( (entbits == 32 || entbits == 64) && "spillslot on x86 should be 32 or 64 bit");
1213 pop = create_pop(env, node, sp, ent);
1215 // add another pop after the first one
1216 sp = create_spproj(env, pop, 1, node);
1217 pop = create_pop(env, node, sp, ent);
1218 add_ia32_am_offs_int(pop, 4);
1220 sp = create_spproj(env, pop, 1, node);
1225 // exchange memprojs
1226 foreach_out_edge_safe(node, edge, next) {
1227 ir_node *proj = get_edge_src_irn(edge);
1228 int p = get_Proj_proj(proj);
1232 set_Proj_pred(proj, pops[p]);
1233 set_Proj_proj(proj, 3);
1237 arity = get_irn_arity(node);
1238 for(i = 0; i < arity; ++i) {
1239 set_irn_n(node, i, new_Bad());
1245 * Block-Walker: Calls the transform functions Spill and Reload.
1247 static void ia32_after_ra_walker(ir_node *block, void *env) {
1248 ir_node *node, *prev;
1249 ia32_code_gen_t *cg = env;
1250 ia32_transform_env_t tenv;
1253 tenv.irg = current_ir_graph;
1255 DEBUG_ONLY(tenv.mod = cg->mod;)
1257 /* beware: the schedule is changed here */
1258 for (node = sched_last(block); !sched_is_begin(node); node = prev) {
1259 prev = sched_prev(node);
1260 tenv.dbg = get_irn_dbg_info(node);
1262 tenv.mode = get_irn_mode(node);
1264 if (be_is_Reload(node)) {
1265 transform_to_Load(&tenv);
1266 } else if (be_is_Spill(node)) {
1267 transform_to_Store(&tenv);
1268 } else if(be_is_MemPerm(node)) {
1269 transform_MemPerm(&tenv);
1275 * We transform Spill and Reload here. This needs to be done before
1276 * stack biasing otherwise we would miss the corrected offset for these nodes.
1278 static void ia32_after_ra(void *self) {
1279 ia32_code_gen_t *cg = self;
1280 ir_graph *irg = cg->irg;
1282 irg_block_walk_graph(irg, NULL, ia32_after_ra_walker, cg);
1284 ia32_finish_irg(irg, cg);
1288 * Last touchups for the graph before emit: x87 simulation to replace the
1289 * virtual with real x87 instructions, creating a block schedule and peephole
1292 static void ia32_finish(void *self) {
1293 ia32_code_gen_t *cg = self;
1294 ir_graph *irg = cg->irg;
1296 /* if we do x87 code generation, rewrite all the virtual instructions and registers */
1297 if (cg->used_fp == fp_x87 || cg->force_sim) {
1298 x87_simulate_graph(cg->arch_env, cg->birg);
1301 /* create block schedule, this also removes empty blocks which might
1302 * produce critical edges */
1303 cg->blk_sched = be_create_block_schedule(irg, cg->birg->exec_freq);
1305 /* do peephole optimisations */
1306 ia32_peephole_optimization(irg, cg);
1310 * Emits the code, closes the output file and frees
1311 * the code generator interface.
1313 static void ia32_codegen(void *self) {
1314 ia32_code_gen_t *cg = self;
1315 ir_graph *irg = cg->irg;
1317 ia32_gen_routine(cg->isa->out, irg, cg);
1321 /* remove it from the isa */
1324 /* de-allocate code generator */
1325 del_set(cg->reg_set);
1329 static void *ia32_cg_init(be_irg_t *birg);
1331 static const arch_code_generator_if_t ia32_code_gen_if = {
1333 NULL, /* before abi introduce hook */
1336 ia32_before_sched, /* before scheduling hook */
1337 ia32_before_ra, /* before register allocation hook */
1338 ia32_after_ra, /* after register allocation hook */
1339 ia32_finish, /* called before codegen */
1340 ia32_codegen /* emit && done */
1344 * Initializes a IA32 code generator.
1346 static void *ia32_cg_init(be_irg_t *birg) {
1347 ia32_isa_t *isa = (ia32_isa_t *)birg->main_env->arch_env->isa;
1348 ia32_code_gen_t *cg = xcalloc(1, sizeof(*cg));
1350 cg->impl = &ia32_code_gen_if;
1351 cg->irg = birg->irg;
1352 cg->reg_set = new_set(ia32_cmp_irn_reg_assoc, 1024);
1353 cg->arch_env = birg->main_env->arch_env;
1356 cg->blk_sched = NULL;
1357 cg->fp_to_gp = NULL;
1358 cg->gp_to_fp = NULL;
1359 cg->fp_kind = isa->fp_kind;
1360 cg->used_fp = fp_none;
1361 cg->dump = (birg->main_env->options->dump_flags & DUMP_BE) ? 1 : 0;
1363 FIRM_DBG_REGISTER(cg->mod, "firm.be.ia32.cg");
1365 /* copy optimizations from isa for easier access */
1367 cg->arch = isa->arch;
1368 cg->opt_arch = isa->opt_arch;
1374 if (isa->name_obst) {
1375 obstack_free(isa->name_obst, NULL);
1376 obstack_init(isa->name_obst);
1380 cur_reg_set = cg->reg_set;
1382 ia32_irn_ops.cg = cg;
1384 return (arch_code_generator_t *)cg;
1389 /*****************************************************************
1390 * ____ _ _ _____ _____
1391 * | _ \ | | | | |_ _|/ ____| /\
1392 * | |_) | __ _ ___| | _____ _ __ __| | | | | (___ / \
1393 * | _ < / _` |/ __| |/ / _ \ '_ \ / _` | | | \___ \ / /\ \
1394 * | |_) | (_| | (__| < __/ | | | (_| | _| |_ ____) / ____ \
1395 * |____/ \__,_|\___|_|\_\___|_| |_|\__,_| |_____|_____/_/ \_\
1397 *****************************************************************/
1400 * Set output modes for GCC
1402 static const tarval_mode_info mo_integer = {
1409 * set the tarval output mode of all integer modes to decimal
1411 static void set_tarval_output_modes(void)
1415 for (i = get_irp_n_modes() - 1; i >= 0; --i) {
1416 ir_mode *mode = get_irp_mode(i);
1418 if (mode_is_int(mode))
1419 set_tarval_mode_output_option(mode, &mo_integer);
1425 * The template that generates a new ISA object.
1426 * Note that this template can be changed by command line
1429 static ia32_isa_t ia32_isa_template = {
1431 &ia32_isa_if, /* isa interface implementation */
1432 &ia32_gp_regs[REG_ESP], /* stack pointer register */
1433 &ia32_gp_regs[REG_EBP], /* base pointer register */
1434 -1, /* stack direction */
1435 NULL, /* main environment */
1437 NULL, /* 16bit register names */
1438 NULL, /* 8bit register names */
1442 IA32_OPT_INCDEC | /* optimize add 1, sub 1 into inc/dec default: on */
1443 IA32_OPT_DOAM | /* optimize address mode default: on */
1444 IA32_OPT_LEA | /* optimize for LEAs default: on */
1445 IA32_OPT_PLACECNST | /* place constants immediately before instructions, default: on */
1446 IA32_OPT_IMMOPS | /* operations can use immediates, default: on */
1447 IA32_OPT_EXTBB | /* use extended basic block scheduling, default: on */
1448 IA32_OPT_PUSHARGS), /* create pushs for function argument passing, default: on */
1449 arch_pentium_4, /* instruction architecture */
1450 arch_pentium_4, /* optimize for architecture */
1451 fp_sse2, /* use sse2 unit */
1452 NULL, /* current code generator */
1453 NULL, /* output file */
1455 NULL, /* name obstack */
1456 0 /* name obst size */
1461 * Initializes the backend ISA.
1463 static void *ia32_init(FILE *file_handle) {
1464 static int inited = 0;
1470 set_tarval_output_modes();
1472 isa = xmalloc(sizeof(*isa));
1473 memcpy(isa, &ia32_isa_template, sizeof(*isa));
1475 ia32_register_init(isa);
1476 ia32_create_opcodes();
1477 ia32_register_copy_attr_func();
1479 if ((ARCH_INTEL(isa->arch) && isa->arch < arch_pentium_4) ||
1480 (ARCH_AMD(isa->arch) && isa->arch < arch_athlon))
1481 /* no SSE2 for these cpu's */
1482 isa->fp_kind = fp_x87;
1484 if (ARCH_INTEL(isa->opt_arch) && isa->opt_arch >= arch_pentium_4) {
1485 /* Pentium 4 don't like inc and dec instructions */
1486 isa->opt &= ~IA32_OPT_INCDEC;
1489 isa->regs_16bit = pmap_create();
1490 isa->regs_8bit = pmap_create();
1491 isa->types = pmap_create();
1492 isa->tv_ent = pmap_create();
1493 isa->out = file_handle;
1494 isa->cpu = ia32_init_machine_description();
1496 ia32_build_16bit_reg_map(isa->regs_16bit);
1497 ia32_build_8bit_reg_map(isa->regs_8bit);
1499 /* patch register names of x87 registers */
1500 ia32_st_regs[0].name = "st";
1501 ia32_st_regs[1].name = "st(1)";
1502 ia32_st_regs[2].name = "st(2)";
1503 ia32_st_regs[3].name = "st(3)";
1504 ia32_st_regs[4].name = "st(4)";
1505 ia32_st_regs[5].name = "st(5)";
1506 ia32_st_regs[6].name = "st(6)";
1507 ia32_st_regs[7].name = "st(7)";
1510 isa->name_obst = xmalloc(sizeof(*isa->name_obst));
1511 obstack_init(isa->name_obst);
1514 ia32_handle_intrinsics();
1515 ia32_switch_section(isa->out, NO_SECTION);
1516 fprintf(isa->out, "\t.intel_syntax\n");
1518 /* needed for the debug support */
1519 ia32_switch_section(isa->out, SECTION_TEXT);
1520 fprintf(isa->out, ".Ltext0:\n");
1530 * Closes the output file and frees the ISA structure.
1532 static void ia32_done(void *self) {
1533 ia32_isa_t *isa = self;
1535 /* emit now all global declarations */
1536 ia32_gen_decls(isa->out, isa->arch_isa.main_env);
1538 pmap_destroy(isa->regs_16bit);
1539 pmap_destroy(isa->regs_8bit);
1540 pmap_destroy(isa->tv_ent);
1541 pmap_destroy(isa->types);
1544 obstack_free(isa->name_obst, NULL);
1552 * Return the number of register classes for this architecture.
1553 * We report always these:
1554 * - the general purpose registers
1555 * - the SSE floating point register set
1556 * - the virtual floating point registers
1558 static int ia32_get_n_reg_class(const void *self) {
1563 * Return the register class for index i.
1565 static const arch_register_class_t *ia32_get_reg_class(const void *self, int i) {
1566 assert(i >= 0 && i < 3 && "Invalid ia32 register class requested.");
1568 return &ia32_reg_classes[CLASS_ia32_gp];
1570 return &ia32_reg_classes[CLASS_ia32_xmm];
1572 return &ia32_reg_classes[CLASS_ia32_vfp];
1576 * Get the register class which shall be used to store a value of a given mode.
1577 * @param self The this pointer.
1578 * @param mode The mode in question.
1579 * @return A register class which can hold values of the given mode.
1581 const arch_register_class_t *ia32_get_reg_class_for_mode(const void *self, const ir_mode *mode) {
1582 const ia32_isa_t *isa = self;
1583 if (mode_is_float(mode)) {
1584 return USE_SSE2(isa) ? &ia32_reg_classes[CLASS_ia32_xmm] : &ia32_reg_classes[CLASS_ia32_vfp];
1587 return &ia32_reg_classes[CLASS_ia32_gp];
1591 * Get the ABI restrictions for procedure calls.
1592 * @param self The this pointer.
1593 * @param method_type The type of the method (procedure) in question.
1594 * @param abi The abi object to be modified
1596 static void ia32_get_call_abi(const void *self, ir_type *method_type, be_abi_call_t *abi) {
1597 const ia32_isa_t *isa = self;
1600 unsigned cc = get_method_calling_convention(method_type);
1601 int n = get_method_n_params(method_type);
1604 int i, ignore_1, ignore_2;
1606 const arch_register_t *reg;
1607 be_abi_call_flags_t call_flags = be_abi_call_get_flags(abi);
1609 unsigned use_push = !IS_P6_ARCH(isa->opt_arch);
1611 /* set abi flags for calls */
1612 call_flags.bits.left_to_right = 0; /* always last arg first on stack */
1613 call_flags.bits.store_args_sequential = use_push;
1614 /* call_flags.bits.try_omit_fp not changed: can handle both settings */
1615 call_flags.bits.fp_free = 0; /* the frame pointer is fixed in IA32 */
1616 call_flags.bits.call_has_imm = 1; /* IA32 calls can have immediate address */
1618 /* set stack parameter passing style */
1619 be_abi_call_set_flags(abi, call_flags, &ia32_abi_callbacks);
1621 /* collect the mode for each type */
1622 modes = alloca(n * sizeof(modes[0]));
1624 for (i = 0; i < n; i++) {
1625 tp = get_method_param_type(method_type, i);
1626 modes[i] = get_type_mode(tp);
1629 /* set register parameters */
1630 if (cc & cc_reg_param) {
1631 /* determine the number of parameters passed via registers */
1632 biggest_n = ia32_get_n_regparam_class(n, modes, &ignore_1, &ignore_2);
1634 /* loop over all parameters and set the register requirements */
1635 for (i = 0; i <= biggest_n; i++) {
1636 reg = ia32_get_RegParam_reg(n, modes, i, cc);
1637 assert(reg && "kaputt");
1638 be_abi_call_param_reg(abi, i, reg);
1645 /* set stack parameters */
1646 for (i = stack_idx; i < n; i++) {
1647 /* parameters on the stack are 32 bit aligned */
1648 be_abi_call_param_stack(abi, i, 4, 0, 0);
1652 /* set return registers */
1653 n = get_method_n_ress(method_type);
1655 assert(n <= 2 && "more than two results not supported");
1657 /* In case of 64bit returns, we will have two 32bit values */
1659 tp = get_method_res_type(method_type, 0);
1660 mode = get_type_mode(tp);
1662 assert(!mode_is_float(mode) && "two FP results not supported");
1664 tp = get_method_res_type(method_type, 1);
1665 mode = get_type_mode(tp);
1667 assert(!mode_is_float(mode) && "mixed INT, FP results not supported");
1669 be_abi_call_res_reg(abi, 0, &ia32_gp_regs[REG_EAX]);
1670 be_abi_call_res_reg(abi, 1, &ia32_gp_regs[REG_EDX]);
1673 const arch_register_t *reg;
1675 tp = get_method_res_type(method_type, 0);
1676 assert(is_atomic_type(tp));
1677 mode = get_type_mode(tp);
1679 reg = mode_is_float(mode) ? &ia32_vfp_regs[REG_VF0] : &ia32_gp_regs[REG_EAX];
1681 be_abi_call_res_reg(abi, 0, reg);
1686 static const void *ia32_get_irn_ops(const arch_irn_handler_t *self, const ir_node *irn) {
1687 return &ia32_irn_ops;
1690 const arch_irn_handler_t ia32_irn_handler = {
1694 const arch_irn_handler_t *ia32_get_irn_handler(const void *self) {
1695 return &ia32_irn_handler;
1698 int ia32_to_appear_in_schedule(void *block_env, const ir_node *irn) {
1699 return is_ia32_irn(irn) ? 1 : -1;
1703 * Initializes the code generator interface.
1705 static const arch_code_generator_if_t *ia32_get_code_generator_if(void *self) {
1706 return &ia32_code_gen_if;
1710 * Returns the estimated execution time of an ia32 irn.
1712 static sched_timestep_t ia32_sched_exectime(void *env, const ir_node *irn) {
1713 const arch_env_t *arch_env = env;
1714 return is_ia32_irn(irn) ? ia32_get_op_estimated_cost(arch_get_irn_ops(arch_env, irn), irn) : 1;
1717 list_sched_selector_t ia32_sched_selector;
1720 * Returns the reg_pressure scheduler with to_appear_in_schedule() overloaded
1722 static const list_sched_selector_t *ia32_get_list_sched_selector(const void *self, list_sched_selector_t *selector) {
1723 memcpy(&ia32_sched_selector, selector, sizeof(ia32_sched_selector));
1724 ia32_sched_selector.exectime = ia32_sched_exectime;
1725 ia32_sched_selector.to_appear_in_schedule = ia32_to_appear_in_schedule;
1726 return &ia32_sched_selector;
1729 static const ilp_sched_selector_t *ia32_get_ilp_sched_selector(const void *self) {
1734 * Returns the necessary byte alignment for storing a register of given class.
1736 static int ia32_get_reg_class_alignment(const void *self, const arch_register_class_t *cls) {
1737 ir_mode *mode = arch_register_class_mode(cls);
1738 int bytes = get_mode_size_bytes(mode);
1740 if (mode_is_float(mode) && bytes > 8)
1745 static const be_execution_unit_t ***ia32_get_allowed_execution_units(const void *self, const ir_node *irn) {
1746 static const be_execution_unit_t *_allowed_units_BRANCH[] = {
1747 &ia32_execution_units_BRANCH[IA32_EXECUNIT_TP_BRANCH_BRANCH1],
1748 &ia32_execution_units_BRANCH[IA32_EXECUNIT_TP_BRANCH_BRANCH2],
1751 static const be_execution_unit_t *_allowed_units_ALU[] = {
1752 &ia32_execution_units_ALU[IA32_EXECUNIT_TP_ALU_ALU1],
1753 &ia32_execution_units_ALU[IA32_EXECUNIT_TP_ALU_ALU2],
1754 &ia32_execution_units_ALU[IA32_EXECUNIT_TP_ALU_ALU3],
1755 &ia32_execution_units_ALU[IA32_EXECUNIT_TP_ALU_ALU4],
1758 static const be_execution_unit_t *_allowed_units_DUMMY[] = {
1759 &ia32_execution_units_DUMMY[IA32_EXECUNIT_TP_DUMMY_DUMMY1],
1760 &ia32_execution_units_DUMMY[IA32_EXECUNIT_TP_DUMMY_DUMMY2],
1761 &ia32_execution_units_DUMMY[IA32_EXECUNIT_TP_DUMMY_DUMMY3],
1762 &ia32_execution_units_DUMMY[IA32_EXECUNIT_TP_DUMMY_DUMMY4],
1765 static const be_execution_unit_t **_units_callret[] = {
1766 _allowed_units_BRANCH,
1769 static const be_execution_unit_t **_units_other[] = {
1773 static const be_execution_unit_t **_units_dummy[] = {
1774 _allowed_units_DUMMY,
1777 const be_execution_unit_t ***ret;
1779 if (is_ia32_irn(irn)) {
1780 ret = get_ia32_exec_units(irn);
1782 else if (is_be_node(irn)) {
1783 if (be_is_Call(irn) || be_is_Return(irn)) {
1784 ret = _units_callret;
1786 else if (be_is_Barrier(irn)) {
1801 * Return the abstract ia32 machine.
1803 static const be_machine_t *ia32_get_machine(const void *self) {
1804 const ia32_isa_t *isa = self;
1809 * Allows or disallows the creation of Psi nodes for the given Phi nodes.
1810 * @return 1 if allowed, 0 otherwise
1812 static int ia32_is_psi_allowed(ir_node *sel, ir_node *phi_list, int i, int j)
1814 ir_node *cmp, *cmp_a, *phi;
1817 /* we don't want long long an floating point Psi */
1818 #define IS_BAD_PSI_MODE(mode) (mode_is_float(mode) || get_mode_size_bits(mode) > 32)
1820 if (get_irn_mode(sel) != mode_b)
1823 cmp = get_Proj_pred(sel);
1824 cmp_a = get_Cmp_left(cmp);
1825 mode = get_irn_mode(cmp_a);
1827 if (IS_BAD_PSI_MODE(mode))
1830 /* check the Phi nodes */
1831 for (phi = phi_list; phi; phi = get_irn_link(phi)) {
1832 ir_node *pred_i = get_irn_n(phi, i);
1833 ir_node *pred_j = get_irn_n(phi, j);
1834 ir_mode *mode_i = get_irn_mode(pred_i);
1835 ir_mode *mode_j = get_irn_mode(pred_j);
1837 if (IS_BAD_PSI_MODE(mode_i) || IS_BAD_PSI_MODE(mode_j))
1841 #undef IS_BAD_PSI_MODE
1846 static ia32_intrinsic_env_t intrinsic_env = {
1847 NULL, /**< the irg, these entities belong to */
1848 NULL, /**< entity for first div operand (move into FPU) */
1849 NULL, /**< entity for second div operand (move into FPU) */
1850 NULL, /**< entity for converts ll -> d */
1851 NULL, /**< entity for converts d -> ll */
1855 * Returns the libFirm configuration parameter for this backend.
1857 static const backend_params *ia32_get_libfirm_params(void) {
1858 static const opt_if_conv_info_t ifconv = {
1859 4, /* maxdepth, doesn't matter for Psi-conversion */
1860 ia32_is_psi_allowed /* allows or disallows Psi creation for given selector */
1862 static const arch_dep_params_t ad = {
1863 1, /* also use subs */
1864 4, /* maximum shifts */
1865 31, /* maximum shift amount */
1867 1, /* allow Mulhs */
1868 1, /* allow Mulus */
1869 32 /* Mulh allowed up to 32 bit */
1871 static backend_params p = {
1872 NULL, /* no additional opcodes */
1873 NULL, /* will be set later */
1874 1, /* need dword lowering */
1875 ia32_create_intrinsic_fkt,
1876 &intrinsic_env, /* context for ia32_create_intrinsic_fkt */
1877 NULL, /* will be set later */
1881 p.if_conv_info = &ifconv;
1886 /* instruction set architectures. */
1887 static const lc_opt_enum_int_items_t arch_items[] = {
1888 { "386", arch_i386, },
1889 { "486", arch_i486, },
1890 { "pentium", arch_pentium, },
1891 { "586", arch_pentium, },
1892 { "pentiumpro", arch_pentium_pro, },
1893 { "686", arch_pentium_pro, },
1894 { "pentiummmx", arch_pentium_mmx, },
1895 { "pentium2", arch_pentium_2, },
1896 { "p2", arch_pentium_2, },
1897 { "pentium3", arch_pentium_3, },
1898 { "p3", arch_pentium_3, },
1899 { "pentium4", arch_pentium_4, },
1900 { "p4", arch_pentium_4, },
1901 { "pentiumm", arch_pentium_m, },
1902 { "pm", arch_pentium_m, },
1903 { "core", arch_core, },
1905 { "athlon", arch_athlon, },
1906 { "athlon64", arch_athlon_64, },
1907 { "opteron", arch_opteron, },
1911 static lc_opt_enum_int_var_t arch_var = {
1912 &ia32_isa_template.arch, arch_items
1915 static lc_opt_enum_int_var_t opt_arch_var = {
1916 &ia32_isa_template.opt_arch, arch_items
1919 static const lc_opt_enum_int_items_t fp_unit_items[] = {
1921 { "sse2", fp_sse2 },
1925 static lc_opt_enum_int_var_t fp_unit_var = {
1926 &ia32_isa_template.fp_kind, fp_unit_items
1929 static const lc_opt_enum_int_items_t gas_items[] = {
1930 { "linux", ASM_LINUX_GAS },
1931 { "mingw", ASM_MINGW_GAS },
1935 static lc_opt_enum_int_var_t gas_var = {
1936 (int *)&asm_flavour, gas_items
1939 static const lc_opt_table_entry_t ia32_options[] = {
1940 LC_OPT_ENT_ENUM_INT("arch", "select the instruction architecture", &arch_var),
1941 LC_OPT_ENT_ENUM_INT("opt", "optimize for instruction architecture", &opt_arch_var),
1942 LC_OPT_ENT_ENUM_INT("fpunit", "select the floating point unit", &fp_unit_var),
1943 LC_OPT_ENT_NEGBIT("noaddrmode", "do not use address mode", &ia32_isa_template.opt, IA32_OPT_DOAM),
1944 LC_OPT_ENT_NEGBIT("nolea", "do not optimize for LEAs", &ia32_isa_template.opt, IA32_OPT_LEA),
1945 LC_OPT_ENT_NEGBIT("noplacecnst", "do not place constants", &ia32_isa_template.opt, IA32_OPT_PLACECNST),
1946 LC_OPT_ENT_NEGBIT("noimmop", "no operations with immediates", &ia32_isa_template.opt, IA32_OPT_IMMOPS),
1947 LC_OPT_ENT_NEGBIT("noextbb", "do not use extended basic block scheduling", &ia32_isa_template.opt, IA32_OPT_EXTBB),
1948 LC_OPT_ENT_NEGBIT("nopushargs", "do not create pushs for function arguments", &ia32_isa_template.opt, IA32_OPT_PUSHARGS),
1949 LC_OPT_ENT_ENUM_INT("gasmode", "set the GAS compatibility mode", &gas_var),
1954 * Register command line options for the ia32 backend.
1958 * ia32-arch=arch create instruction for arch
1959 * ia32-opt=arch optimize for run on arch
1960 * ia32-fpunit=unit select floating point unit (x87 or SSE2)
1961 * ia32-incdec optimize for inc/dec
1962 * ia32-noaddrmode do not use address mode
1963 * ia32-nolea do not optimize for LEAs
1964 * ia32-noplacecnst do not place constants,
1965 * ia32-noimmop no operations with immediates
1966 * ia32-noextbb do not use extended basic block scheduling
1967 * ia32-nopushargs do not create pushs for function argument passing
1968 * ia32-gasmode set the GAS compatibility mode
1970 static void ia32_register_options(lc_opt_entry_t *ent)
1972 lc_opt_entry_t *be_grp_ia32 = lc_opt_get_grp(ent, "ia32");
1973 lc_opt_add_table(be_grp_ia32, ia32_options);
1975 #endif /* WITH_LIBCORE */
1977 const arch_isa_if_t ia32_isa_if = {
1980 ia32_get_n_reg_class,
1982 ia32_get_reg_class_for_mode,
1984 ia32_get_irn_handler,
1985 ia32_get_code_generator_if,
1986 ia32_get_list_sched_selector,
1987 ia32_get_ilp_sched_selector,
1988 ia32_get_reg_class_alignment,
1989 ia32_get_libfirm_params,
1990 ia32_get_allowed_execution_units,
1993 ia32_register_options