2 * This is the main ia32 firm backend driver.
3 * @author Christian Wuerdig
20 #include <libcore/lc_opts.h>
21 #include <libcore/lc_opts_enum.h>
22 #endif /* WITH_LIBCORE */
26 #include "pseudo_irg.h"
30 #include "iredges_t.h"
39 #include "../beabi.h" /* the general register allocator interface */
40 #include "../benode_t.h"
41 #include "../belower.h"
42 #include "../besched_t.h"
45 #include "../beirgmod.h"
46 #include "../be_dbgout.h"
47 #include "../beblocksched.h"
48 #include "../bemachine.h"
49 #include "../beilpsched.h"
50 #include "../bespillslots.h"
52 #include "bearch_ia32_t.h"
54 #include "ia32_new_nodes.h" /* ia32 nodes interface */
55 #include "gen_ia32_regalloc_if.h" /* the generated interface (register type and class defenitions) */
56 #include "gen_ia32_machine.h"
57 #include "ia32_gen_decls.h" /* interface declaration emitter */
58 #include "ia32_transform.h"
59 #include "ia32_emitter.h"
60 #include "ia32_map_regs.h"
61 #include "ia32_optimize.h"
63 #include "ia32_dbg_stat.h"
64 #include "ia32_finish.h"
65 #include "ia32_util.h"
67 #define DEBUG_MODULE "firm.be.ia32.isa"
70 static set *cur_reg_set = NULL;
72 /* Creates the unique per irg GP NoReg node. */
73 ir_node *ia32_new_NoReg_gp(ia32_code_gen_t *cg) {
74 return be_abi_get_callee_save_irn(cg->birg->abi, &ia32_gp_regs[REG_GP_NOREG]);
77 /* Creates the unique per irg FP NoReg node. */
78 ir_node *ia32_new_NoReg_fp(ia32_code_gen_t *cg) {
79 return be_abi_get_callee_save_irn(cg->birg->abi,
80 USE_SSE2(cg) ? &ia32_xmm_regs[REG_XMM_NOREG] : &ia32_vfp_regs[REG_VFP_NOREG]);
84 * Returns gp_noreg or fp_noreg, depending in input requirements.
86 ir_node *ia32_get_admissible_noreg(ia32_code_gen_t *cg, ir_node *irn, int pos) {
87 arch_register_req_t req;
88 const arch_register_req_t *p_req;
90 p_req = arch_get_register_req(cg->arch_env, &req, irn, pos);
91 assert(p_req && "Missing register requirements");
92 if (p_req->cls == &ia32_reg_classes[CLASS_ia32_gp])
93 return ia32_new_NoReg_gp(cg);
95 return ia32_new_NoReg_fp(cg);
98 /**************************************************
101 * _ __ ___ __ _ __ _| | | ___ ___ _| |_
102 * | '__/ _ \/ _` | / _` | | |/ _ \ / __| | | _|
103 * | | | __/ (_| | | (_| | | | (_) | (__ | | |
104 * |_| \___|\__, | \__,_|_|_|\___/ \___| |_|_|
107 **************************************************/
110 * Return register requirements for an ia32 node.
111 * If the node returns a tuple (mode_T) then the proj's
112 * will be asked for this information.
114 static const arch_register_req_t *ia32_get_irn_reg_req(const void *self, arch_register_req_t *req, const ir_node *irn, int pos) {
115 const ia32_irn_ops_t *ops = self;
116 const ia32_register_req_t *irn_req;
117 long node_pos = pos == -1 ? 0 : pos;
118 ir_mode *mode = is_Block(irn) ? NULL : get_irn_mode(irn);
119 FIRM_DBG_REGISTER(firm_dbg_module_t *mod, DEBUG_MODULE);
121 if (is_Block(irn) || mode == mode_X) {
122 DBG((mod, LEVEL_1, "ignoring Block, mode_M, mode_X node %+F\n", irn));
126 if (mode == mode_T && pos < 0) {
127 DBG((mod, LEVEL_1, "ignoring request OUT requirements for node %+F\n", irn));
131 DBG((mod, LEVEL_1, "get requirements at pos %d for %+F ... ", pos, irn));
138 DBG((mod, LEVEL_1, "ignoring request IN requirements for node %+F\n", irn));
142 node_pos = (pos == -1) ? get_Proj_proj(irn) : pos;
143 irn = skip_Proj_const(irn);
145 DB((mod, LEVEL_1, "skipping Proj, going to %+F at pos %d ... ", irn, node_pos));
148 if (is_ia32_irn(irn)) {
149 irn_req = (pos >= 0) ? get_ia32_in_req(irn, pos) : get_ia32_out_req(irn, node_pos);
150 if (irn_req == NULL) {
151 /* no requirements */
155 DB((mod, LEVEL_1, "returning reqs for %+F at pos %d\n", irn, pos));
157 memcpy(req, &(irn_req->req), sizeof(*req));
159 if (arch_register_req_is(&(irn_req->req), should_be_same)) {
160 assert(irn_req->same_pos >= 0 && "should be same constraint for in -> out NYI");
161 req->other_same = get_irn_n(irn, irn_req->same_pos);
164 if (arch_register_req_is(&(irn_req->req), should_be_different)) {
165 assert(irn_req->different_pos >= 0 && "should be different constraint for in -> out NYI");
166 req->other_different = get_irn_n(irn, irn_req->different_pos);
170 /* treat Unknowns like Const with default requirements */
171 if (is_Unknown(irn)) {
172 DB((mod, LEVEL_1, "returning UKNWN reqs for %+F\n", irn));
173 if (mode_is_float(mode)) {
174 if (USE_SSE2(ops->cg))
175 memcpy(req, &(ia32_default_req_ia32_xmm_xmm_UKNWN), sizeof(*req));
177 memcpy(req, &(ia32_default_req_ia32_vfp_vfp_UKNWN), sizeof(*req));
179 else if (mode_is_int(mode) || mode_is_reference(mode))
180 memcpy(req, &(ia32_default_req_ia32_gp_gp_UKNWN), sizeof(*req));
181 else if (mode == mode_T || mode == mode_M) {
182 DBG((mod, LEVEL_1, "ignoring Unknown node %+F\n", irn));
186 assert(0 && "unsupported Unknown-Mode");
189 DB((mod, LEVEL_1, "returning NULL for %+F (not ia32)\n", irn));
197 static void ia32_set_irn_reg(const void *self, ir_node *irn, const arch_register_t *reg) {
199 const ia32_irn_ops_t *ops = self;
201 if (get_irn_mode(irn) == mode_X) {
205 DBG((ops->cg->mod, LEVEL_1, "ia32 assigned register %s to node %+F\n", reg->name, irn));
208 pos = get_Proj_proj(irn);
209 irn = skip_Proj(irn);
212 if (is_ia32_irn(irn)) {
213 const arch_register_t **slots;
215 slots = get_ia32_slots(irn);
219 ia32_set_firm_reg(irn, reg, cur_reg_set);
223 static const arch_register_t *ia32_get_irn_reg(const void *self, const ir_node *irn) {
225 const arch_register_t *reg = NULL;
229 if (get_irn_mode(irn) == mode_X) {
233 pos = get_Proj_proj(irn);
234 irn = skip_Proj_const(irn);
237 if (is_ia32_irn(irn)) {
238 const arch_register_t **slots;
239 slots = get_ia32_slots(irn);
243 reg = ia32_get_firm_reg(irn, cur_reg_set);
249 static arch_irn_class_t ia32_classify(const void *self, const ir_node *irn) {
250 arch_irn_class_t classification = arch_irn_class_normal;
252 irn = skip_Proj_const(irn);
255 classification |= arch_irn_class_branch;
257 if (! is_ia32_irn(irn))
258 return classification & ~arch_irn_class_normal;
260 if (is_ia32_Cnst(irn))
261 classification |= arch_irn_class_const;
264 classification |= arch_irn_class_load;
266 if (is_ia32_St(irn) || is_ia32_Store8Bit(irn))
267 classification |= arch_irn_class_store;
269 if (is_ia32_got_reload(irn))
270 classification |= arch_irn_class_reload;
272 return classification;
275 static arch_irn_flags_t ia32_get_flags(const void *self, const ir_node *irn) {
276 arch_irn_flags_t flags;
277 ir_node *pred = is_Proj(irn) && mode_is_datab(get_irn_mode(irn)) ? get_Proj_pred(irn) : NULL;
280 flags = arch_irn_flags_ignore;
282 /* pred is only set, if we have a Proj */
283 flags = pred && is_ia32_irn(pred) ? get_ia32_out_flags(pred, get_Proj_proj(irn)) : arch_irn_flags_none;
285 irn = skip_Proj_const(irn);
286 if (is_ia32_irn(irn))
287 flags |= get_ia32_flags(irn);
294 * The IA32 ABI callback object.
297 be_abi_call_flags_bits_t flags; /**< The call flags. */
298 const arch_isa_t *isa; /**< The ISA handle. */
299 const arch_env_t *aenv; /**< The architecture environment. */
300 ir_graph *irg; /**< The associated graph. */
303 static ir_entity *ia32_get_frame_entity(const void *self, const ir_node *irn) {
304 return is_ia32_irn(irn) ? get_ia32_frame_ent(irn) : NULL;
307 static void ia32_set_frame_entity(const void *self, ir_node *irn, ir_entity *ent) {
308 set_ia32_frame_ent(irn, ent);
311 static void ia32_set_frame_offset(const void *self, ir_node *irn, int bias) {
312 const ia32_irn_ops_t *ops = self;
314 if (get_ia32_frame_ent(irn)) {
315 if(is_ia32_Pop(irn)) {
316 int omit_fp = be_abi_omit_fp(ops->cg->birg->abi);
318 /* Pop nodes modify the stack pointer before calculating the destination
319 * address, so fix this here
325 DBG((ops->cg->mod, LEVEL_1, "stack biased %+F with %d\n", irn, bias));
327 if (get_ia32_op_type(irn) == ia32_Normal) {
328 // Matze: When does this case happen?
330 snprintf(buf, sizeof(buf), "%d", bias);
331 set_ia32_cnst(irn, buf);
333 ia32_am_flavour_t am_flav = get_ia32_am_flavour(irn);
335 set_ia32_am_flavour(irn, am_flav);
337 add_ia32_am_offs_int(irn, bias);
342 static int ia32_get_sp_bias(const void *self, const ir_node *irn) {
344 long proj = get_Proj_proj(irn);
345 ir_node *pred = get_Proj_pred(irn);
347 if (is_ia32_Push(pred) && proj == pn_ia32_Push_stack)
349 if (is_ia32_Pop(pred) && proj == pn_ia32_Pop_stack)
357 * Put all registers which are saved by the prologue/epilogue in a set.
359 * @param self The callback object.
360 * @param s The result set.
362 static void ia32_abi_dont_save_regs(void *self, pset *s)
364 ia32_abi_env_t *env = self;
365 if(env->flags.try_omit_fp)
366 pset_insert_ptr(s, env->isa->bp);
370 * Generate the routine prologue.
372 * @param self The callback object.
373 * @param mem A pointer to the mem node. Update this if you define new memory.
374 * @param reg_map A map mapping all callee_save/ignore/parameter registers to their defining nodes.
376 * @return The register which shall be used as a stack frame base.
378 * All nodes which define registers in @p reg_map must keep @p reg_map current.
380 static const arch_register_t *ia32_abi_prologue(void *self, ir_node **mem, pmap *reg_map)
382 ia32_abi_env_t *env = self;
384 if (! env->flags.try_omit_fp) {
385 ir_node *bl = get_irg_start_block(env->irg);
386 ir_node *curr_sp = be_abi_reg_map_get(reg_map, env->isa->sp);
387 ir_node *curr_bp = be_abi_reg_map_get(reg_map, env->isa->bp);
388 ir_node *noreg = be_abi_reg_map_get(reg_map, &ia32_gp_regs[REG_GP_NOREG]);
392 push = new_rd_ia32_Push(NULL, env->irg, bl, noreg, noreg, curr_bp, curr_sp, *mem);
393 curr_sp = new_r_Proj(env->irg, bl, push, get_irn_mode(curr_sp), pn_ia32_Push_stack);
394 *mem = new_r_Proj(env->irg, bl, push, mode_M, pn_ia32_Push_M);
396 /* the push must have SP out register */
397 arch_set_irn_register(env->aenv, curr_sp, env->isa->sp);
398 set_ia32_flags(push, arch_irn_flags_ignore);
400 /* move esp to ebp */
401 curr_bp = be_new_Copy(env->isa->bp->reg_class, env->irg, bl, curr_sp);
402 be_set_constr_single_reg(curr_bp, BE_OUT_POS(0), env->isa->bp);
403 arch_set_irn_register(env->aenv, curr_bp, env->isa->bp);
404 be_node_set_flags(curr_bp, BE_OUT_POS(0), arch_irn_flags_ignore);
406 /* beware: the copy must be done before any other sp use */
407 curr_sp = be_new_CopyKeep_single(env->isa->sp->reg_class, env->irg, bl, curr_sp, curr_bp, get_irn_mode(curr_sp));
408 be_set_constr_single_reg(curr_sp, BE_OUT_POS(0), env->isa->sp);
409 arch_set_irn_register(env->aenv, curr_sp, env->isa->sp);
410 be_node_set_flags(curr_sp, BE_OUT_POS(0), arch_irn_flags_ignore);
412 be_abi_reg_map_set(reg_map, env->isa->sp, curr_sp);
413 be_abi_reg_map_set(reg_map, env->isa->bp, curr_bp);
422 * Generate the routine epilogue.
423 * @param self The callback object.
424 * @param bl The block for the epilog
425 * @param mem A pointer to the mem node. Update this if you define new memory.
426 * @param reg_map A map mapping all callee_save/ignore/parameter registers to their defining nodes.
427 * @return The register which shall be used as a stack frame base.
429 * All nodes which define registers in @p reg_map must keep @p reg_map current.
431 static void ia32_abi_epilogue(void *self, ir_node *bl, ir_node **mem, pmap *reg_map)
433 ia32_abi_env_t *env = self;
434 ir_node *curr_sp = be_abi_reg_map_get(reg_map, env->isa->sp);
435 ir_node *curr_bp = be_abi_reg_map_get(reg_map, env->isa->bp);
437 if (env->flags.try_omit_fp) {
438 /* simply remove the stack frame here */
439 curr_sp = be_new_IncSP(env->isa->sp, env->irg, bl, curr_sp, BE_STACK_FRAME_SIZE_SHRINK);
440 add_irn_dep(curr_sp, *mem);
443 const ia32_isa_t *isa = (ia32_isa_t *)env->isa;
444 ir_mode *mode_bp = env->isa->bp->reg_class->mode;
446 /* gcc always emits a leave at the end of a routine */
447 if (1 || ARCH_AMD(isa->opt_arch)) {
451 leave = new_rd_ia32_Leave(NULL, env->irg, bl, curr_sp, curr_bp);
452 set_ia32_flags(leave, arch_irn_flags_ignore);
453 curr_bp = new_r_Proj(current_ir_graph, bl, leave, mode_bp, pn_ia32_Leave_frame);
454 curr_sp = new_r_Proj(current_ir_graph, bl, leave, get_irn_mode(curr_sp), pn_ia32_Leave_stack);
455 *mem = new_r_Proj(current_ir_graph, bl, leave, mode_M, pn_ia32_Leave_M);
458 ir_node *noreg = be_abi_reg_map_get(reg_map, &ia32_gp_regs[REG_GP_NOREG]);
461 /* copy ebp to esp */
462 curr_sp = be_new_SetSP(env->isa->sp, env->irg, bl, curr_sp, curr_bp, *mem);
465 pop = new_rd_ia32_Pop(NULL, env->irg, bl, noreg, noreg, curr_sp, *mem);
466 set_ia32_flags(pop, arch_irn_flags_ignore);
467 curr_bp = new_r_Proj(current_ir_graph, bl, pop, mode_bp, pn_ia32_Pop_res);
468 curr_sp = new_r_Proj(current_ir_graph, bl, pop, get_irn_mode(curr_sp), pn_ia32_Pop_stack);
469 *mem = new_r_Proj(current_ir_graph, bl, pop, mode_M, pn_ia32_Pop_M);
471 arch_set_irn_register(env->aenv, curr_sp, env->isa->sp);
472 arch_set_irn_register(env->aenv, curr_bp, env->isa->bp);
475 be_abi_reg_map_set(reg_map, env->isa->sp, curr_sp);
476 be_abi_reg_map_set(reg_map, env->isa->bp, curr_bp);
480 * Initialize the callback object.
481 * @param call The call object.
482 * @param aenv The architecture environment.
483 * @param irg The graph with the method.
484 * @return Some pointer. This pointer is passed to all other callback functions as self object.
486 static void *ia32_abi_init(const be_abi_call_t *call, const arch_env_t *aenv, ir_graph *irg)
488 ia32_abi_env_t *env = xmalloc(sizeof(env[0]));
489 be_abi_call_flags_t fl = be_abi_call_get_flags(call);
490 env->flags = fl.bits;
493 env->isa = aenv->isa;
498 * Destroy the callback object.
499 * @param self The callback object.
501 static void ia32_abi_done(void *self) {
506 * Produces the type which sits between the stack args and the locals on the stack.
507 * it will contain the return address and space to store the old base pointer.
508 * @return The Firm type modeling the ABI between type.
510 static ir_type *ia32_abi_get_between_type(void *self)
512 #define IDENT(s) new_id_from_chars(s, sizeof(s)-1)
513 static ir_type *omit_fp_between_type = NULL;
514 static ir_type *between_type = NULL;
516 ia32_abi_env_t *env = self;
518 if (! between_type) {
519 ir_entity *old_bp_ent;
520 ir_entity *ret_addr_ent;
521 ir_entity *omit_fp_ret_addr_ent;
523 ir_type *old_bp_type = new_type_primitive(IDENT("bp"), mode_P);
524 ir_type *ret_addr_type = new_type_primitive(IDENT("return_addr"), mode_P);
526 between_type = new_type_struct(IDENT("ia32_between_type"));
527 old_bp_ent = new_entity(between_type, IDENT("old_bp"), old_bp_type);
528 ret_addr_ent = new_entity(between_type, IDENT("ret_addr"), ret_addr_type);
530 set_entity_offset(old_bp_ent, 0);
531 set_entity_offset(ret_addr_ent, get_type_size_bytes(old_bp_type));
532 set_type_size_bytes(between_type, get_type_size_bytes(old_bp_type) + get_type_size_bytes(ret_addr_type));
533 set_type_state(between_type, layout_fixed);
535 omit_fp_between_type = new_type_struct(IDENT("ia32_between_type_omit_fp"));
536 omit_fp_ret_addr_ent = new_entity(omit_fp_between_type, IDENT("ret_addr"), ret_addr_type);
538 set_entity_offset(omit_fp_ret_addr_ent, 0);
539 set_type_size_bytes(omit_fp_between_type, get_type_size_bytes(ret_addr_type));
540 set_type_state(omit_fp_between_type, layout_fixed);
543 return env->flags.try_omit_fp ? omit_fp_between_type : between_type;
548 * Get the estimated cycle count for @p irn.
550 * @param self The this pointer.
551 * @param irn The node.
553 * @return The estimated cycle count for this operation
555 static int ia32_get_op_estimated_cost(const void *self, const ir_node *irn)
558 ia32_op_type_t op_tp;
559 const ia32_irn_ops_t *ops = self;
563 if (!is_ia32_irn(irn))
566 assert(is_ia32_irn(irn));
568 cost = get_ia32_latency(irn);
569 op_tp = get_ia32_op_type(irn);
571 if (is_ia32_CopyB(irn)) {
573 if (ARCH_INTEL(ops->cg->arch))
576 else if (is_ia32_CopyB_i(irn)) {
577 int size = get_tarval_long(get_ia32_Immop_tarval(irn));
578 cost = 20 + (int)ceil((4/3) * size);
579 if (ARCH_INTEL(ops->cg->arch))
582 /* in case of address mode operations add additional cycles */
583 else if (op_tp == ia32_AddrModeD || op_tp == ia32_AddrModeS) {
585 In case of stack access add 5 cycles (we assume stack is in cache),
586 other memory operations cost 20 cycles.
588 cost += is_ia32_use_frame(irn) ? 5 : 20;
595 * Returns the inverse operation if @p irn, recalculating the argument at position @p i.
597 * @param irn The original operation
598 * @param i Index of the argument we want the inverse operation to yield
599 * @param inverse struct to be filled with the resulting inverse op
600 * @param obstack The obstack to use for allocation of the returned nodes array
601 * @return The inverse operation or NULL if operation invertible
603 static arch_inverse_t *ia32_get_inverse(const void *self, const ir_node *irn, int i, arch_inverse_t *inverse, struct obstack *obst) {
607 ir_node *block, *noreg, *nomem;
610 /* we cannot invert non-ia32 irns */
611 if (! is_ia32_irn(irn))
614 /* operand must always be a real operand (not base, index or mem) */
615 if (i != 2 && i != 3)
618 /* we don't invert address mode operations */
619 if (get_ia32_op_type(irn) != ia32_Normal)
622 irg = get_irn_irg(irn);
623 block = get_nodes_block(irn);
624 mode = get_ia32_res_mode(irn);
625 irn_mode = get_irn_mode(irn);
626 noreg = get_irn_n(irn, 0);
627 nomem = new_r_NoMem(irg);
628 dbg = get_irn_dbg_info(irn);
630 /* initialize structure */
631 inverse->nodes = obstack_alloc(obst, 2 * sizeof(inverse->nodes[0]));
635 switch (get_ia32_irn_opcode(irn)) {
637 if (get_ia32_immop_type(irn) == ia32_ImmConst) {
638 /* we have an add with a const here */
639 /* invers == add with negated const */
640 inverse->nodes[0] = new_rd_ia32_Add(dbg, irg, block, noreg, noreg, get_irn_n(irn, i), noreg, nomem, irn_mode);
642 copy_ia32_Immop_attr(inverse->nodes[0], (ir_node *)irn);
643 set_ia32_Immop_tarval(inverse->nodes[0], tarval_neg(get_ia32_Immop_tarval(irn)));
644 set_ia32_commutative(inverse->nodes[0]);
646 else if (get_ia32_immop_type(irn) == ia32_ImmSymConst) {
647 /* we have an add with a symconst here */
648 /* invers == sub with const */
649 inverse->nodes[0] = new_rd_ia32_Sub(dbg, irg, block, noreg, noreg, get_irn_n(irn, i), noreg, nomem, irn_mode);
651 copy_ia32_Immop_attr(inverse->nodes[0], (ir_node *)irn);
654 /* normal add: inverse == sub */
655 ir_node *proj = ia32_get_res_proj(irn);
658 inverse->nodes[0] = new_rd_ia32_Sub(dbg, irg, block, noreg, noreg, proj, get_irn_n(irn, i ^ 1), nomem, irn_mode);
663 if (get_ia32_immop_type(irn) != ia32_ImmNone) {
664 /* we have a sub with a const/symconst here */
665 /* invers == add with this const */
666 inverse->nodes[0] = new_rd_ia32_Add(dbg, irg, block, noreg, noreg, get_irn_n(irn, i), noreg, nomem, irn_mode);
667 inverse->costs += (get_ia32_immop_type(irn) == ia32_ImmSymConst) ? 5 : 1;
668 copy_ia32_Immop_attr(inverse->nodes[0], (ir_node *)irn);
672 ir_node *proj = ia32_get_res_proj(irn);
676 inverse->nodes[0] = new_rd_ia32_Add(dbg, irg, block, noreg, noreg, proj, get_irn_n(irn, 3), nomem, irn_mode);
679 inverse->nodes[0] = new_rd_ia32_Sub(dbg, irg, block, noreg, noreg, get_irn_n(irn, 2), proj, nomem, irn_mode);
685 if (get_ia32_immop_type(irn) != ia32_ImmNone) {
686 /* xor with const: inverse = xor */
687 inverse->nodes[0] = new_rd_ia32_Eor(dbg, irg, block, noreg, noreg, get_irn_n(irn, i), noreg, nomem, irn_mode);
688 inverse->costs += (get_ia32_immop_type(irn) == ia32_ImmSymConst) ? 5 : 1;
689 copy_ia32_Immop_attr(inverse->nodes[0], (ir_node *)irn);
693 inverse->nodes[0] = new_rd_ia32_Eor(dbg, irg, block, noreg, noreg, (ir_node *)irn, get_irn_n(irn, i), nomem, irn_mode);
698 ir_node *proj = ia32_get_res_proj(irn);
701 inverse->nodes[0] = new_rd_ia32_Not(dbg, irg, block, noreg, noreg, proj, nomem, irn_mode);
705 case iro_ia32_Minus: {
706 ir_node *proj = ia32_get_res_proj(irn);
709 inverse->nodes[0] = new_rd_ia32_Minus(dbg, irg, block, noreg, noreg, proj, nomem, irn_mode);
714 /* inverse operation not supported */
718 set_ia32_res_mode(inverse->nodes[0], mode);
724 * Get the mode that should be used for spilling value node
726 static ir_mode *get_spill_mode(ia32_code_gen_t *cg, const ir_node *node)
728 ir_mode *mode = get_irn_mode(node);
729 if (mode_is_float(mode)) {
731 // super exact spilling...
748 * Checks wether an addressmode reload for a node with mode mode is compatible
749 * with a spillslot of mode spill_mode
751 static int ia32_is_spillmode_compatible(const ir_mode *mode, const ir_mode *spillmode)
753 if(mode_is_float(mode)) {
754 return mode == spillmode;
761 * Check if irn can load it's operand at position i from memory (source addressmode).
762 * @param self Pointer to irn ops itself
763 * @param irn The irn to be checked
764 * @param i The operands position
765 * @return Non-Zero if operand can be loaded
767 static int ia32_possible_memory_operand(const void *self, const ir_node *irn, unsigned int i) {
768 const ia32_irn_ops_t *ops = self;
769 ia32_code_gen_t *cg = ops->cg;
770 ir_node *op = get_irn_n(irn, i);
771 const ir_mode *mode = get_irn_mode(op);
772 const ir_mode *spillmode = get_spill_mode(cg, op);
774 if (! is_ia32_irn(irn) || /* must be an ia32 irn */
775 get_irn_arity(irn) != 5 || /* must be a binary operation */
776 get_ia32_op_type(irn) != ia32_Normal || /* must not already be a addressmode irn */
777 ! (get_ia32_am_support(irn) & ia32_am_Source) || /* must be capable of source addressmode */
778 ! ia32_is_spillmode_compatible(mode, spillmode) ||
779 (i != 2 && i != 3) || /* a "real" operand position must be requested */
780 (i == 2 && ! is_ia32_commutative(irn)) || /* if first operand requested irn must be commutative */
781 is_ia32_use_frame(irn)) /* must not already use frame */
787 static void ia32_perform_memory_operand(const void *self, ir_node *irn, ir_node *spill, unsigned int i) {
788 const ia32_irn_ops_t *ops = self;
789 ia32_code_gen_t *cg = ops->cg;
791 assert(ia32_possible_memory_operand(self, irn, i) && "Cannot perform memory operand change");
794 ir_node *tmp = get_irn_n(irn, 3);
795 set_irn_n(irn, 3, get_irn_n(irn, 2));
796 set_irn_n(irn, 2, tmp);
799 set_ia32_am_support(irn, ia32_am_Source);
800 set_ia32_op_type(irn, ia32_AddrModeS);
801 set_ia32_am_flavour(irn, ia32_B);
802 set_ia32_ls_mode(irn, get_irn_mode(get_irn_n(irn, i)));
803 set_ia32_use_frame(irn);
804 set_ia32_got_reload(irn);
806 set_irn_n(irn, 0, get_irg_frame(get_irn_irg(irn)));
807 set_irn_n(irn, 3, ia32_get_admissible_noreg(cg, irn, 3));
808 set_irn_n(irn, 4, spill);
810 //FIXME DBG_OPT_AM_S(reload, irn);
813 static const be_abi_callbacks_t ia32_abi_callbacks = {
816 ia32_abi_get_between_type,
817 ia32_abi_dont_save_regs,
822 /* fill register allocator interface */
824 static const arch_irn_ops_if_t ia32_irn_ops_if = {
825 ia32_get_irn_reg_req,
830 ia32_get_frame_entity,
831 ia32_set_frame_entity,
832 ia32_set_frame_offset,
835 ia32_get_op_estimated_cost,
836 ia32_possible_memory_operand,
837 ia32_perform_memory_operand,
840 ia32_irn_ops_t ia32_irn_ops = {
847 /**************************************************
850 * ___ ___ __| | ___ __ _ ___ _ __ _| |_
851 * / __/ _ \ / _` |/ _ \/ _` |/ _ \ '_ \ | | _|
852 * | (_| (_) | (_| | __/ (_| | __/ | | | | | |
853 * \___\___/ \__,_|\___|\__, |\___|_| |_| |_|_|
856 **************************************************/
858 static void ia32_kill_convs(ia32_code_gen_t *cg) {
861 foreach_nodeset(cg->kill_conv, irn) {
862 ir_node *in = get_irn_n(irn, 2);
863 edges_reroute(irn, in, cg->birg->irg);
868 * Transform the Thread Local Store base.
870 static void transform_tls(ir_graph *irg) {
871 ir_node *irn = get_irg_tls(irg);
874 dbg_info *dbg = get_irn_dbg_info(irn);
875 ir_node *blk = get_nodes_block(irn);
877 newn = new_rd_ia32_LdTls(dbg, irg, blk, get_irn_mode(irn));
884 * Transforms the standard firm graph into
887 static void ia32_prepare_graph(void *self) {
888 ia32_code_gen_t *cg = self;
889 DEBUG_ONLY(firm_dbg_module_t *old_mod = cg->mod;)
891 FIRM_DBG_REGISTER(cg->mod, "firm.be.ia32.transform");
893 /* 1st: transform constants and psi condition trees */
894 ia32_pre_transform_phase(cg);
896 /* 2nd: transform all remaining nodes */
897 ia32_register_transformers();
899 cg->kill_conv = new_nodeset(5);
900 transform_tls(cg->irg);
901 edges_deactivate(cg->irg);
902 edges_activate(cg->irg);
903 irg_walk_blkwise_graph(cg->irg, NULL, ia32_transform_node, cg);
905 del_nodeset(cg->kill_conv);
908 be_dump(cg->irg, "-transformed", dump_ir_block_graph_sched);
910 /* 3rd: optimize address mode */
911 FIRM_DBG_REGISTER(cg->mod, "firm.be.ia32.am");
912 ia32_optimize_addressmode(cg);
915 be_dump(cg->irg, "-am", dump_ir_block_graph_sched);
917 DEBUG_ONLY(cg->mod = old_mod;)
921 * Dummy functions for hooks we don't need but which must be filled.
923 static void ia32_before_sched(void *self) {
926 static void remove_unused_nodes(ir_node *irn, bitset_t *already_visited) {
929 ir_node *mem_proj = NULL;
934 mode = get_irn_mode(irn);
936 /* check if we already saw this node or the node has more than one user */
937 if (bitset_contains_irn(already_visited, irn) || get_irn_n_edges(irn) > 1) {
941 /* mark irn visited */
942 bitset_add_irn(already_visited, irn);
944 /* non-Tuple nodes with one user: ok, return */
945 if (get_irn_n_edges(irn) >= 1 && mode != mode_T) {
949 /* tuple node has one user which is not the mem proj-> ok */
950 if (mode == mode_T && get_irn_n_edges(irn) == 1) {
951 mem_proj = ia32_get_proj_for_mode(irn, mode_M);
952 if (mem_proj == NULL) {
957 arity = get_irn_arity(irn);
958 for (i = 0; i < arity; ++i) {
959 ir_node *pred = get_irn_n(irn, i);
961 /* do not follow memory edges or we will accidentally remove stores */
962 if (get_irn_mode(pred) == mode_M) {
963 if(mem_proj != NULL) {
964 edges_reroute(mem_proj, pred, get_irn_irg(mem_proj));
970 set_irn_n(irn, i, new_Bad());
973 The current node is about to be removed: if the predecessor
974 has only this node as user, it need to be removed as well.
976 if (get_irn_n_edges(pred) <= 1)
977 remove_unused_nodes(pred, already_visited);
980 // we need to set the presd to Bad again to also get the memory edges
981 arity = get_irn_arity(irn);
982 for (i = 0; i < arity; ++i) {
983 set_irn_n(irn, i, new_Bad());
986 if (sched_is_scheduled(irn)) {
991 static void remove_unused_loads_walker(ir_node *irn, void *env) {
992 bitset_t *already_visited = env;
993 if (is_ia32_Ld(irn) && ! bitset_contains_irn(already_visited, irn))
994 remove_unused_nodes(irn, env);
998 * Called before the register allocator.
999 * Calculate a block schedule here. We need it for the x87
1000 * simulator and the emitter.
1002 static void ia32_before_ra(void *self) {
1003 ia32_code_gen_t *cg = self;
1004 bitset_t *already_visited = bitset_irg_alloca(cg->irg);
1007 Handle special case:
1008 There are sometimes unused loads, only pinned by memory.
1009 We need to remove those Loads and all other nodes which won't be used
1010 after removing the Load from schedule.
1012 irg_walk_graph(cg->irg, NULL, remove_unused_loads_walker, already_visited);
1017 * Transforms a be_Reload into a ia32 Load.
1019 static void transform_to_Load(ia32_transform_env_t *env) {
1020 ir_node *irn = env->irn;
1021 ir_entity *ent = be_get_frame_entity(irn);
1022 ir_mode *mode = get_irn_mode(irn);
1023 ir_mode *spillmode = get_spill_mode(env->cg, irn);
1024 ir_node *noreg = ia32_new_NoReg_gp(env->cg);
1025 ir_node *sched_point = NULL;
1026 ir_node *ptr = get_irg_frame(env->irg);
1027 ir_node *mem = get_irn_n(irn, be_pos_Reload_mem);
1028 ir_node *new_op, *proj;
1029 const arch_register_t *reg;
1031 if (sched_is_scheduled(irn)) {
1032 sched_point = sched_prev(irn);
1035 if (mode_is_float(spillmode)) {
1036 if (USE_SSE2(env->cg))
1037 new_op = new_rd_ia32_xLoad(env->dbg, env->irg, env->block, ptr, noreg, mem);
1039 new_op = new_rd_ia32_vfld(env->dbg, env->irg, env->block, ptr, noreg, mem);
1042 new_op = new_rd_ia32_Load(env->dbg, env->irg, env->block, ptr, noreg, mem);
1044 set_ia32_am_support(new_op, ia32_am_Source);
1045 set_ia32_op_type(new_op, ia32_AddrModeS);
1046 set_ia32_am_flavour(new_op, ia32_B);
1047 set_ia32_ls_mode(new_op, spillmode);
1048 set_ia32_frame_ent(new_op, ent);
1049 set_ia32_use_frame(new_op);
1051 DBG_OPT_RELOAD2LD(irn, new_op);
1053 proj = new_rd_Proj(env->dbg, env->irg, env->block, new_op, mode, pn_ia32_Load_res);
1056 sched_add_after(sched_point, new_op);
1057 sched_add_after(new_op, proj);
1062 /* copy the register from the old node to the new Load */
1063 reg = arch_get_irn_register(env->cg->arch_env, irn);
1064 arch_set_irn_register(env->cg->arch_env, new_op, reg);
1066 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env->cg, irn));
1068 exchange(irn, proj);
1072 * Transforms a be_Spill node into a ia32 Store.
1074 static void transform_to_Store(ia32_transform_env_t *env) {
1075 ir_node *irn = env->irn;
1076 ir_entity *ent = be_get_frame_entity(irn);
1077 const ir_node *spillval = get_irn_n(irn, be_pos_Spill_val);
1078 ir_mode *mode = get_spill_mode(env->cg, spillval);
1079 ir_node *noreg = ia32_new_NoReg_gp(env->cg);
1080 ir_node *nomem = new_rd_NoMem(env->irg);
1081 ir_node *ptr = get_irg_frame(env->irg);
1082 ir_node *val = get_irn_n(irn, be_pos_Spill_val);
1084 ir_node *sched_point = NULL;
1086 if (sched_is_scheduled(irn)) {
1087 sched_point = sched_prev(irn);
1090 if (mode_is_float(mode)) {
1091 if (USE_SSE2(env->cg))
1092 store = new_rd_ia32_xStore(env->dbg, env->irg, env->block, ptr, noreg, val, nomem);
1094 store = new_rd_ia32_vfst(env->dbg, env->irg, env->block, ptr, noreg, val, nomem);
1096 else if (get_mode_size_bits(mode) == 8) {
1097 store = new_rd_ia32_Store8Bit(env->dbg, env->irg, env->block, ptr, noreg, val, nomem);
1100 store = new_rd_ia32_Store(env->dbg, env->irg, env->block, ptr, noreg, val, nomem);
1103 set_ia32_am_support(store, ia32_am_Dest);
1104 set_ia32_op_type(store, ia32_AddrModeD);
1105 set_ia32_am_flavour(store, ia32_B);
1106 set_ia32_ls_mode(store, mode);
1107 set_ia32_frame_ent(store, ent);
1108 set_ia32_use_frame(store);
1110 DBG_OPT_SPILL2ST(irn, store);
1111 SET_IA32_ORIG_NODE(store, ia32_get_old_node_name(env->cg, irn));
1114 sched_add_after(sched_point, store);
1118 exchange(irn, store);
1121 static ir_node *create_push(ia32_transform_env_t *env, ir_node *schedpoint, ir_node *sp, ir_node *mem, ir_entity *ent) {
1122 ir_node *noreg = ia32_new_NoReg_gp(env->cg);
1123 ir_node *frame = get_irg_frame(env->irg);
1125 ir_node *push = new_rd_ia32_Push(env->dbg, env->irg, env->block, frame, noreg, noreg, sp, mem);
1127 set_ia32_frame_ent(push, ent);
1128 set_ia32_use_frame(push);
1129 set_ia32_op_type(push, ia32_AddrModeS);
1130 set_ia32_am_flavour(push, ia32_B);
1131 set_ia32_ls_mode(push, mode_Is);
1133 sched_add_before(schedpoint, push);
1137 static ir_node *create_pop(ia32_transform_env_t *env, ir_node *schedpoint, ir_node *sp, ir_entity *ent) {
1138 ir_node *noreg = ia32_new_NoReg_gp(env->cg);
1139 ir_node *frame = get_irg_frame(env->irg);
1141 ir_node *pop = new_rd_ia32_Pop(env->dbg, env->irg, env->block, frame, noreg, sp, new_NoMem());
1143 set_ia32_frame_ent(pop, ent);
1144 set_ia32_use_frame(pop);
1145 set_ia32_op_type(pop, ia32_AddrModeD);
1146 set_ia32_am_flavour(pop, ia32_am_OB);
1147 set_ia32_ls_mode(pop, mode_Is);
1149 sched_add_before(schedpoint, pop);
1154 static ir_node* create_spproj(ia32_transform_env_t *env, ir_node *pred, int pos, ir_node *schedpoint) {
1155 ir_mode *spmode = mode_Iu;
1156 const arch_register_t *spreg = &ia32_gp_regs[REG_ESP];
1159 sp = new_rd_Proj(env->dbg, env->irg, env->block, pred, spmode, pos);
1160 arch_set_irn_register(env->cg->arch_env, sp, spreg);
1161 sched_add_before(schedpoint, sp);
1167 * Transform memperm, currently we do this the ugly way and produce
1168 * push/pop into/from memory cascades. This is possible without using
1171 static void transform_MemPerm(ia32_transform_env_t *env) {
1172 ir_node *node = env->irn;
1174 ir_node *sp = be_abi_get_ignore_irn(env->cg->birg->abi, &ia32_gp_regs[REG_ESP]);
1175 const ir_edge_t *edge;
1176 const ir_edge_t *next;
1179 arity = be_get_MemPerm_entity_arity(node);
1180 pops = alloca(arity * sizeof(pops[0]));
1183 for(i = 0; i < arity; ++i) {
1184 ir_entity *ent = be_get_MemPerm_in_entity(node, i);
1185 ir_type *enttype = get_entity_type(ent);
1186 int entbits = get_type_size_bits(enttype);
1187 ir_node *mem = get_irn_n(node, i + 1);
1190 assert( (entbits == 32 || entbits == 64) && "spillslot on x86 should be 32 or 64 bit");
1192 push = create_push(env, node, sp, mem, ent);
1193 sp = create_spproj(env, push, 0, node);
1195 // add another push after the first one
1196 push = create_push(env, node, sp, mem, ent);
1197 add_ia32_am_offs_int(push, 4);
1198 sp = create_spproj(env, push, 0, node);
1201 set_irn_n(node, i, new_Bad());
1205 for(i = arity - 1; i >= 0; --i) {
1206 ir_entity *ent = be_get_MemPerm_out_entity(node, i);
1207 ir_type *enttype = get_entity_type(ent);
1208 int entbits = get_type_size_bits(enttype);
1212 assert( (entbits == 32 || entbits == 64) && "spillslot on x86 should be 32 or 64 bit");
1214 pop = create_pop(env, node, sp, ent);
1216 // add another pop after the first one
1217 sp = create_spproj(env, pop, 1, node);
1218 pop = create_pop(env, node, sp, ent);
1219 add_ia32_am_offs_int(pop, 4);
1221 sp = create_spproj(env, pop, 1, node);
1226 // exchange memprojs
1227 foreach_out_edge_safe(node, edge, next) {
1228 ir_node *proj = get_edge_src_irn(edge);
1229 int p = get_Proj_proj(proj);
1233 set_Proj_pred(proj, pops[p]);
1234 set_Proj_proj(proj, 3);
1238 arity = get_irn_arity(node);
1239 for(i = 0; i < arity; ++i) {
1240 set_irn_n(node, i, new_Bad());
1246 * Block-Walker: Calls the transform functions Spill and Reload.
1248 static void ia32_after_ra_walker(ir_node *block, void *env) {
1249 ir_node *node, *prev;
1250 ia32_code_gen_t *cg = env;
1251 ia32_transform_env_t tenv;
1254 tenv.irg = current_ir_graph;
1256 DEBUG_ONLY(tenv.mod = cg->mod;)
1258 /* beware: the schedule is changed here */
1259 for (node = sched_last(block); !sched_is_begin(node); node = prev) {
1260 prev = sched_prev(node);
1261 tenv.dbg = get_irn_dbg_info(node);
1263 tenv.mode = get_irn_mode(node);
1265 if (be_is_Reload(node)) {
1266 transform_to_Load(&tenv);
1267 } else if (be_is_Spill(node)) {
1268 transform_to_Store(&tenv);
1269 } else if(be_is_MemPerm(node)) {
1270 transform_MemPerm(&tenv);
1276 * Collects nodes that need frame entities assigned.
1278 static void ia32_collect_frame_entity_nodes(ir_node *node, void *data)
1280 be_fec_env_t *env = data;
1282 if (be_is_Reload(node) && be_get_frame_entity(node) == NULL) {
1283 const ir_mode *mode = get_irn_mode(node);
1284 int align = get_mode_size_bytes(mode);
1285 be_node_needs_frame_entity(env, node, mode, align);
1286 } else if(is_ia32_irn(node) && get_ia32_frame_ent(node) == NULL) {
1287 if (is_ia32_Load(node)) {
1288 const ir_mode *mode = get_ia32_ls_mode(node);
1289 int align = get_mode_size_bytes(mode);
1290 be_node_needs_frame_entity(env, node, mode, align);
1291 } else if (is_ia32_vfild(node)) {
1292 const ir_mode *mode = get_ia32_ls_mode(node);
1294 be_node_needs_frame_entity(env, node, mode, align);
1300 * We transform Spill and Reload here. This needs to be done before
1301 * stack biasing otherwise we would miss the corrected offset for these nodes.
1303 static void ia32_after_ra(void *self) {
1304 ia32_code_gen_t *cg = self;
1305 ir_graph *irg = cg->irg;
1306 be_fec_env_t *fec_env = be_new_frame_entity_coalescer(cg->birg);
1308 /* create and coalesce frame entities */
1309 irg_walk_graph(irg, NULL, ia32_collect_frame_entity_nodes, fec_env);
1310 be_assign_entities(fec_env);
1311 be_free_frame_entity_coalescer(fec_env);
1313 irg_block_walk_graph(irg, NULL, ia32_after_ra_walker, cg);
1315 ia32_finish_irg(irg, cg);
1319 * Last touchups for the graph before emit: x87 simulation to replace the
1320 * virtual with real x87 instructions, creating a block schedule and peephole
1323 static void ia32_finish(void *self) {
1324 ia32_code_gen_t *cg = self;
1325 ir_graph *irg = cg->irg;
1327 /* if we do x87 code generation, rewrite all the virtual instructions and registers */
1328 if (cg->used_fp == fp_x87 || cg->force_sim) {
1329 x87_simulate_graph(cg->arch_env, cg->birg);
1332 /* create block schedule, this also removes empty blocks which might
1333 * produce critical edges */
1334 cg->blk_sched = be_create_block_schedule(irg, cg->birg->exec_freq);
1336 /* do peephole optimisations */
1337 ia32_peephole_optimization(irg, cg);
1341 * Emits the code, closes the output file and frees
1342 * the code generator interface.
1344 static void ia32_codegen(void *self) {
1345 ia32_code_gen_t *cg = self;
1346 ir_graph *irg = cg->irg;
1348 ia32_gen_routine(cg->isa->out, irg, cg);
1352 /* remove it from the isa */
1355 /* de-allocate code generator */
1356 del_set(cg->reg_set);
1360 static void *ia32_cg_init(be_irg_t *birg);
1362 static const arch_code_generator_if_t ia32_code_gen_if = {
1364 NULL, /* before abi introduce hook */
1367 ia32_before_sched, /* before scheduling hook */
1368 ia32_before_ra, /* before register allocation hook */
1369 ia32_after_ra, /* after register allocation hook */
1370 ia32_finish, /* called before codegen */
1371 ia32_codegen /* emit && done */
1375 * Initializes a IA32 code generator.
1377 static void *ia32_cg_init(be_irg_t *birg) {
1378 ia32_isa_t *isa = (ia32_isa_t *)birg->main_env->arch_env->isa;
1379 ia32_code_gen_t *cg = xcalloc(1, sizeof(*cg));
1381 cg->impl = &ia32_code_gen_if;
1382 cg->irg = birg->irg;
1383 cg->reg_set = new_set(ia32_cmp_irn_reg_assoc, 1024);
1384 cg->arch_env = birg->main_env->arch_env;
1387 cg->blk_sched = NULL;
1388 cg->fp_kind = isa->fp_kind;
1389 cg->used_fp = fp_none;
1390 cg->dump = (birg->main_env->options->dump_flags & DUMP_BE) ? 1 : 0;
1392 FIRM_DBG_REGISTER(cg->mod, "firm.be.ia32.cg");
1394 /* copy optimizations from isa for easier access */
1396 cg->arch = isa->arch;
1397 cg->opt_arch = isa->opt_arch;
1403 if (isa->name_obst) {
1404 obstack_free(isa->name_obst, NULL);
1405 obstack_init(isa->name_obst);
1409 cur_reg_set = cg->reg_set;
1411 ia32_irn_ops.cg = cg;
1413 return (arch_code_generator_t *)cg;
1418 /*****************************************************************
1419 * ____ _ _ _____ _____
1420 * | _ \ | | | | |_ _|/ ____| /\
1421 * | |_) | __ _ ___| | _____ _ __ __| | | | | (___ / \
1422 * | _ < / _` |/ __| |/ / _ \ '_ \ / _` | | | \___ \ / /\ \
1423 * | |_) | (_| | (__| < __/ | | | (_| | _| |_ ____) / ____ \
1424 * |____/ \__,_|\___|_|\_\___|_| |_|\__,_| |_____|_____/_/ \_\
1426 *****************************************************************/
1429 * Set output modes for GCC
1431 static const tarval_mode_info mo_integer = {
1438 * set the tarval output mode of all integer modes to decimal
1440 static void set_tarval_output_modes(void)
1444 for (i = get_irp_n_modes() - 1; i >= 0; --i) {
1445 ir_mode *mode = get_irp_mode(i);
1447 if (mode_is_int(mode))
1448 set_tarval_mode_output_option(mode, &mo_integer);
1454 * The template that generates a new ISA object.
1455 * Note that this template can be changed by command line
1458 static ia32_isa_t ia32_isa_template = {
1460 &ia32_isa_if, /* isa interface implementation */
1461 &ia32_gp_regs[REG_ESP], /* stack pointer register */
1462 &ia32_gp_regs[REG_EBP], /* base pointer register */
1463 -1, /* stack direction */
1464 NULL, /* main environment */
1466 NULL, /* 16bit register names */
1467 NULL, /* 8bit register names */
1471 IA32_OPT_INCDEC | /* optimize add 1, sub 1 into inc/dec default: on */
1472 IA32_OPT_DOAM | /* optimize address mode default: on */
1473 IA32_OPT_LEA | /* optimize for LEAs default: on */
1474 IA32_OPT_PLACECNST | /* place constants immediately before instructions, default: on */
1475 IA32_OPT_IMMOPS | /* operations can use immediates, default: on */
1476 IA32_OPT_EXTBB | /* use extended basic block scheduling, default: on */
1477 IA32_OPT_PUSHARGS), /* create pushs for function argument passing, default: on */
1478 arch_pentium_4, /* instruction architecture */
1479 arch_pentium_4, /* optimize for architecture */
1480 fp_sse2, /* use sse2 unit */
1481 NULL, /* current code generator */
1482 NULL, /* output file */
1484 NULL, /* name obstack */
1485 0 /* name obst size */
1490 * Initializes the backend ISA.
1492 static void *ia32_init(FILE *file_handle) {
1493 static int inited = 0;
1499 set_tarval_output_modes();
1501 isa = xmalloc(sizeof(*isa));
1502 memcpy(isa, &ia32_isa_template, sizeof(*isa));
1504 ia32_register_init(isa);
1505 ia32_create_opcodes();
1506 ia32_register_copy_attr_func();
1508 if ((ARCH_INTEL(isa->arch) && isa->arch < arch_pentium_4) ||
1509 (ARCH_AMD(isa->arch) && isa->arch < arch_athlon))
1510 /* no SSE2 for these cpu's */
1511 isa->fp_kind = fp_x87;
1513 if (ARCH_INTEL(isa->opt_arch) && isa->opt_arch >= arch_pentium_4) {
1514 /* Pentium 4 don't like inc and dec instructions */
1515 isa->opt &= ~IA32_OPT_INCDEC;
1518 isa->regs_16bit = pmap_create();
1519 isa->regs_8bit = pmap_create();
1520 isa->types = pmap_create();
1521 isa->tv_ent = pmap_create();
1522 isa->out = file_handle;
1523 isa->cpu = ia32_init_machine_description();
1525 ia32_build_16bit_reg_map(isa->regs_16bit);
1526 ia32_build_8bit_reg_map(isa->regs_8bit);
1528 /* patch register names of x87 registers */
1529 ia32_st_regs[0].name = "st";
1530 ia32_st_regs[1].name = "st(1)";
1531 ia32_st_regs[2].name = "st(2)";
1532 ia32_st_regs[3].name = "st(3)";
1533 ia32_st_regs[4].name = "st(4)";
1534 ia32_st_regs[5].name = "st(5)";
1535 ia32_st_regs[6].name = "st(6)";
1536 ia32_st_regs[7].name = "st(7)";
1539 isa->name_obst = xmalloc(sizeof(*isa->name_obst));
1540 obstack_init(isa->name_obst);
1543 ia32_handle_intrinsics();
1544 ia32_switch_section(isa->out, NO_SECTION);
1545 fprintf(isa->out, "\t.intel_syntax\n");
1547 /* needed for the debug support */
1548 ia32_switch_section(isa->out, SECTION_TEXT);
1549 fprintf(isa->out, ".Ltext0:\n");
1559 * Closes the output file and frees the ISA structure.
1561 static void ia32_done(void *self) {
1562 ia32_isa_t *isa = self;
1564 /* emit now all global declarations */
1565 ia32_gen_decls(isa->out, isa->arch_isa.main_env);
1567 pmap_destroy(isa->regs_16bit);
1568 pmap_destroy(isa->regs_8bit);
1569 pmap_destroy(isa->tv_ent);
1570 pmap_destroy(isa->types);
1573 obstack_free(isa->name_obst, NULL);
1581 * Return the number of register classes for this architecture.
1582 * We report always these:
1583 * - the general purpose registers
1584 * - the SSE floating point register set
1585 * - the virtual floating point registers
1587 static int ia32_get_n_reg_class(const void *self) {
1592 * Return the register class for index i.
1594 static const arch_register_class_t *ia32_get_reg_class(const void *self, int i) {
1595 assert(i >= 0 && i < 3 && "Invalid ia32 register class requested.");
1597 return &ia32_reg_classes[CLASS_ia32_gp];
1599 return &ia32_reg_classes[CLASS_ia32_xmm];
1601 return &ia32_reg_classes[CLASS_ia32_vfp];
1605 * Get the register class which shall be used to store a value of a given mode.
1606 * @param self The this pointer.
1607 * @param mode The mode in question.
1608 * @return A register class which can hold values of the given mode.
1610 const arch_register_class_t *ia32_get_reg_class_for_mode(const void *self, const ir_mode *mode) {
1611 const ia32_isa_t *isa = self;
1612 if (mode_is_float(mode)) {
1613 return USE_SSE2(isa) ? &ia32_reg_classes[CLASS_ia32_xmm] : &ia32_reg_classes[CLASS_ia32_vfp];
1616 return &ia32_reg_classes[CLASS_ia32_gp];
1620 * Get the ABI restrictions for procedure calls.
1621 * @param self The this pointer.
1622 * @param method_type The type of the method (procedure) in question.
1623 * @param abi The abi object to be modified
1625 static void ia32_get_call_abi(const void *self, ir_type *method_type, be_abi_call_t *abi) {
1626 const ia32_isa_t *isa = self;
1629 unsigned cc = get_method_calling_convention(method_type);
1630 int n = get_method_n_params(method_type);
1633 int i, ignore_1, ignore_2;
1635 const arch_register_t *reg;
1636 be_abi_call_flags_t call_flags = be_abi_call_get_flags(abi);
1638 unsigned use_push = !IS_P6_ARCH(isa->opt_arch);
1640 /* set abi flags for calls */
1641 call_flags.bits.left_to_right = 0; /* always last arg first on stack */
1642 call_flags.bits.store_args_sequential = use_push;
1643 /* call_flags.bits.try_omit_fp not changed: can handle both settings */
1644 call_flags.bits.fp_free = 0; /* the frame pointer is fixed in IA32 */
1645 call_flags.bits.call_has_imm = 1; /* IA32 calls can have immediate address */
1647 /* set stack parameter passing style */
1648 be_abi_call_set_flags(abi, call_flags, &ia32_abi_callbacks);
1650 /* collect the mode for each type */
1651 modes = alloca(n * sizeof(modes[0]));
1653 for (i = 0; i < n; i++) {
1654 tp = get_method_param_type(method_type, i);
1655 modes[i] = get_type_mode(tp);
1658 /* set register parameters */
1659 if (cc & cc_reg_param) {
1660 /* determine the number of parameters passed via registers */
1661 biggest_n = ia32_get_n_regparam_class(n, modes, &ignore_1, &ignore_2);
1663 /* loop over all parameters and set the register requirements */
1664 for (i = 0; i <= biggest_n; i++) {
1665 reg = ia32_get_RegParam_reg(n, modes, i, cc);
1666 assert(reg && "kaputt");
1667 be_abi_call_param_reg(abi, i, reg);
1674 /* set stack parameters */
1675 for (i = stack_idx; i < n; i++) {
1676 /* parameters on the stack are 32 bit aligned */
1677 be_abi_call_param_stack(abi, i, 4, 0, 0);
1681 /* set return registers */
1682 n = get_method_n_ress(method_type);
1684 assert(n <= 2 && "more than two results not supported");
1686 /* In case of 64bit returns, we will have two 32bit values */
1688 tp = get_method_res_type(method_type, 0);
1689 mode = get_type_mode(tp);
1691 assert(!mode_is_float(mode) && "two FP results not supported");
1693 tp = get_method_res_type(method_type, 1);
1694 mode = get_type_mode(tp);
1696 assert(!mode_is_float(mode) && "mixed INT, FP results not supported");
1698 be_abi_call_res_reg(abi, 0, &ia32_gp_regs[REG_EAX]);
1699 be_abi_call_res_reg(abi, 1, &ia32_gp_regs[REG_EDX]);
1702 const arch_register_t *reg;
1704 tp = get_method_res_type(method_type, 0);
1705 assert(is_atomic_type(tp));
1706 mode = get_type_mode(tp);
1708 reg = mode_is_float(mode) ? &ia32_vfp_regs[REG_VF0] : &ia32_gp_regs[REG_EAX];
1710 be_abi_call_res_reg(abi, 0, reg);
1715 static const void *ia32_get_irn_ops(const arch_irn_handler_t *self, const ir_node *irn) {
1716 return &ia32_irn_ops;
1719 const arch_irn_handler_t ia32_irn_handler = {
1723 const arch_irn_handler_t *ia32_get_irn_handler(const void *self) {
1724 return &ia32_irn_handler;
1727 int ia32_to_appear_in_schedule(void *block_env, const ir_node *irn) {
1728 return is_ia32_irn(irn) ? 1 : -1;
1732 * Initializes the code generator interface.
1734 static const arch_code_generator_if_t *ia32_get_code_generator_if(void *self) {
1735 return &ia32_code_gen_if;
1739 * Returns the estimated execution time of an ia32 irn.
1741 static sched_timestep_t ia32_sched_exectime(void *env, const ir_node *irn) {
1742 const arch_env_t *arch_env = env;
1743 return is_ia32_irn(irn) ? ia32_get_op_estimated_cost(arch_get_irn_ops(arch_env, irn), irn) : 1;
1746 list_sched_selector_t ia32_sched_selector;
1749 * Returns the reg_pressure scheduler with to_appear_in_schedule() overloaded
1751 static const list_sched_selector_t *ia32_get_list_sched_selector(const void *self, list_sched_selector_t *selector) {
1752 memcpy(&ia32_sched_selector, selector, sizeof(ia32_sched_selector));
1753 ia32_sched_selector.exectime = ia32_sched_exectime;
1754 ia32_sched_selector.to_appear_in_schedule = ia32_to_appear_in_schedule;
1755 return &ia32_sched_selector;
1758 static const ilp_sched_selector_t *ia32_get_ilp_sched_selector(const void *self) {
1763 * Returns the necessary byte alignment for storing a register of given class.
1765 static int ia32_get_reg_class_alignment(const void *self, const arch_register_class_t *cls) {
1766 ir_mode *mode = arch_register_class_mode(cls);
1767 int bytes = get_mode_size_bytes(mode);
1769 if (mode_is_float(mode) && bytes > 8)
1774 static const be_execution_unit_t ***ia32_get_allowed_execution_units(const void *self, const ir_node *irn) {
1775 static const be_execution_unit_t *_allowed_units_BRANCH[] = {
1776 &ia32_execution_units_BRANCH[IA32_EXECUNIT_TP_BRANCH_BRANCH1],
1777 &ia32_execution_units_BRANCH[IA32_EXECUNIT_TP_BRANCH_BRANCH2],
1780 static const be_execution_unit_t *_allowed_units_ALU[] = {
1781 &ia32_execution_units_ALU[IA32_EXECUNIT_TP_ALU_ALU1],
1782 &ia32_execution_units_ALU[IA32_EXECUNIT_TP_ALU_ALU2],
1783 &ia32_execution_units_ALU[IA32_EXECUNIT_TP_ALU_ALU3],
1784 &ia32_execution_units_ALU[IA32_EXECUNIT_TP_ALU_ALU4],
1787 static const be_execution_unit_t *_allowed_units_DUMMY[] = {
1788 &ia32_execution_units_DUMMY[IA32_EXECUNIT_TP_DUMMY_DUMMY1],
1789 &ia32_execution_units_DUMMY[IA32_EXECUNIT_TP_DUMMY_DUMMY2],
1790 &ia32_execution_units_DUMMY[IA32_EXECUNIT_TP_DUMMY_DUMMY3],
1791 &ia32_execution_units_DUMMY[IA32_EXECUNIT_TP_DUMMY_DUMMY4],
1794 static const be_execution_unit_t **_units_callret[] = {
1795 _allowed_units_BRANCH,
1798 static const be_execution_unit_t **_units_other[] = {
1802 static const be_execution_unit_t **_units_dummy[] = {
1803 _allowed_units_DUMMY,
1806 const be_execution_unit_t ***ret;
1808 if (is_ia32_irn(irn)) {
1809 ret = get_ia32_exec_units(irn);
1811 else if (is_be_node(irn)) {
1812 if (be_is_Call(irn) || be_is_Return(irn)) {
1813 ret = _units_callret;
1815 else if (be_is_Barrier(irn)) {
1830 * Return the abstract ia32 machine.
1832 static const be_machine_t *ia32_get_machine(const void *self) {
1833 const ia32_isa_t *isa = self;
1838 * Allows or disallows the creation of Psi nodes for the given Phi nodes.
1839 * @return 1 if allowed, 0 otherwise
1841 static int ia32_is_psi_allowed(ir_node *sel, ir_node *phi_list, int i, int j)
1843 ir_node *cmp, *cmp_a, *phi;
1846 /* we don't want long long an floating point Psi */
1847 #define IS_BAD_PSI_MODE(mode) (mode_is_float(mode) || get_mode_size_bits(mode) > 32)
1849 if (get_irn_mode(sel) != mode_b)
1852 cmp = get_Proj_pred(sel);
1853 cmp_a = get_Cmp_left(cmp);
1854 mode = get_irn_mode(cmp_a);
1856 if (IS_BAD_PSI_MODE(mode))
1859 /* check the Phi nodes */
1860 for (phi = phi_list; phi; phi = get_irn_link(phi)) {
1861 ir_node *pred_i = get_irn_n(phi, i);
1862 ir_node *pred_j = get_irn_n(phi, j);
1863 ir_mode *mode_i = get_irn_mode(pred_i);
1864 ir_mode *mode_j = get_irn_mode(pred_j);
1866 if (IS_BAD_PSI_MODE(mode_i) || IS_BAD_PSI_MODE(mode_j))
1870 #undef IS_BAD_PSI_MODE
1875 static ia32_intrinsic_env_t intrinsic_env = {
1876 NULL, /**< the irg, these entities belong to */
1877 NULL, /**< entity for first div operand (move into FPU) */
1878 NULL, /**< entity for second div operand (move into FPU) */
1879 NULL, /**< entity for converts ll -> d */
1880 NULL, /**< entity for converts d -> ll */
1884 * Returns the libFirm configuration parameter for this backend.
1886 static const backend_params *ia32_get_libfirm_params(void) {
1887 static const opt_if_conv_info_t ifconv = {
1888 4, /* maxdepth, doesn't matter for Psi-conversion */
1889 ia32_is_psi_allowed /* allows or disallows Psi creation for given selector */
1891 static const arch_dep_params_t ad = {
1892 1, /* also use subs */
1893 4, /* maximum shifts */
1894 31, /* maximum shift amount */
1896 1, /* allow Mulhs */
1897 1, /* allow Mulus */
1898 32 /* Mulh allowed up to 32 bit */
1900 static backend_params p = {
1901 NULL, /* no additional opcodes */
1902 NULL, /* will be set later */
1903 1, /* need dword lowering */
1904 ia32_create_intrinsic_fkt,
1905 &intrinsic_env, /* context for ia32_create_intrinsic_fkt */
1906 NULL, /* will be set later */
1910 p.if_conv_info = &ifconv;
1915 /* instruction set architectures. */
1916 static const lc_opt_enum_int_items_t arch_items[] = {
1917 { "386", arch_i386, },
1918 { "486", arch_i486, },
1919 { "pentium", arch_pentium, },
1920 { "586", arch_pentium, },
1921 { "pentiumpro", arch_pentium_pro, },
1922 { "686", arch_pentium_pro, },
1923 { "pentiummmx", arch_pentium_mmx, },
1924 { "pentium2", arch_pentium_2, },
1925 { "p2", arch_pentium_2, },
1926 { "pentium3", arch_pentium_3, },
1927 { "p3", arch_pentium_3, },
1928 { "pentium4", arch_pentium_4, },
1929 { "p4", arch_pentium_4, },
1930 { "pentiumm", arch_pentium_m, },
1931 { "pm", arch_pentium_m, },
1932 { "core", arch_core, },
1934 { "athlon", arch_athlon, },
1935 { "athlon64", arch_athlon_64, },
1936 { "opteron", arch_opteron, },
1940 static lc_opt_enum_int_var_t arch_var = {
1941 &ia32_isa_template.arch, arch_items
1944 static lc_opt_enum_int_var_t opt_arch_var = {
1945 &ia32_isa_template.opt_arch, arch_items
1948 static const lc_opt_enum_int_items_t fp_unit_items[] = {
1950 { "sse2", fp_sse2 },
1954 static lc_opt_enum_int_var_t fp_unit_var = {
1955 &ia32_isa_template.fp_kind, fp_unit_items
1958 static const lc_opt_enum_int_items_t gas_items[] = {
1959 { "linux", ASM_LINUX_GAS },
1960 { "mingw", ASM_MINGW_GAS },
1964 static lc_opt_enum_int_var_t gas_var = {
1965 (int *)&asm_flavour, gas_items
1968 static const lc_opt_table_entry_t ia32_options[] = {
1969 LC_OPT_ENT_ENUM_INT("arch", "select the instruction architecture", &arch_var),
1970 LC_OPT_ENT_ENUM_INT("opt", "optimize for instruction architecture", &opt_arch_var),
1971 LC_OPT_ENT_ENUM_INT("fpunit", "select the floating point unit", &fp_unit_var),
1972 LC_OPT_ENT_NEGBIT("noaddrmode", "do not use address mode", &ia32_isa_template.opt, IA32_OPT_DOAM),
1973 LC_OPT_ENT_NEGBIT("nolea", "do not optimize for LEAs", &ia32_isa_template.opt, IA32_OPT_LEA),
1974 LC_OPT_ENT_NEGBIT("noplacecnst", "do not place constants", &ia32_isa_template.opt, IA32_OPT_PLACECNST),
1975 LC_OPT_ENT_NEGBIT("noimmop", "no operations with immediates", &ia32_isa_template.opt, IA32_OPT_IMMOPS),
1976 LC_OPT_ENT_NEGBIT("noextbb", "do not use extended basic block scheduling", &ia32_isa_template.opt, IA32_OPT_EXTBB),
1977 LC_OPT_ENT_NEGBIT("nopushargs", "do not create pushs for function arguments", &ia32_isa_template.opt, IA32_OPT_PUSHARGS),
1978 LC_OPT_ENT_ENUM_INT("gasmode", "set the GAS compatibility mode", &gas_var),
1983 * Register command line options for the ia32 backend.
1987 * ia32-arch=arch create instruction for arch
1988 * ia32-opt=arch optimize for run on arch
1989 * ia32-fpunit=unit select floating point unit (x87 or SSE2)
1990 * ia32-incdec optimize for inc/dec
1991 * ia32-noaddrmode do not use address mode
1992 * ia32-nolea do not optimize for LEAs
1993 * ia32-noplacecnst do not place constants,
1994 * ia32-noimmop no operations with immediates
1995 * ia32-noextbb do not use extended basic block scheduling
1996 * ia32-nopushargs do not create pushs for function argument passing
1997 * ia32-gasmode set the GAS compatibility mode
1999 static void ia32_register_options(lc_opt_entry_t *ent)
2001 lc_opt_entry_t *be_grp_ia32 = lc_opt_get_grp(ent, "ia32");
2002 lc_opt_add_table(be_grp_ia32, ia32_options);
2004 #endif /* WITH_LIBCORE */
2006 const arch_isa_if_t ia32_isa_if = {
2009 ia32_get_n_reg_class,
2011 ia32_get_reg_class_for_mode,
2013 ia32_get_irn_handler,
2014 ia32_get_code_generator_if,
2015 ia32_get_list_sched_selector,
2016 ia32_get_ilp_sched_selector,
2017 ia32_get_reg_class_alignment,
2018 ia32_get_libfirm_params,
2019 ia32_get_allowed_execution_units,
2022 ia32_register_options