- Improved addressmode optimisation for conv nodes
[libfirm] / ir / be / ia32 / ia32_emitter.c
index 62365f9..4cd44f1 100644 (file)
 #include "irargs_t.h"
 #include "irprog_t.h"
 #include "iredges_t.h"
+#include "execfreq.h"
 
 #include "../besched_t.h"
 #include "../benode_t.h"
+#include "../beabi.h"
 #include "../be_dbgout.h"
 
 #include "ia32_emitter.h"
@@ -126,20 +128,6 @@ static void ia32_dump_function_size(FILE *F, const char *name)
  * |_|                                       |_|
  *************************************************************/
 
-static INLINE int be_is_unknown_reg(const arch_register_t *reg) {
-       return \
-               REGS_ARE_EQUAL(reg, &ia32_gp_regs[REG_GP_UKNWN])   || \
-               REGS_ARE_EQUAL(reg, &ia32_xmm_regs[REG_XMM_UKNWN]) || \
-               REGS_ARE_EQUAL(reg, &ia32_vfp_regs[REG_VFP_UKNWN]);
-}
-
-/**
- * returns true if a node has x87 registers
- */
-static INLINE int has_x87_register(const ir_node *n) {
-       return is_irn_machine_user(n, 0);
-}
-
 /* We always pass the ir_node which is a pointer. */
 static int ia32_get_arg_type(const lc_arg_occ_t *occ) {
        return lc_arg_type_ptr;
@@ -163,13 +151,29 @@ static const arch_register_t *get_in_reg(const ir_node *irn, int pos) {
 
        assert(reg && "no in register found");
 
-       /* in case of unknown: just return a register */
-       if (REGS_ARE_EQUAL(reg, &ia32_gp_regs[REG_GP_UKNWN]))
-               reg = &ia32_gp_regs[REG_EAX];
-       else if (REGS_ARE_EQUAL(reg, &ia32_xmm_regs[REG_XMM_UKNWN]))
-               reg = &ia32_xmm_regs[REG_XMM0];
-       else if (REGS_ARE_EQUAL(reg, &ia32_vfp_regs[REG_VFP_UKNWN]))
-               reg = &ia32_vfp_regs[REG_VF0];
+       /* in case of a joker register: just return a valid register */
+       if (arch_register_type_is(reg, joker)) {
+               arch_register_req_t       req;
+               const arch_register_req_t *p_req;
+
+               /* ask for the requirements */
+               p_req = arch_get_register_req(arch_env, &req, irn, pos);
+
+               if (arch_register_req_is(p_req, limited)) {
+                       /* in case of limited requirements: get the first allowed register */
+
+                       bitset_t *bs = bitset_alloca(arch_register_class_n_regs(p_req->cls));
+                       int      idx;
+
+                       p_req->limited(p_req->limited_env, bs);
+                       idx = bitset_next_set(bs, 0);
+                       reg = arch_register_for_index(p_req->cls, idx);
+               }
+               else {
+                       /* otherwise get first register in class */
+                       reg = arch_register_for_index(p_req->cls, 0);
+               }
+       }
 
        return reg;
 }
@@ -556,19 +560,25 @@ const char *ia32_emit_unop(const ir_node *n, ia32_emit_env_t *env) {
 
        switch(get_ia32_op_type(n)) {
                case ia32_Normal:
-                       if (is_ia32_ImmConst(n) || is_ia32_ImmSymConst(n)) {
+                       if (is_ia32_ImmConst(n)) {
                                lc_esnprintf(ia32_get_arg_env(), buf, SNPRINTF_BUF_LEN, "%C", n);
                        }
+                       else if (is_ia32_ImmSymConst(n)) {
+                               lc_esnprintf(ia32_get_arg_env(), buf, SNPRINTF_BUF_LEN, "OFFSET FLAT:%C", n);
+                       }
                        else {
                                if (is_ia32_MulS(n) || is_ia32_Mulh(n)) {
                                        /* MulS and Mulh implicitly multiply by EAX */
                                        lc_esnprintf(ia32_get_arg_env(), buf, SNPRINTF_BUF_LEN, "%4S", n);
-                               }
-                               else
+                               } else if(is_ia32_Push(n)) {
+                                       lc_esnprintf(ia32_get_arg_env(), buf, SNPRINTF_BUF_LEN, "%3S", n);
+                               } else {
                                        lc_esnprintf(ia32_get_arg_env(), buf, SNPRINTF_BUF_LEN, "%1D", n);
+                               }
                        }
                        break;
                case ia32_AddrModeD:
+                       assert(!is_ia32_Push(n));
                        snprintf(buf, SNPRINTF_BUF_LEN, "%s", ia32_emit_am(n, env));
                        break;
                case ia32_AddrModeS:
@@ -576,7 +586,7 @@ const char *ia32_emit_unop(const ir_node *n, ia32_emit_env_t *env) {
                                Mulh is emitted via emit_unop
                                imul [MEM]  means EDX:EAX <- EAX * [MEM]
                        */
-                       assert((is_ia32_Mulh(n) || is_ia32_MulS(n)) && "Only MulS and Mulh can have AM source as unop");
+                       assert((is_ia32_Mulh(n) || is_ia32_MulS(n) || is_ia32_Push(n)) && "Only MulS and Mulh can have AM source as unop");
                        lc_esnprintf(ia32_get_arg_env(), buf, SNPRINTF_BUF_LEN, "%s", ia32_emit_am(n, env));
                        break;
                default:
@@ -610,7 +620,7 @@ const char *ia32_emit_am(const ir_node *n, ia32_emit_env_t *env) {
        /* obstack_free with NULL results in an uninitialized obstack */
        obstack_init(obst);
 
-       p = pointer_size(mode, has_x87_register(n) || is_ia32_GetST0(n) || is_ia32_SetST0(n));
+       p = pointer_size(mode, ia32_has_x87_register(n) || is_ia32_GetST0(n) || is_ia32_SetST0(n));
        if (p)
                obstack_printf(obst, "%s ", p);
 
@@ -645,17 +655,16 @@ const char *ia32_emit_am(const ir_node *n, ia32_emit_env_t *env) {
        }
 
        if (am_flav & ia32_O) {
-               s = get_ia32_am_offs(n);
+               int offs = get_ia32_am_offs_int(n);
 
-               if (s) {
+               if (offs != 0) {
                        /* omit explicit + if there was no base or index */
                        if (! had_output) {
-                               obstack_printf(obst, "[");
-                               if (s[0] == '+')
-                                       s++;
+                               obstack_printf(obst, "[%d", offs);
+                       } else {
+                               obstack_printf(obst, "%+d", offs);
                        }
 
-                       obstack_printf(obst, s);
                        had_output = 1;
                }
        }
@@ -677,7 +686,7 @@ const char *ia32_emit_adr(const ir_node *irn, ia32_emit_env_t *env)
        static char buf[SNPRINTF_BUF_LEN];
        ir_mode    *mode = get_ia32_ls_mode(irn);
        const char *adr  = get_ia32_cnst(irn);
-       const char *pref = pointer_size(mode, has_x87_register(irn));
+       const char *pref = pointer_size(mode, ia32_has_x87_register(irn));
 
        snprintf(buf, SNPRINTF_BUF_LEN, "%s %s", pref ? pref : "", adr);
        return buf;
@@ -1584,6 +1593,7 @@ static void emit_ia32_Conv_I2I(const ir_node *irn, ia32_emit_env_t *emit_env) {
        char               *conv_cmd = NULL;
        ir_mode            *src_mode = get_ia32_src_mode(irn);
        ir_mode            *tgt_mode = get_ia32_tgt_mode(irn);
+       int                signed_mode;
        int n, m;
        char cmd_buf[SNPRINTF_BUF_LEN], cmnt_buf[SNPRINTF_BUF_LEN];
        const arch_register_t *in_reg, *out_reg;
@@ -1591,33 +1601,34 @@ static void emit_ia32_Conv_I2I(const ir_node *irn, ia32_emit_env_t *emit_env) {
        n = get_mode_size_bits(src_mode);
        m = get_mode_size_bits(tgt_mode);
 
-       if (mode_is_signed(n < m ? src_mode : tgt_mode)) {
+       assert(n == 8 || n == 16 || n == 32);
+       assert(m == 8 || m == 16 || m == 32);
+       assert(n != m);
+
+       signed_mode = mode_is_signed(n < m ? src_mode : tgt_mode);
+       if(signed_mode) {
                move_cmd = "movsx";
-               if (n == 8 || m == 8)
-                       conv_cmd = "cbw";
-               else if (n == 16 || m == 16)
-                       conv_cmd = "cwde";
-               else {
-                       printf("%d -> %d unsupported\n", n, m);
-                       assert(0 && "unsupported Conv_I2I");
-               }
        }
 
-        switch(get_ia32_op_type(irn)) {
+       switch(get_ia32_op_type(irn)) {
                case ia32_Normal:
                        in_reg  = get_in_reg(irn, 2);
                        out_reg = get_out_reg(irn, 0);
 
                        if (REGS_ARE_EQUAL(in_reg, &ia32_gp_regs[REG_EAX]) &&
                                REGS_ARE_EQUAL(out_reg, in_reg)                &&
-                               mode_is_signed(n < m ? src_mode : tgt_mode))
+                               signed_mode)
                        {
+                               if (n == 8 || m == 8)
+                                       conv_cmd = "cbw";
+                               else if (n == 16 || m == 16)
+                                       conv_cmd = "cwde";
+
                                /* argument and result are both in EAX and */
                                /* signedness is ok: -> use converts       */
                                lc_esnprintf(env, cmd_buf, SNPRINTF_BUF_LEN, "%s", conv_cmd);
                        }
-                       else if (REGS_ARE_EQUAL(out_reg, in_reg) &&
-                               ! mode_is_signed(n < m ? src_mode : tgt_mode))
+                       else if (REGS_ARE_EQUAL(out_reg, in_reg) &&     ! signed_mode)
                        {
                                /* argument and result are in the same register */
                                /* and signedness is ok: -> use and with mask   */
@@ -1727,7 +1738,7 @@ static void Copy_emitter(const ir_node *irn, ir_node *op, ia32_emit_env_t *emit_
        char cmd_buf[SNPRINTF_BUF_LEN], cmnt_buf[SNPRINTF_BUF_LEN];
 
        if (REGS_ARE_EQUAL(arch_get_irn_register(aenv, irn), arch_get_irn_register(aenv, op)) ||
-               be_is_unknown_reg(arch_get_irn_register(aenv, op)))
+               arch_register_type_is(arch_get_irn_register(aenv, op), virtual))
                return;
 
        if (mode_is_float(get_irn_mode(irn)))
@@ -1764,7 +1775,18 @@ static void emit_be_Perm(const ir_node *irn, ia32_emit_env_t *emit_env) {
        assert(cls1 == cls2 && "Register class mismatch at Perm");
 
        if (cls1 == &ia32_reg_classes[CLASS_ia32_gp]) {
-               lc_esnprintf(ia32_get_arg_env(), cmd_buf, SNPRINTF_BUF_LEN, "xchg %1S, %2S", irn, irn);
+               if(emit_env->isa->opt_arch == arch_athlon) {
+                       // xchg commands are Vector path on athlons and therefore stall the DirectPath pipeline
+                       // it is nearly always beneficial to use the 3 xor trick instead of an xchg
+                       cmnt_buf[0] = 0;
+                       lc_esnprintf(ia32_get_arg_env(), cmd_buf, SNPRINTF_BUF_LEN, "xor %1S, %2S", irn, irn);
+                       IA32_DO_EMIT(irn);
+                       lc_esnprintf(ia32_get_arg_env(), cmd_buf, SNPRINTF_BUF_LEN, "xor %2S, %1S", irn, irn);
+                       IA32_DO_EMIT(irn);
+                       lc_esnprintf(ia32_get_arg_env(), cmd_buf, SNPRINTF_BUF_LEN, "xor %1S, %2S", irn, irn);
+               } else {
+                       lc_esnprintf(ia32_get_arg_env(), cmd_buf, SNPRINTF_BUF_LEN, "xchg %1S, %2S", irn, irn);
+               }
        }
        else if (cls1 == &ia32_reg_classes[CLASS_ia32_xmm]) {
                lc_esnprintf(ia32_get_arg_env(), cmd_buf, SNPRINTF_BUF_LEN,
@@ -1797,7 +1819,7 @@ static void emit_ia32_Const(const ir_node *n, ia32_emit_env_t *env) {
                lc_esnprintf(arg_env, cmd_buf, 256, "mov %1D, OFFSET FLAT:%C ", n, n);
                lc_esnprintf(arg_env, cmnt_buf, 256, "/* Move address of SymConst into register */");
        } else {
-               assert(mode == get_tarval_mode(tv));
+               assert(mode == get_tarval_mode(tv) || (mode_is_reference(get_tarval_mode(tv)) && mode == mode_Iu));
                /* beware: in some rare cases mode is mode_b which has no tarval_null() */
                if (tv == get_tarval_b_false() || tv == get_tarval_null(mode)) {
                        const char *instr = "xor";
@@ -1987,23 +2009,33 @@ static void ia32_register_emitters(void) {
 #undef IA32_EMIT
 }
 
+static const char *last_name = NULL;
 static unsigned last_line = -1;
-static const char *last_file = NULL;
 static unsigned num = -1;
 
+/**
+ * Emit the debug support for node irn.
+ */
 static void ia32_emit_dbg(const ir_node *irn, ia32_emit_env_t *env) {
        dbg_info *db = get_irn_dbg_info(irn);
        unsigned lineno;
        const char *fname = be_retrieve_dbg_info(db, &lineno);
 
-       if (fname && last_line != lineno) {
-               char name[64];
-               FILE *F = env->out;
-
-               snprintf(name, sizeof(name), ".Ld%u", ++num);
-               last_line = lineno;
-               be_dbg_line(env->cg->birg->main_env->db_handle, lineno, name);
-               fprintf(F, "%s:\n", name);
+       if (fname) {
+               if (last_name != fname) {
+                       last_line = -1;
+                       be_dbg_include_begin(env->cg->birg->main_env->db_handle, fname);
+                       last_name = fname;
+               }
+               if (last_line != lineno) {
+                       char name[64];
+                       FILE *F = env->out;
+
+                       snprintf(name, sizeof(name), ".LM%u", ++num);
+                       last_line = lineno;
+                       be_dbg_line(env->cg->birg->main_env->db_handle, lineno, name);
+                       fprintf(F, "%s:\n", name);
+               }
        }
 }
 
@@ -2039,7 +2071,8 @@ static void ia32_emit_alignment(FILE *F, unsigned align, unsigned skip) {
  * Emits gas alignment directives for Functions depended on cpu architecture.
  */
 static void ia32_emit_align_func(FILE *F, cpu_support cpu) {
-       unsigned align; unsigned maximum_skip;
+       unsigned align;
+       unsigned maximum_skip;
 
        switch (cpu) {
                case arch_i386:
@@ -2081,30 +2114,121 @@ static void ia32_emit_align_label(FILE *F, cpu_support cpu) {
        ia32_emit_alignment(F, align, maximum_skip);
 }
 
+static int is_first_loop_block(ir_node *block, ir_node *prev_block, ia32_emit_env_t *env) {
+       ir_exec_freq *execfreqs = env->cg->birg->execfreqs;
+       double block_freq, prev_freq;
+       static const double DELTA = .0001;
+       cpu_support cpu = env->isa->opt_arch;
+
+       if(execfreqs == NULL)
+               return 0;
+       if(cpu == arch_i386 || cpu == arch_i486)
+               return 0;
+
+       block_freq = get_block_execfreq(execfreqs, block);
+       prev_freq = get_block_execfreq(execfreqs, prev_block);
+
+       if(block_freq < DELTA || prev_freq < DELTA)
+               return 0;
+
+       block_freq /= prev_freq;
+
+       switch (cpu) {
+               case arch_athlon:
+               case arch_athlon_64:
+               case arch_k6:
+                       return block_freq > 3;
+               default:
+                       break;
+       }
+
+       return block_freq > 2;
+}
+
 /**
  * Walks over the nodes in a block connected by scheduling edges
  * and emits code for each node.
  */
-static void ia32_gen_block(ir_node *block, void *env) {
-       ia32_emit_env_t *emit_env = env;
+static void ia32_gen_block(ir_node *block, ir_node *last_block, ia32_emit_env_t *env) {
+       ir_graph      *irg         = get_irn_irg(block);
+       ir_node       *start_block = get_irg_start_block(irg);
+       int           need_label   = 1;
+       FILE          *F           = env->out;
        const ir_node *irn;
-       int need_label = block != get_irg_start_block(get_irn_irg(block));
-       FILE *F = emit_env->out;
+       int           i;
 
-       if (! is_Block(block))
-               return;
+       assert(is_Block(block));
+
+       if (block == start_block)
+               need_label = 0;
 
-       if (need_label && (emit_env->cg->opt & IA32_OPT_EXTBB)) {
-               /* if the extended block scheduler is used, only leader blocks need
-                  labels. */
-               need_label = (block == get_extbb_leader(get_nodes_extbb(block)));
+       if (need_label && get_irn_arity(block) == 1) {
+               ir_node *pred_block = get_Block_cfgpred_block(block, 0);
+
+               if (pred_block == last_block && get_irn_n_edges_kind(pred_block, EDGE_KIND_BLOCK) <= 2)
+                       need_label = 0;
+       }
+
+       /* special case: if one of our cfg preds is a switch-jmp we need a label, */
+       /*               otherwise there might be jump table entries jumping to   */
+       /*               non-existent (omitted) labels                            */
+       for (i = get_Block_n_cfgpreds(block) - 1; i >= 0; --i) {
+               ir_node *pred = get_Block_cfgpred(block, i);
+
+               if (is_Proj(pred)) {
+                       assert(get_irn_mode(pred) == mode_X);
+                       if (is_ia32_SwitchJmp(get_Proj_pred(pred))) {
+                               need_label = 1;
+                               break;
+                       }
+               }
+       }
+
+       /* special case because the start block contains no jump instruction */
+       if (last_block == start_block) {
+               const ir_edge_t *edge;
+               ir_node *startsucc = NULL;
+
+               foreach_block_succ(start_block, edge) {
+                       startsucc = get_edge_src_irn(edge);
+                       if (startsucc != start_block)
+                               break;
+               }
+               assert(startsucc != NULL);
+
+               /* if the last block was the start block and we are not inside the */
+               /* start successor, emit a jump to the start successor             */
+               if (startsucc != block) {
+                       char buf[SNPRINTF_BUF_LEN];
+                       ir_snprintf(buf, sizeof(buf), BLOCK_PREFIX("%d"),
+                                   get_irn_node_nr(startsucc));
+                       ir_fprintf(F, "\tjmp %s\n", buf);
+               }
        }
 
        if (need_label) {
                char cmd_buf[SNPRINTF_BUF_LEN];
                int i, arity;
+               int align = 1;
+               ir_exec_freq *execfreqs = env->cg->birg->execfreqs;
+
+               /* align the loop headers */
+               if (! is_first_loop_block(block, last_block, env)) {
+                       /* align blocks where the previous block has no fallthrough */
+                       arity = get_irn_arity(block);
 
-               ia32_emit_align_label(emit_env->out, emit_env->isa->opt_arch);
+                       for (i = 0; i < arity; ++i) {
+                               ir_node *predblock = get_Block_cfgpred_block(block, i);
+
+                               if (predblock == last_block) {
+                                       align = 0;
+                                       break;
+                               }
+                       }
+               }
+
+               if (align)
+                       ia32_emit_align_label(env->out, env->isa->opt_arch);
 
                ir_snprintf(cmd_buf, sizeof(cmd_buf), BLOCK_PREFIX("%d:"),
                            get_irn_node_nr(block));
@@ -2114,14 +2238,20 @@ static void ia32_gen_block(ir_node *block, void *env) {
                fprintf(F, "/* preds:");
 
                arity = get_irn_arity(block);
-               for(i = 0; i < arity; ++i) {
+               for (i = 0; i < arity; ++i) {
                        ir_node *predblock = get_Block_cfgpred_block(block, i);
                        fprintf(F, " %ld", get_irn_node_nr(predblock));
                }
+
+               if (execfreqs != NULL) {
+                       fprintf(F, " freq: %f", get_block_execfreq(execfreqs, block));
+               }
+
                fprintf(F, " */\n");
        }
 
        /* emit the contents of the block */
+       ia32_emit_dbg(block, env);
        sched_foreach(block, irn) {
                ia32_emit_node(irn, env);
        }
@@ -2134,10 +2264,11 @@ static void ia32_emit_func_prolog(FILE *F, ir_graph *irg, ia32_emit_env_t *emit_
        entity     *irg_ent  = get_irg_entity(irg);
        const char *irg_name = get_entity_ld_name(irg_ent);
        cpu_support cpu      = emit_env->isa->opt_arch;
+       const be_irg_t *birg = emit_env->cg->birg;
 
        fprintf(F, "\n");
        ia32_switch_section(F, SECTION_TEXT);
-       be_dbg_method(emit_env->cg->birg->main_env->db_handle, irg_ent);
+       be_dbg_method_begin(birg->main_env->db_handle, irg_ent, be_abi_get_stack_layout(birg->abi));
        ia32_emit_align_func(F, cpu);
        if (get_entity_visibility(irg_ent) == visibility_external_visible) {
                fprintf(F, ".globl %s\n", irg_name);
@@ -2149,10 +2280,12 @@ static void ia32_emit_func_prolog(FILE *F, ir_graph *irg, ia32_emit_env_t *emit_
 /**
  * Emits code for function end
  */
-static void ia32_emit_func_epilog(FILE *F, ir_graph *irg) {
+static void ia32_emit_func_epilog(FILE *F, ir_graph *irg, ia32_emit_env_t *emit_env) {
        const char *irg_name = get_entity_ld_name(get_irg_entity(irg));
+       const be_irg_t *birg = emit_env->cg->birg;
 
        ia32_dump_function_size(F, irg_name);
+       be_dbg_method_end(birg->main_env->db_handle);
        fprintf(F, "\n");
 }
 
@@ -2177,6 +2310,7 @@ static void ia32_gen_labels(ir_node *block, void *env) {
 void ia32_gen_routine(FILE *F, ir_graph *irg, const ia32_code_gen_t *cg) {
        ia32_emit_env_t emit_env;
        ir_node *block;
+       ir_node *last_block = NULL;
 
        emit_env.out      = F;
        emit_env.arch_env = cg->arch_env;
@@ -2204,15 +2338,18 @@ void ia32_gen_routine(FILE *F, ir_graph *irg, const ia32_code_gen_t *cg) {
 
                        /* set here the link. the emitter expects to find the next block here */
                        set_irn_link(block, next_bl);
-                       ia32_gen_block(block, &emit_env);
+                       ia32_gen_block(block, last_block, &emit_env);
+                       last_block = block;
                }
        }
        else {
                /* "normal" block schedule: Note the get_next_block() returns the NUMBER of the block
                   in the block schedule. As this number should NEVER be equal the next block,
                   we does not need a clear block link here. */
-               irg_walk_blkwise_graph(irg, NULL, ia32_gen_block, &emit_env);
+
+               //irg_walk_blkwise_graph(irg, NULL, ia32_gen_block, &emit_env);
+               // TODO
        }
 
-       ia32_emit_func_epilog(F, irg);
+       ia32_emit_func_epilog(F, irg, &emit_env);
 }