- Improved addressmode optimisation for conv nodes
[libfirm] / ir / be / ia32 / ia32_emitter.c
index 397709e..4cd44f1 100644 (file)
@@ -20,6 +20,7 @@
 #include "irargs_t.h"
 #include "irprog_t.h"
 #include "iredges_t.h"
+#include "execfreq.h"
 
 #include "../besched_t.h"
 #include "../benode_t.h"
@@ -127,20 +128,6 @@ static void ia32_dump_function_size(FILE *F, const char *name)
  * |_|                                       |_|
  *************************************************************/
 
-static INLINE int be_is_unknown_reg(const arch_register_t *reg) {
-       return \
-               REGS_ARE_EQUAL(reg, &ia32_gp_regs[REG_GP_UKNWN])   || \
-               REGS_ARE_EQUAL(reg, &ia32_xmm_regs[REG_XMM_UKNWN]) || \
-               REGS_ARE_EQUAL(reg, &ia32_vfp_regs[REG_VFP_UKNWN]);
-}
-
-/**
- * returns true if a node has x87 registers
- */
-static INLINE int has_x87_register(const ir_node *n) {
-       return is_irn_machine_user(n, 0);
-}
-
 /* We always pass the ir_node which is a pointer. */
 static int ia32_get_arg_type(const lc_arg_occ_t *occ) {
        return lc_arg_type_ptr;
@@ -164,13 +151,29 @@ static const arch_register_t *get_in_reg(const ir_node *irn, int pos) {
 
        assert(reg && "no in register found");
 
-       /* in case of unknown: just return a register */
-       if (REGS_ARE_EQUAL(reg, &ia32_gp_regs[REG_GP_UKNWN]))
-               reg = &ia32_gp_regs[REG_EAX];
-       else if (REGS_ARE_EQUAL(reg, &ia32_xmm_regs[REG_XMM_UKNWN]))
-               reg = &ia32_xmm_regs[REG_XMM0];
-       else if (REGS_ARE_EQUAL(reg, &ia32_vfp_regs[REG_VFP_UKNWN]))
-               reg = &ia32_vfp_regs[REG_VF0];
+       /* in case of a joker register: just return a valid register */
+       if (arch_register_type_is(reg, joker)) {
+               arch_register_req_t       req;
+               const arch_register_req_t *p_req;
+
+               /* ask for the requirements */
+               p_req = arch_get_register_req(arch_env, &req, irn, pos);
+
+               if (arch_register_req_is(p_req, limited)) {
+                       /* in case of limited requirements: get the first allowed register */
+
+                       bitset_t *bs = bitset_alloca(arch_register_class_n_regs(p_req->cls));
+                       int      idx;
+
+                       p_req->limited(p_req->limited_env, bs);
+                       idx = bitset_next_set(bs, 0);
+                       reg = arch_register_for_index(p_req->cls, idx);
+               }
+               else {
+                       /* otherwise get first register in class */
+                       reg = arch_register_for_index(p_req->cls, 0);
+               }
+       }
 
        return reg;
 }
@@ -557,19 +560,25 @@ const char *ia32_emit_unop(const ir_node *n, ia32_emit_env_t *env) {
 
        switch(get_ia32_op_type(n)) {
                case ia32_Normal:
-                       if (is_ia32_ImmConst(n) || is_ia32_ImmSymConst(n)) {
+                       if (is_ia32_ImmConst(n)) {
                                lc_esnprintf(ia32_get_arg_env(), buf, SNPRINTF_BUF_LEN, "%C", n);
                        }
+                       else if (is_ia32_ImmSymConst(n)) {
+                               lc_esnprintf(ia32_get_arg_env(), buf, SNPRINTF_BUF_LEN, "OFFSET FLAT:%C", n);
+                       }
                        else {
                                if (is_ia32_MulS(n) || is_ia32_Mulh(n)) {
                                        /* MulS and Mulh implicitly multiply by EAX */
                                        lc_esnprintf(ia32_get_arg_env(), buf, SNPRINTF_BUF_LEN, "%4S", n);
-                               }
-                               else
+                               } else if(is_ia32_Push(n)) {
+                                       lc_esnprintf(ia32_get_arg_env(), buf, SNPRINTF_BUF_LEN, "%3S", n);
+                               } else {
                                        lc_esnprintf(ia32_get_arg_env(), buf, SNPRINTF_BUF_LEN, "%1D", n);
+                               }
                        }
                        break;
                case ia32_AddrModeD:
+                       assert(!is_ia32_Push(n));
                        snprintf(buf, SNPRINTF_BUF_LEN, "%s", ia32_emit_am(n, env));
                        break;
                case ia32_AddrModeS:
@@ -577,7 +586,7 @@ const char *ia32_emit_unop(const ir_node *n, ia32_emit_env_t *env) {
                                Mulh is emitted via emit_unop
                                imul [MEM]  means EDX:EAX <- EAX * [MEM]
                        */
-                       assert((is_ia32_Mulh(n) || is_ia32_MulS(n)) && "Only MulS and Mulh can have AM source as unop");
+                       assert((is_ia32_Mulh(n) || is_ia32_MulS(n) || is_ia32_Push(n)) && "Only MulS and Mulh can have AM source as unop");
                        lc_esnprintf(ia32_get_arg_env(), buf, SNPRINTF_BUF_LEN, "%s", ia32_emit_am(n, env));
                        break;
                default:
@@ -611,7 +620,7 @@ const char *ia32_emit_am(const ir_node *n, ia32_emit_env_t *env) {
        /* obstack_free with NULL results in an uninitialized obstack */
        obstack_init(obst);
 
-       p = pointer_size(mode, has_x87_register(n) || is_ia32_GetST0(n) || is_ia32_SetST0(n));
+       p = pointer_size(mode, ia32_has_x87_register(n) || is_ia32_GetST0(n) || is_ia32_SetST0(n));
        if (p)
                obstack_printf(obst, "%s ", p);
 
@@ -646,17 +655,16 @@ const char *ia32_emit_am(const ir_node *n, ia32_emit_env_t *env) {
        }
 
        if (am_flav & ia32_O) {
-               s = get_ia32_am_offs(n);
+               int offs = get_ia32_am_offs_int(n);
 
-               if (s) {
+               if (offs != 0) {
                        /* omit explicit + if there was no base or index */
                        if (! had_output) {
-                               obstack_printf(obst, "[");
-                               if (s[0] == '+')
-                                       s++;
+                               obstack_printf(obst, "[%d", offs);
+                       } else {
+                               obstack_printf(obst, "%+d", offs);
                        }
 
-                       obstack_printf(obst, s);
                        had_output = 1;
                }
        }
@@ -678,7 +686,7 @@ const char *ia32_emit_adr(const ir_node *irn, ia32_emit_env_t *env)
        static char buf[SNPRINTF_BUF_LEN];
        ir_mode    *mode = get_ia32_ls_mode(irn);
        const char *adr  = get_ia32_cnst(irn);
-       const char *pref = pointer_size(mode, has_x87_register(irn));
+       const char *pref = pointer_size(mode, ia32_has_x87_register(irn));
 
        snprintf(buf, SNPRINTF_BUF_LEN, "%s %s", pref ? pref : "", adr);
        return buf;
@@ -1585,6 +1593,7 @@ static void emit_ia32_Conv_I2I(const ir_node *irn, ia32_emit_env_t *emit_env) {
        char               *conv_cmd = NULL;
        ir_mode            *src_mode = get_ia32_src_mode(irn);
        ir_mode            *tgt_mode = get_ia32_tgt_mode(irn);
+       int                signed_mode;
        int n, m;
        char cmd_buf[SNPRINTF_BUF_LEN], cmnt_buf[SNPRINTF_BUF_LEN];
        const arch_register_t *in_reg, *out_reg;
@@ -1592,33 +1601,34 @@ static void emit_ia32_Conv_I2I(const ir_node *irn, ia32_emit_env_t *emit_env) {
        n = get_mode_size_bits(src_mode);
        m = get_mode_size_bits(tgt_mode);
 
-       if (mode_is_signed(n < m ? src_mode : tgt_mode)) {
+       assert(n == 8 || n == 16 || n == 32);
+       assert(m == 8 || m == 16 || m == 32);
+       assert(n != m);
+
+       signed_mode = mode_is_signed(n < m ? src_mode : tgt_mode);
+       if(signed_mode) {
                move_cmd = "movsx";
-               if (n == 8 || m == 8)
-                       conv_cmd = "cbw";
-               else if (n == 16 || m == 16)
-                       conv_cmd = "cwde";
-               else {
-                       printf("%d -> %d unsupported\n", n, m);
-                       assert(0 && "unsupported Conv_I2I");
-               }
        }
 
-        switch(get_ia32_op_type(irn)) {
+       switch(get_ia32_op_type(irn)) {
                case ia32_Normal:
                        in_reg  = get_in_reg(irn, 2);
                        out_reg = get_out_reg(irn, 0);
 
                        if (REGS_ARE_EQUAL(in_reg, &ia32_gp_regs[REG_EAX]) &&
                                REGS_ARE_EQUAL(out_reg, in_reg)                &&
-                               mode_is_signed(n < m ? src_mode : tgt_mode))
+                               signed_mode)
                        {
+                               if (n == 8 || m == 8)
+                                       conv_cmd = "cbw";
+                               else if (n == 16 || m == 16)
+                                       conv_cmd = "cwde";
+
                                /* argument and result are both in EAX and */
                                /* signedness is ok: -> use converts       */
                                lc_esnprintf(env, cmd_buf, SNPRINTF_BUF_LEN, "%s", conv_cmd);
                        }
-                       else if (REGS_ARE_EQUAL(out_reg, in_reg) &&
-                               ! mode_is_signed(n < m ? src_mode : tgt_mode))
+                       else if (REGS_ARE_EQUAL(out_reg, in_reg) &&     ! signed_mode)
                        {
                                /* argument and result are in the same register */
                                /* and signedness is ok: -> use and with mask   */
@@ -1728,7 +1738,7 @@ static void Copy_emitter(const ir_node *irn, ir_node *op, ia32_emit_env_t *emit_
        char cmd_buf[SNPRINTF_BUF_LEN], cmnt_buf[SNPRINTF_BUF_LEN];
 
        if (REGS_ARE_EQUAL(arch_get_irn_register(aenv, irn), arch_get_irn_register(aenv, op)) ||
-               be_is_unknown_reg(arch_get_irn_register(aenv, op)))
+               arch_register_type_is(arch_get_irn_register(aenv, op), virtual))
                return;
 
        if (mode_is_float(get_irn_mode(irn)))
@@ -1809,7 +1819,7 @@ static void emit_ia32_Const(const ir_node *n, ia32_emit_env_t *env) {
                lc_esnprintf(arg_env, cmd_buf, 256, "mov %1D, OFFSET FLAT:%C ", n, n);
                lc_esnprintf(arg_env, cmnt_buf, 256, "/* Move address of SymConst into register */");
        } else {
-               assert(mode == get_tarval_mode(tv));
+               assert(mode == get_tarval_mode(tv) || (mode_is_reference(get_tarval_mode(tv)) && mode == mode_Iu));
                /* beware: in some rare cases mode is mode_b which has no tarval_null() */
                if (tv == get_tarval_b_false() || tv == get_tarval_null(mode)) {
                        const char *instr = "xor";
@@ -2100,38 +2110,125 @@ static void ia32_emit_align_label(FILE *F, cpu_support cpu) {
                default:
                        align = 4;
        }
-       if(cpu == arch_athlon) {
-               maximum_skip = 3;
-       } else {
-               maximum_skip = (1 << align) - 1;
-       }
+       maximum_skip = (1 << align) - 1;
        ia32_emit_alignment(F, align, maximum_skip);
 }
 
+static int is_first_loop_block(ir_node *block, ir_node *prev_block, ia32_emit_env_t *env) {
+       ir_exec_freq *execfreqs = env->cg->birg->execfreqs;
+       double block_freq, prev_freq;
+       static const double DELTA = .0001;
+       cpu_support cpu = env->isa->opt_arch;
+
+       if(execfreqs == NULL)
+               return 0;
+       if(cpu == arch_i386 || cpu == arch_i486)
+               return 0;
+
+       block_freq = get_block_execfreq(execfreqs, block);
+       prev_freq = get_block_execfreq(execfreqs, prev_block);
+
+       if(block_freq < DELTA || prev_freq < DELTA)
+               return 0;
+
+       block_freq /= prev_freq;
+
+       switch (cpu) {
+               case arch_athlon:
+               case arch_athlon_64:
+               case arch_k6:
+                       return block_freq > 3;
+               default:
+                       break;
+       }
+
+       return block_freq > 2;
+}
+
 /**
  * Walks over the nodes in a block connected by scheduling edges
  * and emits code for each node.
  */
-static void ia32_gen_block(ir_node *block, void *env) {
-       ia32_emit_env_t *emit_env = env;
+static void ia32_gen_block(ir_node *block, ir_node *last_block, ia32_emit_env_t *env) {
+       ir_graph      *irg         = get_irn_irg(block);
+       ir_node       *start_block = get_irg_start_block(irg);
+       int           need_label   = 1;
+       FILE          *F           = env->out;
        const ir_node *irn;
-       int need_label = block != get_irg_start_block(get_irn_irg(block));
-       FILE *F = emit_env->out;
+       int           i;
 
-       if (! is_Block(block))
-               return;
+       assert(is_Block(block));
+
+       if (block == start_block)
+               need_label = 0;
+
+       if (need_label && get_irn_arity(block) == 1) {
+               ir_node *pred_block = get_Block_cfgpred_block(block, 0);
+
+               if (pred_block == last_block && get_irn_n_edges_kind(pred_block, EDGE_KIND_BLOCK) <= 2)
+                       need_label = 0;
+       }
+
+       /* special case: if one of our cfg preds is a switch-jmp we need a label, */
+       /*               otherwise there might be jump table entries jumping to   */
+       /*               non-existent (omitted) labels                            */
+       for (i = get_Block_n_cfgpreds(block) - 1; i >= 0; --i) {
+               ir_node *pred = get_Block_cfgpred(block, i);
+
+               if (is_Proj(pred)) {
+                       assert(get_irn_mode(pred) == mode_X);
+                       if (is_ia32_SwitchJmp(get_Proj_pred(pred))) {
+                               need_label = 1;
+                               break;
+                       }
+               }
+       }
 
-       if (need_label && (emit_env->cg->opt & IA32_OPT_EXTBB)) {
-               /* if the extended block scheduler is used, only leader blocks need
-                  labels. */
-               need_label = (block == get_extbb_leader(get_nodes_extbb(block)));
+       /* special case because the start block contains no jump instruction */
+       if (last_block == start_block) {
+               const ir_edge_t *edge;
+               ir_node *startsucc = NULL;
+
+               foreach_block_succ(start_block, edge) {
+                       startsucc = get_edge_src_irn(edge);
+                       if (startsucc != start_block)
+                               break;
+               }
+               assert(startsucc != NULL);
+
+               /* if the last block was the start block and we are not inside the */
+               /* start successor, emit a jump to the start successor             */
+               if (startsucc != block) {
+                       char buf[SNPRINTF_BUF_LEN];
+                       ir_snprintf(buf, sizeof(buf), BLOCK_PREFIX("%d"),
+                                   get_irn_node_nr(startsucc));
+                       ir_fprintf(F, "\tjmp %s\n", buf);
+               }
        }
 
        if (need_label) {
                char cmd_buf[SNPRINTF_BUF_LEN];
                int i, arity;
+               int align = 1;
+               ir_exec_freq *execfreqs = env->cg->birg->execfreqs;
 
-               ia32_emit_align_label(emit_env->out, emit_env->isa->opt_arch);
+               /* align the loop headers */
+               if (! is_first_loop_block(block, last_block, env)) {
+                       /* align blocks where the previous block has no fallthrough */
+                       arity = get_irn_arity(block);
+
+                       for (i = 0; i < arity; ++i) {
+                               ir_node *predblock = get_Block_cfgpred_block(block, i);
+
+                               if (predblock == last_block) {
+                                       align = 0;
+                                       break;
+                               }
+                       }
+               }
+
+               if (align)
+                       ia32_emit_align_label(env->out, env->isa->opt_arch);
 
                ir_snprintf(cmd_buf, sizeof(cmd_buf), BLOCK_PREFIX("%d:"),
                            get_irn_node_nr(block));
@@ -2141,10 +2238,15 @@ static void ia32_gen_block(ir_node *block, void *env) {
                fprintf(F, "/* preds:");
 
                arity = get_irn_arity(block);
-               for(i = 0; i < arity; ++i) {
+               for (i = 0; i < arity; ++i) {
                        ir_node *predblock = get_Block_cfgpred_block(block, i);
                        fprintf(F, " %ld", get_irn_node_nr(predblock));
                }
+
+               if (execfreqs != NULL) {
+                       fprintf(F, " freq: %f", get_block_execfreq(execfreqs, block));
+               }
+
                fprintf(F, " */\n");
        }
 
@@ -2208,6 +2310,7 @@ static void ia32_gen_labels(ir_node *block, void *env) {
 void ia32_gen_routine(FILE *F, ir_graph *irg, const ia32_code_gen_t *cg) {
        ia32_emit_env_t emit_env;
        ir_node *block;
+       ir_node *last_block = NULL;
 
        emit_env.out      = F;
        emit_env.arch_env = cg->arch_env;
@@ -2235,14 +2338,17 @@ void ia32_gen_routine(FILE *F, ir_graph *irg, const ia32_code_gen_t *cg) {
 
                        /* set here the link. the emitter expects to find the next block here */
                        set_irn_link(block, next_bl);
-                       ia32_gen_block(block, &emit_env);
+                       ia32_gen_block(block, last_block, &emit_env);
+                       last_block = block;
                }
        }
        else {
                /* "normal" block schedule: Note the get_next_block() returns the NUMBER of the block
                   in the block schedule. As this number should NEVER be equal the next block,
                   we does not need a clear block link here. */
-               irg_walk_blkwise_graph(irg, NULL, ia32_gen_block, &emit_env);
+
+               //irg_walk_blkwise_graph(irg, NULL, ia32_gen_block, &emit_env);
+               // TODO
        }
 
        ia32_emit_func_epilog(F, irg, &emit_env);