fixed optimize_conv_conv(): when the result is 8bit, we must change the opcode AND...
[libfirm] / ir / be / bespillbelady2.c
index f6da9de..aac8cde 100644 (file)
@@ -53,6 +53,7 @@
 #include "ircons_t.h"
 #include "irprintf.h"
 #include "execfreq.h"
+#include "dfs_t.h"
 #include "xmalloc.h"
 
 #include "beutil.h"
 #include "beirg_t.h"
 #include "bemodule.h"
 
+#include <libcore/lc_opts.h>
+#include <libcore/lc_opts_enum.h>
+#include <libcore/lc_timing.h>
+
 #define DBG_SPILL     1
 #define DBG_WSETS     2
 #define DBG_FIX       4
 #define DBG_WORKSET 128
 #define DBG_GLOBAL  256
 
-#define DEAD     UINT_MAX
-#define LIVE_END (DEAD-1)
+#define ALREADY_SPILLED_FACTOR 2
+
+#define DEAD       UINT_MAX
+#define LIVE_END   (DEAD-1)
+#define REMAT_DIST (DEAD-2)
+
+static int already_spilled_factor = 2;
+static int remat_live_range_ext   = 1;
+static int global_pass_enabled    = 1;
+
+static const lc_opt_table_entry_t options[] = {
+       LC_OPT_ENT_INT           ("asf",    "already spilled factor",                             &already_spilled_factor),
+       LC_OPT_ENT_BOOL          ("remat",  "rematerializable ops get infinite long live ranges", &remat_live_range_ext),
+       LC_OPT_ENT_BOOL          ("global", "enable/disable the global pass",                     &global_pass_enabled),
+       LC_OPT_LAST
+};
 
 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
 
@@ -103,6 +122,7 @@ typedef struct _workset_t {
 typedef struct _belady_env_t {
        struct obstack ob;
        ir_graph *irg;
+       const dfs_t *dfs;
        const arch_env_t *arch;
        const arch_register_class_t *cls;
        be_lv_t *lv;
@@ -113,9 +133,10 @@ typedef struct _belady_env_t {
        int n_regs;                     /**< number of regs in this reg-class */
        workset_t *ws;          /**< the main workset used while processing a block. ob-allocated */
        ir_node *instr;         /**< current instruction */
-       unsigned instr_nr;      /**< current instruction number (relative to block start) */
+       int instr_nr;           /**< current instruction number (relative to block start) */
 
        spill_env_t *senv;      /**< see bespill.h */
+       bitset_t *spilled;  /**< bitset to keep all the irns which have already been spilled. */
 } belady_env_t;
 
 
@@ -306,6 +327,7 @@ typedef struct _next_use_t {
                                                                 transport in values for the global
                                                                 pass. */
        int step;                  /**< The time step of the use. */
+       ir_node *irn;
        struct _next_use_t *next;  /**< The next use int this block
                                                                 or NULL. */
 } next_use_t;
@@ -322,9 +344,11 @@ static void build_next_uses(block_info_t *bi)
 {
        ir_node *irn;
 
+       sched_renumber(bi->bl);
+
        phase_init(&bi->next_uses, "next uses", bi->bel->irg, PHASE_DEFAULT_GROWTH, next_use_init, NULL);
        sched_foreach_reverse(bi->bl, irn) {
-               int i, step = sched_get_time_step(irn);
+               int i;
 
                if (is_Phi(irn))
                        break;
@@ -334,13 +358,15 @@ static void build_next_uses(block_info_t *bi)
                        next_use_t *curr = phase_get_irn_data(&bi->next_uses, op);
                        next_use_t *use  = phase_alloc(&bi->next_uses, sizeof(use[0]));
 
-                       assert(step >= 0);
                        use->is_first_use = 1;
-                       use->step         = step;
+                       use->step         = sched_get_time_step(irn);
                        use->next         = curr;
+                       use->irn          = irn;
 
-                       if (curr)
+                       if (curr) {
                                curr->is_first_use = 0;
+                               assert(curr->step >= use->step);
+                       }
 
                        phase_set_irn_data(&bi->next_uses, op, use);
                }
@@ -360,8 +386,8 @@ static INLINE void advance_current_use(block_info_t *bi, const ir_node *irn)
 static INLINE unsigned get_curr_distance(block_info_t *bi, const ir_node *irn, int is_usage)
 {
        belady_env_t *env = bi->bel;
-       next_use_t *use   = get_current_use(bi, irn);
        int curr_step     = sched_get_time_step(env->instr);
+       next_use_t *use   = get_current_use(bi, irn);
        int flags         = arch_irn_get_flags(env->arch, irn);
 
        assert(!(flags & arch_irn_flags_ignore));
@@ -374,8 +400,18 @@ static INLINE unsigned get_curr_distance(block_info_t *bi, const ir_node *irn, i
                use = use->next;
 
        if (use) {
+               unsigned res  = use->step - curr_step;
+
                assert(use->step >= curr_step);
-               return use->step - curr_step;
+
+               if (res != 0) {
+                       if (remat_live_range_ext && be_is_rematerializable(env->senv, irn, use->irn))
+                               res = REMAT_DIST;
+                       else if (bitset_contains_irn(env->spilled, irn))
+                               res *= already_spilled_factor;
+               }
+
+               return res;
        }
 
        return be_is_live_end(env->lv, bi->bl, irn) ? LIVE_END : DEAD;
@@ -452,6 +488,7 @@ static void displace(block_info_t *bi, workset_t *new_vals, int is_usage) {
                                }
 
                                if (insert_reload) {
+                                       bitset_add_irn(env->spilled, val);
                                        DBG((dbg, DBG_SPILL, "Reload %+F before %+F\n", val, env->instr));
                                        be_add_reload(env->senv, val, env->instr, env->cls, 1);
                                }
@@ -471,8 +508,6 @@ static void displace(block_info_t *bi, workset_t *new_vals, int is_usage) {
 
        /* Only make more free room if we do not have enough */
        if (len > max_allowed) {
-               // int curr_step = sched_get_time_step(env->instr);
-
                DBG((dbg, DBG_DECIDE, "    disposing %d values\n", len - max_allowed));
 
                /* get current next-use distance */
@@ -626,6 +661,25 @@ static int block_freq_gt(const void *a, const void *b)
        return (diff > 0) - (diff < 0);
 }
 
+static int block_order(const void *a, const void *b)
+{
+       const ir_node * const *p = a;
+       const ir_node * const *q = b;
+       block_info_t *pi = get_block_info(*p);
+       block_info_t *qi = get_block_info(*q);
+       double diff;
+
+       if (pi->exec_freq > 1.0 && qi->exec_freq > 1.0) {
+               const dfs_t *dfs = pi->bel->dfs;
+               int pp = dfs_get_post_num(dfs, pi->bl);
+               int pq = dfs_get_post_num(dfs, qi->bl);
+               return pq - pp;
+       }
+
+       diff = qi->exec_freq - pi->exec_freq;
+       return (diff > 0) - (diff < 0);
+}
+
 enum {
        irn_act_none = 0,
        irn_act_reload,
@@ -1064,7 +1118,7 @@ static void global_assign(belady_env_t *env)
         * sort the blocks according to execution frequency.
         * That's not necessary for belady() but for the global pass later on.
         */
-       qsort(env->blocks, env->n_blocks, sizeof(env->blocks[0]), block_freq_gt);
+       qsort(env->blocks, env->n_blocks, sizeof(env->blocks[0]), block_order);
 
        memset(&ges, 0, sizeof(ges));
        obstack_init(&ges.obst);
@@ -1131,13 +1185,15 @@ void be_spill_belady(be_irg_t *birg, const arch_register_class_t *cls)
        /* init belady env */
        obstack_init(&env.ob);
        env.irg       = irg;
-       env.arch      = birg->main_env->arch_env;
+       env.arch      = be_get_birg_arch_env(birg);
        env.cls       = cls;
        env.lv        = be_get_birg_liveness(birg);
+       env.dfs       = env.lv->dfs;
        env.n_regs    = n_regs;
        env.ws        = new_workset(&env, &env.ob);
        env.senv      = be_new_spill_env(birg);
        env.ef        = be_get_birg_exec_freq(birg);
+       env.spilled   = bitset_irg_obstack_alloc(&env.ob, irg);
        env.n_blocks  = 0;
 
        irg_block_walk_graph(irg, NULL, collect_blocks, &env);
@@ -1161,10 +1217,15 @@ void be_spill_belady(be_irg_t *birg, const arch_register_class_t *cls)
 
 void be_init_spillbelady2(void)
 {
+       lc_opt_entry_t *be_grp    = lc_opt_get_grp(firm_opt_get_root(), "be");
+       lc_opt_entry_t *spill_grp = lc_opt_get_grp(be_grp, "spill");
+       lc_opt_entry_t *bel2_grp  = lc_opt_get_grp(spill_grp, "belady2");
+
        static be_spiller_t belady_spiller = {
                be_spill_belady
        };
 
+       lc_opt_add_table(bel2_grp, options);
        be_register_spiller("belady2", &belady_spiller);
        FIRM_DBG_REGISTER(dbg, "firm.be.spill.belady2");
 }