now hopefully computing number of available registers correctly
[libfirm] / ir / be / bespillremat.c
index 1fa28c9..4077767 100644 (file)
 #include <lpp/lpp_net.h>
 #include <lpp/lpp_cplex.h>
 //#include <lc_pset.h>
-#include <libcore/lc_bitset.h>
+//#include <libcore/lc_bitset.h>
 
 #include "be_t.h"
 #include "belive_t.h"
 #include "besched_t.h"
 #include "beirgmod.h"
 #include "bearch.h"
+#include "beabi.h"
 #include "benode_t.h"
 #include "beutil.h"
 #include "bespillremat.h"
@@ -602,8 +603,8 @@ get_irn_n_nonignore_args(const spill_ilp_t * si, const ir_node * irn)
        int n;
        int ret = 0;
 
-//     if(is_Proj(irn))
-//             irn = get_Proj_pred(irn);
+       if(is_Proj(irn))
+               irn = get_Proj_pred(irn);
 
        for(n=get_irn_arity(irn)-1; n>=0; --n) {
                const ir_node  *arg = get_irn_n(irn, n);
@@ -1235,7 +1236,7 @@ walker_remat_insertor(ir_node * bb, void * data)
                be_lv_foreach(si->lv, bb, be_lv_state_end, i) {
                        value = be_lv_get_irn(si->lv, bb, i);
 
-                       if (be_is_live_end(si->lv, bb, value) && has_reg_class(si, value)) {
+                       if (has_reg_class(si, value)) {
                                pset_insert_ptr(live_out, value);
                        }
                }
@@ -1269,31 +1270,36 @@ walker_remat_insertor(ir_node * bb, void * data)
                be_lv_foreach(si->lv, bb, be_lv_state_in, i) {
                        value = be_lv_get_irn(si->lv, bb, i);
 
-                       if (has_reg_class(si, value)) {
+                       if(has_reg_class(si, value)) {
+                               pset_insert_ptr(live_in, value);
+                       }
+               }
+               sched_foreach(bb, value) {
+                       if(!is_Phi(value)) break;
+
+                       if(has_reg_class(si, value)) {
                                pset_insert_ptr(live_in, value);
                        }
                }
 
                /* add remat2s at beginning of block */
                pset_foreach(live_in, value) {
-                       if ((be_is_live_in(si->lv, bb, value) || (is_Phi(value) && get_nodes_block(value)==bb)) && has_reg_class(si, value)) {
-                               remat_info_t   *remat_info,
-                                                          query;
-                               remat_t        *remat;
+                       remat_info_t   *remat_info,
+                                                  query;
+                       remat_t        *remat;
 
-                               query.irn = value;
-                               query.remats = NULL;
-                               query.remats_by_operand = NULL;
-                               remat_info = set_find(si->remat_info, &query, sizeof(query), HASH_PTR(value));
+                       query.irn = value;
+                       query.remats = NULL;
+                       query.remats_by_operand = NULL;
+                       remat_info = set_find(si->remat_info, &query, sizeof(query), HASH_PTR(value));
 
-                               if(remat_info && remat_info->remats_by_operand) {
-                                       pset_foreach(remat_info->remats_by_operand, remat) {
-                                               DBG((si->dbg, LEVEL_4, "\t  considering remat2 %+F at beginning of block %+F\n", remat->op, bb));
+                       if(remat_info && remat_info->remats_by_operand) {
+                               pset_foreach(remat_info->remats_by_operand, remat) {
+                                       DBG((si->dbg, LEVEL_4, "\t  considering remat2 %+F at beginning of block %+F\n", remat->op, bb));
 
-                                               /* put the remat here if all its args are available */
-                                               insert_remat_after(si, remat, bb, live_in);
+                                       /* put the remat here if all its args are available */
+                                       insert_remat_after(si, remat, bb, live_in);
 
-                                       }
                                }
                        }
                }
@@ -1301,6 +1307,30 @@ walker_remat_insertor(ir_node * bb, void * data)
        }
 }
 
+int
+can_be_copied(const ir_node * bb, const ir_node * irn)
+{
+       assert(is_merge_edge(bb));
+
+       const ir_edge_t *edge = get_block_succ_first(bb);
+       const ir_node   *next_bb = edge->src;
+       int              pos = edge->pos;
+       const ir_node   *phi;
+
+       sched_foreach(next_bb, phi) {
+               const ir_node  *phi_arg;
+
+               if(!is_Phi(phi)) break;
+
+               phi_arg = get_irn_n(phi, pos);
+
+               if(phi_arg == irn) {
+                       return 1;
+               }
+       }
+       return 0;
+}
+
 /**
  * Preparation of blocks' ends for Luke Blockwalker(tm)(R)
  */
@@ -1394,7 +1424,7 @@ luke_endwalker(ir_node * bb, void * data)
                        ilp_cst_t   rel_cst;
 
                        ir_snprintf(buf, sizeof(buf), "reload_%N_%N", bb, irn);
-                       reload = lpp_add_var_default(si->lpp, buf, lpp_binary, opt_cost_reload*execution_frequency(si, bb), 1.0);
+                       reload = lpp_add_var_default(si->lpp, buf, lpp_binary, opt_cost_reload*execution_frequency(si, bb), can_be_copied(bb, irn));
                        set_insert_keyval(spill_bb->reloads, irn, INT_TO_PTR(reload));
 
                        /* reload <= mem_out */
@@ -1758,7 +1788,41 @@ luke_blockwalker(ir_node * bb, void * data)
                                lpp_set_factor_fast(si->lpp, cst, remat_op->attr.remat.ilp, -1.0);
                        }
                }
-               /* maybe we should also assure that reg_out >= live_range etc. */
+               ir_snprintf(buf, sizeof(buf), "reg_out2_%N_%N", bb, irn);
+               cst = lpp_add_cst_uniq(si->lpp, buf, lpp_greater, 0.0);
+
+               /* value may only die at bb end if it is used for a mem copy */
+               /* reg_out + \sum copy - reload - remat - live_range >= 0 */
+               lpp_set_factor_fast(si->lpp, cst, spill->reg_out, 1.0);
+               if(reload != ILP_UNDEF) lpp_set_factor_fast(si->lpp, cst, reload, -1.0);
+               lpp_set_factor_fast(si->lpp, cst, op->attr.live_range.ilp, -1.0);
+               foreach_pre_remat(si, bb, tmp) {
+                       op_t     *remat_op = get_irn_link(tmp);
+                       if(remat_op->attr.remat.remat->value == irn) {
+                               lpp_set_factor_fast(si->lpp, cst, remat_op->attr.remat.ilp, -1.0);
+                       }
+               }
+               if(is_merge_edge(bb)) {
+                       const ir_edge_t *edge = get_block_succ_first(bb);
+                       const ir_node   *next_bb = edge->src;
+                       int              pos = edge->pos;
+                       const ir_node   *phi;
+
+                       sched_foreach(next_bb, phi) {
+                               const ir_node  *phi_arg;
+
+                               if(!is_Phi(phi)) break;
+
+                               phi_arg = get_irn_n(phi, pos);
+
+                               if(phi_arg == irn) {
+                                       op_t      *phi_op = get_irn_link(phi);
+                                       ilp_var_t  copy = phi_op->attr.live_range.args.copies[pos];
+
+                                       lpp_set_factor_fast(si->lpp, cst, copy, 1.0);
+                               }
+                       }
+               }
        }
 
        if(opt_memcopies)
@@ -1922,7 +1986,6 @@ luke_blockwalker(ir_node * bb, void * data)
                                }
                        }
 
-            // value_op->attr.live_range.ilp != ILP_UNDEF
                        if(pset_find_ptr(live, value) && cst != ILP_UNDEF) {
                                lpp_set_factor_fast(si->lpp, cst, value_op->attr.live_range.ilp, -n_remats);
                        }
@@ -2149,8 +2212,6 @@ skip_one_must_die:
 
                                                lpp_set_factor_fast(si->lpp, cst, memoperand, 1.0);
                                                lpp_set_factor_fast(si->lpp, cst, post_use, 1.0);
-//                                             if(arg_op->attr.live_range.ilp != ILP_UNDEF)
-//                                                     lpp_set_factor_fast(si->lpp, cst, arg_op->attr.live_range.ilp, 1.0);
                                        }
                                }
                        }
@@ -2159,12 +2220,7 @@ skip_one_must_die:
                        arg_op->attr.live_range.ilp = prev_lr;
                        arg_op->attr.live_range.op = irn;
 
-                       /*if(!pset_find_ptr(live, arg)) {
-                               pset_insert_ptr(live, arg);
-                               add_to_spill_bb(si, bb, arg);
-                       }*/
                        pset_insert_ptr(live, arg);
-
                }
 
                /* just to be sure */
@@ -2273,7 +2329,6 @@ skip_one_must_die:
 
 
                /* requirements for remats */
-               /* start new live ranges for values used by remats */
                foreach_pre_remat(si, irn, tmp) {
                        op_t        *remat_op = get_irn_link(tmp);
                        int          n;
@@ -2411,12 +2466,24 @@ skip_one_must_die:
 
        foreach_post_remat(bb, tmp) {
                int         n;
+               pset       *remat_args = pset_new_ptr(get_irn_arity(tmp));
+               op_t       *remat_op = get_irn_link(tmp);
+               ir_node    *remat_arg;
 
                for (n=get_irn_arity(tmp)-1; n>=0; --n) {
-                       ir_node    *remat_arg = get_irn_n(tmp, n);
+                       remat_arg = get_irn_n(tmp, n);
+                       if(has_reg_class(si, remat_arg)) {
+                               pset_insert_ptr(remat_args, remat_arg);
+                       }
+               }
+               assert(pset_count(remat_args) > 0 && "post remats should have at least one arg");
 
-                       if(!has_reg_class(si, remat_arg)) continue;
+               /* remat + \sum live_range(remat_arg) <= |args| */
+               ir_snprintf(buf, sizeof(buf), "one_must_die_%N", tmp);
+               cst = lpp_add_cst_uniq(si->lpp, buf, lpp_less, pset_count(remat_args));
+               lpp_set_factor_fast(si->lpp, cst, remat_op->attr.remat.ilp, 1.0);
 
+               pset_foreach(remat_args, remat_arg) {
                        /* if value is becoming live through use by remat2 */
                        if(!pset_find_ptr(live, remat_arg)) {
                                op_t       *remat_arg_op = get_irn_link(remat_arg);
@@ -2448,8 +2515,12 @@ skip_one_must_die:
                                                lpp_set_factor_fast(si->lpp, nomem, reload, 1.0);
                                        }
                                }
+                       } else {
+                               op_t       *remat_arg_op = get_irn_link(remat_arg);
+                               lpp_set_factor_fast(si->lpp, cst, remat_arg_op->attr.live_range.ilp, 1.0);
                        }
                }
+               del_pset(remat_args);
        }
 
        /* L\U is empty at bb start */
@@ -2567,7 +2638,6 @@ skip_one_must_die:
                        spill = set_find_spill(spill_bb->ilp, remat_arg);
                        assert(spill);
 
-                       /* remat <= reg_in_argument */
                        ir_snprintf(buf, sizeof(buf), "req_remat2_%N_%N_arg_%N", tmp, bb, remat_arg);
                        cst = lpp_add_cst(si->lpp, buf, lpp_less, 0.0);
                        lpp_set_factor_fast(si->lpp, cst, spill->reg_in, -1.0);
@@ -2580,8 +2650,6 @@ skip_one_must_die:
                const ir_node   *remat;
                int              n_remats = 0;
 
-               if(op->attr.live_range.ilp == ILP_UNDEF) continue;
-
                cst = ILP_UNDEF;
 
                foreach_post_remat(bb, remat) {
@@ -2594,9 +2662,9 @@ skip_one_must_die:
                                        const op_t   *remat_op = get_irn_link(remat);
 
                                        if(cst == ILP_UNDEF) {
-                                               /* \sum post_remat <= 1 + #post_remats * next(lr) */
-                                               ir_snprintf(buf, sizeof(buf), "remat2_%N_%N_arg_%N", remat, bb, irn);
-                                               cst = lpp_add_cst(si->lpp, buf, lpp_less, 1.0);
+                                               /* sum remat2s <= 1 + n_remats*live_range */
+                                               ir_snprintf(buf, sizeof(buf), "dying_lr_%N_%N", irn, bb);
+                                               cst = lpp_add_cst_uniq(si->lpp, buf, lpp_less, 1.0);
                                        }
                                        lpp_set_factor_fast(si->lpp, cst, remat_op->attr.remat.ilp, 1.0);
                                        ++n_remats;
@@ -2604,8 +2672,8 @@ skip_one_must_die:
                                }
                        }
                }
-               if(n_remats) {
-                       lpp_set_factor_fast(si->lpp, cst, op->attr.live_range.ilp, n_remats);
+               if(cst != ILP_UNDEF && op->attr.live_range.ilp != ILP_UNDEF) {
+                       lpp_set_factor_fast(si->lpp, cst, op->attr.live_range.ilp, -n_remats);
                }
        }
 
@@ -3547,7 +3615,6 @@ insert_reload(spill_ilp_t * si, const ir_node * value, ir_node * after)
 void perform_memory_operand(spill_ilp_t * si, memoperand_t * memoperand)
 {
        defs_t           *defs;
-       ir_node          *reload;
        ir_node          *value = get_irn_n(memoperand->irn, memoperand->pos);
        ir_node          *spill;
        const arch_env_t *arch_env = si->chordal_env->birg->main_env->arch_env;
@@ -3559,10 +3626,7 @@ void perform_memory_operand(spill_ilp_t * si, memoperand_t * memoperand)
        spill = defs->spills;
        assert(spill && "no spill placed before reload");
 
-       reload = be_reload(arch_env, si->cls, memoperand->irn, get_irn_mode(value), spill);
-
-       arch_perform_memory_operand(arch_env, memoperand->irn, reload, memoperand->pos);
-       sched_remove(reload);
+       arch_perform_memory_operand(arch_env, memoperand->irn, spill, memoperand->pos);
 }
 
 void insert_memoperands(spill_ilp_t * si)
@@ -3850,13 +3914,13 @@ walker_reload_placer(ir_node * bb, void * data) {
 static void
 walker_collect_used(ir_node * irn, void * data)
 {
-       lc_bitset_t   *used = data;
+       bitset_t   *used = data;
 
-       lc_bitset_set(used, get_irn_idx(irn));
+       bitset_set(used, get_irn_idx(irn));
 }
 
 struct kill_helper {
-       lc_bitset_t  *used;
+       bitset_t  *used;
        spill_ilp_t  *si;
 };
 
@@ -3872,7 +3936,7 @@ walker_kill_unused(ir_node * bb, void * data)
                ir_node     *next = sched_next(irn);
                int          n;
 
-               if(!lc_bitset_is_set(kh->used, get_irn_idx(irn))) {
+               if(!bitset_is_set(kh->used, get_irn_idx(irn))) {
                        if(be_is_Spill(irn) || be_is_Reload(irn)) {
                                DBG((kh->si->dbg, LEVEL_1, "\t SUBOPTIMAL! %+F IS UNUSED (cost: %g)\n", irn, get_cost(kh->si, irn)*execution_frequency(kh->si, bb)));
 #if 0
@@ -3896,13 +3960,13 @@ kill_all_unused_values_in_schedule(spill_ilp_t * si)
 {
        struct kill_helper kh;
 
-       kh.used = lc_bitset_malloc(get_irg_last_idx(si->chordal_env->irg));
+       kh.used = bitset_malloc(get_irg_last_idx(si->chordal_env->irg));
        kh.si = si;
 
        irg_walk_graph(si->chordal_env->irg, walker_collect_used, NULL, kh.used);
        irg_block_walk_graph(si->chordal_env->irg, walker_kill_unused, NULL, &kh);
 
-       lc_bitset_free(kh.used);
+       bitset_free(kh.used);
 }
 
 void
@@ -4055,18 +4119,22 @@ writeback_results(spill_ilp_t * si)
 static int
 get_n_regs(spill_ilp_t * si)
 {
-       int     arch_n_regs = arch_register_class_n_regs(si->cls);
-       int     free = 0;
-       int     i;
+       int       arch_n_regs = arch_register_class_n_regs(si->cls);
 
-       for(i=0; i<arch_n_regs; i++) {
-               if(!arch_register_type_is(&si->cls->regs[i], ignore)) {
-                       free++;
-               }
-       }
+       bitset_t *arch_regs = bitset_malloc(arch_n_regs);
+       bitset_t *abi_regs = bitset_malloc(arch_n_regs);
+
+       arch_put_non_ignore_regs(si->chordal_env->birg->main_env->arch_env, si->cls, arch_regs);
+    be_abi_put_ignore_regs(si->chordal_env->birg->abi, si->cls, abi_regs);
+
+       bitset_andnot(arch_regs, abi_regs);
+       arch_n_regs = bitset_popcnt(arch_regs);
+
+       bitset_free(arch_regs);
+       bitset_free(abi_regs);
 
-       DBG((si->dbg, LEVEL_1, "\tArchitecture has %d free registers in class %s\n", free, si->cls->name));
-       return free;
+       DBG((si->dbg, LEVEL_1, "\tArchitecture has %d free registers in class %s\n", arch_n_regs, si->cls->name));
+       return arch_n_regs;
 }
 
 static void
@@ -4160,33 +4228,6 @@ verify_phiclasses(spill_ilp_t * si)
        irg_block_walk_graph(si->chordal_env->irg, luke_meminterferencechecker, NULL, si);
 }
 
-static void
-walker_spillslotassigner(ir_node * irn, void * data)
-{
-       void                   *cls;
-
-       if(!be_is_Spill(irn)) return;
-
-       /* set spill context to phi class if it has one ;) */
-       (void) cls;
-#if 0
-       // Matze: not needed anymore
-       cls = get_phi_class(irn);
-       if(cls)
-               be_set_Spill_context(irn, cls);
-       else
-               be_set_Spill_context(irn, irn);
-#endif
-}
-
-
-static void
-assign_spillslots(spill_ilp_t * si)
-{
-       DBG((si->dbg, LEVEL_2, "\t calling spill slot assigner\n"));
-       irg_walk_graph(si->chordal_env->irg, walker_spillslotassigner, NULL, si);
-}
-
 void
 be_spill_remat(const be_chordal_env_t * chordal_env)
 {
@@ -4344,7 +4385,6 @@ be_spill_remat(const be_chordal_env_t * chordal_env)
 
        if(opt_memcopies) {
                verify_phiclasses(&si);
-               assign_spillslots(&si);
        }
 
        irg_block_walk_graph(chordal_env->irg, walker_pressure_annotator, NULL, &si);