spill slots are handled by matze
[libfirm] / ir / be / bespillremat.c
index 9ecacf5..69c216d 100644 (file)
@@ -120,7 +120,7 @@ static const lc_opt_enum_mask_items_t remats_items[] = {
        { "none",      REMATS_NONE      },
        { "briggs",    REMATS_BRIGGS    },
        { "noinverse", REMATS_NOINVERSE },
-       { "ALL",       REMATS_ALL       },
+       { "all",       REMATS_ALL       },
        { NULL,        0 }
 };
 
@@ -139,7 +139,7 @@ static const lc_opt_table_entry_t options[] = {
        LC_OPT_ENT_BOOL     ("no_enlage_liveness",  "do not enlarge liveness of operands of remats",&opt_no_enlarge_liveness),
        LC_OPT_ENT_BOOL     ("remat_while_live",  "remat only values that can be used by real ops", &opt_remat_while_live),
 
-       LC_OPT_ENT_ENUM_MASK("dump", "dump ifg before, after or after each cloud",                  &dump_var),
+       LC_OPT_ENT_ENUM_MASK("dump", "dump problem, mps or solution",                               &dump_var),
        LC_OPT_ENT_BOOL     ("log",  "activate the lpp log",                                        &opt_log),
        LC_OPT_ENT_INT      ("timeout",  "ILP solver timeout",                                      &opt_timeout),
 
@@ -158,7 +158,8 @@ void be_spill_remat_register_options(lc_opt_entry_t *grp)
 #endif
 
 
-//#define EXECFREQ_LOOPDEPH /* compute execution frequency from loop depth only */
+//#define EXECFREQ_LOOPDEPH   /* compute execution frequency from loop depth only */
+//#define SCHEDULE_PHIM   /* insert phim nodes into schedule */
 
 #define  SOLVE
 //#define  SOLVE_LOCAL
@@ -166,7 +167,7 @@ void be_spill_remat_register_options(lc_opt_entry_t *grp)
 #define LPP_SOLVER "cplex"
 
 
-#define MAX_PATHS      16
+#define MAX_PATHS      INT_MAX
 #define ILP_UNDEF              -1
 
 typedef struct _spill_ilp_t {
@@ -478,7 +479,7 @@ get_remat_from_op(spill_ilp_t * si, const ir_node * dest_value, const ir_node *
                const ir_node *proj = NULL;
 
                if(is_Proj(dest_value)) {
-                       op = get_irn_n(op, 0);
+                       op = get_Proj_pred(op);
                        proj = dest_value;
                }
 
@@ -599,10 +600,15 @@ static int
 get_irn_n_nonignore_args(const spill_ilp_t * si, const ir_node * irn)
 {
        int n;
-       unsigned int ret = 0;
+       int ret = 0;
+
+//     if(is_Proj(irn))
+//             irn = get_Proj_pred(irn);
 
        for(n=get_irn_arity(irn)-1; n>=0; --n) {
-               if(has_reg_class(si, irn)) ++ret;
+               const ir_node  *arg = get_irn_n(irn, n);
+
+               if(has_reg_class(si, arg)) ++ret;
        }
 
        return ret;
@@ -884,7 +890,7 @@ insert_remat_after(spill_ilp_t * si, const remat_t * remat, ir_node * pos, const
                                                *proj_copy;
                op_t            *op;
 
-               DBG((si->dbg, LEVEL_3, "\t  >inserting remat %+F\n", remat->op));
+               DBG((si->dbg, LEVEL_3, "\t  >inserting remat2 %+F\n", remat->op));
 
                copy = insert_copy_after(si, remat->op, pos);
 
@@ -964,9 +970,29 @@ get_block_n_succs(const ir_node *block) {
        return edge ? 2 : 1;
 }
 
+static int
+is_start_block(const ir_node * bb)
+{
+       return get_irg_start_block(get_irn_irg(bb)) == bb;
+}
+
+static int
+is_before_frame(const ir_node * bb, const ir_node * irn)
+{
+       const ir_node  *frame  = get_irg_frame(get_irn_irg(bb));
+
+       if(is_start_block(bb) && sched_get_time_step(frame) >= sched_get_time_step(irn))
+               return 1;
+       else
+               return 0;
+}
+
 static int
 is_merge_edge(const ir_node * bb)
 {
+       if(is_start_block(bb))
+               return 0;
+
        if(opt_goodwin)
                return get_block_n_succs(bb) == 1;
        else
@@ -976,6 +1002,9 @@ is_merge_edge(const ir_node * bb)
 static int
 is_diverge_edge(const ir_node * bb)
 {
+       if(is_start_block(bb))
+               return 0;
+
        if(opt_goodwin)
                return get_Block_n_cfgpreds(bb) == 1;
        else
@@ -1198,33 +1227,55 @@ walker_remat_insertor(ir_node * bb, void * data)
                irn = next;
        }
 
-       be_lv_foreach(si->lv, bb, be_lv_state_end | be_lv_state_in, i) {
-               ir_node        *value = be_lv_get_irn(si->lv, bb, i);
+       /* add remats at end if successor has multiple predecessors */
+       if(is_merge_edge(bb)) {
+               pset     *live_out = pset_new_ptr_default();
+               ir_node  *value;
+
+               be_lv_foreach(si->lv, bb, be_lv_state_end, i) {
+                       value = be_lv_get_irn(si->lv, bb, i);
 
-               /* add remats at end if successor has multiple predecessors */
-               if(is_merge_edge(bb)) {
-                       /* add remats at end of block */
                        if (be_is_live_end(si->lv, bb, value) && has_reg_class(si, value)) {
-                               remat_info_t   *remat_info,
-                                                          query;
-                               remat_t        *remat;
+                               pset_insert_ptr(live_out, value);
+                       }
+               }
 
-                               query.irn = value;
-                               query.remats = NULL;
-                               query.remats_by_operand = NULL;
-                               remat_info = set_find(si->remat_info, &query, sizeof(query), HASH_PTR(value));
+               /* add remats at end of block */
+               pset_foreach(live_out, value) {
+                       remat_info_t   *remat_info,
+                                                  query;
+                       remat_t        *remat;
 
-                               if(remat_info && remat_info->remats) {
-                                       pset_foreach(remat_info->remats, remat) {
-                                               DBG((si->dbg, LEVEL_4, "\t  considering remat %+F at end of block %+F\n", remat->op, bb));
+                       query.irn = value;
+                       query.remats = NULL;
+                       query.remats_by_operand = NULL;
+                       remat_info = set_find(si->remat_info, &query, sizeof(query), HASH_PTR(value));
 
-                                               insert_remat_before(si, remat, bb, NULL);
-                                       }
+                       if(remat_info && remat_info->remats) {
+                               pset_foreach(remat_info->remats, remat) {
+                                       DBG((si->dbg, LEVEL_4, "\t  considering remat %+F at end of block %+F\n", remat->op, bb));
+
+                                       insert_remat_before(si, remat, bb, live_out);
                                }
                        }
                }
-               if(is_diverge_edge(bb)) {
-                       /* add remat2s at beginning of block */
+               del_pset(live_out);
+       }
+
+       if(is_diverge_edge(bb)) {
+               pset     *live_in = pset_new_ptr_default();
+               ir_node  *value;
+
+               be_lv_foreach(si->lv, bb, be_lv_state_in, i) {
+                       value = be_lv_get_irn(si->lv, bb, i);
+
+                       if (has_reg_class(si, value)) {
+                               pset_insert_ptr(live_in, value);
+                       }
+               }
+
+               /* add remat2s at beginning of block */
+               pset_foreach(live_in, value) {
                        if ((be_is_live_in(si->lv, bb, value) || (is_Phi(value) && get_nodes_block(value)==bb)) && has_reg_class(si, value)) {
                                remat_info_t   *remat_info,
                                                           query;
@@ -1235,17 +1286,18 @@ walker_remat_insertor(ir_node * bb, void * data)
                                query.remats_by_operand = NULL;
                                remat_info = set_find(si->remat_info, &query, sizeof(query), HASH_PTR(value));
 
-                               if(remat_info && remat_info->remats) {
-                                       pset_foreach(remat_info->remats, remat) {
-                                               DBG((si->dbg, LEVEL_4, "\t  considering remat %+F at beginning of block %+F\n", remat->op, bb));
+                               if(remat_info && remat_info->remats_by_operand) {
+                                       pset_foreach(remat_info->remats_by_operand, remat) {
+                                               DBG((si->dbg, LEVEL_4, "\t  considering remat2 %+F at beginning of block %+F\n", remat->op, bb));
 
                                                /* put the remat here if all its args are available */
-                                               insert_remat_after(si, remat, bb, NULL);
+                                               insert_remat_after(si, remat, bb, live_in);
 
                                        }
                                }
                        }
                }
+               del_pset(live_in);
        }
 }
 
@@ -2024,7 +2076,7 @@ skip_one_must_die:
 
                        /* new live range for each used value */
                        ir_snprintf(buf, sizeof(buf), "lr_%N_%N", arg, irn);
-                       prev_lr = lpp_add_var_default(si->lpp, buf, lpp_binary, 0.0, 0.0);
+                       prev_lr = lpp_add_var_default(si->lpp, buf, lpp_binary, 0.0, is_before_frame(bb, irn)?1.0:0.0);
 
                        /* the epilog stuff - including post_use, check_post, check_post_remat */
                        ir_snprintf(buf, sizeof(buf), "post_use_%N_%N", arg, irn);
@@ -2157,7 +2209,7 @@ skip_one_must_die:
                        assert(spill);
 
                        ir_snprintf(buf, sizeof(buf), "reload_%N_%N", arg, irn);
-                       op->attr.live_range.args.reloads[i] = lpp_add_var_default(si->lpp, buf, lpp_binary, opt_cost_reload*execution_frequency(si, bb), 1.0);
+                       op->attr.live_range.args.reloads[i] = lpp_add_var_default(si->lpp, buf, lpp_binary, opt_cost_reload*execution_frequency(si, bb), is_before_frame(bb, irn)?0.0:1.0);
 
                        /* reload <= mem_out */
                        ir_snprintf(buf, sizeof(buf), "req_reload_%N_%N", arg, irn);
@@ -2306,7 +2358,7 @@ skip_one_must_die:
                        spill->mem_in   = lpp_add_var_default(si->lpp, buf, lpp_binary, 0.0, default_spilled);
                        lpp_set_factor_fast(si->lpp, cst, spill->mem_in, -1.0);
 
-                       if(is_Phi(spill->irn) && get_nodes_block(spill->irn) == bb) {
+                       if(opt_memcopies && is_Phi(spill->irn) && get_nodes_block(spill->irn) == bb) {
                                int   n;
                                op_t *op = get_irn_link(spill->irn);
 
@@ -2357,6 +2409,48 @@ skip_one_must_die:
                }
        }
 
+       foreach_post_remat(bb, tmp) {
+               int         n;
+
+               for (n=get_irn_arity(tmp)-1; n>=0; --n) {
+                       ir_node    *remat_arg = get_irn_n(tmp, n);
+
+                       if(!has_reg_class(si, remat_arg)) continue;
+
+                       /* if value is becoming live through use by remat2 */
+                       if(!pset_find_ptr(live, remat_arg)) {
+                               op_t       *remat_arg_op = get_irn_link(remat_arg);
+                               ilp_cst_t   nomem;
+
+                               DBG((si->dbg, LEVEL_3, "  value %+F becoming live through use by remat2 at bb start %+F\n", remat_arg, tmp));
+
+                               pset_insert_ptr(live, remat_arg);
+                               spill = add_to_spill_bb(si, bb, remat_arg);
+                               remat_arg_op->attr.live_range.ilp = ILP_UNDEF;
+
+                               /* we need reg_in and mem_in for this value; they will be referenced later */
+                               ir_snprintf(buf, sizeof(buf), "reg_in_%N_%N", remat_arg, bb);
+                               spill->reg_in = lpp_add_var_default(si->lpp, buf, lpp_binary, 0.0, 0.0);
+                               ir_snprintf(buf, sizeof(buf), "mem_in_%N_%N", remat_arg, bb);
+                               spill->mem_in = lpp_add_var_default(si->lpp, buf, lpp_binary, 0.0, 1.0);
+
+
+                               /* optimization: all memory stuff should be 0, for we do not want to insert reloads for remats */
+                               ir_snprintf(buf, sizeof(buf), "nomem_%N_%N", remat_arg, bb);
+                               nomem = lpp_add_cst_uniq(si->lpp, buf, lpp_equal, 0.0);
+
+                               lpp_set_factor_fast(si->lpp, nomem, spill->spill, 1.0);
+                               if(spill_bb->reloads) {
+                                       keyval_t *keyval = set_find_keyval(spill_bb->reloads, remat_arg);
+
+                                       if(keyval) {
+                                               ilp_var_t reload = PTR_TO_INT(keyval->val);
+                                               lpp_set_factor_fast(si->lpp, nomem, reload, 1.0);
+                                       }
+                               }
+                       }
+               }
+       }
 
        /* L\U is empty at bb start */
        /* arg is live throughout epilog if it is reg_in into this block */
@@ -2364,6 +2458,8 @@ skip_one_must_die:
        /* check the register pressure at the beginning of the block
         * including remats
         */
+       /* reg_in entspricht post_use */
+
        ir_snprintf(buf, sizeof(buf), "check_start_%N", bb);
        cst = lpp_add_cst_uniq(si->lpp, buf, lpp_less, si->n_regs);
 
@@ -2385,36 +2481,8 @@ skip_one_must_die:
                lpp_set_factor_fast(si->lpp, nospill, spill->mem_in, 1.0);
                lpp_set_factor_fast(si->lpp, nospill, spill->spill, 1.0);
 
-       }
-       foreach_post_remat(bb, irn) {
-               op_t     *remat_op = get_irn_link(irn);
-
-               DBG((si->dbg, LEVEL_4, "\t  next post remat: %+F\n", irn));
-               assert(remat_op->is_remat && !remat_op->attr.remat.pre);
-
-               lpp_set_factor_fast(si->lpp, cst, remat_op->attr.remat.ilp, 1.0);
-       }
-
-       /* forall post remats add requirements */
-       foreach_post_remat(bb, tmp) {
-               int         n;
-
-               for (n=get_irn_arity(tmp)-1; n>=0; --n) {
-                       ir_node    *remat_arg = get_irn_n(tmp, n);
-                       op_t       *remat_op = get_irn_link(tmp);
-
-                       if(!has_reg_class(si, remat_arg)) continue;
-
-                       spill = set_find_spill(spill_bb->ilp, remat_arg);
-                       assert(spill);
-
-                       /* remat <= reg_in_argument */
-                       ir_snprintf(buf, sizeof(buf), "req_remat2_%N_%N_arg_%N", tmp, bb, remat_arg);
-                       cst = lpp_add_cst(si->lpp, buf, lpp_less, 0.0);
-                       lpp_set_factor_fast(si->lpp, cst, spill->reg_in, -1.0);
-                       lpp_set_factor_fast(si->lpp, cst, remat_op->attr.remat.ilp, 1.0);
-               }
-       }
+       } /* post_remats are NOT included in register pressure check because
+          they do not increase regpressure */
 
        /* mem_in/reg_in for live_in values, especially phis and their arguments */
        pset_foreach(live, irn) {
@@ -2454,7 +2522,9 @@ skip_one_must_die:
                                        assert(spill_p);
 
                                        lpp_set_factor_fast(si->lpp, mem_in, spill_p->mem_out, -1.0);
-                                       lpp_set_factor_fast(si->lpp, mem_in, op->attr.live_range.args.copies[n], -1.0);
+                                       if(opt_memcopies)
+                                               lpp_set_factor_fast(si->lpp, mem_in, op->attr.live_range.args.copies[n], -1.0);
+
                                        lpp_set_factor_fast(si->lpp, reg_in, spill_p->reg_out, -1.0);
                                }
                        }
@@ -2485,23 +2555,80 @@ skip_one_must_die:
                }
        }
 
+       foreach_post_remat(bb, tmp) {
+               int         n;
+
+               for (n=get_irn_arity(tmp)-1; n>=0; --n) {
+                       ir_node    *remat_arg = get_irn_n(tmp, n);
+                       op_t       *remat_op = get_irn_link(tmp);
+
+                       if(!has_reg_class(si, remat_arg)) continue;
+
+                       spill = set_find_spill(spill_bb->ilp, remat_arg);
+                       assert(spill);
+
+                       /* remat <= reg_in_argument */
+                       ir_snprintf(buf, sizeof(buf), "req_remat2_%N_%N_arg_%N", tmp, bb, remat_arg);
+                       cst = lpp_add_cst(si->lpp, buf, lpp_less, 0.0);
+                       lpp_set_factor_fast(si->lpp, cst, spill->reg_in, -1.0);
+                       lpp_set_factor_fast(si->lpp, cst, remat_op->attr.remat.ilp, 1.0);
+               }
+       }
+
+       pset_foreach(live, irn) {
+               const op_t      *op = get_irn_link(irn);
+               const ir_node   *remat;
+               int              n_remats = 0;
+
+               if(op->attr.live_range.ilp == ILP_UNDEF) continue;
+
+               cst = ILP_UNDEF;
+
+               foreach_post_remat(bb, remat) {
+                       int   n;
+
+                       for (n=get_irn_arity(remat)-1; n>=0; --n) {
+                               const ir_node  *arg = get_irn_n(remat, n);
+
+                               if(arg == irn) {
+                                       const op_t   *remat_op = get_irn_link(remat);
+
+                                       if(cst == ILP_UNDEF) {
+                                               /* \sum post_remat <= 1 + #post_remats * next(lr) */
+                                               ir_snprintf(buf, sizeof(buf), "remat2_%N_%N_arg_%N", remat, bb, irn);
+                                               cst = lpp_add_cst(si->lpp, buf, lpp_less, 1.0);
+                                       }
+                                       lpp_set_factor_fast(si->lpp, cst, remat_op->attr.remat.ilp, 1.0);
+                                       ++n_remats;
+                                       break;
+                               }
+                       }
+               }
+               if(n_remats) {
+                       lpp_set_factor_fast(si->lpp, cst, op->attr.live_range.ilp, n_remats);
+               }
+       }
+
        /* first live ranges from reg_ins */
        pset_foreach(live, irn) {
                op_t      *op = get_irn_link(irn);
 
-               spill = set_find_spill(spill_bb->ilp, irn);
-               assert(spill && spill->irn == irn);
+               if(op->attr.live_range.ilp != ILP_UNDEF) {
 
-               ir_snprintf(buf, sizeof(buf), "first_lr_%N_%N", irn, bb);
-               cst = lpp_add_cst_uniq(si->lpp, buf, lpp_less, 0.0);
-               lpp_set_factor_fast(si->lpp, cst, op->attr.live_range.ilp, 1.0);
-               lpp_set_factor_fast(si->lpp, cst, spill->reg_in, -1.0);
+                       spill = set_find_spill(spill_bb->ilp, irn);
+                       assert(spill && spill->irn == irn);
 
-               foreach_post_remat(bb, tmp) {
-                       op_t     *remat_op = get_irn_link(tmp);
+                       ir_snprintf(buf, sizeof(buf), "first_lr_%N_%N", irn, bb);
+                       cst = lpp_add_cst_uniq(si->lpp, buf, lpp_less, 0.0);
+                       lpp_set_factor_fast(si->lpp, cst, op->attr.live_range.ilp, 1.0);
+                       lpp_set_factor_fast(si->lpp, cst, spill->reg_in, -1.0);
 
-                       if(remat_op->attr.remat.remat->value == irn) {
-                               lpp_set_factor_fast(si->lpp, cst, remat_op->attr.remat.ilp, -1.0);
+                       foreach_post_remat(bb, tmp) {
+                               op_t     *remat_op = get_irn_link(tmp);
+
+                               if(remat_op->attr.remat.remat->value == irn) {
+                                       lpp_set_factor_fast(si->lpp, cst, remat_op->attr.remat.ilp, -1.0);
+                               }
                        }
                }
        }
@@ -2759,7 +2886,7 @@ find_copy_path(spill_ilp_t * si, const ir_node * irn, const ir_node * target, il
                                paths += find_copy_path(si, arg, target, any_interfere, copies, visited);
                                pset_remove(copies, INT_TO_PTR(copy), copy);
 
-                /*if(paths > MAX_PATHS) {
+                if(paths > MAX_PATHS) {
                     if(pset_count(copies) == 0) {
                         ilp_cst_t  cst;
                         char       buf[256];
@@ -2779,7 +2906,7 @@ find_copy_path(spill_ilp_t * si, const ir_node * irn, const ir_node * target, il
                     }
                 } else if(pset_count(copies) == 0) {
                                        paths = 0;
-                               }*/
+                               }
                        }
                }
 
@@ -2814,7 +2941,7 @@ find_copy_path(spill_ilp_t * si, const ir_node * irn, const ir_node * target, il
                        paths += find_copy_path(si, user, target, any_interfere, copies, visited);
                        pset_remove(copies, INT_TO_PTR(copy), copy);
 
-            /*if(paths > MAX_PATHS) {
+            if(paths > MAX_PATHS) {
                 if(pset_count(copies) == 0) {
                     ilp_cst_t  cst;
                     char       buf[256];
@@ -2833,7 +2960,7 @@ find_copy_path(spill_ilp_t * si, const ir_node * irn, const ir_node * target, il
                 }
             } else if(pset_count(copies) == 0) {
                                paths = 0;
-                       }*/
+                       }
                }
        }
 
@@ -3309,7 +3436,6 @@ get_spills_for_value(spill_ilp_t * si, const ir_node * value)
 /**
  * @param before   The node after which the spill will be placed in the schedule
  */
-/* TODO set context properly */
 static ir_node *
 insert_spill(spill_ilp_t * si, ir_node * irn, const ir_node * value, ir_node * before)
 {
@@ -3360,7 +3486,9 @@ insert_mem_phi(spill_ilp_t * si, ir_node * phi)
        set_irn_link(mem_phi, defs->spills);
        defs->spills = mem_phi;
 
+#ifdef SCHEDULE_PHIM
        sched_add_after(phi, mem_phi);
+#endif
 
        if(opt_keep_alive & KEEPALIVE_SPILLS)
                pset_insert_ptr(si->spills, mem_phi);
@@ -4032,33 +4160,6 @@ verify_phiclasses(spill_ilp_t * si)
        irg_block_walk_graph(si->chordal_env->irg, luke_meminterferencechecker, NULL, si);
 }
 
-static void
-walker_spillslotassigner(ir_node * irn, void * data)
-{
-       void                   *cls;
-
-       if(!be_is_Spill(irn)) return;
-
-       /* set spill context to phi class if it has one ;) */
-       (void) cls;
-#if 0
-       // Matze: not needed anymore
-       cls = get_phi_class(irn);
-       if(cls)
-               be_set_Spill_context(irn, cls);
-       else
-               be_set_Spill_context(irn, irn);
-#endif
-}
-
-
-static void
-assign_spillslots(spill_ilp_t * si)
-{
-       DBG((si->dbg, LEVEL_2, "\t calling spill slot assigner\n"));
-       irg_walk_graph(si->chordal_env->irg, walker_spillslotassigner, NULL, si);
-}
-
 void
 be_spill_remat(const be_chordal_env_t * chordal_env)
 {
@@ -4216,7 +4317,6 @@ be_spill_remat(const be_chordal_env_t * chordal_env)
 
        if(opt_memcopies) {
                verify_phiclasses(&si);
-               assign_spillslots(&si);
        }
 
        irg_block_walk_graph(chordal_env->irg, walker_pressure_annotator, NULL, &si);