sparc: implement float->unsigned conversions
[libfirm] / ir / be / beprefalloc.c
index 164262d..338b7a4 100644 (file)
@@ -22,7 +22,6 @@
  * @brief       Preference Guided Register Assignment
  * @author      Matthias Braun
  * @date        14.2.2009
- * @version     $Id$
  *
  * The idea is to allocate registers in 2 passes:
  * 1. A first pass to determine "preferred" registers for live-ranges. This
@@ -42,6 +41,7 @@
 #include <float.h>
 #include <stdbool.h>
 #include <math.h>
+#include "lpp.h"
 
 #include "error.h"
 #include "execfreq.h"
@@ -53,6 +53,8 @@
 #include "irnode_t.h"
 #include "irprintf.h"
 #include "irdump.h"
+#include "irtools.h"
+#include "util.h"
 #include "obst.h"
 #include "raw_bitset.h"
 #include "unionfind.h"
@@ -270,7 +272,7 @@ static void give_penalties_for_limits(const ir_nodeset_t *live_nodes,
 static void check_defs(const ir_nodeset_t *live_nodes, float weight,
                        ir_node *node)
 {
-       const arch_register_req_t *req = arch_get_register_req_out(node);
+       const arch_register_req_t *req = arch_get_irn_register_req(node);
        if (req->type & arch_register_req_type_limited) {
                const unsigned *limited = req->limited;
                float           penalty = weight * DEF_FACTOR;
@@ -351,7 +353,7 @@ static void analyze_block(ir_node *block, void *data)
                info = get_allocation_info(node);
                for (i = 0; i < arity; ++i) {
                        ir_node                   *op  = get_irn_n(node, i);
-                       const arch_register_req_t *req = arch_get_register_req_out(op);
+                       const arch_register_req_t *req = arch_get_irn_register_req(op);
                        if (req->cls != cls)
                                continue;
 
@@ -373,7 +375,7 @@ static void analyze_block(ir_node *block, void *data)
                                if (!arch_irn_consider_in_reg_alloc(cls, op))
                                        continue;
 
-                               req = arch_get_register_req(node, i);
+                               req = arch_get_irn_register_req_in(node, i);
                                if (!(req->type & arch_register_req_type_limited))
                                        continue;
 
@@ -389,7 +391,7 @@ static void analyze_block(ir_node *block, void *data)
 
 static void congruence_def(ir_nodeset_t *live_nodes, const ir_node *node)
 {
-       const arch_register_req_t *req = arch_get_register_req_out(node);
+       const arch_register_req_t *req = arch_get_irn_register_req(node);
 
        /* should be same constraint? */
        if (req->type & arch_register_req_type_should_be_same) {
@@ -649,7 +651,7 @@ static bool try_optimistic_split(ir_node *to_split, ir_node *before,
         * (so we don't split away the values produced because of
         *  must_be_different constraints) */
        original_insn = skip_Proj(info->original_value);
-       if (arch_irn_get_flags(original_insn) & arch_irn_flags_dont_spill)
+       if (arch_get_irn_flags(original_insn) & arch_irn_flags_dont_spill)
                return false;
 
        from_reg        = arch_get_irn_register(to_split);
@@ -720,7 +722,7 @@ static bool try_optimistic_split(ir_node *to_split, ir_node *before,
                return false;
 
        reg  = arch_register_for_index(cls, r);
-       copy = be_new_Copy(cls, block, to_split);
+       copy = be_new_Copy(block, to_split);
        mark_as_copy_of(copy, to_split);
        /* hacky, but correct here */
        if (assignments[arch_register_get_index(from_reg)] == to_split)
@@ -758,7 +760,7 @@ static void assign_reg(const ir_node *block, ir_node *node,
                return;
        }
 
-       req = arch_get_register_req_out(node);
+       req = arch_get_irn_register_req(node);
        /* ignore reqs must be preassigned */
        assert (! (req->type & arch_register_req_type_ignore));
 
@@ -881,7 +883,7 @@ static void assign_reg(const ir_node *block, ir_node *node,
  *                     registers.
  */
 static void permute_values(ir_nodeset_t *live_nodes, ir_node *before,
-                             unsigned *permutation)
+                           unsigned *permutation)
 {
        unsigned  *n_used = ALLOCANZ(unsigned, n_regs);
        ir_node   *block;
@@ -921,7 +923,7 @@ static void permute_values(ir_nodeset_t *live_nodes, ir_node *before,
 
                /* create a copy */
                src  = assignments[old_r];
-               copy = be_new_Copy(cls, block, src);
+               copy = be_new_Copy(block, src);
                sched_add_before(before, copy);
                reg = arch_register_for_index(cls, r);
                DB((dbg, LEVEL_2, "Copy %+F (from %+F, before %+F) -> %s\n",
@@ -1105,6 +1107,151 @@ static void determine_live_through_regs(unsigned *bitset, ir_node *node)
        }
 }
 
+static void solve_lpp(ir_nodeset_t *live_nodes, ir_node *node,
+                      unsigned *forbidden_regs, unsigned *live_through_regs)
+{
+       unsigned *forbidden_edges = rbitset_malloc(n_regs * n_regs);
+       int      *lpp_vars        = XMALLOCNZ(int, n_regs*n_regs);
+       int       arity           = get_irn_arity(node);
+       int       i;
+       unsigned  l;
+       unsigned  r;
+
+       lpp_t *lpp = lpp_new("prefalloc", lpp_minimize);
+       //lpp_set_time_limit(lpp, 20);
+       lpp_set_log(lpp, stdout);
+
+       /** mark some edges as forbidden */
+       for (i = 0; i < arity; ++i) {
+               ir_node                   *op = get_irn_n(node, i);
+               const arch_register_t     *reg;
+               const arch_register_req_t *req;
+               const unsigned            *limited;
+               unsigned                   current_reg;
+
+               if (!arch_irn_consider_in_reg_alloc(cls, op))
+                       continue;
+
+               req = arch_get_irn_register_req_in(node, i);
+               if (!(req->type & arch_register_req_type_limited))
+                       continue;
+
+               limited     = req->limited;
+               reg         = arch_get_irn_register(op);
+               current_reg = arch_register_get_index(reg);
+               for (r = 0; r < n_regs; ++r) {
+                       if (rbitset_is_set(limited, r))
+                               continue;
+
+                       rbitset_set(forbidden_edges, current_reg*n_regs + r);
+               }
+       }
+
+       /* add all combinations, except for not allowed ones */
+       for (l = 0; l < n_regs; ++l) {
+               if (!rbitset_is_set(normal_regs, l)) {
+                       char name[15];
+                       snprintf(name, sizeof(name), "%u_to_%u", l, l);
+                       lpp_vars[l*n_regs+l] = lpp_add_var(lpp, name, lpp_binary, 1);
+                       continue;
+               }
+
+               for (r = 0; r < n_regs; ++r) {
+                       if (!rbitset_is_set(normal_regs, r))
+                               continue;
+                       if (rbitset_is_set(forbidden_edges, l*n_regs + r))
+                               continue;
+                       /* livethrough values may not use constrained output registers */
+                       if (rbitset_is_set(live_through_regs, l)
+                           && rbitset_is_set(forbidden_regs, r))
+                               continue;
+
+                       char name[15];
+                       snprintf(name, sizeof(name), "%u_to_%u", l, r);
+
+                       double costs = l==r ? 9 : 8;
+                       lpp_vars[l*n_regs+r]
+                               = lpp_add_var(lpp, name, lpp_binary, costs);
+                       assert(lpp_vars[l*n_regs+r] > 0);
+               }
+       }
+       /* add constraints */
+       for (l = 0; l < n_regs; ++l) {
+               int constraint;
+               /* only 1 destination per register */
+               constraint = -1;
+               for (r = 0; r < n_regs; ++r) {
+                       int var = lpp_vars[l*n_regs+r];
+                       if (var == 0)
+                               continue;
+                       if (constraint < 0) {
+                               char name[64];
+                               snprintf(name, sizeof(name), "%u_to_dest", l);
+                               constraint = lpp_add_cst(lpp, name, lpp_equal, 1);
+                       }
+                       lpp_set_factor_fast(lpp, constraint, var, 1);
+               }
+               /* each destination used by at most 1 value */
+               constraint = -1;
+               for (r = 0; r < n_regs; ++r) {
+                       int var = lpp_vars[r*n_regs+l];
+                       if (var == 0)
+                               continue;
+                       if (constraint < 0) {
+                               char name[64];
+                               snprintf(name, sizeof(name), "one_to_%u", l);
+                               constraint = lpp_add_cst(lpp, name, lpp_less_equal, 1);
+                       }
+                       lpp_set_factor_fast(lpp, constraint, var, 1);
+               }
+       }
+
+       lpp_dump_plain(lpp, fopen("lppdump.txt", "w"));
+
+       /* solve lpp */
+       {
+               ir_graph     *irg     = get_irn_irg(node);
+               be_options_t *options = be_get_irg_options(irg);
+               unsigned     *assignment;
+               lpp_solve(lpp, options->ilp_server, options->ilp_solver);
+               if (!lpp_is_sol_valid(lpp))
+                       panic("ilp solution not valid!");
+
+               assignment = ALLOCAN(unsigned, n_regs);
+               for (l = 0; l < n_regs; ++l) {
+                       unsigned dest_reg = (unsigned)-1;
+                       for (r = 0; r < n_regs; ++r) {
+                               int var = lpp_vars[l*n_regs+r];
+                               if (var == 0)
+                                       continue;
+                               double val = lpp_get_var_sol(lpp, var);
+                               if (val == 1) {
+                                       assert(dest_reg == (unsigned)-1);
+                                       dest_reg = r;
+                               }
+                       }
+                       assert(dest_reg != (unsigned)-1);
+                       assignment[dest_reg] = l;
+               }
+
+               fprintf(stderr, "Assignment: ");
+               for (l = 0; l < n_regs; ++l) {
+                       fprintf(stderr, "%u ", assignment[l]);
+               }
+               fprintf(stderr, "\n");
+               fflush(stdout);
+               permute_values(live_nodes, node, assignment);
+       }
+       lpp_free(lpp);
+}
+
+static bool is_aligned(unsigned num, unsigned alignment)
+{
+       unsigned mask = alignment-1;
+       assert(is_po2(alignment));
+       return (num&mask) == 0;
+}
+
 /**
  * Enforce constraints at a node by live range splits.
  *
@@ -1124,7 +1271,9 @@ static void enforce_constraints(ir_nodeset_t *live_nodes, ir_node *node,
        /* construct a list of register occupied by live-through values */
        unsigned *live_through_regs = NULL;
 
-       /* see if any use constraints are not met */
+       /* see if any use constraints are not met and whether double-width
+        * values are involved */
+       bool double_width = false;
        bool good = true;
        for (i = 0; i < arity; ++i) {
                ir_node                   *op = get_irn_n(node, i);
@@ -1137,22 +1286,32 @@ static void enforce_constraints(ir_nodeset_t *live_nodes, ir_node *node,
                        continue;
 
                /* are there any limitations for the i'th operand? */
-               req = arch_get_register_req(node, i);
+               req = arch_get_irn_register_req_in(node, i);
+               if (req->width > 1)
+                       double_width = true;
+               reg       = arch_get_irn_register(op);
+               reg_index = arch_register_get_index(reg);
+               if (req->type & arch_register_req_type_aligned) {
+                       if (!is_aligned(reg_index, req->width)) {
+                               good = false;
+                               continue;
+                       }
+               }
                if (!(req->type & arch_register_req_type_limited))
                        continue;
 
                limited   = req->limited;
-               reg       = arch_get_irn_register(op);
-               reg_index = arch_register_get_index(reg);
                if (!rbitset_is_set(limited, reg_index)) {
                        /* found an assignment outside the limited set */
                        good = false;
-                       break;
+                       continue;
                }
        }
 
        /* is any of the live-throughs using a constrained output register? */
        be_foreach_definition(node, cls, value,
+               if (req_->width > 1)
+                       double_width = true;
                if (! (req_->type & arch_register_req_type_limited))
                        continue;
                if (live_through_regs == NULL) {
@@ -1172,6 +1331,12 @@ static void enforce_constraints(ir_nodeset_t *live_nodes, ir_node *node,
                rbitset_alloca(live_through_regs, n_regs);
        }
 
+       if (double_width) {
+               /* only the ILP variant can solve this yet */
+               solve_lpp(live_nodes, node, forbidden_regs, live_through_regs);
+               return;
+       }
+
        /* at this point we have to construct a bipartite matching problem to see
         * which values should go to which registers
         * Note: We're building the matrix in "reverse" - source registers are
@@ -1209,7 +1374,7 @@ static void enforce_constraints(ir_nodeset_t *live_nodes, ir_node *node,
                if (!arch_irn_consider_in_reg_alloc(cls, op))
                        continue;
 
-               req = arch_get_register_req(node, i);
+               req = arch_get_irn_register_req_in(node, i);
                if (!(req->type & arch_register_req_type_limited))
                        continue;
 
@@ -1565,7 +1730,7 @@ static void allocate_coalesce_block(ir_node *block, void *data)
                int                        p;
 
                node = be_lv_get_irn(lv, block, i);
-               req  = arch_get_register_req_out(node);
+               req  = arch_get_irn_register_req(node);
                if (req->cls != cls)
                        continue;
 
@@ -1845,8 +2010,8 @@ static void be_pref_alloc_cls(void)
 {
        size_t i;
 
-       lv = be_assure_liveness(irg);
-       be_liveness_assure_sets(lv);
+       be_assure_live_sets(irg);
+       lv = be_get_irg_liveness(irg);
 
        ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK);
 
@@ -1882,7 +2047,7 @@ static void spill(void)
        be_pre_spill_prepare_constr(irg, cls);
        be_timer_pop(T_RA_CONSTR);
 
-       dump(DUMP_RA, irg, "-spillprepare");
+       dump(DUMP_RA, irg, "spillprepare");
 
        /* spill */
        be_timer_push(T_RA_SPILL);
@@ -1893,7 +2058,7 @@ static void spill(void)
        check_for_memory_operands(irg);
        be_timer_pop(T_RA_SPILL_APPLY);
 
-       dump(DUMP_RA, irg, "-spill");
+       dump(DUMP_RA, irg, "spill");
 }
 
 /**
@@ -1944,8 +2109,7 @@ static void be_pref_alloc(ir_graph *new_irg)
 
                /* we most probably constructed new Phis so liveness info is invalid
                 * now */
-               /* TODO: test liveness_introduce */
-               be_liveness_invalidate(lv);
+               be_invalidate_live_sets(irg);
                free(normal_regs);
 
                stat_ev_ctx_pop("regcls");