Let dfs() discover only memory nodes
[libfirm] / ir / be / becopyopt.c
index 9a7540f..b5d3ffc 100644 (file)
 #include "beirg_t.h"
 #include "error.h"
 
+#include <libcore/lc_timing.h>
+#include <libcore/lc_opts.h>
+#include <libcore/lc_opts_enum.h>
+
 #define DUMP_BEFORE 1
 #define DUMP_AFTER  2
 #define DUMP_APPEL  4
@@ -82,11 +86,6 @@ static cost_fct_t cost_func   = co_get_costs_exec_freq;
 static unsigned   algo        = CO_ALGO_HEUR4;
 static int        improve     = 1;
 
-#ifdef WITH_LIBCORE
-#include <libcore/lc_timing.h>
-#include <libcore/lc_opts.h>
-#include <libcore/lc_opts_enum.h>
-
 static const lc_opt_enum_mask_items_t dump_items[] = {
        { "before",  DUMP_BEFORE },
        { "after",   DUMP_AFTER  },
@@ -152,7 +151,6 @@ static const lc_opt_table_entry_t options[] = {
        LC_OPT_ENT_BOOL          ("improve", "run heur3 before if algo can exploit start solutions",    &improve),
        LC_OPT_LAST
 };
-#endif /* WITH_LIBCORE */
 
 /* Insert additional options registration functions here. */
 extern void be_co_ilp_register_options(lc_opt_entry_t *grp);
@@ -161,14 +159,12 @@ extern void be_co3_register_options(lc_opt_entry_t *grp);
 
 void be_init_copycoal(void)
 {
-#ifdef WITH_LIBCORE
        lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
        lc_opt_entry_t *ra_grp = lc_opt_get_grp(be_grp, "ra");
        lc_opt_entry_t *chordal_grp = lc_opt_get_grp(ra_grp, "chordal");
        lc_opt_entry_t *co_grp = lc_opt_get_grp(chordal_grp, "co");
 
        lc_opt_add_table(co_grp, options);
-#endif
 }
 
 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_copycoal);
@@ -449,10 +445,8 @@ static void co_collect_units(ir_node *irn, void *env) {
                }
                unit->nodes = xrealloc(unit->nodes, unit->node_count * sizeof(*unit->nodes));
                unit->costs = xrealloc(unit->costs, unit->node_count * sizeof(*unit->costs));
-       } else
-
-       /* Proj of a perm with corresponding arg */
-       if (is_Perm_Proj(co->aenv, irn)) {
+       } else if (is_Perm_Proj(co->aenv, irn)) {
+               /* Proj of a perm with corresponding arg */
                assert(!nodes_interfere(co->cenv, irn, get_Perm_src(irn)));
                unit->nodes = xmalloc(2 * sizeof(*unit->nodes));
                unit->costs = xmalloc(2 * sizeof(*unit->costs));
@@ -466,15 +460,44 @@ static void co_collect_units(ir_node *irn, void *env) {
 
                /* Src == Tgt of a 2-addr-code instruction */
                if (is_2addr_code(req)) {
-                       ir_node *other = get_irn_n(skip_Proj(irn), req->other_same);
-                       if (!arch_irn_is(co->aenv, other, ignore) &&
-                                       !nodes_interfere(co->cenv, irn, other)) {
-                               unit->nodes = xmalloc(2 * sizeof(*unit->nodes));
-                               unit->costs = xmalloc(2 * sizeof(*unit->costs));
-                               unit->node_count = 2;
-                               unit->nodes[0] = irn;
-                               unit->nodes[1] = other;
-                               unit->costs[1] = co->get_costs(co, irn, other, -1);
+                       ir_node *other  = get_irn_n(skip_Proj(irn), req->other_same[0]);
+                       ir_node *other2 = NULL;
+                       int      count;
+
+                       if (arch_irn_is(co->aenv, other, ignore) ||
+                                       nodes_interfere(co->cenv, irn, other)) {
+                               other = NULL;
+                       }
+                       if (req->other_same[1] != -1) {
+                               other2 = get_irn_n(skip_Proj(irn), req->other_same[1]);
+                               if (arch_irn_is(co->aenv, other2, ignore) ||
+                                               nodes_interfere(co->cenv, irn, other2)) {
+                                       other2 = NULL;
+                               }
+                       }
+                       count = 1 + (other != NULL) + (other2 != NULL && other != other2);
+
+                       if (count > 1) {
+                               int i = 0;
+
+                               unit->nodes = xmalloc(count * sizeof(*unit->nodes));
+                               unit->costs = xmalloc(count * sizeof(*unit->costs));
+                               unit->node_count = count;
+                               unit->nodes[i] = irn;
+                               if (other != NULL) {
+                                       ++i;
+                                       unit->nodes[i] = other;
+                                       unit->costs[i] = co->get_costs(co, irn, other, -1);
+                               }
+                               if (other2 != NULL) {
+                                       if (other == other2) {
+                                               unit->costs[i] += co->get_costs(co, irn, other2, -1);
+                                       } else {
+                                               ++i;
+                                               unit->nodes[i] = other2;
+                                               unit->costs[i] = co->get_costs(co, irn, other2, -1);
+                                       }
+                               }
                        }
                } else {
                        assert(0 && "This is not an optimizable node!");
@@ -769,27 +792,29 @@ static void build_graph_walker(ir_node *irn, void *env) {
        if (arch_register_type_is(reg, ignore))
                return;
 
-       /* Phis */
-       if (is_Reg_Phi(irn))
+       if (is_Reg_Phi(irn)) { /* Phis */
                for (pos=0, max=get_irn_arity(irn); pos<max; ++pos) {
                        ir_node *arg = get_irn_n(irn, pos);
                        add_edges(co, irn, arg, co->get_costs(co, irn, arg, pos));
                }
-
-       /* Perms */
-       else if (is_Perm_Proj(co->aenv, irn)) {
+       }
+       else if (is_Perm_Proj(co->aenv, irn)) { /* Perms */
                ir_node *arg = get_Perm_src(irn);
                add_edges(co, irn, arg, co->get_costs(co, irn, arg, 0));
        }
-
-       /* 2-address code */
-       else {
-               const arch_register_req_t *req =
-                       arch_get_register_req(co->aenv, irn, -1);
+       else { /* 2-address code */
+               const arch_register_req_t *req = arch_get_register_req(co->aenv, irn, -1);
                if (is_2addr_code(req)) {
-                       ir_node *other = get_irn_n(skip_Proj(irn), req->other_same);
-                       if (! arch_irn_is(co->aenv, other, ignore))
-                               add_edges(co, irn, other, co->get_costs(co, irn, other, 0));
+                       const int *i;
+                       for (i = req->other_same; i != ENDOF(req->other_same); ++i) {
+                               ir_node *other;
+
+                               if (*i == -1) break;
+
+                               other = get_irn_n(skip_Proj(irn), *i);
+                               if (! arch_irn_is(co->aenv, other, ignore))
+                                       add_edges(co, irn, other, co->get_costs(co, irn, other, 0));
+                       }
                }
        }
 }
@@ -831,7 +856,8 @@ void co_dump_appel_graph(const copy_opt_t *co, FILE *f)
 
        ir_node *irn;
        void *it, *nit;
-       int i, n, n_regs;
+       int n, n_regs;
+       unsigned i;
 
        n_regs = 0;
        for(i = 0; i < co->cls->n_regs; ++i) {
@@ -1080,7 +1106,7 @@ static void appel_walker(ir_node *bl, void *data)
                        for(j = 0; j < insn->use_start; ++j) {
                                ir_node *op   = insn->ops[j].carrier;
                                bitset_t *adm = insn->ops[j].regs;
-                               int k;
+                               unsigned k;
                                size_t nr;
 
                                if(!insn->ops[j].has_constraints)
@@ -1176,8 +1202,8 @@ static void appel_inter_block_aff(ir_node *bl, void *data)
 
 void co_dump_appel_graph_cliques(const copy_opt_t *co, FILE *f)
 {
-       int i;
-       int n_colors;
+       unsigned i;
+       unsigned n_colors;
        appel_clique_walker_t env;
        bitset_t *adm = bitset_alloca(co->cls->n_regs);
        be_lv_t *lv = co->cenv->birg->lv;
@@ -1196,7 +1222,7 @@ void co_dump_appel_graph_cliques(const copy_opt_t *co, FILE *f)
        env.color_map = alloca(co->cls->n_regs * sizeof(env.color_map[0]));
        for(i = 0, n_colors = 0; i < co->cls->n_regs; ++i) {
                const arch_register_t *reg = &co->cls->regs[i];
-               env.color_map[i] = arch_register_type_is(reg, ignore) ? -1 : n_colors++;
+               env.color_map[i] = arch_register_type_is(reg, ignore) ? -1 : (int) n_colors++;
        }
 
        env.dumb = 1;
@@ -1208,7 +1234,7 @@ void co_dump_appel_graph_cliques(const copy_opt_t *co, FILE *f)
 
        /* make the first k nodes interfere */
        for(i = 0; i < n_colors; ++i) {
-               int j;
+               unsigned j;
                for(j = i + 1; j < n_colors; ++j)
                        fprintf(f, "%d %d -1 ", i, j);
                fprintf(f, "\n");