fixed the fix of the fix (that was fix)
[libfirm] / ir / be / becopyopt.c
index 57a381f..584da53 100644 (file)
@@ -1,25 +1,44 @@
+/*
+ * Copyright (C) 1995-2007 University of Karlsruhe.  All right reserved.
+ *
+ * This file is part of libFirm.
+ *
+ * This file may be distributed and/or modified under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation and appearing in the file LICENSE.GPL included in the
+ * packaging of this file.
+ *
+ * Licensees holding valid libFirm Professional Edition licenses may use
+ * this file in accordance with the libFirm Commercial License.
+ * Agreement provided with the Software.
+ *
+ * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
+ * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE.
+ */
+
 /**
- * Author:      Daniel Grund
- * Date:               12.04.2005
- * Copyright:   (c) Universitaet Karlsruhe
- * Licence:     This file protected by GPL -  GNU GENERAL PUBLIC LICENSE.
+ * @file
+ * @brief       Copy minimization driver.
+ * @author      Daniel Grund
+ * @date        12.04.2005
+ * @version     $Id$
+ *
+ * Main file for the optimization reducing the copies needed for:
+ * - Phi coalescing
+ * - Register-constrained nodes
+ * - Two-address code instructions
  */
 #ifdef HAVE_CONFIG_H
 #include "config.h"
 #endif
 
-#ifdef HAVE_ALLOCA_H
-#include <alloca.h>
-#endif
-#ifdef HAVE_MALLOC_H
-#include <malloc.h>
-#endif
-
 #include "execfreq.h"
 #include "xmalloc.h"
 #include "debug.h"
 #include "pmap.h"
 #include "raw_bitset.h"
+#include "irnode.h"
 #include "irgraph.h"
 #include "irgwalk.h"
 #include "irprog.h"
 #include "irprintf_t.h"
 
 #include "bemodule.h"
-#include "bearch.h"
+#include "bearch_t.h"
 #include "benode_t.h"
 #include "beutil.h"
 #include "beifg_t.h"
+#include "beintlive_t.h"
 #include "becopyopt_t.h"
 #include "becopystat.h"
 #include "belive_t.h"
 #define COST_FUNC_LOOP     2
 #define COST_FUNC_ALL_ONE  3
 
-static unsigned dump_flags    = 0;
-static unsigned style_flags   = 0;
-static unsigned  do_stats     = 0;
+static unsigned   dump_flags  = 0;
+static unsigned   style_flags = 0;
+static unsigned   do_stats    = 0;
 static cost_fct_t cost_func   = co_get_costs_exec_freq;
-static unsigned algo          = CO_ALGO_HEUR2;
-static int improve            = 1;
+static unsigned   algo        = CO_ALGO_HEUR4;
+static int        improve     = 1;
 
 static const lc_opt_enum_mask_items_t dump_items[] = {
        { "before",  DUMP_BEFORE },
@@ -90,6 +110,7 @@ static const lc_opt_enum_mask_items_t algo_items[] = {
 #ifdef WITH_JVM
        { "heur3",  CO_ALGO_HEUR3 },
 #endif /* WITH_JVM */
+       { "heur4",  CO_ALGO_HEUR4 },
 #ifdef WITH_ILP
        { "ilp",    CO_ALGO_ILP   },
 #endif /* WITH_ILP */
@@ -152,10 +173,10 @@ BE_REGISTER_MODULE_CONSTRUCTOR(be_init_copycoal);
 
 static int nodes_interfere(const be_chordal_env_t *env, const ir_node *a, const ir_node *b)
 {
-       if(env->ifg)
+       if (env->ifg)
                return be_ifg_connected(env->ifg, a, b);
        else
-               return values_interfere(env->birg->lv, a, b);
+               return values_interfere(env->birg, a, b);
 }
 
 
@@ -220,39 +241,6 @@ int co_is_optimizable_root(const copy_opt_t *co, ir_node *irn) {
        return 0;
 }
 
-int co_is_optimizable_arg(const copy_opt_t *co, ir_node *irn) {
-       const ir_edge_t *edge;
-       const arch_register_t *reg;
-
-       assert(0 && "Is buggy and obsolete. Do not use");
-
-       if (arch_irn_is(co->aenv, irn, ignore))
-               return 0;
-
-       reg = arch_get_irn_register(co->aenv, irn);
-       if (arch_register_type_is(reg, ignore))
-               return 0;
-
-       foreach_out_edge(irn, edge) {
-               ir_node *n = edge->src;
-
-               if (!nodes_interfere(co->cenv, irn, n) || irn == n) {
-                       const arch_register_req_t *req;
-                       req = arch_get_register_req(co->aenv, n, -1);
-
-                       if(is_Reg_Phi(n) ||
-                          is_Perm(co->aenv, n) ||
-                          (arch_register_req_is(req, should_be_same))) {
-                               ir_node *other = get_irn_n(irn, req->other_same);
-                               if(other == irn)
-                                       return 1;
-                       }
-               }
-       }
-
-       return 0;
-}
-
 int co_get_costs_loop_depth(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
        int cost = 0;
        ir_loop *loop;
@@ -426,22 +414,25 @@ static void co_collect_units(ir_node *irn, void *env) {
                        /* Else insert the argument of the phi to the members of this ou */
                        DBG((dbg, LEVEL_1, "\t   Member: %+F\n", arg));
 
-                       /* Check if arg has occurred at a prior position in the arg/list */
-                       arg_pos = 0;
-                       for (o=0; o<unit->node_count; ++o)
-                               if (unit->nodes[o] == arg) {
-                                       arg_pos = o;
-                                       break;
+                       if (! arch_irn_is(co->aenv, arg, ignore)) {
+                               /* Check if arg has occurred at a prior position in the arg/list */
+                               arg_pos = 0;
+                               for (o=1; o<unit->node_count; ++o) {
+                                       if (unit->nodes[o] == arg) {
+                                               arg_pos = o;
+                                               break;
+                                       }
                                }
 
-                       if (!arg_pos) { /* a new argument */
-                               /* insert node, set costs */
-                               unit->nodes[unit->node_count] = arg;
-                               unit->costs[unit->node_count] = co->get_costs(co, irn, arg, i);
-                               unit->node_count++;
-                       } else { /* arg has occured before in same phi */
-                               /* increase costs for existing arg */
-                               unit->costs[arg_pos] += co->get_costs(co, irn, arg, i);
+                               if (!arg_pos) { /* a new argument */
+                                       /* insert node, set costs */
+                                       unit->nodes[unit->node_count] = arg;
+                                       unit->costs[unit->node_count] = co->get_costs(co, irn, arg, i);
+                                       unit->node_count++;
+                               } else { /* arg has occured before in same phi */
+                                       /* increase costs for existing arg */
+                                       unit->costs[arg_pos] += co->get_costs(co, irn, arg, i);
+                               }
                        }
                }
                unit->nodes = xrealloc(unit->nodes, unit->node_count * sizeof(*unit->nodes));
@@ -463,7 +454,7 @@ static void co_collect_units(ir_node *irn, void *env) {
 
                /* Src == Tgt of a 2-addr-code instruction */
                if (is_2addr_code(req)) {
-                       ir_node *other = get_irn_n(irn, req->other_same);
+                       ir_node *other = get_irn_n(skip_Proj(irn), req->other_same);
                        if (!arch_irn_is(co->aenv, other, ignore) &&
                                        !nodes_interfere(co->cenv, irn, other)) {
                                unit->nodes = xmalloc(2 * sizeof(*unit->nodes));
@@ -717,15 +708,14 @@ static int compare_affinity_node_t(const void *k1, const void *k2, size_t size)
 
 static void add_edge(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
        affinity_node_t new_node, *node;
-       neighb_t new_nbr, *nbr;
-       int allocnew;
+       neighb_t        *nbr;
+       int             allocnew = 1;
 
        new_node.irn        = n1;
        new_node.degree     = 0;
        new_node.neighbours = NULL;
        node = set_insert(co->nodes, &new_node, sizeof(new_node), nodeset_hash(new_node.irn));
 
-       allocnew = 1;
        for (nbr = node->neighbours; nbr; nbr = nbr->next)
                if (nbr->irn == n2) {
                        allocnew = 0;
@@ -734,11 +724,11 @@ static void add_edge(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
 
        /* if we did not find n2 in n1's neighbourhood insert it */
        if (allocnew) {
-               obstack_grow(&co->obst, &new_nbr, sizeof(new_nbr));
-               nbr = obstack_finish(&co->obst);
+               nbr        = obstack_alloc(&co->obst, sizeof(*nbr));
                nbr->irn   = n2;
                nbr->costs = 0;
                nbr->next  = node->neighbours;
+
                node->neighbours = nbr;
                node->degree++;
        }
@@ -784,8 +774,8 @@ static void build_graph_walker(ir_node *irn, void *env) {
                const arch_register_req_t *req =
                        arch_get_register_req(co->aenv, irn, -1);
                if (is_2addr_code(req)) {
-                       ir_node *other = get_irn_n(irn, req->other_same);
-                       if(!arch_irn_is(co->aenv, other, ignore))
+                       ir_node *other = get_irn_n(skip_Proj(irn), req->other_same);
+                       if (! arch_irn_is(co->aenv, other, ignore))
                                add_edges(co, irn, other, co->get_costs(co, irn, other, 0));
                }
        }
@@ -893,7 +883,7 @@ void co_dump_appel_graph(const copy_opt_t *co, FILE *f)
 }
 
 typedef struct _appel_clique_walker_t {
-       phase_t ph;
+       ir_phase ph;
        const copy_opt_t *co;
        int curr_nr;
        int node_count;
@@ -931,7 +921,7 @@ static int appel_aff_weight(const appel_clique_walker_t *env, ir_node *bl)
 #endif
 }
 
-static void *appel_clique_walker_irn_init(phase_t *phase, ir_node *irn, void *old)
+static void *appel_clique_walker_irn_init(ir_phase *phase, ir_node *irn, void *old)
 {
        appel_block_info_t *res = NULL;
 
@@ -1050,7 +1040,7 @@ static void appel_walker(ir_node *bl, void *data)
                irn = insn->next_insn;
        }
 
-       DBG((env->co->cenv->dbg, LEVEL_2, "%+F\n", bl));
+       DBG((dbg, LEVEL_2, "%+F\n", bl));
        be_liveness_end_of_block(lv, env->co->aenv, env->co->cls, bl, live);
 
        /* Generate the bad and ugly. */
@@ -1177,7 +1167,7 @@ void co_dump_appel_graph_cliques(const copy_opt_t *co, FILE *f)
 
        be_liveness_recompute(lv);
        obstack_init(&env.obst);
-       phase_init(&env.ph, "appel_clique_dumper", co->irg, PHASE_DEFAULT_GROWTH, appel_clique_walker_irn_init);
+       phase_init(&env.ph, "appel_clique_dumper", co->irg, PHASE_DEFAULT_GROWTH, appel_clique_walker_irn_init, NULL);
        env.curr_nr = co->cls->n_regs;
        env.co = co;
        env.f = f;
@@ -1383,20 +1373,21 @@ static int void_algo(copy_opt_t *co)
 */
 
 typedef struct {
-       co_algo_t *algo;
+       co_algo_t  *algo;
        const char *name;
-       int can_improve_existing;
+       int        can_improve_existing;
 } co_algo_info_t;
 
 static co_algo_info_t algos[] = {
-       { void_algo,                "none",  0 },
-       { co_solve_heuristic,       "heur1", 0 },
-       { co_solve_heuristic_new,   "heur2", 0 },
-       { co_solve_heuristic_java,  "heur3", 0 },
+       { void_algo,               "none",  0 },
+       { co_solve_heuristic,      "heur1", 0 },
+       { co_solve_heuristic_new,  "heur2", 0 },
+       { co_solve_heuristic_java, "heur3", 0 },
+       { co_solve_heuristic_mst,  "heur4", 0 },
 #ifdef WITH_ILP
-       { co_solve_ilp2,            "ilp",   1 },
+       { co_solve_ilp2,           "ilp",   1 },
 #endif
-       { NULL,                     "",      0 }
+       { NULL,                    "",      0 }
 };
 
 /*
@@ -1424,13 +1415,13 @@ static FILE *my_open(const be_chordal_env_t *env, const char *prefix, const char
 
 void co_driver(be_chordal_env_t *cenv)
 {
-       lc_timer_t *timer = lc_timer_register("firm.be.copyopt", "runtime");
+       lc_timer_t          *timer = lc_timer_register("firm.be.copyopt", "runtime");
        co_complete_stats_t before, after;
-       copy_opt_t *co;
-       co_algo_t  *algo_func;
-       int was_optimal = 0;
+       copy_opt_t          *co;
+       co_algo_t           *algo_func;
+       int                 was_optimal = 0;
 
-       if(algo < 0 || algo >= CO_ALGO_LAST)
+       if (algo < 0 || algo >= CO_ALGO_LAST)
                return;
 
        co = new_copy_opt(cenv, cost_func);
@@ -1449,24 +1440,28 @@ void co_driver(be_chordal_env_t *cenv)
        be_stat_ev_ull("co_init_unsat",   before.unsatisfied_edges);
 
        /* Dump the interference graph in Appel's format. */
-       if(dump_flags & DUMP_APPEL) {
+       if (dump_flags & DUMP_APPEL) {
                FILE *f = my_open(cenv, "", ".apl");
                co_dump_appel_graph(co, f);
                fclose(f);
        }
 
-       if(dump_flags & DUMP_BEFORE) {
+       if (dump_flags & DUMP_BEFORE) {
                FILE *f = my_open(cenv, "", "-before.dot");
                co_dump_ifg_dot(co, f, style_flags);
                fclose(f);
        }
 
        /* if the algo can improve results, provide an initial solution with heur3 */
-       if(improve && algos[algo].can_improve_existing) {
+       if (improve && algos[algo].can_improve_existing) {
                co_complete_stats_t stats;
 
-               /* produce a heuristical solution */
+               /* produce a heuristic solution */
+#ifdef WITH_JVM
                co_solve_heuristic_java(co);
+#else
+               co_solve_heuristic(co);
+#endif /* WITH_JVM */
 
                /* do the stats and provide the current costs */
                co_complete_stats(co, &stats);
@@ -1475,22 +1470,21 @@ void co_driver(be_chordal_env_t *cenv)
 
 #ifdef WITH_JVM
        /* start the JVM here so that it does not tamper the timing. */
-       if(algo == CO_ALGO_HEUR3)
+       if (algo == CO_ALGO_HEUR3)
                be_java_coal_start_jvm();
-#endif
+#endif /* WITH_JVM */
 
        algo_func = algos[algo].algo;
 
+       /* perform actual copy minimization */
        lc_timer_reset_and_start(timer);
-
        was_optimal = algo_func(co);
-
        lc_timer_stop(timer);
-       be_stat_ev("co_time", lc_timer_elapsed_msec(timer));
 
+       be_stat_ev("co_time", lc_timer_elapsed_msec(timer));
        be_stat_ev_ull("co_optimal", was_optimal);
 
-       if(dump_flags & DUMP_AFTER) {
+       if (dump_flags & DUMP_AFTER) {
                FILE *f = my_open(cenv, "", "-after.dot");
                co_dump_ifg_dot(co, f, style_flags);
                fclose(f);
@@ -1498,7 +1492,7 @@ void co_driver(be_chordal_env_t *cenv)
 
        co_complete_stats(co, &after);
 
-       if(do_stats) {
+       if (do_stats) {
                ulong64 optimizable_costs = after.max_costs - after.inevit_costs;
                ulong64 evitable          = after.costs     - after.inevit_costs;