simplify and cleanup execfreq API
authorMatthias Braun <matthias.braun@kit.edu>
Mon, 30 Jul 2012 16:30:51 +0000 (18:30 +0200)
committerMatthias Braun <matthias.braun@kit.edu>
Mon, 30 Jul 2012 16:30:51 +0000 (18:30 +0200)
21 files changed:
include/libfirm/execfreq.h
include/libfirm/firm_types.h
ir/ana/execfreq.c
ir/ana/execfreq_t.h [new file with mode: 0644]
ir/be/beblocksched.c
ir/be/bechordal_main.c
ir/be/becopyopt.c
ir/be/begnuas.c
ir/be/beirg.c
ir/be/beirg.h
ir/be/bemain.c
ir/be/bepbqpcoloring.c
ir/be/beprefalloc.c
ir/be/bespillslots.c
ir/be/bespillutil.c
ir/be/bestat.c
ir/be/bestat.h
ir/be/ia32/ia32_emitter.c
ir/common/firm.c
ir/ir/irprofile.c
ir/ir/irprofile.h

index a301e91..95846f3 100644 (file)
 /**
  * @ingroup irana
  * @defgroup execfreq Basic Block Execution Frequency
+ *
+ * Execution frequencies specify how often a basic block is expected to get
+ * executed during execution of a function.
+ * For example the start block has a natural execution frequency of 1.0, the
+ * two branches of a simple if 0.5, nodes in a simple loop 10.0 ...
+ * Execution frequencies can either get estimated based on the structure of the
+ * control flow graph or can be calculated based on profile information.
  * @{
  */
 
-/** Creates execfreq structure (to be used with set_execfreq) */
-FIRM_API ir_exec_freq *create_execfreq(ir_graph *irg);
-
-/**
- * Sets execution frequency of a basic block
+/** Estimates execution frequency of a graph.
+ * You can query the frequencies with get_block_execfreq().
  */
-FIRM_API void set_execfreq(ir_exec_freq *ef, const ir_node *block, double freq);
-
-/** Creates execfreq structure and initialize with estimated frequencies. */
-FIRM_API ir_exec_freq *compute_execfreq(ir_graph *irg, double loop_weight);
-
-/** Frees memory occupied by execution frequency structure @p ef. */
-FIRM_API void free_execfreq(ir_exec_freq *ef);
+FIRM_API void ir_estimate_execfreq(ir_graph *irg);
 
 /** Returns execution frequency of block @p block. */
-FIRM_API double get_block_execfreq(const ir_exec_freq *ef,
-                                   const ir_node *block);
-
-/** Returns execution frequency of block @p block, scaled into the range
- * of an unsigned long type. */
-FIRM_API unsigned long get_block_execfreq_ulong(const ir_exec_freq *ef,
-                                                const ir_node *block);
+FIRM_API double get_block_execfreq(const ir_node *block);
 
 /** @} */
 
index 3c5a1d2..de4dffd 100644 (file)
@@ -94,9 +94,6 @@ typedef struct ir_loop              ir_loop;
 /** @ingroup ir_entity
  * Entity */
 typedef struct ir_entity            ir_entity;
-/** @ingroup execfreq
- * Execution Frequency Analysis Results */
-typedef struct ir_exec_freq         ir_exec_freq;
 /** @ingroup ir_cdep
  * Control Dependence Analysis Results */
 typedef struct ir_cdep              ir_cdep;
index 8ab3dd8..be752dd 100644 (file)
 #include "irprintf.h"
 #include "util.h"
 #include "irhooks.h"
+#include "irnodehashmap.h"
 
-#include "execfreq.h"
-
-/* enable to also solve the equations with Gauss-Jordan */
-#undef COMPARE_AGAINST_GAUSSJORDAN
-
-#ifdef COMPARE_AGAINST_GAUSSJORDAN
-#include "gaussjordan.h"
-#endif
-
+#include "execfreq_t.h"
 
 #define EPSILON          1e-5
 #define UNDEF(x)         (fabs(x) < EPSILON)
 #define MAX_INT_FREQ 1000000
 
 typedef struct freq_t {
-       const ir_node    *irn;
-       int               idx;
-       double            freq;
+       double freq;
 } freq_t;
 
-struct ir_exec_freq {
-       set *freqs;
-       hook_entry_t hook;
-       double max;
-       double min_non_zero;
-       double m, b;
-       unsigned infeasible : 1;
-};
+static ir_nodehashmap_t freq_map;
+static struct obstack   obst;
+static hook_entry_t     hook;
 
-static int cmp_freq(const void *a, const void *b, size_t size)
+double get_block_execfreq(const ir_node *block)
 {
-       const freq_t *p = (const freq_t*) a;
-       const freq_t *q = (const freq_t*) b;
-       (void) size;
-
-       return !(p->irn == q->irn);
+       const freq_t *freq = ir_nodehashmap_get(freq_t, &freq_map, block);
+       if (freq == NULL)
+               return 0.0;
+       return freq->freq;
 }
 
-static freq_t *set_find_freq(set *freqs, const ir_node *irn)
+void set_block_execfreq(ir_node *block, double newfreq)
 {
-       freq_t query;
-       query.irn = irn;
-       return set_find(freq_t, freqs, &query, sizeof(query), hash_ptr(irn));
+       freq_t *freq = ir_nodehashmap_get(freq_t, &freq_map, block);
+       if (freq == NULL) {
+               freq = OALLOC(&obst, freq_t);
+               ir_nodehashmap_insert(&freq_map, block, freq);
+       }
+       freq->freq = newfreq;
 }
 
-static freq_t *set_insert_freq(set *freqs, const ir_node *irn)
+static void exec_freq_node_info(void *ctx, FILE *f, const ir_node *irn)
 {
-       freq_t query;
-
-       query.irn = irn;
-       query.freq = 0.0;
-       query.idx  = -1;
-       return set_insert(freq_t, freqs, &query, sizeof(query), hash_ptr(irn));
+       (void)ctx;
+       if (!is_Block(irn))
+               return;
+       fprintf(f, "execution frequency: %g\n", get_block_execfreq(irn));
 }
 
-double get_block_execfreq(const ir_exec_freq *ef, const ir_node *irn)
+void init_execfreq(void)
 {
-       if (!ef->infeasible) {
-               set *freqs = ef->freqs;
-               freq_t *freq;
-               assert(is_Block(irn));
-               freq = set_find_freq(freqs, irn);
-               assert(freq);
-
-               assert(freq->freq >= 0);
-               return freq->freq;
-       }
+       ir_nodehashmap_init(&freq_map);
+       obstack_init(&obst);
 
-       return 1.0;
+       memset(&hook, 0, sizeof(hook));
+       hook.hook._hook_node_info = exec_freq_node_info;
+       register_hook(hook_node_info, &hook);
 }
 
-unsigned long
-get_block_execfreq_ulong(const ir_exec_freq *ef, const ir_node *bb)
+void exit_execfreq(void)
 {
-       double f       = get_block_execfreq(ef, bb);
-       int res        = (int) (f > ef->min_non_zero ? ef->m * f + ef->b : 1.0);
-       return res;
+       unregister_hook(hook_node_info, &hook);
+
+       obstack_free(&obst, NULL);
+       ir_nodehashmap_destroy(&freq_map);
 }
 
+
 static double *solve_lgs(gs_matrix_t *mat, double *x, int size)
 {
        double init = 1.0 / size;
@@ -150,29 +131,13 @@ static double *solve_lgs(gs_matrix_t *mat, double *x, int size)
        stat_ev_tim_pop("execfreq_seidel_time");
        stat_ev_dbl("execfreq_seidel_iter", iter);
 
-#ifdef COMPARE_AGAINST_GAUSSJORDAN
-       {
-               double *nw = XMALLOCN(double, size * size);
-               double *nx = XMALLOCNZ(double, size);
-
-               gs_matrix_export(mat, nw, size);
-
-               stat_ev_tim_push();
-               firm_gaussjordansolve(nw, nx, size);
-               stat_ev_tim_pop("execfreq_jordan_time");
-
-               xfree(nw);
-               xfree(nx);
-       }
-#endif
-
        return x;
 }
 
 /*
  * Determine probability that predecessor pos takes this cf edge.
  */
-static double get_cf_probability(ir_node *bb, int pos, double loop_weight)
+static double get_cf_probability(const ir_node *bb, int pos, double loop_weight)
 {
        double         sum = 0.0;
        double         cur = 1.0;
@@ -211,114 +176,125 @@ static double get_cf_probability(ir_node *bb, int pos, double loop_weight)
        return cur/sum;
 }
 
-static void exec_freq_node_info(void *ctx, FILE *f, const ir_node *irn)
-{
-       ir_exec_freq *ef = (ir_exec_freq*) ctx;
-       if (!is_Block(irn))
-               return;
+static double *freqs;
+static double  min_non_zero;
+static double  max_freq;
 
-       fprintf(f, "execution frequency: %g/%lu\n", get_block_execfreq(ef, irn), get_block_execfreq_ulong(ef, irn));
+static void collect_freqs(ir_node *node, void *data)
+{
+       (void) data;
+       double freq = get_block_execfreq(node);
+       if (freq > max_freq)
+               max_freq = freq;
+       if (freq > 0.0 && freq < min_non_zero)
+               min_non_zero = freq;
+       ARR_APP1(double, freqs, freq);
 }
 
-ir_exec_freq *create_execfreq(ir_graph *irg)
+void ir_calculate_execfreq_int_factors(ir_execfreq_int_factors *factors,
+                                       ir_graph *irg)
 {
-       ir_exec_freq *execfreq = XMALLOCZ(ir_exec_freq);
-       execfreq->freqs = new_set(cmp_freq, 32);
+       /* compute m and b of the transformation used to convert the doubles into
+        * scaled ints */
+       freqs = NEW_ARR_F(double, 0);
+       min_non_zero = HUGE_VAL;
+       max_freq     = 0.0;
+       irg_block_walk_graph(irg, collect_freqs, NULL, NULL);
+
+       /*
+        * find the smallest difference of the execution frequencies
+        * we try to ressolve it with 1 integer.
+        */
+       size_t n_freqs       = ARR_LEN(freqs);
+       double smallest_diff = 1.0;
+       for (size_t i = 0; i < n_freqs; ++i) {
+               if (freqs[i] <= 0.0)
+                       continue;
 
-       memset(&execfreq->hook, 0, sizeof(execfreq->hook));
+               for (size_t j = i + 1; j < n_freqs; ++j) {
+                       double diff = fabs(freqs[i] - freqs[j]);
 
-       // set reasonable values to convert double execfreq to ulong execfreq
-       execfreq->m = 1.0;
+                       if (!UNDEF(diff))
+                               smallest_diff = MIN(diff, smallest_diff);
+               }
+       }
 
-       execfreq->hook.context = execfreq;
-       execfreq->hook.hook._hook_node_info = exec_freq_node_info;
-       register_hook(hook_node_info, &execfreq->hook);
-       (void) irg;
+       double l2 = min_non_zero;
+       double h2 = max_freq;
+       double l1 = 1.0;
+       double h1 = MAX_INT_FREQ;
 
-       return execfreq;
-}
+       /* according to that the slope of the translation function is
+        * 1.0 / smallest_diff */
+       factors->m = 1.0 / smallest_diff;
 
-void set_execfreq(ir_exec_freq *execfreq, const ir_node *block, double freq)
-{
-       freq_t *f = set_insert_freq(execfreq->freqs, block);
-       f->freq = freq;
+       /* the abscissa is then given by */
+       factors->b = l1 - factors->m * l2;
+
+       /*
+        * if the slope is so high that the largest integer would be larger than
+        * MAX_INT_FREQ set the largest int freq to that upper limit and recompute
+        * the translation function
+        */
+       if (factors->m * h2 + factors->b > MAX_INT_FREQ) {
+               factors->m = (h1 - l1) / (h2 - l2);
+               factors->b = l1 - factors->m * l2;
+       }
+
+       DEL_ARR_F(freqs);
 }
 
-static void collect_blocks(ir_node *bl, void *data)
+int get_block_execfreq_int(const ir_execfreq_int_factors *factors,
+                           const ir_node *block)
 {
-       set *freqs = (set*) data;
-       set_insert_freq(freqs, bl);
+       double f   = get_block_execfreq(block);
+       int    res = (int) (f > factors->min_non_zero ? factors->m * f + factors->b : 1.0);
+       return res;
 }
 
-ir_exec_freq *compute_execfreq(ir_graph *irg, double loop_weight)
+void ir_estimate_execfreq(ir_graph *irg)
 {
-       gs_matrix_t  *mat;
-       int           size;
-       int           n_keepalives;
-       int           idx;
-       freq_t       *freq, *s, *e;
-       ir_exec_freq *ef;
-       ir_node      *end = get_irg_end(irg);
-       set          *freqs;
-       dfs_t        *dfs;
-       double       *x;
-       double        norm;
+       double loop_weight = 10.0;
 
-       /*
-        * compute a DFS.
+       assure_irg_properties(irg,
+               IR_GRAPH_PROPERTY_CONSISTENT_OUT_EDGES
+               | IR_GRAPH_PROPERTY_CONSISTENT_LOOPINFO);
+
+       /* compute a DFS.
         * using a toposort on the CFG (without back edges) will propagate
         * the values better for the gauss/seidel iteration.
         * => they can "flow" from start to end.
         */
-       dfs = dfs_new(&absgraph_irg_cfg_succ, irg);
-       ef = XMALLOCZ(ir_exec_freq);
-       ef->min_non_zero = HUGE_VAL; /* initialize with a reasonable large number. */
-       freqs = ef->freqs = new_set(cmp_freq, dfs_get_n_nodes(dfs));
+       dfs_t *dfs = dfs_new(&absgraph_irg_cfg_succ, irg);
 
-       /*
-        * Populate the exec freq set.
-        * The DFS cannot be used alone, since the CFG might not be connected
-        * due to unreachable code.
-        */
-       irg_block_walk_graph(irg, collect_blocks, NULL, freqs);
+       int          size = dfs_get_n_nodes(dfs);
+       gs_matrix_t *mat  = gs_new_matrix(size, size);
 
-       construct_cf_backedges(irg);
-       assure_edges(irg);
+       ir_node *end_block = get_irg_end_block(irg);
 
-       size = dfs_get_n_nodes(dfs);
-       mat  = gs_new_matrix(size, size);
-       x    = XMALLOCN(double, size);
-
-       for (idx = dfs_get_n_nodes(dfs) - 1; idx >= 0; --idx) {
-               ir_node *bb = (ir_node *) dfs_get_post_num_node(dfs, size - idx - 1);
-               int i;
-
-               freq = set_insert_freq(freqs, bb);
-               freq->idx = idx;
+       for (int idx = dfs_get_n_nodes(dfs) - 1; idx >= 0; --idx) {
+               const ir_node *bb = (ir_node*)dfs_get_post_num_node(dfs, size-idx-1);
 
                /* Sum of (execution frequency of predecessor * probability of cf edge) ... */
-               for (i = get_Block_n_cfgpreds(bb) - 1; i >= 0; --i) {
-                       ir_node *pred = get_Block_cfgpred_block(bb, i);
-                       int pred_idx  = size - dfs_get_post_num(dfs, pred) - 1;
-
-                       gs_matrix_set(mat, idx, pred_idx, get_cf_probability(bb, i, loop_weight));
+               for (int i = get_Block_n_cfgpreds(bb) - 1; i >= 0; --i) {
+                       const ir_node *pred           = get_Block_cfgpred_block(bb, i);
+                       int            pred_idx       = size - dfs_get_post_num(dfs, pred)-1;
+                       double         cf_probability = get_cf_probability(bb, i, loop_weight);
+                       gs_matrix_set(mat, idx, pred_idx, cf_probability);
                }
                /* ... equals my execution frequency */
                gs_matrix_set(mat, idx, idx, -1.0);
-       }
-
-       dfs_free(dfs);
-
-       /*
-        * Add an edge from end to start.
-        * The problem is then an eigenvalue problem:
-        * Solve A*x = 1*x => (A-I)x = 0
-        */
-       s = set_find_freq(freqs, get_irg_start_block(irg));
 
-       e = set_find_freq(freqs, get_irg_end_block(irg));
-       if (e->idx >= 0)
-               gs_matrix_set(mat, s->idx, e->idx, 1.0);
+               /* Add an edge from end to start.
+                * The problem is then an eigenvalue problem:
+                * Solve A*x = 1*x => (A-I)x = 0
+                */
+               if (bb == end_block) {
+                       const ir_node *start_block = get_irg_start_block(irg);
+                       int            s_idx = size - dfs_get_post_num(dfs, start_block)-1;
+                       gs_matrix_set(mat, s_idx, idx, 1.0);
+               }
+       }
 
        /*
         * Also add an edge for each kept block to start.
@@ -326,104 +302,43 @@ ir_exec_freq *compute_execfreq(ir_graph *irg, double loop_weight)
         * This avoid strange results for e.g. an irg containing a exit()-call
         * which block has no cfg successor.
         */
-       n_keepalives = get_End_n_keepalives(end);
-       for (idx = n_keepalives - 1; idx >= 0; --idx) {
+       ir_node       *start_block  = get_irg_start_block(irg);
+       int            s_idx        = size - dfs_get_post_num(dfs, start_block)-1;
+       const ir_node *end          = get_irg_end(irg);
+       int            n_keepalives = get_End_n_keepalives(end);
+       for (int idx = n_keepalives - 1; idx >= 0; --idx) {
                ir_node *keep = get_End_keepalive(end, idx);
+               if (!is_Block(keep) || get_irn_n_edges_kind(keep, EDGE_KIND_BLOCK) > 0)
+                       continue;
 
-               if (is_Block(keep) && get_Block_n_cfg_outs(keep) == 0) {
-                       freq_t *k = set_find_freq(freqs, keep);
-                       if (k->idx >= 0)
-                               gs_matrix_set(mat, s->idx, k->idx, 1.0);
-               }
+               int k_idx = size-dfs_get_post_num(dfs, keep)-1;
+               if (k_idx > 0)
+                       gs_matrix_set(mat, s_idx, k_idx, 1.0);
        }
 
        /* solve the system and delete the matrix */
+       double *x = XMALLOCN(double, size);
        solve_lgs(mat, x, size);
        gs_delete_matrix(mat);
 
-       /*
-        * compute the normalization factor.
+       /* compute the normalization factor.
         * 1.0 / exec freq of start block.
+        * (note: start_idx is != 0 in strange cases involving endless loops,
+        *  probably a misfeature/bug)
         */
-       norm = x[s->idx] != 0.0 ? 1.0 / x[s->idx] : 1.0;
+       int    start_idx  = size-dfs_get_post_num(dfs, get_irg_start_block(irg))-1;
+       double start_freq = x[start_idx];
+       double norm       = start_freq != 0.0 ? 1.0 / start_freq : 1.0;
 
-       ef->max = 0.0;
-       foreach_set(freqs, freq_t, freq) {
-               idx = freq->idx;
+       for (int idx = dfs_get_n_nodes(dfs) - 1; idx >= 0; --idx) {
+               ir_node *bb = (ir_node *) dfs_get_post_num_node(dfs, size - idx - 1);
 
                /* take abs because it sometimes can be -0 in case of endless loops */
-               freq->freq = fabs(x[idx]) * norm;
-
-               /* get the maximum exec freq */
-               ef->max = MAX(ef->max, freq->freq);
-
-               /* Get the minimum non-zero execution frequency. */
-               if (freq->freq > 0.0)
-                       ef->min_non_zero = MIN(ef->min_non_zero, freq->freq);
+               double freq = fabs(x[idx]) * norm;
+               set_block_execfreq(bb, freq);
        }
 
-       /* compute m and b of the transformation used to convert the doubles into scaled ints */
-       {
-               double smallest_diff = 1.0;
-
-               double l2 = ef->min_non_zero;
-               double h2 = ef->max;
-               double l1 = 1.0;
-               double h1 = MAX_INT_FREQ;
-
-               double *fs = (double*) malloc(set_count(freqs) * sizeof(fs[0]));
-               int i, j, n = 0;
-
-               foreach_set(freqs, freq_t, freq)
-                       fs[n++] = freq->freq;
-
-               /*
-                * find the smallest difference of the execution frequencies
-                * we try to ressolve it with 1 integer.
-                */
-               for (i = 0; i < n; ++i) {
-                       if (fs[i] <= 0.0)
-                               continue;
-
-                       for (j = i + 1; j < n; ++j) {
-                               double diff = fabs(fs[i] - fs[j]);
-
-                               if (!UNDEF(diff))
-                                       smallest_diff = MIN(diff, smallest_diff);
-                       }
-               }
-
-               /* according to that the slope of the translation function is 1.0 / smallest diff */
-               ef->m = 1.0 / smallest_diff;
-
-               /* the abscissa is then given by */
-               ef->b = l1 - ef->m * l2;
-
-               /*
-                * if the slope is so high that the largest integer would be larger than MAX_INT_FREQ
-                * set the largest int freq to that upper limit and recompute the translation function
-                */
-               if (ef->m * h2 + ef->b > MAX_INT_FREQ) {
-                       ef->m = (h1 - l1) / (h2 - l2);
-                       ef->b = l1 - ef->m * l2;
-               }
-
-               free(fs);
-       }
-
-       memset(&ef->hook, 0, sizeof(ef->hook));
-       ef->hook.context = ef;
-       ef->hook.hook._hook_node_info = exec_freq_node_info;
-       register_hook(hook_node_info, &ef->hook);
+       dfs_free(dfs);
 
        xfree(x);
-
-       return ef;
-}
-
-void free_execfreq(ir_exec_freq *ef)
-{
-       del_set(ef->freqs);
-       unregister_hook(hook_node_info, &ef->hook);
-       free(ef);
 }
diff --git a/ir/ana/execfreq_t.h b/ir/ana/execfreq_t.h
new file mode 100644 (file)
index 0000000..ec20160
--- /dev/null
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
+ *
+ * This file is part of libFirm.
+ *
+ * This file may be distributed and/or modified under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation and appearing in the file LICENSE.GPL included in the
+ * packaging of this file.
+ *
+ * Licensees holding valid libFirm Professional Edition licenses may use
+ * this file in accordance with the libFirm Commercial License.
+ * Agreement provided with the Software.
+ *
+ * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
+ * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE.
+ */
+
+/**
+ * @file
+ * @brief       Compute an estimate of basic block executions.
+ * @author      Adam M. Szalkowski
+ * @date        28.05.2006
+ */
+#ifndef FIRM_ANA_EXECFREQ_T_H
+#define FIRM_ANA_EXECFREQ_T_H
+
+#include "execfreq.h"
+
+void init_execfreq(void);
+
+void exit_execfreq(void);
+
+void set_block_execfreq(ir_node *block, double freq);
+
+typedef struct ir_execfreq_int_factors {
+       double max;
+       double min_non_zero;
+       double m, b;
+} ir_execfreq_int_factors;
+
+void ir_calculate_execfreq_int_factors(ir_execfreq_int_factors *factors,
+                                       ir_graph *irg);
+
+int get_block_execfreq_int(const ir_execfreq_int_factors *factors,
+                           const ir_node *block);
+
+#endif
index e920d2c..f3b46cd 100644 (file)
@@ -121,7 +121,6 @@ typedef struct blocksched_env_t blocksched_env_t;
 struct blocksched_env_t {
        ir_graph       *irg;
        struct obstack *obst;
-       ir_exec_freq   *execfreqs;
        edge_t         *edges;
        pdeq           *worklist;
        int            blockcount;
@@ -164,11 +163,11 @@ static void collect_egde_frequency(ir_node *block, void *data)
        } else if (arity == 1) {
                ir_node *pred_block = get_Block_cfgpred_block(block, 0);
                ir_loop *pred_loop  = get_irn_loop(pred_block);
-               float    freq       = (float)get_block_execfreq(env->execfreqs, block);
+               float    freq       = (float)get_block_execfreq(block);
 
                /* is it an edge leaving a loop */
                if (get_loop_depth(pred_loop) > get_loop_depth(loop)) {
-                       float pred_freq = (float)get_block_execfreq(env->execfreqs, pred_block);
+                       float pred_freq = (float)get_block_execfreq(pred_block);
                        edge.outedge_penalty_freq = -(pred_freq - freq);
                }
 
@@ -187,7 +186,7 @@ static void collect_egde_frequency(ir_node *block, void *data)
                        double  execfreq;
                        ir_node *pred_block = get_Block_cfgpred_block(block, i);
 
-                       execfreq = get_block_execfreq(env->execfreqs, pred_block);
+                       execfreq = get_block_execfreq(pred_block);
 
                        edge.pos              = i;
                        edge.execfreq         = execfreq;
@@ -444,7 +443,6 @@ static void pick_block_successor(blocksched_entry_t *entry, blocksched_env_t *en
 
        foreach_block_succ(block, edge) {
                ir_node *succ_block = get_edge_src_irn(edge);
-               double  execfreq;
 
                if (irn_visited(succ_block))
                        continue;
@@ -453,7 +451,7 @@ static void pick_block_successor(blocksched_entry_t *entry, blocksched_env_t *en
                if (succ_entry->prev != NULL)
                        continue;
 
-               execfreq = get_block_execfreq(env->execfreqs, succ_block);
+               double execfreq = get_block_execfreq(succ_block);
                if (execfreq > best_succ_execfreq) {
                        best_succ_execfreq = execfreq;
                        succ = succ_block;
@@ -519,7 +517,7 @@ static ir_node **create_blocksched_array(blocksched_env_t *env, blocksched_entry
        return block_list;
 }
 
-static ir_node **create_block_schedule_greedy(ir_graph *irg, ir_exec_freq *execfreqs)
+static ir_node **create_block_schedule_greedy(ir_graph *irg)
 {
        blocksched_env_t   env;
        struct obstack     obst;
@@ -530,7 +528,6 @@ static ir_node **create_block_schedule_greedy(ir_graph *irg, ir_exec_freq *execf
 
        env.irg        = irg;
        env.obst       = &obst;
-       env.execfreqs  = execfreqs;
        env.edges      = NEW_ARR_F(edge_t, 0);
        env.worklist   = NULL;
        env.blockcount = 0;
@@ -625,7 +622,7 @@ static void collect_egde_frequency_ilp(ir_node *block, void *data)
 
        arity = get_irn_arity(block);
        if (arity == 1) {
-               double execfreq = get_block_execfreq(env->env.execfreqs, block);
+               double execfreq = get_block_execfreq(block);
                add_ilp_edge(block, 0, execfreq, env);
        }
        else {
@@ -641,7 +638,7 @@ static void collect_egde_frequency_ilp(ir_node *block, void *data)
                        ilp_edge_t *edge;
                        ir_node    *pred_block = get_Block_cfgpred_block(block, i);
 
-                       execfreq = get_block_execfreq(env->env.execfreqs, pred_block);
+                       execfreq = get_block_execfreq(pred_block);
                        edgenum  = add_ilp_edge(block, i, execfreq, env);
                        edge     = &env->ilpedges[edgenum];
                        lpp_set_factor_fast(env->lpp, cst_idx, edge->ilpvar, 1.0);
@@ -708,7 +705,7 @@ static void coalesce_blocks_ilp(blocksched_ilp_env_t *env)
        }
 }
 
-static ir_node **create_block_schedule_ilp(ir_graph *irg, ir_exec_freq *execfreqs)
+static ir_node **create_block_schedule_ilp(ir_graph *irg)
 {
        blocksched_ilp_env_t env;
        struct obstack       obst;
@@ -719,7 +716,6 @@ static ir_node **create_block_schedule_ilp(ir_graph *irg, ir_exec_freq *execfreq
 
        env.env.irg        = irg;
        env.env.obst       = &obst;
-       env.env.execfreqs  = execfreqs;
        env.env.worklist   = NULL;
        env.env.blockcount = 0;
        env.ilpedges       = NEW_ARR_F(ilp_edge_t, 0);
@@ -765,14 +761,12 @@ void be_init_blocksched(void)
 
 ir_node **be_create_block_schedule(ir_graph *irg)
 {
-       ir_exec_freq *execfreqs = be_get_irg_exec_freq(irg);
-
        switch (algo) {
        case BLOCKSCHED_GREEDY:
        case BLOCKSCHED_NAIV:
-               return create_block_schedule_greedy(irg, execfreqs);
+               return create_block_schedule_greedy(irg);
        case BLOCKSCHED_ILP:
-               return create_block_schedule_ilp(irg, execfreqs);
+               return create_block_schedule_ilp(irg);
        }
 
        panic("unknown blocksched algo");
index cacb161..8e6536d 100644 (file)
@@ -238,7 +238,6 @@ static void pre_spill(post_spill_env_t *pse, const arch_register_class_t *cls)
 {
        be_chordal_env_t *chordal_env = &pse->cenv;
        ir_graph         *irg         = pse->irg;
-       ir_exec_freq     *exec_freq   = be_get_irg_exec_freq(irg);
 
        pse->cls                      = cls;
        chordal_env->cls              = cls;
@@ -248,7 +247,7 @@ static void pre_spill(post_spill_env_t *pse, const arch_register_class_t *cls)
        be_assure_live_chk(irg);
 
        if (stat_ev_enabled) {
-               pse->pre_spill_cost = be_estimate_irg_costs(irg, exec_freq);
+               pse->pre_spill_cost = be_estimate_irg_costs(irg);
        }
 
        /* put all ignore registers into the ignore register set. */
@@ -268,12 +267,11 @@ static void post_spill(post_spill_env_t *pse, int iteration)
 {
        be_chordal_env_t *chordal_env = &pse->cenv;
        ir_graph         *irg         = pse->irg;
-       ir_exec_freq     *exec_freq   = be_get_irg_exec_freq(irg);
        int               allocatable_regs = be_get_n_allocatable_regs(irg, chordal_env->cls);
 
        /* some special classes contain only ignore regs, no work to be done */
        if (allocatable_regs > 0) {
-               stat_ev_dbl("bechordal_spillcosts", be_estimate_irg_costs(irg, exec_freq) - pse->pre_spill_cost);
+               stat_ev_dbl("bechordal_spillcosts", be_estimate_irg_costs(irg) - pse->pre_spill_cost);
 
                /*
                        If we have a backend provided spiller, post spill is
index ea5f074..762a8a1 100644 (file)
@@ -32,7 +32,7 @@
 
 #include "debug.h"
 #include "error.h"
-#include "execfreq.h"
+#include "execfreq_t.h"
 #include "irdump_t.h"
 #include "iredges_t.h"
 #include "irgraph.h"
@@ -280,6 +280,8 @@ static int co_get_costs_loop_depth(const ir_node *root, int pos)
        return 1+cost;
 }
 
+static ir_execfreq_int_factors factors;
+
 /**
  * Computes the costs of a copy according to execution frequency
  * @param pos  the argument position of arg in the root arguments
@@ -287,12 +289,10 @@ static int co_get_costs_loop_depth(const ir_node *root, int pos)
  */
 static int co_get_costs_exec_freq(const ir_node *root, int pos)
 {
-       ir_graph     *irg       = get_irn_irg(root);
-       ir_node      *root_bl   = get_nodes_block(root);
-       ir_node      *copy_bl
+       ir_node *root_bl = get_nodes_block(root);
+       ir_node *copy_bl
                = is_Phi(root) ? get_Block_cfgpred_block(root_bl, pos) : root_bl;
-       ir_exec_freq *exec_freq = be_get_irg_exec_freq(irg);
-       int           res       = get_block_execfreq_ulong(exec_freq, copy_bl);
+       int      res     = get_block_execfreq_int(&factors, copy_bl);
 
        /* don't allow values smaller than one. */
        return res < 1 ? 1 : res;
index f03355f..5a90acf 100644 (file)
@@ -1358,14 +1358,10 @@ void be_gas_begin_block(const ir_node *block, bool needs_label)
        }
 
        if (be_options.verbose_asm) {
-               int           arity;
-               ir_graph     *irg       = get_irn_irg(block);
-               ir_exec_freq *exec_freq = be_get_irg_exec_freq(irg);
-
                be_emit_pad_comment();
                be_emit_irprintf("/* %+F preds:", block);
 
-               arity = get_irn_arity(block);
+               int arity = get_irn_arity(block);
                if (arity == 0) {
                        be_emit_cstring(" none");
                } else {
@@ -1376,11 +1372,7 @@ void be_gas_begin_block(const ir_node *block, bool needs_label)
                                be_gas_emit_block_name(predblock);
                        }
                }
-               if (exec_freq != NULL) {
-                       be_emit_irprintf(", freq: %.3f",
-                                        get_block_execfreq(exec_freq, block));
-               }
-               be_emit_cstring(" */");
+               be_emit_irprintf(", freq: %.3f */", get_block_execfreq(block));
        }
        be_emit_char('\n');
        be_emit_write_line();
index da0add1..5bdd854 100644 (file)
@@ -57,8 +57,6 @@ void be_assure_live_chk(ir_graph *irg)
 void be_free_birg(ir_graph *irg)
 {
        be_irg_t *birg = be_birg_from_irg(irg);
-       free_execfreq(birg->exec_freq);
-       birg->exec_freq = NULL;
 
        if (birg->lv != NULL) {
                be_liveness_free(birg->lv);
index 9abac30..e87fae5 100644 (file)
@@ -83,7 +83,6 @@ typedef struct be_irg_t {
        ir_graph              *irg;
        be_main_env_t         *main_env;
        be_abi_irg_t          *abi;
-       ir_exec_freq          *exec_freq;
        be_lv_t               *lv;
        be_stack_layout_t      stack_layout;
        unsigned              *allocatable_regs; /**< registers available for the
@@ -112,11 +111,6 @@ static inline be_lv_t *be_get_irg_liveness(const ir_graph *irg)
        return be_birg_from_irg(irg)->lv;
 }
 
-static inline ir_exec_freq *be_get_irg_exec_freq(const ir_graph *irg)
-{
-       return be_birg_from_irg(irg)->exec_freq;
-}
-
 static inline be_abi_irg_t *be_get_irg_abi(const ir_graph *irg)
 {
        return be_birg_from_irg(irg)->abi;
index d5cb7aa..2522cdf 100644 (file)
@@ -46,7 +46,7 @@
 #include "irprintf.h"
 #include "iroptimize.h"
 #include "firmstat.h"
-#include "execfreq.h"
+#include "execfreq_t.h"
 #include "irprofile.h"
 #include "irpass_t.h"
 #include "ircons.h"
@@ -569,18 +569,36 @@ static void be_main_loop(FILE *file_handle, const char *cup_name)
        sprintf(prof_filename, "%.*s%s",
                (int)(sizeof(prof_filename) - sizeof(suffix)), cup_name, suffix);
 
+       bool have_profile = false;
        if (be_options.opt_profile_use) {
                bool res = ir_profile_read(prof_filename);
                if (!res) {
                        fprintf(stderr, "Warning: Couldn't read profile data '%s'\n",
                                prof_filename);
+               } else {
+                       ir_create_execfreqs_from_profile();
+                       ir_profile_free();
+                       have_profile = true;
                }
        }
+
        if (num_birgs > 0 && be_options.opt_profile_generate) {
-               ir_graph *prof_init_irg
-                       = ir_profile_instrument(prof_filename);
+               ir_profile_instrument(prof_filename);
+               ir_graph *prof_init_irg = get_irp_irg(get_irp_n_irgs()-1);
+               assert(prof_init_irg->be_data == NULL);
                initialize_birg(&birgs[num_birgs], prof_init_irg, &env);
                num_birgs++;
+               num_irgs++;
+               assert(num_irgs == get_irp_n_irgs());
+       }
+
+       if (!have_profile) {
+               be_timer_push(T_EXECFREQ);
+               for (i = 0; i < num_irgs; ++i) {
+                       ir_graph *irg = get_irp_irg(i);
+                       ir_estimate_execfreq(irg);
+               }
+               be_timer_pop(T_EXECFREQ);
        }
 
        /* For all graphs */
@@ -661,21 +679,6 @@ static void be_main_loop(FILE *file_handle, const char *cup_name)
 
                dump(DUMP_PREPARED, irg, "code-selection");
 
-               be_timer_push(T_EXECFREQ);
-               /**
-                * Create execution frequencies from profile data or estimate some
-                */
-               if (ir_profile_has_data())
-                       birg->exec_freq = ir_create_execfreqs_from_profile(irg);
-               else {
-                       /* TODO: edges are corrupt for EDGE_KIND_BLOCK after the local
-                        * optimize graph phase merges blocks in the x86 backend */
-                       edges_deactivate(irg);
-                       birg->exec_freq = compute_execfreq(irg, 10);
-               }
-               be_timer_pop(T_EXECFREQ);
-
-
                /* disabled for now, fails for EmptyFor.c and XXEndless.c */
                /* be_live_chk_compare(irg); */
 
@@ -724,8 +727,7 @@ static void be_main_loop(FILE *file_handle, const char *cup_name)
                be_timer_pop(T_VERIFY);
 
                if (stat_ev_enabled) {
-                       stat_ev_dbl("bemain_costs_before_ra",
-                                       be_estimate_irg_costs(irg, birg->exec_freq));
+                       stat_ev_dbl("bemain_costs_before_ra", be_estimate_irg_costs(irg));
                        be_stat_ev("bemain_insns_before_ra", be_count_insns(irg));
                        be_stat_ev("bemain_blocks_before_ra", be_count_blocks(irg));
                }
@@ -733,7 +735,7 @@ static void be_main_loop(FILE *file_handle, const char *cup_name)
                /* Do register allocation */
                be_allocate_registers(irg);
 
-               stat_ev_dbl("bemain_costs_before_ra", be_estimate_irg_costs(irg, birg->exec_freq));
+               stat_ev_dbl("bemain_costs_before_ra", be_estimate_irg_costs(irg));
 
                dump(DUMP_RA, irg, "ra");
 
@@ -813,7 +815,6 @@ static void be_main_loop(FILE *file_handle, const char *cup_name)
 
        arch_env_end_codegeneration(arch_env);
 
-       ir_profile_free();
        be_done_env(&env);
 
        be_info_free();
index 542dfd4..36dad0a 100644 (file)
@@ -37,7 +37,7 @@
 #include "irgwalk.h"
 #include "irtools.h"
 #include "time.h"
-#include "execfreq.h"
+#include "execfreq_t.h"
 #include "bipartite.h"
 
 /* libfirm/ir/be includes */
@@ -90,6 +90,7 @@ typedef struct be_pbqp_alloc_env_t {
        plist_t                     *rpeo;
        unsigned                    *restr_nodes;
        unsigned                    *ife_edge_num;
+       ir_execfreq_int_factors      execfreq_factors;
        be_chordal_env_t            *env;
 } be_pbqp_alloc_env_t;
 
@@ -218,10 +219,9 @@ static void insert_afe_edge(be_pbqp_alloc_env_t *pbqp_alloc_env, ir_node *src_no
        if (get_edge(pbqp, get_irn_idx(src_node), get_irn_idx(trg_node)) == NULL) {
                if (use_exec_freq) {
                        /* get exec_freq for copy_block */
-                       ir_node       *root_bl   = get_nodes_block(src_node);
-                       ir_node       *copy_bl   = is_Phi(src_node) ? get_Block_cfgpred_block(root_bl, pos) : root_bl;
-                       ir_exec_freq  *exec_freq = be_get_irg_exec_freq(pbqp_alloc_env->irg);
-                       unsigned long  res       = get_block_execfreq_ulong(exec_freq, copy_bl);
+                       ir_node *root_bl = get_nodes_block(src_node);
+                       ir_node *copy_bl = is_Phi(src_node) ? get_Block_cfgpred_block(root_bl, pos) : root_bl;
+                       int      res     = get_block_execfreq_int(&pbqp_alloc_env->execfreq_factors, copy_bl);
 
                        /* create afe-matrix */
                        unsigned row, col;
@@ -641,6 +641,7 @@ static void be_pbqp_coloring(be_chordal_env_t *env)
                dump_ir_graph(irg, buf);
        }
 
+       ir_calculate_execfreq_int_factors(&pbqp_alloc_env.execfreq_factors, irg);
 
        /* initialize pbqp allocation data structure */
        pbqp_alloc_env.pbqp_inst        = alloc_pbqp(get_irg_last_idx(irg));  /* initialize pbqp instance */
index 27c45db..96640cd 100644 (file)
@@ -90,7 +90,6 @@ static struct obstack               obst;
 static ir_graph                    *irg;
 static const arch_register_class_t *cls;
 static be_lv_t                     *lv;
-static const ir_exec_freq          *execfreqs;
 static unsigned                     n_regs;
 static unsigned                    *normal_regs;
 static int                         *congruence_classes;
@@ -314,7 +313,7 @@ static void check_defs(const ir_nodeset_t *live_nodes, float weight,
  */
 static void analyze_block(ir_node *block, void *data)
 {
-       float        weight = (float)get_block_execfreq(execfreqs, block);
+       float        weight = (float)get_block_execfreq(block);
        ir_nodeset_t live_nodes;
        (void) data;
 
@@ -653,7 +652,7 @@ static bool try_optimistic_split(ir_node *to_split, ir_node *before,
        from_reg        = arch_get_irn_register(to_split);
        from_r          = arch_register_get_index(from_reg);
        block           = get_nodes_block(before);
-       split_threshold = (float)get_block_execfreq(execfreqs, block) * SPLIT_DELTA;
+       split_threshold = (float)get_block_execfreq(block) * SPLIT_DELTA;
 
        if (pref_delta < split_threshold*0.5)
                return false;
@@ -764,7 +763,7 @@ static void assign_reg(const ir_node *block, ir_node *node,
        info    = get_allocation_info(node);
        in_node = skip_Proj(node);
        if (req->type & arch_register_req_type_should_be_same) {
-               float weight = (float)get_block_execfreq(execfreqs, block);
+               float weight = (float)get_block_execfreq(block);
                int   arity  = get_irn_arity(in_node);
                int   i;
 
@@ -1554,7 +1553,7 @@ static void adapt_phi_prefs(ir_node *phi)
                        continue;
 
                /* give bonus for already assigned register */
-               weight = (float)get_block_execfreq(execfreqs, pred_block);
+               weight = (float)get_block_execfreq(pred_block);
                r      = arch_register_get_index(reg);
                info->prefs[r] += weight * AFF_PHI;
        }
@@ -1576,7 +1575,7 @@ static void propagate_phi_register(ir_node *phi, unsigned assigned_r)
                ir_node           *pred_block = get_Block_cfgpred_block(block, i);
                unsigned           r;
                float              weight
-                       = (float)get_block_execfreq(execfreqs, pred_block) * AFF_PHI;
+                       = (float)get_block_execfreq(pred_block) * AFF_PHI;
 
                if (info->prefs[assigned_r] >= weight)
                        continue;
@@ -1913,7 +1912,7 @@ static void determine_block_order(void)
                block_costs_t *cost_info;
                ir_node *block = blocklist[--p];
 
-               float execfreq   = (float)get_block_execfreq(execfreqs, block);
+               float execfreq   = (float)get_block_execfreq(block);
                float costs      = execfreq;
                int   n_cfgpreds = get_Block_n_cfgpreds(block);
                int   p2;
@@ -2060,8 +2059,7 @@ static void be_pref_alloc(ir_graph *new_irg)
 
        obstack_init(&obst);
 
-       irg       = new_irg;
-       execfreqs = be_get_irg_exec_freq(irg);
+       irg = new_irg;
 
        /* determine a good coloring order */
        determine_block_order();
index b1b2638..2620ff5 100644 (file)
@@ -152,7 +152,6 @@ static spill_t *collect_spill(be_fec_env_t *env, ir_node *node,
        DB((dbg, DBG_COALESCING, "Slot %d: %+F\n", spill->spillslot, node));
 
        if (is_Phi(node)) {
-               const ir_exec_freq *exec_freq = be_get_irg_exec_freq(env->irg);
                int                 arity     = get_irn_arity(node);
                int                 i;
                for (i = 0; i < arity; ++i) {
@@ -163,7 +162,7 @@ static spill_t *collect_spill(be_fec_env_t *env, ir_node *node,
 
                        /* add an affinity edge */
                        affinty_edge           = OALLOC(&env->obst, affinity_edge_t);
-                       affinty_edge->affinity = get_block_execfreq(exec_freq, block);
+                       affinty_edge->affinity = get_block_execfreq(block);
                        affinty_edge->slot1    = spill->spillslot;
                        affinty_edge->slot2    = arg_spill->spillslot;
                        ARR_APP1(affinity_edge_t*, env->affinity_edges, affinty_edge);
index 2fd24a9..3e0a933 100644 (file)
@@ -106,7 +106,6 @@ struct spill_env_t {
        set              *spills;         /**< all spill_info_t's, which must be
                                               placed */
        spill_info_t    **mem_phis;       /**< set of all spilled phis. */
-       ir_exec_freq     *exec_freq;
 
        unsigned          spill_count;
        unsigned          reload_count;
@@ -160,7 +159,6 @@ spill_env_t *be_new_spill_env(ir_graph *irg)
        env->mem_phis       = NEW_ARR_F(spill_info_t*, 0);
        env->spill_cost     = arch_env->spill_cost;
        env->reload_cost    = arch_env->reload_cost;
-       env->exec_freq      = be_get_irg_exec_freq(irg);
        obstack_init(&env->obst);
 
        env->spill_count       = 0;
@@ -665,7 +663,7 @@ static ir_node *do_remat(spill_env_t *env, ir_node *spilled, ir_node *reloader)
 double be_get_spill_costs(spill_env_t *env, ir_node *to_spill, ir_node *before)
 {
        ir_node *block = get_nodes_block(before);
-       double   freq  = get_block_execfreq(env->exec_freq, block);
+       double   freq  = get_block_execfreq(block);
        (void) to_spill;
 
        return env->spill_cost * freq;
@@ -686,8 +684,8 @@ unsigned be_get_reload_costs_no_weight(spill_env_t *env, const ir_node *to_spill
 
 double be_get_reload_costs(spill_env_t *env, ir_node *to_spill, ir_node *before)
 {
-       ir_node      *block = get_nodes_block(before);
-       double        freq  = get_block_execfreq(env->exec_freq, block);
+       ir_node *block = get_nodes_block(before);
+       double   freq  = get_block_execfreq(block);
 
        if (be_do_remats) {
                /* is the node rematerializable? */
@@ -793,7 +791,7 @@ static void determine_spill_costs(spill_env_t *env, spill_info_t *spillinfo)
        }
 
        spill_block    = get_nodes_block(insn);
-       spill_execfreq = get_block_execfreq(env->exec_freq, spill_block);
+       spill_execfreq = get_block_execfreq(spill_block);
 
        if (spillinfo->spilled_phi) {
                /* TODO calculate correct costs...
@@ -811,7 +809,7 @@ static void determine_spill_costs(spill_env_t *env, spill_info_t *spillinfo)
                s               = spillinfo->spills;
                for ( ; s != NULL; s = s->next) {
                        ir_node *spill_block = get_block(s->after);
-                       double   freq = get_block_execfreq(env->exec_freq, spill_block);
+                       double   freq = get_block_execfreq(spill_block);
 
                        spills_execfreq += freq;
                }
@@ -875,9 +873,8 @@ void make_spill_locations_dominate_irn(spill_env_t *env, ir_node *irn)
 
 void be_insert_spills_reloads(spill_env_t *env)
 {
-       const ir_exec_freq *exec_freq  = env->exec_freq;
-       size_t              n_mem_phis = ARR_LEN(env->mem_phis);
-       size_t              i;
+       size_t n_mem_phis = ARR_LEN(env->mem_phis);
+       size_t i;
 
        be_timer_push(T_RA_SPILL_APPLY);
 
@@ -936,7 +933,7 @@ void be_insert_spills_reloads(spill_env_t *env)
                                remat_cost_delta      = remat_cost - env->reload_cost;
                                rld->remat_cost_delta = remat_cost_delta;
                                block                 = is_Block(reloader) ? reloader : get_nodes_block(reloader);
-                               freq                  = get_block_execfreq(exec_freq, block);
+                               freq                  = get_block_execfreq(block);
                                all_remat_costs      += remat_cost_delta * freq;
                                DBG((dbg, LEVEL_2, "\tremat costs delta before %+F: "
                                     "%d (rel %f)\n", reloader, remat_cost_delta,
index 3d25381..189c1ab 100644 (file)
@@ -120,8 +120,7 @@ void be_do_stat_reg_pressure(ir_graph *irg, const arch_register_class_t *cls)
 
 
 typedef struct estimate_irg_costs_env_t {
-       ir_exec_freq     *execfreqs;
-       double           costs;
+       double costs;
 } estimate_irg_costs_env_t;
 
 static void estimate_block_costs(ir_node *block, void *data)
@@ -133,15 +132,13 @@ static void estimate_block_costs(ir_node *block, void *data)
                costs += arch_get_op_estimated_cost(node);
        }
 
-       env->costs += costs * get_block_execfreq(env->execfreqs, block);
+       env->costs += costs * get_block_execfreq(block);
 }
 
-double be_estimate_irg_costs(ir_graph *irg, ir_exec_freq *execfreqs)
+double be_estimate_irg_costs(ir_graph *irg)
 {
        estimate_irg_costs_env_t env;
-
-       env.execfreqs = execfreqs;
-       env.costs     = 0.0;
+       env.costs = 0.0;
 
        irg_block_walk_graph(irg, estimate_block_costs, NULL, &env);
 
index 47f984b..dcfcf34 100644 (file)
@@ -64,7 +64,7 @@ void be_do_stat_reg_pressure(ir_graph *irg, const arch_register_class_t *cls);
  * Gives a cost estimate for the program (based on execution frequencies)
  * and backend op_estimated_cost
  */
-double be_estimate_irg_costs(ir_graph *irg, ir_exec_freq *execfreqs);
+double be_estimate_irg_costs(ir_graph *irg);
 
 /**
  * return number of "instructions" (=nodes without some virtual nodes like Proj,
index 221b055..e391b27 100644 (file)
@@ -1560,27 +1560,23 @@ static void ia32_emit_align_label(void)
 static int should_align_block(const ir_node *block)
 {
        static const double DELTA = .0001;
-       ir_graph     *irg         = get_irn_irg(block);
-       ir_exec_freq *exec_freq   = be_get_irg_exec_freq(irg);
-       ir_node      *prev        = get_prev_block_sched(block);
-       double        block_freq;
-       double        prev_freq = 0;  /**< execfreq of the fallthrough block */
-       double        jmp_freq  = 0;  /**< execfreq of all non-fallthrough blocks */
-       int           i, n_cfgpreds;
-
-       if (exec_freq == NULL)
-               return 0;
+       ir_node *prev      = get_prev_block_sched(block);
+       double   prev_freq = 0;  /**< execfreq of the fallthrough block */
+       double   jmp_freq  = 0;  /**< execfreq of all non-fallthrough blocks */
+       double   block_freq;
+       int      i, n_cfgpreds;
+
        if (ia32_cg_config.label_alignment_factor <= 0)
                return 0;
 
-       block_freq = get_block_execfreq(exec_freq, block);
+       block_freq = get_block_execfreq(block);
        if (block_freq < DELTA)
                return 0;
 
        n_cfgpreds = get_Block_n_cfgpreds(block);
        for (i = 0; i < n_cfgpreds; ++i) {
                const ir_node *pred      = get_Block_cfgpred_block(block, i);
-               double         pred_freq = get_block_execfreq(exec_freq, pred);
+               double         pred_freq = get_block_execfreq(pred);
 
                if (pred == prev) {
                        prev_freq += pred_freq;
index 1095594..1575e3d 100644 (file)
@@ -54,6 +54,7 @@
 #include "debugger.h"
 #include "be_t.h"
 #include "irtools.h"
+#include "execfreq_t.h"
 
 /* returns the firm root */
 lc_opt_entry_t *firm_opt_get_root(void)
@@ -113,6 +114,8 @@ void ir_init(void)
 
        init_irnode();
 
+       init_execfreq();
+
 #ifdef DEBUG_libfirm
        /* integrated debugger extension */
        firm_init_debugger();
@@ -124,6 +127,7 @@ void ir_finish(void)
 #ifdef DEBUG_libfirm
        firm_finish_debugger();
 #endif
+       exit_execfreq();
        firm_be_finish();
 
        free_ir_prog();
index 007f223..20bd61e 100644 (file)
 #include "irdump_t.h"
 #include "irnode_t.h"
 #include "ircons_t.h"
-#include "execfreq.h"
-#include "typerep.h"
-
+#include "execfreq_t.h"
 #include "irprofile.h"
+#include "typerep.h"
 
 /* Instrument blocks walker. */
 typedef struct block_id_walker_data_t {
@@ -56,12 +55,6 @@ typedef struct block_assoc_t {
        unsigned int *counters;  /**< block execution counts */
 } block_assoc_t;
 
-typedef struct intialize_execfreq_env_t {
-       ir_graph *irg;
-       ir_exec_freq *execfreqs;
-       double freq_factor;
-} initialize_execfreq_env_t;
-
 /* minimal execution frequency (an execfreq of 0 confuses algos) */
 #define MIN_EXECFREQ 0.00001
 
@@ -80,7 +73,7 @@ DEBUG_ONLY(static firm_dbg_module_t *dbg;)
  */
 typedef struct execcount_t {
        unsigned long block; /**< block id */
-       unsigned int  count; /**< execution count */
+       uint32_t      count; /**< execution count */
 } execcount_t;
 
 /**
@@ -94,6 +87,22 @@ static int cmp_execcount(const void *a, const void *b, size_t size)
        return ea->block != eb->block;
 }
 
+uint32_t ir_profile_get_block_execcount(const ir_node *block)
+{
+       execcount_t *ec, query;
+
+       query.block = get_irn_node_nr(block);
+       ec = set_find(execcount_t, profile, &query, sizeof(query), query.block);
+
+       if (ec != NULL) {
+               return ec->count;
+       } else {
+               DBG((dbg, LEVEL_3,
+                       "Warning: Profile contains no data for %+F\n", block));
+               return 0;
+       }
+}
+
 /**
  * Block walker, count number of blocks.
  */
@@ -463,7 +472,7 @@ static ir_entity *new_static_string_entity(ident *name, const char *string)
        return result;
 }
 
-ir_graph *ir_profile_instrument(const char *filename)
+void ir_profile_instrument(const char *filename)
 {
        int n, n_blocks = 0;
        ident *counter_id, *filename_id;
@@ -474,7 +483,7 @@ ir_graph *ir_profile_instrument(const char *filename)
        /* Don't do anything for modules without code. Else the linker will
         * complain. */
        if (get_irp_n_irgs() == 0)
-               return NULL;
+               return;
 
        /* count the number of block first */
        n_blocks = get_irp_n_blocks();
@@ -495,17 +504,11 @@ ir_graph *ir_profile_instrument(const char *filename)
                instrument_irg(irg, bblock_counts, &wd);
        }
 
-       return gen_initializer_irg(ent_filename, bblock_counts, n_blocks);
+       gen_initializer_irg(ent_filename, bblock_counts, n_blocks);
 }
 
-static unsigned int *
-parse_profile(const char *filename, unsigned int num_blocks)
+static unsigned int *parse_profile(const char *filename, unsigned int num_blocks)
 {
-       unsigned int *result = NULL;
-       char          buf[8];
-       size_t        ret;
-       unsigned int  i;
-
        FILE *f = fopen(filename, "rb");
        if (!f) {
                DBG((dbg, LEVEL_2, "Failed to open profile file (%s)\n", filename));
@@ -513,7 +516,9 @@ parse_profile(const char *filename, unsigned int num_blocks)
        }
 
        /* check header */
-       ret = fread(buf, 8, 1, f);
+       uint32_t *result = NULL;
+       char      buf[8];
+       size_t    ret = fread(buf, 8, 1, f);
        if (ret == 0 || strncmp(buf, "firmprof", 8) != 0) {
                DBG((dbg, LEVEL_2, "Broken fileheader in profile\n"));
                goto end;
@@ -523,7 +528,7 @@ parse_profile(const char *filename, unsigned int num_blocks)
 
        /* The profiling output format is defined to be a sequence of integer
         * values stored little endian format. */
-       for (i = 0; i < num_blocks; ++i) {
+       for (unsigned i = 0; i < num_blocks; ++i) {
                unsigned char bytes[4];
 
                if ((ret = fread(bytes, 1, 4, f)) < 1)
@@ -562,35 +567,12 @@ static void block_associate_walker(ir_node *bb, void *env)
 
 static void irp_associate_blocks(block_assoc_t *env)
 {
-       int n;
-       for (n = get_irp_n_irgs() - 1; n >= 0; --n) {
+       for (int n = get_irp_n_irgs() - 1; n >= 0; --n) {
                ir_graph *irg = get_irp_irg(n);
                irg_block_walk_graph(irg, block_associate_walker, NULL, env);
        }
 }
 
-bool ir_profile_read(const char *filename)
-{
-       block_assoc_t env;
-       FIRM_DBG_REGISTER(dbg, "firm.ir.profile");
-
-       env.i = 0;
-       env.counters = parse_profile(filename, get_irp_n_blocks());
-       if (!env.counters)
-               return false;
-
-       if (profile)
-               ir_profile_free();
-       profile = new_set(cmp_execcount, 16);
-
-       irp_associate_blocks(&env);
-       xfree(env.counters);
-
-       /* register the vcg hook */
-       hook = dump_add_node_info_callback(dump_profile_node_info, NULL);
-       return true;
-}
-
 void ir_profile_free(void)
 {
        if (profile) {
@@ -604,37 +586,40 @@ void ir_profile_free(void)
        }
 }
 
-bool ir_profile_has_data(void)
+bool ir_profile_read(const char *filename)
 {
-       return profile != NULL;
-}
+       block_assoc_t env;
+       FIRM_DBG_REGISTER(dbg, "firm.ir.profile");
 
-unsigned int ir_profile_get_block_execcount(const ir_node *block)
-{
-       execcount_t *ec, query;
+       unsigned n_blocks = get_irp_n_blocks();
+       env.i        = 0;
+       env.counters = parse_profile(filename, n_blocks);
+       if (!env.counters)
+               return false;
 
-       if (!ir_profile_has_data())
-               return 1;
+       ir_profile_free();
+       profile = new_set(cmp_execcount, 16);
 
-       query.block = get_irn_node_nr(block);
-       ec = set_find(execcount_t, profile, &query, sizeof(query), query.block);
+       irp_associate_blocks(&env);
+       xfree(env.counters);
 
-       if (ec != NULL) {
-               return ec->count;
-       } else {
-               DBG((dbg, LEVEL_3,
-                       "Warning: Profile contains no data for %+F\n", block));
-               return 1;
-       }
+       /* register the vcg hook */
+       hook = dump_add_node_info_callback(dump_profile_node_info, NULL);
+       return 1;
 }
 
+typedef struct initialize_execfreq_env_t {
+       double freq_factor;
+} initialize_execfreq_env_t;
+
 static void initialize_execfreq(ir_node *block, void *data)
 {
-       initialize_execfreq_env_t *env = (initialize_execfreq_env_t*)data;
+       const initialize_execfreq_env_t *env
+               = (const initialize_execfreq_env_t*) data;
+       ir_graph *irg = get_irn_irg(block);
        double freq;
 
-       if (block == get_irg_start_block(env->irg)
-          || block == get_irg_end_block(env->irg)) {
+       if (block == get_irg_start_block(irg) || block == get_irg_end_block(irg)) {
                freq = 1.0;
        } else {
                freq = ir_profile_get_block_execcount(block);
@@ -643,29 +628,29 @@ static void initialize_execfreq(ir_node *block, void *data)
                        freq = MIN_EXECFREQ;
        }
 
-       set_execfreq(env->execfreqs, block, freq);
+       set_block_execfreq(block, freq);
 }
 
-ir_exec_freq *ir_create_execfreqs_from_profile(ir_graph *irg)
+static void ir_set_execfreqs_from_profile(ir_graph *irg)
 {
-       ir_node *start_block;
-       initialize_execfreq_env_t env;
-       unsigned count;
-
-       env.irg = irg;
-       env.execfreqs = create_execfreq(irg);
-
        /* Find the first block containing instructions */
-       start_block = get_irg_start_block(irg);
-       count = ir_profile_get_block_execcount(start_block);
+       ir_node *start_block = get_irg_start_block(irg);
+       unsigned count       = ir_profile_get_block_execcount(start_block);
        if (count == 0) {
                /* the function was never executed, so fallback to estimated freqs */
-               free_execfreq(env.execfreqs);
-               return compute_execfreq(irg, 10);
+               ir_estimate_execfreq(irg);
+               return;
        }
 
+       initialize_execfreq_env_t env;
        env.freq_factor = 1.0 / count;
        irg_block_walk_graph(irg, initialize_execfreq, NULL, &env);
+}
 
-       return env.execfreqs;
+void ir_create_execfreqs_from_profile(void)
+{
+       for (int n = get_irp_n_irgs() - 1; n >= 0; --n) {
+               ir_graph *irg = get_irp_irg(n);
+               ir_set_execfreqs_from_profile(irg);
+       }
 }
index 139b49b..507179c 100644 (file)
 #define FIRM_BE_BEPROFILE_H
 
 #include <stdbool.h>
+#include <stdint.h>
 #include "irgraph.h"
 #include "irnode.h"
 
 /**
- * Instruments irgs with profile code
- *
- * @param filename  The name of the output file for the profile information
- * @param flags     Additional flags
- *
- * @return The irg doing the profile initialization.
+ * Instruments all irgs in the program with profile code.
+ * The final code will have a counter for each basic block which is
+ * incremented in that block. After the program has run the info is written
+ * to @p filename.
  */
-ir_graph *ir_profile_instrument(const char *filename);
+void ir_profile_instrument(const char *filename);
 
 /**
  * Reads the corresponding profile info file if it exists and returns a
@@ -55,16 +54,11 @@ void ir_profile_free(void);
 /**
  * Get block execution count as determined be profiling
  */
-unsigned int ir_profile_get_block_execcount(const ir_node *block);
+uint32_t ir_profile_get_block_execcount(const ir_node *block);
 
 /**
  * Initializes exec_freq structure for an irg based on profile data
  */
-ir_exec_freq *ir_create_execfreqs_from_profile(ir_graph *irg);
-
-/**
- * Tells whether profile module has acquired data
- */
-bool ir_profile_has_data(void);
+void ir_create_execfreqs_from_profile(void);
 
 #endif