+ir_exec_freq *
+compute_execfreq(ir_graph * irg, double loop_weight)
+{
+ gs_matrix_t *mat;
+ int size;
+ int idx;
+ freq_t *freq, *s, *e;
+ ir_exec_freq *ef;
+ set *freqs;
+ dfs_t *dfs;
+ double *x;
+ double norm;
+
+ /*
+ * compute a DFS.
+ * using a toposort on the CFG (without back edges) will propagate
+ * the values better for the gauss/seidel iteration.
+ * => they can "flow" from start to end.
+ */
+ dfs = dfs_new(&absgraph_irg_cfg_succ, irg);
+ ef = xmalloc(sizeof(ef[0]));
+ memset(ef, 0, sizeof(ef[0]));
+ ef->min_non_zero = HUGE_VAL; /* initialize with a reasonable large number. */
+ freqs = ef->set = new_set(cmp_freq, dfs_get_n_nodes(dfs));
+
+ /*
+ * Populate the exec freq set.
+ * The DFS cannot be used alone, since the CFG might not be connected
+ * due to unreachable code.
+ */
+ irg_block_walk_graph(irg, collect_blocks, NULL, freqs);
+
+ construct_cf_backedges(irg);
+ /* TODO: edges are corrupt for EDGE_KIND_BLOCK after the local optimize
+ graph phase merges blocks in the x86 backend */
+ edges_deactivate(irg);
+ edges_activate(irg);
+ /* edges_assure(irg); */
+
+ size = dfs_get_n_nodes(dfs);
+ mat = gs_new_matrix(size, size);
+ x = xmalloc(size*sizeof(*x));
+
+ for (idx = dfs_get_n_nodes(dfs) - 1; idx >= 0; --idx) {
+ ir_node *bb = (ir_node *) dfs_get_post_num_node(dfs, size - idx - 1);
+ freq_t *freq;
+ int i;
+
+ freq = set_insert_freq(freqs, bb);
+ freq->idx = idx;
+
+ gs_matrix_set(mat, idx, idx, -1.0);
+ for(i = get_Block_n_cfgpreds(bb) - 1; i >= 0; --i) {
+ ir_node *pred = get_Block_cfgpred_block(bb, i);
+ int pred_idx = size - dfs_get_post_num(dfs, pred) - 1;
+
+ gs_matrix_set(mat, idx, pred_idx, get_cf_probability(bb, i, loop_weight));
+ }
+ }
+
+ /*
+ * Add a loop from end to start.
+ * The problem is then an eigenvalue problem:
+ * Solve A*x = 1*x => (A-I)x = 0
+ */
+ s = set_find_freq(freqs, get_irg_start_block(irg));
+ e = set_find_freq(freqs, get_irg_end_block(irg));
+ if (e->idx >= 0)
+ gs_matrix_set(mat, s->idx, e->idx, 1.0);
+
+ /* solve the system and delete the matrix */
+ solve_lgs(mat, x, size);
+ gs_delete_matrix(mat);
+
+ /*
+ * compute the normalization factor.
+ * 1.0 / exec freq of start block.
+ */
+ norm = x[s->idx] != 0.0 ? 1.0 / x[s->idx] : 1.0;
+
+ ef->max = 0.0;
+ set_foreach(freqs, freq) {
+ int idx = freq->idx;
+
+ /* take abs because it sometimes can be -0 in case of endless loops */
+ freq->freq = fabs(x[idx]) * norm;
+
+ /* get the maximum exec freq */
+ ef->max = MAX(ef->max, freq->freq);
+
+ /* Get the minimum non-zero execution frequency. */
+ if(freq->freq > 0.0)
+ ef->min_non_zero = MIN(ef->min_non_zero, freq->freq);
+ }
+
+ /* compute m and b of the transformation used to convert the doubles into scaled ints */
+ {
+ double smallest_diff = 1.0;
+
+ double l2 = ef->min_non_zero;
+ double h2 = ef->max;
+ double l1 = 1.0;
+ double h1 = MAX_INT_FREQ;
+
+ double *fs = malloc(set_count(freqs) * sizeof(fs[0]));
+ int i, j, n = 0;
+
+ set_foreach(freqs, freq)
+ fs[n++] = freq->freq;
+
+ /*
+ * find the smallest difference of the execution frequencies
+ * we try to ressolve it with 1 integer.
+ */
+ for(i = 0; i < n; ++i) {
+ if(fs[i] <= 0.0)
+ continue;
+
+ for(j = i + 1; j < n; ++j) {
+ double diff = fabs(fs[i] - fs[j]);
+
+ if(!UNDEF(diff))
+ smallest_diff = MIN(diff, smallest_diff);
+ }
+ }
+
+ /* according to that the slope of the translation function is 1.0 / smallest diff */
+ ef->m = 1.0 / smallest_diff;
+
+ /* the abscissa is then given by */
+ ef->b = l1 - ef->m * l2;
+
+ /*
+ * if the slope is so high that the largest integer would be larger than MAX_INT_FREQ
+ * set the largest int freq to that upper limit and recompute the translation function
+ */
+ if(ef->m * h2 + ef->b > MAX_INT_FREQ) {
+ ef->m = (h1 - l1) / (h2 - l2);
+ ef->b = l1 - ef->m * l2;
+ }
+
+ free(fs);
+ }
+
+ memset(&ef->hook, 0, sizeof(ef->hook));
+ ef->hook.context = ef;
+ ef->hook.hook._hook_node_info = exec_freq_node_info;
+ register_hook(hook_node_info, &ef->hook);
+
+ xfree(x);
+
+ return ef;