Fixed the fix for the memory leak
[libfirm] / ir / ana / execfreq.c
1 /*
2  * Copyright (C) 1995-2007 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       Compute an estimate of basic block executions.
23  * @author      Adam M. Szalkowski
24  * @date        28.05.2006
25  * @version     $Id$
26  */
27 #ifdef HAVE_CONFIG_H
28 #include "config.h"
29 #endif
30
31 #undef USE_GSL
32
33 #include <stdio.h>
34 #include <string.h>
35 #include <limits.h>
36 #include <math.h>
37
38 #ifdef USE_GSL
39 #include <gsl/gsl_linalg.h>
40 #include <gsl/gsl_vector.h>
41 #else
42 #include "gaussjordan.h"
43 #endif
44
45 #include "firm_common_t.h"
46 #include "set.h"
47 #include "hashptr.h"
48
49 #include "irprog_t.h"
50 #include "irgraph_t.h"
51 #include "irnode_t.h"
52 #include "irloop.h"
53 #include "irgwalk.h"
54 #include "iredges.h"
55 #include "irprintf.h"
56 #include "irhooks.h"
57
58 #include "execfreq.h"
59
60 #define set_foreach(s,i) for((i)=set_first((s)); (i); (i)=set_next((s)))
61
62 #define MAX_INT_FREQ 1000000
63
64 typedef struct _freq_t {
65         const ir_node    *irn;
66         double            freq;
67 } freq_t;
68
69
70 typedef struct _walkerdata_t {
71   set    *set;
72   size_t  idx;
73 } walkerdata_t;
74
75 struct ir_exec_freq {
76         set *set;
77         hook_entry_t hook;
78         double max;
79         double min_non_zero;
80         double m, b;
81         unsigned infeasible : 1;
82 };
83
84 static int
85 cmp_freq(const void *a, const void *b, size_t size)
86 {
87         const freq_t *p = a;
88         const freq_t *q = b;
89
90         return !(p->irn == q->irn);
91 }
92
93 static freq_t *
94 set_find_freq(set * set, const ir_node * irn)
95 {
96         freq_t     query;
97
98         query.irn = irn;
99         return set_find(set, &query, sizeof(query), HASH_PTR(irn));
100 }
101
102 static freq_t *
103 set_insert_freq(set * set, const ir_node * irn)
104 {
105         freq_t query;
106
107         query.irn = irn;
108         query.freq = 0.0;
109         return set_insert(set, &query, sizeof(query), HASH_PTR(irn));
110 }
111
112 double
113 get_block_execfreq(const ir_exec_freq *ef, const ir_node * irn)
114 {
115         if(!ef->infeasible) {
116                 set *freqs = ef->set;
117                 freq_t *freq;
118                 assert(is_Block(irn));
119                 freq = set_find_freq(freqs, irn);
120                 assert(freq);
121
122                 assert(freq->freq >= 0);
123                 return freq->freq;
124         }
125
126         return 1.0;
127 }
128
129 unsigned long
130 get_block_execfreq_ulong(const ir_exec_freq *ef, const ir_node *bb)
131 {
132         double f       = get_block_execfreq(ef, bb);
133         int res        = (int) (f > ef->min_non_zero ? ef->m * f + ef->b : 1.0);
134
135         // printf("%20.6f %10d\n", f, res);
136         return res;
137 }
138
139 #define EPSILON         0.0001
140 #define UNDEF(x)    !(x > EPSILON)
141
142 static void
143 block_walker(ir_node * bb, void * data)
144 {
145   walkerdata_t  *wd = data;
146
147   set_insert_freq(wd->set, bb);
148   set_irn_link(bb, (void*)wd->idx++);
149 }
150
151 #ifdef USE_GSL
152 static gsl_vector *
153 solve_lgs(double * a_data, double * b_data, size_t size)
154 {
155   gsl_matrix_view m
156     = gsl_matrix_view_array (a_data, size, size);
157
158   gsl_vector_view b
159     = gsl_vector_view_array (b_data, size);
160
161   gsl_vector *x = gsl_vector_alloc (size);
162
163   int s;
164
165   gsl_permutation * p = gsl_permutation_alloc (size);
166
167   gsl_linalg_LU_decomp (&m.matrix, p, &s);
168
169   gsl_linalg_LU_solve (&m.matrix, p, &b.vector, x);
170
171   gsl_permutation_free (p);
172
173   return x;
174 }
175 #else
176 static double *
177 solve_lgs(double * A, double * b, size_t size)
178 {
179   if(firm_gaussjordansolve(A,b,size) == 0) {
180     return b;
181   } else {
182     return NULL;
183   }
184 }
185 #endif /* USE_GSL */
186
187 static double
188 get_cf_probability(ir_node *bb, int pos, double loop_weight)
189 {
190         double  sum = 0.0;
191         double  cur = 0.0;
192         const ir_node *pred = get_Block_cfgpred_block(bb, pos);
193         const ir_loop *pred_loop = get_irn_loop(pred);
194         int pred_depth = get_loop_depth(pred_loop);
195         const ir_edge_t *edge;
196
197         cur = get_loop_depth(get_irn_loop(bb)) < get_loop_depth(get_irn_loop(pred)) ? 1.0 : loop_weight;
198
199         foreach_block_succ(pred, edge) {
200                 const ir_node *block = get_edge_src_irn(edge);
201                 const ir_loop *loop = get_irn_loop(block);
202                 int depth = get_loop_depth(loop);
203                 sum += depth < pred_depth ? 1.0 : loop_weight;
204         }
205
206         return cur/sum;
207 }
208
209 static void exec_freq_node_info(void *ctx, FILE *f, const ir_node *irn)
210 {
211         if(is_Block(irn)) {
212                 ir_exec_freq *ef = ctx;
213                 fprintf(f, "execution frequency: %g/%lu\n", get_block_execfreq(ef, irn), get_block_execfreq_ulong(ef, irn));
214         }
215 }
216
217 ir_exec_freq *create_execfreq(ir_graph *irg)
218 {
219         ir_exec_freq *execfreq = xmalloc(sizeof(execfreq[0]));
220         memset(execfreq, 0, sizeof(execfreq[0]));
221         execfreq->set = new_set(cmp_freq, 32);
222
223         memset(&execfreq->hook, 0, sizeof(execfreq->hook));
224         execfreq->hook.context = execfreq;
225         execfreq->hook.hook._hook_node_info = exec_freq_node_info;
226         register_hook(hook_node_info, &execfreq->hook);
227
228         return execfreq;
229 }
230
231 void set_execfreq(ir_exec_freq *execfreq, const ir_node *block, double freq)
232 {
233         freq_t *f = set_insert_freq(execfreq->set, block);
234         f->freq = freq;
235 }
236
237 ir_exec_freq *
238 compute_execfreq(ir_graph * irg, double loop_weight)
239 {
240         size_t        size;
241         double       *matrix;
242         double       *rhs;
243         int           i;
244         freq_t       *freq;
245         walkerdata_t  wd;
246         ir_exec_freq  *ef;
247         set          *freqs;
248 #ifdef USE_GSL
249         gsl_vector   *x;
250 #else
251         double       *x;
252 #endif
253
254         ef = xmalloc(sizeof(ef[0]));
255         memset(ef, 0, sizeof(ef[0]));
256         ef->min_non_zero = 1e50; /* initialize with a reasonable large number. */
257         freqs = ef->set = new_set(cmp_freq, 32);
258
259         construct_cf_backedges(irg);
260         edges_assure(irg);
261
262         wd.idx = 0;
263         wd.set = freqs;
264
265         irg_block_walk_graph(irg, block_walker, NULL, &wd);
266
267         size = set_count(freqs);
268         matrix = xmalloc(size*size*sizeof(*matrix));
269         memset(matrix, 0, size*size*sizeof(*matrix));
270         rhs = xmalloc(size*sizeof(*rhs));
271         memset(rhs, 0, size*sizeof(*rhs));
272
273         set_foreach(freqs, freq) {
274                 ir_node *bb = (ir_node *)freq->irn;
275                 size_t  idx = (int)get_irn_link(bb);
276
277                 matrix[idx * (size + 1)] = -1.0;
278
279                 if (bb == get_irg_start_block(irg)) {
280                         rhs[(int)get_irn_link(bb)] = -1.0;
281                         continue;
282                 }
283
284                 for(i = get_Block_n_cfgpreds(bb) - 1; i >= 0; --i) {
285                         ir_node *pred    = get_Block_cfgpred_block(bb, i);
286                         size_t  pred_idx = (int)get_irn_link(pred);
287
288                         //      matrix[pred_idx + idx*size] += 1.0/(double)get_Block_n_cfg_outs(pred);
289                         matrix[pred_idx + idx * size] += get_cf_probability(bb, i, loop_weight);
290                 }
291         }
292
293         x = solve_lgs(matrix, rhs, size);
294         if (x == NULL) {
295                 ef->infeasible = 1;
296         } else {
297                 ef->max = 0.0;
298
299                 set_foreach(freqs, freq) {
300                         const ir_node *bb = freq->irn;
301                         size_t        idx = PTR_TO_INT(get_irn_link(bb));
302
303 #ifdef USE_GSL
304                         freq->freq = UNDEF(gsl_vector_get(x, idx)) ? EPSILON : gsl_vector_get(x, idx);
305 #else
306                         freq->freq = UNDEF(x[idx]) ? EPSILON : x[idx];
307 #endif
308
309                         /* get the maximum exec freq */
310                         ef->max = MAX(ef->max, freq->freq);
311
312                         /* Get the minimum non-zero execution frequency. */
313                         if(freq->freq > 0.0)
314                                 ef->min_non_zero = MIN(ef->min_non_zero, freq->freq);
315                 }
316
317                 /* compute m and b of the transformation used to convert the doubles into scaled ints */
318                 {
319                         double smallest_diff = 1.0;
320
321                         double l2 = ef->min_non_zero;
322                         double h2 = ef->max;
323                         double l1 = 1.0;
324                         double h1 = MAX_INT_FREQ;
325
326                         double *fs = malloc(set_count(freqs) * sizeof(fs[0]));
327                         int i, j, n = 0;
328
329                         set_foreach(freqs, freq)
330                                 fs[n++] = freq->freq;
331
332                         /*
333                          * find the smallest difference of the execution frequencies
334                          * we try to ressolve it with 1 integer.
335                          */
336                         for(i = 0; i < n; ++i) {
337                                 if(fs[i] <= 0.0)
338                                         continue;
339
340                                 for(j = i + 1; j < n; ++j) {
341                                         double diff = fabs(fs[i] - fs[j]);
342
343                                         if(!UNDEF(diff))
344                                                 smallest_diff = MIN(diff, smallest_diff);
345                                 }
346                         }
347
348                         /* according to that the slope of the translation function is 1.0 / smallest diff */
349                         ef->m = 1.0 / smallest_diff;
350
351                         /* the abscissa is then given by */
352                         ef->b = l1 - ef->m * l2;
353
354                         /*
355                          * if the slope is so high that the largest integer would be larger than MAX_INT_FREQ
356                          * set the largest int freq to that upper limit and recompute the translation function
357                          */
358                         if(ef->m * h2 + ef->b > MAX_INT_FREQ) {
359                                 ef->m = (h1 - l1) / (h2 - l2);
360                                 ef->b = l1 - ef->m * l2;
361                         }
362
363                         // printf("smallest_diff: %g, l1: %f, h1: %f, l2: %f, h2: %f, m: %f, b: %f\n", smallest_diff, l1, h1, l2, h2, ef->m, ef->b);
364                         free(fs);
365                 }
366
367 #ifdef USE_GSL
368                 gsl_vector_free(x);
369 #endif
370                 memset(&ef->hook, 0, sizeof(ef->hook));
371                 ef->hook.context = ef;
372                 ef->hook.hook._hook_node_info = exec_freq_node_info;
373                 register_hook(hook_node_info, &ef->hook);
374         }
375
376         free(matrix);
377         free(rhs);
378
379         return ef;
380 }
381
382 void
383 free_execfreq(ir_exec_freq *ef)
384 {
385         del_set(ef->set);
386         unregister_hook(hook_node_info, &ef->hook);
387         free(ef);
388 }