copy register on benode lowering to Load
[libfirm] / ir / be / becopyopt.c
1 /**
2  * Author:      Daniel Grund
3  * Date:                12.04.2005
4  * Copyright:   (c) Universitaet Karlsruhe
5  * Licence:     This file protected by GPL -  GNU GENERAL PUBLIC LICENSE.
6  */
7 #ifdef HAVE_CONFIG_H
8 #include "config.h"
9 #endif
10 #ifdef HAVE_ALLOCA_H
11 #include <alloca.h>
12 #endif
13 #ifdef HAVE_MALLOC_H
14 #include <malloc.h>
15 #endif
16
17 #include "xmalloc.h"
18 #include "debug.h"
19 #include "pmap.h"
20 #include "irgraph.h"
21 #include "irgwalk.h"
22 #include "irprog.h"
23 #include "irloop_t.h"
24 #include "iredges_t.h"
25 #include "phiclass.h"
26
27 #include "bearch.h"
28 #include "beutil.h"
29 #include "becopyopt_t.h"
30 #include "becopystat.h"
31
32
33 static firm_dbg_module_t *dbg = NULL;
34
35 /**
36  * Determines a maximum weighted independent set with respect to
37  * the interference and conflict edges of all nodes in a qnode.
38  */
39 static int ou_max_ind_set_costs(unit_t *ou) {
40         be_chordal_env_t *chordal_env = ou->co->cenv;
41         ir_node **safe, **unsafe;
42         int i, o, safe_count, safe_costs, unsafe_count, *unsafe_costs;
43         bitset_t *curr;
44         int max, pos, curr_weight, best_weight = 0;
45
46         /* assign the nodes into two groups.
47          * safe: node has no interference, hence it is in every max stable set.
48          * unsafe: node has an interference
49          */
50         safe = alloca((ou->node_count-1) * sizeof(*safe));
51         safe_costs = 0;
52         safe_count = 0;
53         unsafe = alloca((ou->node_count-1) * sizeof(*unsafe));
54         unsafe_costs = alloca((ou->node_count-1) * sizeof(*unsafe_costs));
55         unsafe_count = 0;
56         for(i=1; i<ou->node_count; ++i) {
57                 int is_safe = 1;
58                 for(o=1; o<ou->node_count; ++o) {
59                         if (i==o)
60                                 continue;
61                         if (nodes_interfere(chordal_env, ou->nodes[i], ou->nodes[o])) {
62                                 unsafe_costs[unsafe_count] = ou->costs[i];
63                                 unsafe[unsafe_count] = ou->nodes[i];
64                                 ++unsafe_count;
65                                 is_safe = 0;
66                                 break;
67                         }
68                 }
69                 if (is_safe) {
70                         safe_costs += ou->costs[i];
71                         safe[safe_count++] = ou->nodes[i];
72                 }
73         }
74
75
76         /* now compute the best set out of the unsafe nodes*/
77         if (unsafe_count > MIS_HEUR_TRIGGER) {
78                 bitset_t *best = bitset_alloca(unsafe_count);
79                 /* Heuristik: Greedy trial and error form index 0 to unsafe_count-1 */
80                 for (i=0; i<unsafe_count; ++i) {
81                         bitset_set(best, i);
82                         /* check if it is a stable set */
83                         for (o=bitset_next_set(best, 0); o!=-1 && o<i; o=bitset_next_set(best, o+1))
84                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o])) {
85                                         bitset_clear(best, i); /* clear the bit and try next one */
86                                         break;
87                                 }
88                 }
89                 /* compute the weight */
90                 bitset_foreach(best, pos)
91                         best_weight += unsafe_costs[pos];
92         } else {
93                 /* Exact Algorithm: Brute force */
94                 curr = bitset_alloca(unsafe_count);
95                 bitset_set_all(curr);
96                 while ((max = bitset_popcnt(curr)) != 0) {
97                         /* check if curr is a stable set */
98                         for (i=bitset_next_set(curr, 0); i!=-1; i=bitset_next_set(curr, i+1))
99                                 for (o=bitset_next_set(curr, i+1); o!=-1; o=bitset_next_set(curr, o+1)) /* !!!!! difference to qnode_max_ind_set(): NOT (curr, i) */
100                                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o]))
101                                                         goto no_stable_set;
102
103                         /* if we arrive here, we have a stable set */
104                         /* compute the weigth of the stable set*/
105                         curr_weight = 0;
106                         bitset_foreach(curr, pos)
107                                 curr_weight += unsafe_costs[pos];
108
109                         /* any better ? */
110                         if (curr_weight > best_weight) {
111                                 best_weight = curr_weight;
112                         }
113
114         no_stable_set:
115                         bitset_minus1(curr);
116                 }
117         }
118
119         return safe_costs+best_weight;
120 }
121
122 static void co_collect_units(ir_node *irn, void *env) {
123         copy_opt_t *co = env;
124         unit_t *unit;
125         arch_register_req_t req;
126
127         if (!is_curr_reg_class(co, irn))
128                 return;
129         if (!co_is_optimizable(co->aenv, irn, &req))
130                 return;
131
132         /* Init a new unit */
133         unit = xcalloc(1, sizeof(*unit));
134         unit->co = co;
135         unit->node_count = 1;
136         INIT_LIST_HEAD(&unit->queue);
137
138         /* Phi with some/all of its arguments */
139         if (is_Reg_Phi(irn)) {
140                 int i, arity;
141
142                 /* init */
143                 arity = get_irn_arity(irn);
144                 unit->nodes = xmalloc((arity+1) * sizeof(*unit->nodes));
145                 unit->costs = xmalloc((arity+1) * sizeof(*unit->costs));
146                 unit->nodes[0] = irn;
147
148                 /* fill */
149                 for (i=0; i<arity; ++i) {
150                         int o, arg_pos;
151                         ir_node *arg = get_irn_n(irn, i);
152
153                         assert(is_curr_reg_class(co, arg) && "Argument not in same register class.");
154                         if (arg == irn)
155                                 continue;
156                         if (nodes_interfere(co->cenv, irn, arg)) {
157                                 unit->inevitable_costs += co->get_costs(irn, arg, i);
158                                 continue;
159                         }
160
161                         /* Else insert the argument of the phi to the members of this ou */
162                         DBG((dbg, LEVEL_1, "\t   Member: %+F\n", arg));
163
164                         /* Check if arg has occurred at a prior position in the arg/list */
165                         arg_pos = 0;
166                         for (o=0; o<unit->node_count; ++o)
167                                 if (unit->nodes[o] == arg) {
168                                         arg_pos = o;
169                                         break;
170                                 }
171
172                         if (!arg_pos) { /* a new argument */
173                                 /* insert node, set costs */
174                                 unit->nodes[unit->node_count] = arg;
175                                 unit->costs[unit->node_count] = co->get_costs(irn, arg, i);
176                                 unit->node_count++;
177                         } else { /* arg has occured before in same phi */
178                                 /* increase costs for existing arg */
179                                 unit->costs[arg_pos] += co->get_costs(irn, arg, i);
180                         }
181                 }
182                 unit->nodes = xrealloc(unit->nodes, unit->node_count * sizeof(*unit->nodes));
183                 unit->costs = xrealloc(unit->costs, unit->node_count * sizeof(*unit->costs));
184         } else
185
186         /* Proj of a perm with corresponding arg */
187         if (is_Perm_Proj(co->aenv, irn)) {
188                 assert(!nodes_interfere(co->cenv, irn, get_Copy_src(irn)));
189                 unit->nodes = xmalloc(2 * sizeof(*unit->nodes));
190                 unit->costs = xmalloc(2 * sizeof(*unit->costs));
191                 unit->node_count = 2;
192                 unit->nodes[0] = irn;
193                 unit->nodes[1] = get_Copy_src(irn);
194                 unit->costs[1] = co->get_costs(irn, unit->nodes[1], -1);
195         } else
196
197         /* Src == Tgt of a 2-addr-code instruction */
198         if (is_2addr_code(co->aenv, irn, &req)) {
199                 ir_node *other = req.other_same;
200                 if (!nodes_interfere(co->cenv, irn, other)) {
201                         unit->nodes = xmalloc(2 * sizeof(*unit->nodes));
202                         unit->costs = xmalloc(2 * sizeof(*unit->costs));
203                         unit->node_count = 2;
204                         unit->nodes[0] = irn;
205                         unit->nodes[1] = other;
206                         unit->costs[1] = co->get_costs(irn, other, -120480);
207                 }
208         } else
209                 assert(0 && "This is not an optimizable node!");
210
211         /* Insert the new unit at a position according to its costs */
212         if (unit->node_count > 1) {
213                 int i;
214                 struct list_head *tmp;
215
216                 /* Determine the maximum costs this unit can cause: all_nodes_cost */
217                 for(i=1; i<unit->node_count; ++i) {
218                         unit->sort_key = MAX(unit->sort_key, unit->costs[i]);
219                         unit->all_nodes_costs += unit->costs[i];
220                 }
221
222                 /* Determine the minimal costs this unit will cause: min_nodes_costs */
223                 unit->min_nodes_costs += unit->all_nodes_costs - ou_max_ind_set_costs(unit);
224
225                 /* Insert the new ou according to its sort_key */
226                 tmp = &co->units;
227                 while (tmp->next != &co->units && list_entry_units(tmp->next)->sort_key > unit->sort_key)
228                         tmp = tmp->next;
229                 list_add(&unit->units, tmp);
230         } else {
231                 free(unit);
232         }
233 }
234
235 void be_copy_opt_init(void) {
236         dbg = firm_dbg_register("ir.be.copyoptmain");
237 }
238
239 copy_opt_t *new_copy_opt(be_chordal_env_t *chordal_env, int (*get_costs)(ir_node*, ir_node*, int)) {
240         const char *s1, *s2, *s3;
241         int len;
242         copy_opt_t *co;
243
244         dbg = firm_dbg_register("ir.be.copyopt");
245
246         co = xcalloc(1, sizeof(*co));
247         co->cenv = chordal_env;
248         co->aenv = chordal_env->birg->main_env->arch_env;
249         co->irg = chordal_env->irg;
250         co->cls = chordal_env->cls;
251         co->get_costs = get_costs;
252
253         s1 = get_irp_prog_name();
254         s2 = get_entity_name(get_irg_entity(co->irg));
255         s3 = chordal_env->cls->name;
256         len = strlen(s1) + strlen(s2) + strlen(s3) + 5;
257         co->name = xmalloc(len);
258         snprintf(co->name, len, "%s__%s__%s", s1, s2, s3);
259
260         DBG((dbg, LEVEL_1, "\tCollecting optimization units\n"));
261         INIT_LIST_HEAD(&co->units);
262         irg_walk_graph(co->irg, co_collect_units, NULL, co);
263         return co;
264 }
265
266 void free_copy_opt(copy_opt_t *co) {
267         unit_t *curr, *tmp;
268         xfree(co->name);
269         list_for_each_entry_safe(unit_t, curr, tmp, &co->units, units) {
270                 xfree(curr->nodes);
271                 xfree(curr->costs);
272                 xfree(curr);
273         }
274 }
275
276 int co_is_optimizable_arg(const copy_opt_t *co, ir_node *irn) {
277         const ir_edge_t *edge;
278
279         if (arch_irn_is_ignore(co->aenv, irn))
280                 return 0;
281
282         foreach_out_edge(irn, edge) {
283                 ir_node *n = edge->src;
284
285                 if (!nodes_interfere(co->cenv, irn, n) || irn == n) {
286                         arch_register_req_t req;
287                         arch_get_register_req(co->aenv, &req, n, -1);
288
289                         if(is_Reg_Phi(n) ||
290                            is_Perm(co->aenv, n) ||
291                            (arch_register_req_is(&req, should_be_same) && req.other_same == irn)
292                           )
293                                 return 1;
294                 }
295         }
296
297         return 0;
298 }
299
300 int co_get_costs_loop_depth(ir_node *root, ir_node* arg, int pos) {
301         int cost = 0;
302         ir_loop *loop;
303         ir_node *root_block = get_nodes_block(root);
304
305         if (is_Phi(root)) {
306                 /* for phis the copies are placed in the corresponding pred-block */
307                 loop = get_irn_loop(get_Block_cfgpred_block(root_block, pos));
308         } else {
309                 /* a perm places the copy in the same block as it resides */
310                 loop = get_irn_loop(root_block);
311         }
312         if (loop) {
313                 int d = get_loop_depth(loop);
314                 cost = d*d;
315         }
316         return cost+1;
317 }
318
319 int co_get_costs_all_one(ir_node *root, ir_node* arg, int pos) {
320         return 1;
321 }
322
323 int co_get_max_copy_costs(const copy_opt_t *co) {
324         int i, res = 0;
325         unit_t *curr;
326
327         list_for_each_entry(unit_t, curr, &co->units, units) {
328                 res += curr->inevitable_costs;
329                 for (i=1; i<curr->node_count; ++i)
330                         res += curr->costs[i];
331         }
332         return res;
333 }
334
335 int co_get_inevit_copy_costs(const copy_opt_t *co) {
336         int res = 0;
337         unit_t *curr;
338
339         list_for_each_entry(unit_t, curr, &co->units, units)
340                 res += curr->inevitable_costs;
341         return res;
342 }
343
344 int co_get_copy_costs(const copy_opt_t *co) {
345         int i, res = 0;
346         unit_t *curr;
347
348         list_for_each_entry(unit_t, curr, &co->units, units) {
349                 int root_col = get_irn_col(co, curr->nodes[0]);
350                 DBG((dbg, LEVEL_1, "  %3d costs for root %+F color %d\n", curr->inevitable_costs, curr->nodes[0], root_col));
351                 res += curr->inevitable_costs;
352                 for (i=1; i<curr->node_count; ++i) {
353                         int arg_col = get_irn_col(co, curr->nodes[i]);
354                         if (root_col != arg_col) {
355                                 DBG((dbg, LEVEL_1, "  %3d for arg %+F color %d\n", curr->costs[i], curr->nodes[i], arg_col));
356                                 res += curr->costs[i];
357                         }
358                 }
359         }
360         return res;
361 }
362
363 int co_get_lower_bound(const copy_opt_t *co) {
364         int res = 0;
365         unit_t *curr;
366         list_for_each_entry(unit_t, curr, &co->units, units)
367                 res += curr->inevitable_costs + curr->min_nodes_costs;
368         return res;
369 }