common spilling things
[libfirm] / ir / be / bespillilp.c
1 /**
2  * @file   bespillilp.c
3  * @date   15.07.2005
4  * @author Sebastian Hack
5  *
6  * ILP based spilling
7  *
8  * Copyright (C) 2005 Universitaet Karlsruhe
9  * Released under the GPL
10  */
11 #include <math.h>
12
13 #include "hashptr.h"
14 #include "debug.h"
15 #include "obst.h"
16 #include "set.h"
17 #include "list.h"
18 #include "pmap.h"
19
20 #include "irprintf.h"
21 #include "irgwalk.h"
22 #include "irnode_t.h"
23 #include "ircons_t.h"
24
25 #include <lpp/lpp.h>
26 #include <lpp/lpp_net.h>
27 #include <lpp/lpp_cplex.h>
28
29 #include "be_t.h"
30 #include "belive_t.h"
31 #include "besched_t.h"
32 #include "beirgmod.h"
33 #include "bearch.h"
34 #include "benode_t.h"
35 #include "beutil.h"
36 #include "bespillilp.h"
37
38 #define BIGM 1000.0
39
40 #define MAX(a,b) ((a) > (b) ? (a) : (b))
41
42 #define DBG_LEVEL SET_LEVEL_0
43
44 #undef DUMP_SOLUTION
45 #undef DUMP_ILP
46
47 #define LPP_SERVER "i44pc52"
48 #define LPP_SOLVER "cplex"
49
50 #define COST_LOAD      10
51 #define COST_STORE     50
52 #define COST_REMAT     (-9)
53
54 #define is_end_of_block_use(lr) (is_Block((lr)->user))
55
56 typedef struct _spill_ilp_t {
57         const arch_register_class_t *cls;
58         firm_dbg_module_t *dbg;
59         lpp_t *lpp;
60         set *irn_use_heads;
61         set *live_ranges;
62         spill_env_t senv;
63 //      pset *remove_phis;
64         struct obstack *obst;
65         int enable_store : 1;
66         int enable_remat : 1;
67 } spill_ilp_t;
68
69 typedef struct _live_range_t live_range_t;
70
71 typedef struct _irn_use_head_t {
72         struct list_head head;
73         ir_node *irn;
74         int spill_var;
75         int n_uses;
76         live_range_t *closest_use;
77 } irn_use_head_t;
78
79 struct _live_range_t {
80   struct list_head list;
81         irn_use_head_t *use_head;
82   ir_node *user;
83   ir_node *irn;
84         int pos;
85   int in_mem_var;
86   int is_remat_var;
87 };
88
89 static int has_reg_class(const spill_ilp_t *si, const ir_node *irn)
90 {
91   return arch_irn_has_reg_class(si->senv.session->main_env->arch_env,
92       irn, arch_pos_make_out(0), si->cls);
93 }
94
95 static int register_demand(spill_ilp_t *si, const ir_node *irn)
96 {
97         const arch_env_t *arch_env = si->senv.session->main_env->arch_env;
98         int n_in = arch_get_n_operands(arch_env, irn, 0);
99         int n_out = arch_get_n_operands(arch_env, irn, -1);
100
101         return MAX(n_in, n_out);
102 }
103
104 static int cmp_live_range(const void *a, const void *b, size_t n)
105 {
106   const live_range_t *p = a;
107   const live_range_t *q = b;
108
109   return !(p->user == q->user && p->irn == q->irn && p->pos == q->pos);
110 }
111
112 static int cmp_irn_use_head(const void *a, const void *b, size_t n)
113 {
114   const irn_use_head_t *p = a;
115   const irn_use_head_t *q = b;
116
117         return !(p->irn == q->irn);
118 }
119
120 /**
121  * Checks, if a vertain node can be recomputed at a certain position.
122  * @param si    The spill ILP environment.
123  * @param irn   The node to recompute.
124  * @param live  The nodes live at the place where @p irn shall be
125  *              recomputed.
126  * @return      1, if irn can be recomputed, 0 if not.
127  */
128 static INLINE int can_remat(const spill_ilp_t *si, const ir_node *irn, pset *live)
129 {
130         int i, n;
131   const arch_env_t *arch_env    = si->senv.session->main_env->arch_env;
132         int remat = (arch_irn_get_flags(arch_env, irn) & arch_irn_flags_rematerializable) != 0;
133
134         for(i = 0, n = get_irn_arity(irn); i < n && remat; ++i) {
135                 ir_node *op = get_irn_n(irn, i);
136                 remat &= !has_reg_class(si, op) || pset_find_ptr(live, op);
137         }
138
139         return remat;
140 }
141
142 static live_range_t *get_live_range(spill_ilp_t *si, ir_node *irn, ir_node *user, int pos)
143 {
144   live_range_t lr, *res;
145         irn_use_head_t iuh, *head;
146         int is_new;
147   unsigned hash = HASH_COMBINE(HASH_PTR(irn), HASH_PTR(user));
148
149   lr.user    = user;
150   lr.irn     = irn;
151   lr.pos     = pos;
152   lr.in_mem_var = -1;
153         lr.is_remat_var = -1;
154
155   res = set_insert(si->live_ranges, &lr, sizeof(lr), hash);
156         is_new = res->in_mem_var == -1;
157
158   if(is_new) {
159     char buf[128];
160     ir_snprintf(buf, sizeof(buf), "m_%s%N_%N_%d",
161                                 is_Phi(irn) ? "phi_" : "", irn, user, MAX(pos, 0));
162     res->in_mem_var = lpp_add_var(si->lpp, buf, lpp_binary, pos >= 0 ? COST_LOAD : 0.0);
163   }
164
165         memset(&iuh, 0, sizeof(iuh));
166         iuh.irn = irn;
167         iuh.n_uses = -1;
168         head = set_insert(si->irn_use_heads, &iuh, sizeof(iuh), HASH_PTR(irn));
169         if(head->n_uses == -1) {
170                 head->n_uses = 0;
171                 INIT_LIST_HEAD(&head->head);
172         }
173
174         if(is_new) {
175                 list_add_tail(&res->list, &head->head);
176                 head->n_uses++;
177         }
178
179         res->use_head = head;
180
181   return res;
182 }
183
184 #if 0
185 static void annotate_live_ranges(ir_node *irn, void *data)
186 {
187         const ir_edge_t *edge;
188
189         foreach_out_edge(irn, edge) {
190                 pset *lr_set;
191
192                 ir_node *user = edge->use;
193                 int pos       = edge->pos;
194                 ir_node *bl   = get_nodes_block(user);
195
196                 if(is_Phi(user))
197                         bl = get_Block_cfgpred_block(bl, pos);
198
199                 lr_set = get_irn_link(bl);
200
201         }
202 }
203 #endif
204
205
206 static void process_block(ir_node *bl, void *data)
207 {
208         char buf[128];
209         int i, n;
210   spill_ilp_t *si  = data;
211   int step         = 0;
212   int n_regs       = arch_register_class_n_regs(si->cls);
213         int n_preds      = get_irn_arity(bl);
214   pset *live       = pset_new_ptr_default();
215   irn_live_t *li;
216   ir_node *irn;
217
218   /* as always, bring the live end nodes to life here */
219   live_foreach(bl, li) {
220     if(live_is_end(li) && has_reg_class(si, li->irn)) {
221       ir_node *irn = (ir_node *) li->irn;
222       pset_insert_ptr(live, irn);
223
224       /*
225        * The "user" of the live range to the end of a block
226        * is the block itself. This is quite arbitrary.
227        */
228       set_irn_link(irn, get_live_range(si, irn, bl, -1));
229     }
230   }
231
232         sched_foreach_reverse(bl, irn) {
233                 ir_node *l;
234                 int cst;
235                 int demand;
236                 int n_live;
237                 int must_be_in_mem;
238
239                 /* We handle phi togther with live ins after this loop (see below). */
240                 if(is_Phi(irn))
241                         break;
242
243                 if(has_reg_class(si, irn))
244                         pset_remove_ptr(live, irn);
245
246                 demand = register_demand(si, irn);
247                 n_live = pset_count(live);
248
249                 /*
250                  * Determine, how many values (which are not used at the label)
251                  * must be in memory.
252                  * demand means the number of registers, the operation will consume.
253                  * So there are n_regs - demand registers available to store values
254                  * which are not used at this label. The rest must reside in memory.
255                  */
256                 must_be_in_mem = MAX(n_live - (n_regs - demand), 0);
257
258                 if(must_be_in_mem > 0) {
259
260                         /*
261                          * The constraint limiting the pressure at this label to
262                          * the number of free registers.
263                          */
264                         ir_snprintf(buf, sizeof(buf), "cp_%N_%d", bl, step);
265                         cst = lpp_add_cst(si->lpp, buf, lpp_greater, must_be_in_mem);
266
267                         for(l = pset_first(live); l; l = pset_next(live)) {
268                                 live_range_t *lr = get_irn_link(l);
269                                 lpp_set_factor_fast(si->lpp, cst, lr->in_mem_var, 1.0);
270                         }
271                 }
272
273                 for(i = 0, n = get_irn_arity(irn); i < n; ++i) {
274                         ir_node *op = get_irn_n(irn, i);
275
276                         if(has_reg_class(si, op)) {
277                                 live_range_t *op_lr = get_live_range(si, op, irn, i);
278
279                                 set_irn_link(op, op_lr);
280
281                                 /*
282                                  * The operand is reloaded at its usage, so it must not occur
283                                  * in the constraint which determines which values live at the
284                                  * instruction must reside in memory.
285                                  */
286                                 if(must_be_in_mem > 0) {
287                                         lpp_set_factor_fast(si->lpp, cst, op_lr->in_mem_var, 0.0);
288                                 }
289
290                                 /*
291                                  * Check, if the node is a rematerializable node and
292                                  * if its operands are live here.
293                                  */
294                                 if(si->enable_remat && can_remat(si, op, live)) {
295                                         int cst;
296                                         int j, n;
297                                         int n_operands = 0;
298
299                                         for(j = 0, n = get_irn_arity(op); j < n; ++j)
300                                                 n_operands += has_reg_class(si, get_irn_n(op, j));
301
302                                         /* Make the remat constraint for this operand */
303                                         ir_snprintf(buf, sizeof(buf), "ce1_%N_%N_%d", op, irn, i);
304                                         cst = lpp_add_cst(si->lpp, buf, lpp_less, n_operands);
305
306                                         /* Make the rematerialize variable for the operand */
307                                         ir_snprintf(buf, sizeof(buf), "e_%N_%N_%d", op, irn, i);
308                                         op_lr->is_remat_var = lpp_add_var(si->lpp, buf, lpp_binary, COST_REMAT);
309                                         lpp_set_factor_fast(si->lpp, cst, op_lr->is_remat_var, n_operands);
310
311                                         for(j = 0, n = get_irn_arity(op); j < n; ++j) {
312                                                 ir_node *oop = get_irn_n(op, j);
313                                                 if(has_reg_class(si, oop)) {
314                                                         live_range_t *lr = get_irn_link(oop);
315                                                         lpp_set_factor_fast(si->lpp, cst, lr->in_mem_var, 1.0);
316                                                 }
317                                         }
318
319                                         ir_snprintf(buf, sizeof(buf), "ce2_%N_%N_%d", op, irn, i);
320                                         cst = lpp_add_cst(si->lpp, buf, lpp_less, 0.0);
321                                         lpp_set_factor_fast(si->lpp, cst, op_lr->is_remat_var, 1.0);
322                                         lpp_set_factor_fast(si->lpp, cst, op_lr->in_mem_var, -1.0);
323                                 }
324                         }
325                 }
326
327                         for(i = 0, n = get_irn_arity(irn); i < n; ++i) {
328                                 ir_node *op = get_irn_n(irn, i);
329                                 if(has_reg_class(si, op) && !is_Phi(irn))
330                                         pset_insert_ptr(live, op);
331                         }
332
333                 step++;
334         }
335
336         if(bl == get_irg_start_block(get_irn_irg(bl)))
337                 goto end;
338
339         /*
340          * Here, only the phis in the block and the values live in are in the
341          * live set.
342          *
343          * If a value is live in, it must be in a register in all predecessor
344          * blocks or in memory at the end of all predecessor blocks. Also, the
345          * closest use in the current block must then be from register or
346          * memory, respectively.
347          */
348         for(irn = pset_first(live); irn; irn = pset_next(live)) {
349                 live_range_t *lr = get_irn_link(irn);
350                 int is_phi = is_Phi(irn) && get_nodes_block(irn) == bl;
351                 int cst;
352
353                 if(is_phi)
354                         lr->use_head->closest_use = lr;
355
356                 assert(has_reg_class(si, irn));
357                 assert(is_Phi(irn) || is_live_in(bl, irn));
358
359 #if 0
360                 ir_snprintf(buf, sizeof(buf), "c%s_%N_%N", (is_phi ? "phi" : "li"), irn, bl);
361                 cst = lpp_add_cst(si->lpp, buf, lpp_equal, 0.0);
362                 lpp_set_factor_fast(si->lpp, cst, lr->in_mem_var, -n_preds);
363
364                 for(i = 0; i < n_preds; ++i) {
365                         ir_node *pred_bl     = get_Block_cfgpred_block(bl, i);
366                         ir_node *end_node    = is_phi ? get_irn_n(irn, i) : irn;
367                         live_range_t *op_lr  = get_live_range(si, end_node, pred_bl, -1);
368
369                         lpp_set_factor_fast(si->lpp, cst, op_lr->in_mem_var, 1.0);
370                 }
371 #endif
372
373                 for(i = 0; i < n_preds; ++i) {
374                         ir_node *pred_bl     = get_Block_cfgpred_block(bl, i);
375                         ir_node *end_node    = is_phi ? get_irn_n(irn, i) : irn;
376                         live_range_t *op_lr  = get_live_range(si, end_node, pred_bl, -1);
377
378                         ir_snprintf(buf, sizeof(buf), "cpred_%N_%N_%d", lr->irn, bl, i);
379                         cst = lpp_add_cst(si->lpp, buf, lpp_equal, 0.0);
380                         lpp_set_factor_fast(si->lpp, cst, op_lr->in_mem_var, 1.0);
381                         lpp_set_factor_fast(si->lpp, cst, lr->in_mem_var, -1.0);
382                 }
383         }
384
385 end:
386
387   del_pset(live);
388 }
389
390 /**
391  * Add the costs for a store.
392  *
393  * If one of the uses is from memory, add additional costs for the
394  * spill.
395  *
396  * m_1 + ... + m_n - M * s <= 0
397  *
398  * @param si The ILP spilling environment.
399  */
400 static void add_store_costs(spill_ilp_t *si)
401 {
402         char buf[64];
403         irn_use_head_t *uh;
404         double costs = si->enable_store ? COST_STORE : 0.0;
405
406         for(uh = set_first(si->irn_use_heads); uh; uh = set_next(si->irn_use_heads)) {
407                 int cst;
408                 live_range_t *lr;
409
410                 ir_snprintf(buf, sizeof(buf), "cs_%N", uh->irn);
411                 cst = lpp_add_cst(si->lpp, buf, lpp_less, 0);
412
413                 ir_snprintf(buf, sizeof(buf), "s_%N", uh->irn);
414                 uh->spill_var = lpp_add_var(si->lpp, buf, lpp_binary, costs);
415                 lpp_set_factor_fast(si->lpp, cst, uh->spill_var, -BIGM);
416
417     list_for_each_entry(live_range_t, lr, &uh->head, list)
418                         lpp_set_factor_fast(si->lpp, cst, lr->in_mem_var, 1.0);
419         }
420 }
421
422 static INLINE int is_zero(double x)
423 {
424   return fabs(x) < 0.00001;
425 }
426
427 static int is_spilled(const spill_ilp_t *si, const live_range_t *lr)
428 {
429         return !is_zero(lpp_get_var_sol(si->lpp, lr->in_mem_var));
430 }
431
432 static void writeback_results(spill_ilp_t *si)
433 {
434         const be_node_factory_t *fact = si->senv.session->main_env->node_factory;
435         irn_use_head_t *uh;
436         si->senv.mem_phis = pset_new_ptr_default();
437
438         for(uh = set_first(si->irn_use_heads); uh; uh = set_next(si->irn_use_heads)) {
439                 if(is_Phi(uh->irn) && is_spilled(si, uh->closest_use))
440                         pset_insert_ptr(si->senv.mem_phis, uh->irn);
441         }
442
443         /* Look at each node and examine the usages. */
444         for(uh = set_first(si->irn_use_heads); uh; uh = set_next(si->irn_use_heads)) {
445                 live_range_t *lr;
446                 ir_node **reloads;
447
448                 int n_reloads           = 0;
449                 ir_node *irn            = uh->irn;
450                 ir_mode *mode           = get_irn_mode(irn);
451
452                 /* Go through all live ranges of the node. */
453                 list_for_each_entry(live_range_t, lr, &uh->head, list) {
454                         int spilled = is_spilled(si, lr);
455                         // int rematd  = !is_zero(lpp_get_var_sol(si->lpp, lr->is_remat_var));
456
457                         if(spilled && !is_end_of_block_use(lr)) {
458                                 ir_node *bl      = get_nodes_block(lr->user);
459
460
461                                 ir_node *spill   = be_spill_node(&si->senv, lr->irn);
462                                 ir_node *reload  = new_Reload(fact, si->cls, si->senv.session->irg, bl, mode, spill);
463
464                                 obstack_ptr_grow(si->obst, reload);
465                                 n_reloads++;
466
467                                 sched_add_before(lr->user, reload);
468                         }
469                 }
470
471                 if(n_reloads > 0) {
472                         reloads = obstack_finish(si->obst);
473                         be_introduce_copies_ignore(si->senv.session->dom_front, irn, n_reloads, reloads, si->senv.mem_phis);
474                         obstack_free(si->obst, reloads);
475                 }
476         }
477         be_remove_spilled_phis(&si->senv);
478 }
479
480 void be_spill_ilp(const be_main_session_env_t *session_env,
481     const arch_register_class_t *cls)
482 {
483         char problem_name[256];
484         struct obstack obst;
485         spill_ilp_t si;
486
487         ir_snprintf(problem_name, sizeof(problem_name), "%F_%s", session_env->irg, cls->name);
488
489         obstack_init(&obst);
490         si.obst           = &obst;
491         si.dbg            = firm_dbg_register("be.ra.spillilp");
492         si.senv.session   = session_env;
493         si.cls            = cls;
494         si.lpp            = new_lpp(problem_name, lpp_minimize);
495         si.irn_use_heads  = new_set(cmp_irn_use_head, 4096);
496         si.live_ranges    = new_set(cmp_live_range, 16384);
497         si.senv.spill_ctxs= new_set(be_set_cmp_spillctx, 4096);
498         si.enable_remat   = 1;
499         si.enable_store   = 0;
500
501         firm_dbg_set_mask(si.dbg, DBG_LEVEL);
502         irg_block_walk_graph(session_env->irg, process_block, NULL, &si);
503         if(si.enable_store)
504                 add_store_costs(&si);
505
506 #ifdef DUMP_ILP
507         {
508                 FILE *f;
509                 char buf[256];
510
511                 ir_snprintf(buf, sizeof(buf), "spill-%s.ilp", problem_name);
512                 if((f = fopen(buf, "wt")) != NULL) {
513                         lpp_dump_plain(si.lpp, f);
514                         fclose(f);
515                 }
516         }
517 #endif
518
519         DBG((si.dbg, LEVEL_1, "%F\n", session_env->irg));
520 //      lpp_solve_net(si.lpp, LPP_SERVER, LPP_SOLVER);
521         lpp_solve_cplex(si.lpp);
522         assert(lpp_is_sol_valid(si.lpp) && "ILP not feasible");
523
524         assert(lpp_is_sol_valid(si.lpp) && "solution of ILP must be valid");
525
526         DBG((si.dbg, LEVEL_1, "\tnodes: %d, vars: %d, csts: %d\n",
527                                 set_count(si.irn_use_heads), si.lpp->var_next, si.lpp->cst_next));
528         DBG((si.dbg, LEVEL_1, "\titerations: %d, solution time: %g\n",
529                                 si.lpp->iterations, si.lpp->sol_time));
530
531 #ifdef DUMP_SOLUTION
532         {
533                 FILE *f;
534                 char buf[256];
535
536                 ir_snprintf(buf, sizeof(buf), "spill-%s.sol", problem_name);
537                 if((f = fopen(buf, "wt")) != NULL) {
538                         int i;
539                         for(i = 0; i < si.lpp->var_next; ++i) {
540                                 lpp_name_t *name = si.lpp->vars[i];
541                                 fprintf(f, "%10s %4d %10f\n", name->name, name->nr, name->value);
542                         }
543                         fclose(f);
544                 }
545         }
546 #endif
547   writeback_results(&si);
548
549   del_set(si.irn_use_heads);
550   del_set(si.live_ranges);
551   free_lpp(si.lpp);
552   obstack_free(&obst, NULL);
553 }