4 * @author Sebastian Hack
8 * Copyright (C) 2005 Universitaet Karlsruhe
9 * Released under the GPL
26 #include <lpp/lpp_net.h>
27 #include <lpp/lpp_cplex.h>
31 #include "besched_t.h"
39 #define MAX(a,b) ((a) > (b) ? (a) : (b))
41 #define DBG_LEVEL SET_LEVEL_0
46 #define LPP_SERVER "i44pc52"
47 #define LPP_SOLVER "cplex"
51 #define COST_REMAT (-9)
53 #define is_end_of_block_use(lr) (is_Block((lr)->user))
55 typedef struct _spill_ilp_t {
56 const be_main_session_env_t *session_env;
57 const arch_register_class_t *cls;
58 firm_dbg_module_t *dbg;
69 typedef struct _live_range_t live_range_t;
71 typedef struct _irn_use_head_t {
72 struct list_head head;
76 live_range_t *closest_use;
79 struct _live_range_t {
80 struct list_head list;
81 irn_use_head_t *use_head;
89 typedef struct _spill_ctx_t {
90 ir_node *spilled; /**< The spilled node. */
91 ir_node *user; /**< The node this spill is for. */
92 ir_node *spill; /**< The spill itself. */
95 static int has_reg_class(const spill_ilp_t *si, const ir_node *irn)
97 return arch_irn_has_reg_class(si->session_env->main_env->arch_env,
98 irn, arch_pos_make_out(0), si->cls);
101 static int register_demand(spill_ilp_t *si, const ir_node *irn)
103 const arch_env_t *arch_env = si->session_env->main_env->arch_env;
104 int n_in = arch_get_n_operands(arch_env, irn, 0);
105 int n_out = arch_get_n_operands(arch_env, irn, -1);
107 return MAX(n_in, n_out);
110 static int cmp_spill_ctx(const void *a, const void *b, size_t n)
112 const spill_ctx_t *p = a;
113 const spill_ctx_t *q = b;
114 return !(p->user == q->user && p->spilled == q->spilled);
117 static int cmp_live_range(const void *a, const void *b, size_t n)
119 const live_range_t *p = a;
120 const live_range_t *q = b;
122 return !(p->user == q->user && p->irn == q->irn && p->pos == q->pos);
125 static int cmp_irn_use_head(const void *a, const void *b, size_t n)
127 const irn_use_head_t *p = a;
128 const irn_use_head_t *q = b;
130 return !(p->irn == q->irn);
134 * Checks, if a vertain node can be recomputed at a certain position.
135 * @param si The spill ILP environment.
136 * @param irn The node to recompute.
137 * @param live The nodes live at the place where @p irn shall be
139 * @return 1, if irn can be recomputed, 0 if not.
141 static INLINE int can_remat(const spill_ilp_t *si, const ir_node *irn, pset *live)
144 const arch_env_t *arch_env = si->session_env->main_env->arch_env;
145 int remat = (arch_irn_get_flags(arch_env, irn) & arch_irn_flags_rematerializable) != 0;
147 for(i = 0, n = get_irn_arity(irn); i < n && remat; ++i) {
148 ir_node *op = get_irn_n(irn, i);
149 remat &= !has_reg_class(si, op) || pset_find_ptr(live, op);
155 static spill_ctx_t *get_spill_ctx(spill_ilp_t *si, ir_node *spilled, ir_node *ctx_irn)
157 spill_ctx_t templ, *res;
159 templ.spilled = spilled;
160 templ.user = ctx_irn;
163 res = set_insert(si->spill_ctx, &templ, sizeof(templ),
164 HASH_COMBINE(HASH_PTR(spilled), HASH_PTR(ctx_irn)));
169 static live_range_t *get_live_range(spill_ilp_t *si, ir_node *irn, ir_node *user, int pos)
171 live_range_t lr, *res;
172 irn_use_head_t iuh, *head;
174 unsigned hash = HASH_COMBINE(HASH_PTR(irn), HASH_PTR(user));
180 lr.is_remat_var = -1;
182 res = set_insert(si->live_ranges, &lr, sizeof(lr), hash);
183 is_new = res->in_mem_var == -1;
187 ir_snprintf(buf, sizeof(buf), "m_%s%N_%N_%d",
188 is_Phi(irn) ? "phi_" : "", irn, user, MAX(pos, 0));
189 res->in_mem_var = lpp_add_var(si->lpp, buf, lpp_binary, pos >= 0 ? COST_LOAD : 0.0);
192 memset(&iuh, 0, sizeof(iuh));
195 head = set_insert(si->irn_use_heads, &iuh, sizeof(iuh), HASH_PTR(irn));
196 if(head->n_uses == -1) {
198 INIT_LIST_HEAD(&head->head);
202 list_add_tail(&res->list, &head->head);
206 res->use_head = head;
211 static live_range_t *lookup_live_range(const spill_ilp_t *si, ir_node *irn,
212 ir_node *user, int pos)
215 unsigned hash = HASH_COMBINE(HASH_PTR(irn), HASH_PTR(user));
222 return set_find(si->live_ranges, &lr, sizeof(lr), hash);
225 static void create_block_live_range_sets(ir_node *bl, void *data)
227 assert(is_Block(bl));
228 set_irn_link(bl, pset_new_ptr_default());
231 static void delete_block_live_range_sets(ir_node *bl, void *data)
233 assert(is_Block(bl));
235 del_pset(get_irn_link(bl));
236 set_irn_link(bl, NULL);
240 static void annotate_live_ranges(ir_node *irn, void *data)
242 const ir_edge_t *edge;
244 foreach_out_edge(irn, edge) {
247 ir_node *user = edge->use;
249 ir_node *bl = get_nodes_block(user);
252 bl = get_Block_cfgpred_block(bl, pos);
254 lr_set = get_irn_link(bl);
261 static void process_block(ir_node *bl, void *data)
265 spill_ilp_t *si = data;
267 int n_regs = arch_register_class_n_regs(si->cls);
268 int n_preds = get_irn_arity(bl);
269 pset *live = pset_new_ptr_default();
273 /* as always, bring the live end nodes to life here */
274 live_foreach(bl, li) {
275 if(live_is_end(li) && has_reg_class(si, li->irn)) {
276 ir_node *irn = (ir_node *) li->irn;
277 pset_insert_ptr(live, irn);
280 * The "user" of the live range to the end of a block
281 * is the block itself. This is quite arbitrary.
283 set_irn_link(irn, get_live_range(si, irn, bl, -1));
287 sched_foreach_reverse(bl, irn) {
294 /* We handle phi togther with live ins after this loop (see below). */
298 if(has_reg_class(si, irn))
299 pset_remove_ptr(live, irn);
301 demand = register_demand(si, irn);
302 n_live = pset_count(live);
305 * Determine, how many values (which are not used at the label)
307 * demand means the number of registers, the operation will consume.
308 * So there are n_regs - demand registers available to store values
309 * which are not used at this label. The rest must reside in memory.
311 must_be_in_mem = MAX(n_live - (n_regs - demand), 0);
313 if(must_be_in_mem > 0) {
316 * The constraint limiting the pressure at this label to
317 * the number of free registers.
319 ir_snprintf(buf, sizeof(buf), "cp_%N_%d", bl, step);
320 cst = lpp_add_cst(si->lpp, buf, lpp_greater, must_be_in_mem);
322 for(l = pset_first(live); l; l = pset_next(live)) {
323 live_range_t *lr = get_irn_link(l);
324 lpp_set_factor_fast(si->lpp, cst, lr->in_mem_var, 1.0);
328 for(i = 0, n = get_irn_arity(irn); i < n; ++i) {
329 ir_node *op = get_irn_n(irn, i);
331 if(has_reg_class(si, op)) {
332 live_range_t *op_lr = get_live_range(si, op, irn, i);
334 set_irn_link(op, op_lr);
337 * The operand is reloaded at its usage, so it must not occur
338 * in the constraint which determines which values live at the
339 * instruction must reside in memory.
341 if(must_be_in_mem > 0) {
342 lpp_set_factor_fast(si->lpp, cst, op_lr->in_mem_var, 0.0);
346 * Check, if the node is a rematerializable node and
347 * if its operands are live here.
349 if(si->enable_remat && can_remat(si, op, live)) {
354 for(j = 0, n = get_irn_arity(op); j < n; ++j)
355 n_operands += has_reg_class(si, get_irn_n(op, j));
357 /* Make the remat constraint for this operand */
358 ir_snprintf(buf, sizeof(buf), "ce1_%N_%N_%d", op, irn, i);
359 cst = lpp_add_cst(si->lpp, buf, lpp_less, n_operands);
361 /* Make the rematerialize variable for the operand */
362 ir_snprintf(buf, sizeof(buf), "e_%N_%N_%d", op, irn, i);
363 op_lr->is_remat_var = lpp_add_var(si->lpp, buf, lpp_binary, COST_REMAT);
364 lpp_set_factor_fast(si->lpp, cst, op_lr->is_remat_var, n_operands);
366 for(j = 0, n = get_irn_arity(op); j < n; ++j) {
367 ir_node *oop = get_irn_n(op, j);
368 if(has_reg_class(si, oop)) {
369 live_range_t *lr = get_irn_link(oop);
370 lpp_set_factor_fast(si->lpp, cst, lr->in_mem_var, 1.0);
374 ir_snprintf(buf, sizeof(buf), "ce2_%N_%N_%d", op, irn, i);
375 cst = lpp_add_cst(si->lpp, buf, lpp_less, 0.0);
376 lpp_set_factor_fast(si->lpp, cst, op_lr->is_remat_var, 1.0);
377 lpp_set_factor_fast(si->lpp, cst, op_lr->in_mem_var, -1.0);
382 for(i = 0, n = get_irn_arity(irn); i < n; ++i) {
383 ir_node *op = get_irn_n(irn, i);
384 if(has_reg_class(si, op) && !is_Phi(irn))
385 pset_insert_ptr(live, op);
391 if(bl == get_irg_start_block(get_irn_irg(bl)))
395 * Here, only the phis in the block and the values live in are in the
398 * If a value is live in, it must be in a register in all predecessor
399 * blocks or in memory at the end of all predecessor blocks. Also, the
400 * closest use in the current block must then be from register or
401 * memory, respectively.
403 for(irn = pset_first(live); irn; irn = pset_next(live)) {
404 live_range_t *lr = get_irn_link(irn);
405 int is_phi = is_Phi(irn) && get_nodes_block(irn) == bl;
409 lr->use_head->closest_use = lr;
411 assert(has_reg_class(si, irn));
412 assert(is_Phi(irn) || is_live_in(bl, irn));
415 ir_snprintf(buf, sizeof(buf), "c%s_%N_%N", (is_phi ? "phi" : "li"), irn, bl);
416 cst = lpp_add_cst(si->lpp, buf, lpp_equal, 0.0);
417 lpp_set_factor_fast(si->lpp, cst, lr->in_mem_var, -n_preds);
419 for(i = 0; i < n_preds; ++i) {
420 ir_node *pred_bl = get_Block_cfgpred_block(bl, i);
421 ir_node *end_node = is_phi ? get_irn_n(irn, i) : irn;
422 live_range_t *op_lr = get_live_range(si, end_node, pred_bl, -1);
424 lpp_set_factor_fast(si->lpp, cst, op_lr->in_mem_var, 1.0);
428 for(i = 0; i < n_preds; ++i) {
429 ir_node *pred_bl = get_Block_cfgpred_block(bl, i);
430 ir_node *end_node = is_phi ? get_irn_n(irn, i) : irn;
431 live_range_t *op_lr = get_live_range(si, end_node, pred_bl, -1);
433 ir_snprintf(buf, sizeof(buf), "cpred_%N_%N_%d", lr->irn, bl, i);
434 cst = lpp_add_cst(si->lpp, buf, lpp_equal, 0.0);
435 lpp_set_factor_fast(si->lpp, cst, op_lr->in_mem_var, 1.0);
436 lpp_set_factor_fast(si->lpp, cst, lr->in_mem_var, -1.0);
446 * Add the costs for a store.
448 * If one of the uses is from memory, add additional costs for the
451 * m_1 + ... + m_n - M * s <= 0
453 * @param si The ILP spilling environment.
455 static void add_store_costs(spill_ilp_t *si)
459 double costs = si->enable_store ? COST_STORE : 0.0;
461 for(uh = set_first(si->irn_use_heads); uh; uh = set_next(si->irn_use_heads)) {
465 ir_snprintf(buf, sizeof(buf), "cs_%N", uh->irn);
466 cst = lpp_add_cst(si->lpp, buf, lpp_less, 0);
468 ir_snprintf(buf, sizeof(buf), "s_%N", uh->irn);
469 uh->spill_var = lpp_add_var(si->lpp, buf, lpp_binary, costs);
470 lpp_set_factor_fast(si->lpp, cst, uh->spill_var, -BIGM);
472 list_for_each_entry(live_range_t, lr, &uh->head, list)
473 lpp_set_factor_fast(si->lpp, cst, lr->in_mem_var, 1.0);
477 static INLINE int is_zero(double x)
479 return fabs(x) < 0.00001;
482 static int is_spilled(const spill_ilp_t *si, const live_range_t *lr)
484 return !is_zero(lpp_get_var_sol(si->lpp, lr->in_mem_var));
487 static ir_node *spill_irn(spill_ilp_t *si, ir_node *irn, ir_node *ctx_irn)
489 const be_node_factory_t *fact = si->session_env->main_env->node_factory;
490 const arch_env_t *arch_env = si->session_env->main_env->arch_env;
493 ctx = get_spill_ctx(si, irn, ctx_irn);
497 ctx->spill = be_spill(fact, arch_env, irn);
501 static ir_node *spill_phi(spill_ilp_t *si, ir_node *phi, ir_node *ctx_irn, pset *rem_phis)
504 ir_mode *mode = get_irn_mode(phi);
506 ir_node *bl = get_nodes_block(phi);
507 ir_graph *irg = get_irn_irg(bl);
512 ctx = get_spill_ctx(si, phi, ctx_irn);
516 n = get_irn_arity(phi);
517 ins = malloc(n * sizeof(ins[0]));
519 for(i = 0; i < n; ++i)
520 ins[i] = new_r_Unknown(irg, mode_M);
522 ctx->spill = new_r_Phi(irg, bl, n, ins, mode_M);
525 for(i = 0; i < n; ++i) {
526 ir_node *arg = get_irn_n(phi, i);
530 res = spill_phi(si, arg, ctx_irn, rem_phis);
532 res = spill_irn(si, arg, ctx_irn);
534 set_irn_n(ctx->spill, i, res);
540 static ir_node *spill_live_range(spill_ilp_t *si, live_range_t *lr, pset *rem_phis)
542 const live_range_t *closest = lr->use_head->closest_use;
544 if(is_Phi(lr->irn) && closest && is_spilled(si, closest))
545 return spill_phi(si, lr->irn, lr->irn, rem_phis);
547 return spill_irn(si, lr->irn, lr->irn);
551 static void writeback_results(spill_ilp_t *si)
553 const be_node_factory_t *fact = si->session_env->main_env->node_factory;
554 const arch_env_t *arch_env = si->session_env->main_env->arch_env;
557 pset *rem_phis = pset_new_ptr_default();
559 for(uh = set_first(si->irn_use_heads); uh; uh = set_next(si->irn_use_heads)) {
560 if(is_Phi(uh->irn) && is_spilled(si, uh->closest_use))
561 pset_insert_ptr(rem_phis, uh->irn);
564 /* Look at each node and examine the usages. */
565 for(uh = set_first(si->irn_use_heads); uh; uh = set_next(si->irn_use_heads)) {
570 ir_node *irn = uh->irn;
571 ir_mode *mode = get_irn_mode(irn);
573 /* Go through all live ranges of the node. */
574 list_for_each_entry(live_range_t, lr, &uh->head, list) {
575 int spilled = is_spilled(si, lr);
576 // int rematd = !is_zero(lpp_get_var_sol(si->lpp, lr->is_remat_var));
578 if(spilled && !is_end_of_block_use(lr)) {
579 ir_node *bl = get_nodes_block(lr->user);
580 ir_node *spill = spill_live_range(si, lr, rem_phis);
581 ir_node *reload = new_Reload(fact, si->cls,
582 si->session_env->irg, bl, mode, spill);
584 obstack_ptr_grow(si->obst, reload);
587 sched_add_before(lr->user, reload);
593 reloads = obstack_finish(si->obst);
594 be_introduce_copies_ignore(si->session_env->dom_front, irn,
595 n_reloads, reloads, rem_phis);
596 obstack_free(si->obst, reloads);
600 for(irn = pset_first(rem_phis); irn; irn = pset_next(rem_phis)) {
603 for(i = 0, n = get_irn_arity(irn); i < n; ++i)
604 set_irn_n(irn, i, new_r_Bad(si->session_env->irg));
609 void be_spill_ilp(const be_main_session_env_t *session_env,
610 const arch_register_class_t *cls)
613 char problem_name[256];
617 ir_snprintf(problem_name, sizeof(problem_name), "%F_%s", session_env->irg, cls->name);
621 si.dbg = firm_dbg_register("be.ra.spillilp");
622 si.session_env = session_env;
624 si.lpp = new_lpp(problem_name, lpp_minimize);
625 si.irn_use_heads = new_set(cmp_irn_use_head, 4096);
626 si.live_ranges = new_set(cmp_live_range, 16384);
627 si.spill_ctx = new_set(cmp_spill_ctx, 4096);
631 firm_dbg_set_mask(si.dbg, DBG_LEVEL);
632 irg_block_walk_graph(session_env->irg, process_block, NULL, &si);
634 add_store_costs(&si);
640 ir_snprintf(buf, sizeof(buf), "spill-%s.ilp", problem_name);
641 if((f = fopen(buf, "wt")) != NULL) {
642 lpp_dump_plain(si.lpp, f);
648 DBG((si.dbg, LEVEL_1, "%F\n", session_env->irg));
649 // lpp_solve_net(si.lpp, LPP_SERVER, LPP_SOLVER);
650 lpp_solve_cplex(si.lpp);
651 assert(lpp_is_sol_valid(si.lpp) && "ILP not feasible");
653 assert(lpp_is_sol_valid(si.lpp) && "solution of ILP must be valid");
655 DBG((si.dbg, LEVEL_1, "\tnodes: %d, vars: %d, csts: %d\n",
656 set_count(si.irn_use_heads), si.lpp->var_next, si.lpp->cst_next));
657 DBG((si.dbg, LEVEL_1, "\titerations: %d, solution time: %g\n",
658 si.lpp->iterations, si.lpp->sol_time));
664 ir_snprintf(buf, sizeof(buf), "spill-%s.sol", problem_name);
665 if((f = fopen(buf, "wt")) != NULL) {
667 for(i = 0; i < si.lpp->var_next; ++i) {
668 lpp_name_t *name = si.lpp->vars[i];
669 fprintf(f, "%10s %4d %10f\n", name->name, name->nr, name->value);
675 writeback_results(&si);
677 del_set(si.irn_use_heads);
678 del_set(si.live_ranges);
680 obstack_free(&obst, NULL);