1 /** vim: set sw=4 ts=4:
4 * @author Adam M. Szalkowski & Sebastian Hack
6 * ILP based spilling & rematerialization
8 * Copyright (C) 2006 Universitaet Karlsruhe
9 * Released under the GPL
32 #include "execution_frequency.h"
36 #include <lpp/lpp_net.h>
37 #include <lpp/lpp_cplex.h>
38 //#include <lc_pset.h>
42 #include "besched_t.h"
47 #include "bespillappel.h"
49 #include "bessadestrsimple.h"
51 #include "bechordal_t.h"
60 #define LPP_SERVER "i44pc52"
61 #define LPP_SOLVER "cplex"
69 typedef struct _spill_ilp_t {
70 const arch_register_class_t *cls;
71 const be_chordal_env_t *chordal_env;
75 DEBUG_ONLY(firm_dbg_module_t * dbg);
78 typedef int ilp_var_t;
79 typedef int ilp_cst_t;
81 typedef struct _keyval_t {
86 typedef struct _spill_t {
96 has_reg_class(const spill_ilp_t * si, const ir_node * irn)
98 return chordal_has_class(si->chordal_env, irn);
102 cmp_spill(const void *a, const void *b, size_t size)
104 const spill_t *p = a;
105 const spill_t *q = b;
107 // return !(p->irn == q->irn && p->bb == q->bb);
108 return !(p->irn == q->irn);
112 set_find_keyval(set * set, void * key)
117 return set_find(set, &query, sizeof(query), HASH_PTR(key));
121 set_insert_keyval(set * set, void * key, void * val)
127 return set_insert(set, &query, sizeof(query), HASH_PTR(key));
131 set_next_keyval(set * set)
135 result = set_next(set);
144 set_first_keyval(set * set)
148 result = set_first(set);
156 #define pset_foreach(s,i) for((i)=pset_first((s)); (i); (i)=pset_next((s)))
157 #define set_foreach(s,i) for((i)=set_first((s)); (i); (i)=set_next((s)))
158 #define foreach_post_remat(s,i) for((i)=next_post_remat((s)); (i); (i)=next_post_remat((i)))
159 #define foreach_pre_remat(s,i) for((i)=next_pre_remat((s)); (i); (i)=next_pre_remat((i)))
162 cmp_keyval(const void *a, const void *b, size_t size)
164 const keyval_t *p = a;
165 const keyval_t *q = b;
167 return !(p->key == q->key);
171 execution_frequency(ir_node * irn)
174 return expf((float)get_loop_depth(get_irn_loop(irn)) * logf(10));
176 return expf((float)get_loop_depth(get_irn_loop(get_nodes_block(irn))) * logf(10));
180 * Checks, whether node and its operands have suitable reg classes
183 is_rematerializable(const spill_ilp_t * si, const ir_node * irn)
187 const arch_env_t *arch_env = si->chordal_env->birg->main_env->arch_env;
188 int remat = (arch_irn_get_flags(arch_env, irn) & arch_irn_flags_rematerializable) != 0;
192 ir_fprintf(stderr, " Node %+F is not rematerializable\n", irn);
195 for (i = 0, n = get_irn_arity(irn); i < n && remat; ++i) {
196 ir_node *op = get_irn_n(irn, i);
197 remat &= has_reg_class(si, op);
200 ir_fprintf(stderr, " Argument %d (%+F) of Node %+F has wrong regclass\n", i, op, irn);
207 value_is_defined_before(const spill_ilp_t * si, const ir_node * pos, const ir_node * val)
210 ir_node *def_block = get_nodes_block(val);
216 /* if pos is at end of a basic block */
218 ret = (pos == def_block || block_dominates(def_block, pos));
219 ir_fprintf(stderr, "(def(bb)=%d) ", ret);
223 /* else if this is a normal operation */
224 block = get_nodes_block(pos);
225 if(block == def_block) {
226 ret = sched_comes_after(val, pos);
227 ir_fprintf(stderr, "(def(same block)=%d) ",ret);
231 ret = block_dominates(def_block, block);
232 ir_fprintf(stderr, "(def(other block)=%d) ", ret);
238 * Returns first non-Phi node of block @p bb
240 static INLINE ir_node *
241 sched_block_first_nonphi(const ir_node * bb)
243 return sched_skip((ir_node*)bb, 1, sched_skip_phi_predicator, NULL);
247 sched_skip_proj_predicator(const ir_node * irn, void * data)
249 return (is_Proj(irn));
253 * Returns next operation node (non-Proj) after @p irn
254 * or the basic block of this node
256 static INLINE ir_node *
257 sched_next_op(const ir_node * irn)
259 ir_node *next = sched_next(irn);
264 return sched_skip(next, 1, sched_skip_proj_predicator, NULL);
268 * Returns previous operation node (non-Proj) before @p irn
269 * or the basic block of this node
271 static INLINE ir_node *
272 sched_prev_op(const ir_node * irn)
274 ir_node *prev = sched_prev(irn);
279 return sched_skip(prev, 0, sched_skip_proj_predicator, NULL);
284 * Preparation of blocks' ends for Luke Blockwalker(tm)(R)
287 luke_endwalker(ir_node * bb, void * data)
289 spill_ilp_t *si = (spill_ilp_t*)data;
294 const int n_regs = arch_register_class_n_regs(si->cls);
298 live = pset_new_ptr_default();
300 live_foreach(bb, li) {
301 ir_node *irn = (ir_node *) li->irn;
302 if (live_is_end(li) && has_reg_class(si, irn)) {
303 pset_insert_ptr(live, irn);
308 ir_snprintf(buf, sizeof(buf), "check_end_%N", bb);
309 cst = lpp_add_cst(si->lpp, buf, lpp_less, n_regs);
310 spill_bb->ilp = new_set(cmp_spill, 16);
311 spill->reg_out = lpp_add_var(si->lpp, buf, lpp_binary, 0.0);
312 lpp_set_factor_fast(si->lpp, cst, spill->reg_out, 1.0);
320 * Walk all irg blocks and emit this ILP
323 luke_blockwalker(ir_node * bb, void * data)
325 spill_ilp_t *si = (spill_ilp_t*)data;
330 const int n_regs = arch_register_class_n_regs(si->cls);
336 live = pset_new_ptr_default();
344 return fabs(x) < 0.00001;
349 is_spilled(const spill_ilp_t * si, const live_range_t * lr)
351 return !is_zero(lpp_get_var_sol(si->lpp, lr->in_mem_var));
356 is_mem_phi(const ir_node * phi, void *data)
358 spill_ilp_t *si = data;
359 // return is_spilled(si, get_use_head(si, phi)->closest_use);
364 be_spill_appel(const be_chordal_env_t * chordal_env)
366 char problem_name[256];
370 ir_snprintf(problem_name, sizeof(problem_name), "%F_%s", chordal_env->irg, chordal_env->cls->name);
373 si.chordal_env = chordal_env;
375 si.senv = be_new_spill_env(chordal_env, is_mem_phi, &si);
376 si.cls = chordal_env->cls;
377 si.lpp = new_lpp(problem_name, lpp_minimize);
378 FIRM_DBG_REGISTER(si.dbg, "firm.be.ra.spillappel");
380 firm_dbg_set_mask(si.dbg,0xFF);
382 set_irg_link(chordal_env->irg, &si);
384 ir_fprintf(stderr, "\nProcessing %s\n\n", problem_name);
386 ir_fprintf(stderr, "Initiating SSA destruction\n");
387 be_ssa_destr_simple(chordal_env->irg, chordal_env->birg->main_env->arch_env);
388 be_dump(chordal_env->irg, "-ssadestr-appel", dump_ir_block_graph_sched);
392 DBG((si.dbg, LEVEL_1, "\tBuilding ILP\n"));
393 DBG((si.dbg, LEVEL_2, "\t endwalker\n"));
394 irg_block_walk_graph(chordal_env->irg, luke_endwalker, NULL, &si);
396 DBG((si.dbg, LEVEL_2, "\t blockwalker\n"));
397 irg_block_walk_graph(chordal_env->irg, luke_blockwalker, NULL, &si);
404 ir_snprintf(buf, sizeof(buf), "%s-spillappel.ilp", problem_name);
405 if ((f = fopen(buf, "wt")) != NULL) {
406 lpp_dump_plain(si.lpp, f);
412 DBG((si.dbg, LEVEL_1, "\t%F\n", chordal_env->irg));
417 lpp_solve_cplex(si.lpp);
419 lpp_solve_net(si.lpp, LPP_SERVER, LPP_SOLVER);
421 assert(lpp_is_sol_valid(si.lpp)
422 && "solution of ILP must be valid");
424 DBG((si.dbg, LEVEL_1, "\titerations: %d, solution time: %g\n", si.lpp->iterations, si.lpp->sol_time));
431 ir_snprintf(buf, sizeof(buf), "%s-spillappel.sol", problem_name);
432 if ((f = fopen(buf, "wt")) != NULL) {
434 for (i = 0; i < si.lpp->var_next; ++i) {
435 lpp_name_t *name = si.lpp->vars[i];
436 fprintf(f, "%20s %4d %10f\n", name->name, name->nr, name->value);
443 // writeback_results(&si);
446 firm_dbg_set_mask(si.dbg, 0);
449 obstack_free(&obst, NULL);
456 only_that_you_can_compile_without_WITH_ILP_defined(void)
460 #endif /* WITH_ILP */
467 process_block(ir_node * bl, void *data)
473 spill_ilp_t *si = data;
475 int n_regs = arch_register_class_n_regs(si->cls);
476 int n_preds = get_irn_arity(bl);
477 pset *live = pset_new_ptr_default();
481 DBG((si->dbg, LEVEL_3, "\n"));
482 DBG((si->dbg, LEVEL_3, "Processing %+F\n", bl));
485 * Get all live-end values of this block
487 live_foreach(bl, li) {
488 if (live_is_end(li) && has_reg_class(si, li->irn)) {
489 ir_node *irn = (ir_node *) li->irn;
490 pset_insert_ptr(live, irn);
493 * The "user" of the live range to the end of a
494 * block is the block itself. This is quite
497 set_irn_link(irn, get_live_range(si, irn, bl, -1));
500 DBG((si->dbg, LEVEL_3, "Live-End:\n"));
501 print_live_set(si, live);
504 * Walk through the schedule of this block from end to begin.
505 * Phis are handled togther with live ins after this loop.
507 for (irn = sched_last(bl); !sched_is_begin(irn) && !is_Phi(irn); irn = sched_prev(irn)) {
518 * Determine the number of results
521 * Special handling of Projs
524 if (has_reg_class(si, irn)) {
525 assert(pset_find_ptr(live, irn)
526 && "node must be live");
527 pset_remove_ptr(live, irn);
531 DBG((si->dbg, LEVEL_2, "Skipped %+F\n", irn));
535 DBG((si->dbg, LEVEL_1, "Irn %+F\n", irn));
540 assert(get_irn_mode(irn) == mode_T && "node before projs must be tuple");
547 if (has_reg_class(si, irn)) {
548 assert(get_irn_mode(irn) != mode_T && "node must not be a tuple");
549 assert(pset_find_ptr(live, irn)
550 && "node must be live");
551 pset_remove_ptr(live, irn);
559 * cand holds the irns which may be spilled
561 cand = pset_new_ptr(8);
562 for (l = pset_first(live); l; l = pset_next(live))
563 pset_insert_ptr(cand, l);
566 * Determine number of arguments
569 for (i = 0, n = get_irn_arity(irn); i < n; ++i) {
570 ir_node *op = get_irn_n(irn, i);
571 if (has_reg_class(si, op)) {
572 DBG((si->dbg, LEVEL_2, " arg %+F\n", op));
576 * arguments must not be spilled
578 if (pset_find_ptr(cand, op))
579 pset_remove_ptr(cand, op);
584 * Determine, how many values must be in memory.
585 * We have 'n_regs' registers.
586 * The instr. needs 'demand'.
587 * So (R:= n_regs - demand) registers can be used for candidates 'cand'.
588 * The rest (M:= n_cand - R) must reside in memory.
590 demand = MAX(results, relevant_args);
591 n_cand = pset_count(cand);
592 must_be_in_mem = n_cand - (n_regs - demand);
594 DBG((si->dbg, LEVEL_1, " Demand: %d, Cands: %d, InMem: %d\n", demand, n_cand, must_be_in_mem));
595 DBG((si->dbg, LEVEL_3, " Cand-Set:\n"));
596 print_live_set(si, cand);
599 * Generate the corresponding constraint spilling
600 * enough candidates at this label.
602 if (must_be_in_mem > 0) {
603 ir_snprintf(buf, sizeof(buf), "cp_%N_%N_%d", bl, irn, step);
604 cst = lpp_add_cst(si->lpp, buf, lpp_greater, must_be_in_mem);
606 for (l = pset_first(cand); l; l = pset_next(cand)) {
607 live_range_t *lr = get_irn_link(l);
608 lpp_set_factor_fast(si->lpp, cst, lr->in_mem_var, 1.0);
612 for (i = 0, n = get_irn_arity(irn); i < n; ++i) {
613 ir_node *op = get_irn_n(irn, i);
615 if (has_reg_class(si, op)) {
616 live_range_t *op_lr = get_live_range(si, op, irn, i);
617 set_irn_link(op, op_lr);
621 * The operand is reloaded at its usage, so it must not occur
622 * in the constraint which determines which values live at the
623 * instruction must reside in memory.
625 if (must_be_in_mem > 0) {
626 DBG((si->dbg, LEVEL_3, " Resetting %+F to 0:\n", op));
627 lpp_set_factor_fast(si->lpp, cst, op_lr->in_mem_var, 0.0);
632 * Check, if the node is a rematerializable node and
633 * if its operands are live here.
635 if (si->enable_remat && can_remat(si, op, live)) {
641 for (j = 0, n = get_irn_arity(op); j < n; ++j)
642 n_operands += has_reg_class(si, get_irn_n(op, j));
645 * Make the remat constraint for
648 ir_snprintf(buf, sizeof(buf), "ce1_%N_%N_%d", op, irn, i);
649 cst = lpp_add_cst(si->lpp, buf, lpp_less, n_operands);
652 * Make the rematerialize variable
655 ir_snprintf(buf, sizeof(buf), "e_%N_%N_%d", op, irn, i);
656 op_lr->is_remat_var = lpp_add_var(si->lpp, buf, lpp_binary, COST_REMAT);
657 lpp_set_factor_fast(si->lpp, cst, op_lr->is_remat_var, n_operands);
659 for (j = 0, n = get_irn_arity(op); j < n; ++j) {
660 ir_node *oop = get_irn_n(op, j);
661 if (has_reg_class(si, oop)) {
662 live_range_t * lr = get_irn_link(oop);
663 lpp_set_factor_fast(si->lpp, cst, lr->in_mem_var, 1.0);
667 ir_snprintf(buf, sizeof(buf), "ce2_%N_%N_%d", op, irn, i);
668 cst = lpp_add_cst(si->lpp, buf, lpp_less, 0.0);
669 lpp_set_factor_fast(si->lpp, cst, op_lr->is_remat_var, 1.0);
670 lpp_set_factor_fast(si->lpp, cst, op_lr->in_mem_var, -1.0);
676 * Insert arguments of current instr into the live set
678 for (i = 0, n = get_irn_arity(irn); i < n; ++i) {
679 ir_node *op = get_irn_n(irn, i);
680 if (has_reg_class(si, op))
681 pset_insert_ptr(live, op);
688 if (bl == get_irg_start_block(get_irn_irg(bl)))
692 * Here, the live set contains
693 * - phis of the block
694 * - live-in values of the block
696 * TODO: comment is wrong
697 * If a value is live in, it must be in a register in all predecessor
698 * blocks or in memory at the end of all predecessor blocks. Also, the
699 * closest use in the current block must then be from register or
700 * memory, respectively.
702 for (irn = pset_first(live); irn; irn = pset_next(live)) {
703 live_range_t *lr = get_irn_link(irn);
704 int is_phi = is_Phi(irn)
705 && get_nodes_block(irn) == bl;
708 assert(has_reg_class(si, irn));
709 assert(is_Phi(irn) || is_live_in(bl, irn));
712 * Deprecated: Can be done with the first uses map
715 lr->use_head->closest_use = lr;
718 * Remind the liverange of the first use of a live (or phi) in the
721 add_first_use(si, bl, irn, lr);
723 for (i = 0; i < n_preds; ++i) {
724 ir_node *pred_bl = get_Block_cfgpred_block(bl, i);
725 ir_node *end_node = is_phi ? get_irn_n(irn, i) : irn;
726 live_range_t *op_lr = get_live_range(si, end_node, pred_bl,
728 edge_reload_t *edge = obstack_alloc(si->obst,
731 ir_snprintf(buf, sizeof(buf), "edge_b%N_p%N_i%N", bl, pred_bl, end_node);
732 edge->in_mem_var = lpp_add_var(si->lpp, buf, lpp_binary, get_weight(pred_bl) * COST_LOAD);
734 edge->irn = end_node;
736 edge->next = si->edges;
739 ir_snprintf(buf, sizeof(buf), "cedge_b%N_p%N_i%N", bl, pred_bl, end_node);
740 cst = lpp_add_cst(si->lpp, buf, lpp_less, 0.0);
741 lpp_set_factor_fast(si->lpp, cst, op_lr->in_mem_var, 1.0);
742 lpp_set_factor_fast(si->lpp, cst, lr->in_mem_var, -1.0);
743 lpp_set_factor_fast(si->lpp, cst, edge->in_mem_var, -1.0);