2 * Copyright (C) 1995-2007 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief Beladys spillalgorithm version 2.
23 * @author Sebastian Hack, Matthias Braun, Daniel Grund
27 * The main differences to the original Belady are:
28 * - The workset is empty at the start of a block
29 * There is no attempt to fill it with variables which
30 * are not used in the block.
31 * - There is a global pass which tries to use the remaining
32 * capacity of the blocks to let global variables live through
43 #include "irnodeset.h"
45 #include "irprintf_t.h"
51 #include "iredges_t.h"
52 #include "irphase_t.h"
60 #include "bespillbelady.h"
61 #include "besched_t.h"
65 #include "bechordal_t.h"
66 #include "bespilloptions.h"
67 #include "beloopana.h"
71 #include <libcore/lc_opts.h>
72 #include <libcore/lc_opts_enum.h>
73 #include <libcore/lc_timing.h>
82 #define DBG_WORKSET 128
83 #define DBG_GLOBAL 256
85 #define ALREADY_SPILLED_FACTOR 2
88 #define LIVE_END (DEAD-1)
89 #define REMAT_DIST (DEAD-2)
91 static int already_spilled_factor = 2;
92 static int remat_live_range_ext = 1;
93 static int global_pass_enabled = 1;
95 static const lc_opt_table_entry_t options[] = {
96 LC_OPT_ENT_INT ("asf", "already spilled factor", &already_spilled_factor),
97 LC_OPT_ENT_BOOL ("remat", "rematerializable ops get infinite long live ranges", &remat_live_range_ext),
98 LC_OPT_ENT_BOOL ("global", "enable/disable the global pass", &global_pass_enabled),
102 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
105 * An association between a node and a point in time.
107 typedef struct _loc_t {
108 ir_node *irn; /**< A node. */
109 unsigned time; /**< A use time.
110 In the global pass this is used
111 as the version number and not as a time.
112 Only to save space...
116 typedef struct _workset_t {
117 int len; /**< current length */
118 loc_t vals[0]; /**< inlined array of the values/distances in this working set */
121 typedef struct _belady_env_t {
124 const arch_env_t *arch;
125 const arch_register_class_t *cls;
129 ir_node **blocks; /**< Array of all blocks. */
130 int n_blocks; /**< Number of blocks in the graph. */
131 int n_regs; /**< number of regs in this reg-class */
132 workset_t *ws; /**< the main workset used while processing a block. ob-allocated */
133 ir_node *instr; /**< current instruction */
134 int instr_nr; /**< current instruction number (relative to block start) */
136 spill_env_t *senv; /**< see bespill.h */
137 bitset_t *spilled; /**< bitset to keep all the irns which have already been spilled. */
141 static int loc_compare(const void *a, const void *b)
145 return (p->time > q->time) - (p->time < q->time);
148 static INLINE void workset_print(const workset_t *w)
152 for(i = 0; i < w->len; ++i) {
153 ir_fprintf(stderr, "%+F %d\n", w->vals[i].irn, w->vals[i].time);
158 * Alloc a new workset on obstack @p ob with maximum size @p max
160 static INLINE workset_t *new_workset(belady_env_t *env, struct obstack *ob) {
162 size_t size = sizeof(*res) + (env->n_regs)*sizeof(res->vals[0]);
163 res = obstack_alloc(ob, size);
164 memset(res, 0, size);
169 * Alloc a new instance on obstack and make it equal to @param ws
171 static INLINE workset_t *workset_clone(belady_env_t *env, struct obstack *ob, workset_t *ws) {
173 size_t size = sizeof(*res) + (env->n_regs)*sizeof(res->vals[0]);
174 res = obstack_alloc(ob, size);
175 memcpy(res, ws, size);
180 * Do NOT alloc anything. Make @param tgt equal to @param src.
181 * returns @param tgt for convenience
183 static INLINE workset_t *workset_copy(belady_env_t *env, workset_t *tgt, workset_t *src) {
184 size_t size = sizeof(*src) + (env->n_regs)*sizeof(src->vals[0]);
185 memcpy(tgt, src, size);
190 * Overwrites the current content array of @param ws with the
191 * @param count locations given at memory @param locs.
192 * Set the length of @param ws to count.
194 static INLINE void workset_bulk_fill(workset_t *workset, int count, const loc_t *locs) {
195 workset->len = count;
196 memcpy(&(workset->vals[0]), locs, count * sizeof(locs[0]));
200 * Inserts the value @p val into the workset, iff it is not
201 * already contained. The workset must not be full.
203 static INLINE void workset_insert(belady_env_t *env, workset_t *ws, ir_node *val) {
205 /* check for current regclass */
206 if (!arch_irn_consider_in_reg_alloc(env->arch, env->cls, val)) {
207 DBG((dbg, DBG_WORKSET, "Skipped %+F\n", val));
211 /* check if val is already contained */
212 for(i=0; i<ws->len; ++i)
213 if (ws->vals[i].irn == val)
217 assert(ws->len < env->n_regs && "Workset already full!");
218 ws->vals[ws->len++].irn = val;
222 * Removes all entries from this workset
224 static INLINE void workset_clear(workset_t *ws) {
229 * Removes the value @p val from the workset if present.
231 static INLINE void workset_remove(workset_t *ws, ir_node *val) {
233 for(i=0; i<ws->len; ++i) {
234 if (ws->vals[i].irn == val) {
235 ws->vals[i] = ws->vals[--ws->len];
241 static INLINE int workset_get_index(const workset_t *ws, const ir_node *val) {
243 for(i=0; i<ws->len; ++i) {
244 if (ws->vals[i].irn == val)
252 * Iterates over all values in the working set.
253 * @p ws The workset to iterate
254 * @p v A variable to put the current value in
255 * @p i An integer for internal use
257 #define workset_foreach(ws, v, i) for(i=0; \
258 v=(i < ws->len) ? ws->vals[i].irn : NULL, i < ws->len; \
261 #define workset_set_time(ws, i, t) (ws)->vals[i].time=t
262 #define workset_get_time(ws, i) (ws)->vals[i].time
263 #define workset_set_length(ws, length) (ws)->len = length
264 #define workset_get_length(ws) ((ws)->len)
265 #define workset_get_val(ws, i) ((ws)->vals[i].irn)
266 #define workset_sort(ws) qsort((ws)->vals, (ws)->len, sizeof((ws)->vals[0]), loc_compare);
267 #define workset_contains(ws, n) (workset_get_index(ws, n) >= 0)
269 typedef struct _block_info_t {
272 workset_t *ws_start, *ws_end;
276 ir_node *first_non_in; /**< First node in block which is not a phi. */
277 ir_node *last_ins; /**< The instruction before which end of
278 block reloads will be inserted. */
280 workset_t *entrance_reg; /**< That set will contain all values
281 transported into the block which
282 are used before they are displaced.
283 That means, we later have to care to
284 bring them into the block in a register
285 or reload them at the entry of the block. */
287 int pressure; /**< The amount of registers which remain free
288 in this block. This capacity can be used to let
289 global variables, transported into other blocks,
290 live through this block. */
292 double exec_freq; /**< The execution frequency of this block. */
295 static INLINE void *new_block_info(belady_env_t *bel, int id)
297 ir_node *bl = bel->blocks[id];
298 block_info_t *res = obstack_alloc(&bel->ob, sizeof(*res));
299 memset(res, 0, sizeof(res[0]));
300 res->first_non_in = NULL;
301 res->last_ins = NULL;
305 res->entrance_reg = new_workset(bel, &bel->ob);
306 res->exec_freq = get_block_execfreq(bel->ef, bl);
307 set_irn_link(bl, res);
311 #define get_block_info(block) ((block_info_t *)get_irn_link(block))
312 #define set_block_info(block, info) set_irn_link(block, info)
314 static INLINE ir_node *block_info_get_last_ins(block_info_t *bi)
317 bi->last_ins = be_get_end_of_block_insertion_point(bi->bl);
322 typedef struct _next_use_t {
323 unsigned is_first_use : 1; /**< Indicate that this use is the first
324 in the block. Needed to identify
325 transport in values for the global
327 int step; /**< The time step of the use. */
329 struct _next_use_t *next; /**< The next use int this block
333 static void *next_use_init(ir_phase *phase, ir_node *irn, void *old)
341 static void build_next_uses(block_info_t *bi)
345 sched_renumber(bi->bl);
347 phase_init(&bi->next_uses, "next uses", bi->bel->irg, PHASE_DEFAULT_GROWTH, next_use_init, NULL);
348 sched_foreach_reverse(bi->bl, irn) {
354 for (i = get_irn_arity(irn) - 1; i >= 0; --i) {
355 ir_node *op = get_irn_n(irn, i);
356 next_use_t *curr = phase_get_irn_data(&bi->next_uses, op);
357 next_use_t *use = phase_alloc(&bi->next_uses, sizeof(use[0]));
359 use->is_first_use = 1;
360 use->step = sched_get_time_step(irn);
365 curr->is_first_use = 0;
366 assert(curr->step >= use->step);
369 phase_set_irn_data(&bi->next_uses, op, use);
374 #define get_current_use(bi, irn) phase_get_irn_data(&(bi)->next_uses, (irn))
376 static INLINE void advance_current_use(block_info_t *bi, const ir_node *irn)
378 next_use_t *use = get_current_use(bi, irn);
381 phase_set_irn_data(&bi->next_uses, irn, use->next);
384 static INLINE unsigned get_curr_distance(block_info_t *bi, const ir_node *irn, int is_usage)
386 belady_env_t *env = bi->bel;
387 int curr_step = sched_get_time_step(env->instr);
388 next_use_t *use = get_current_use(bi, irn);
389 int flags = arch_irn_get_flags(env->arch, irn);
391 assert(!(flags & arch_irn_flags_ignore));
393 /* We have to keep nonspillable nodes in the workingset */
394 if(flags & arch_irn_flags_dont_spill)
397 if (!is_usage && use && use->step == curr_step)
401 unsigned res = use->step - curr_step;
403 assert(use->step >= curr_step);
406 if (remat_live_range_ext && be_is_rematerializable(env->senv, irn, use->irn))
408 else if (bitset_contains_irn(env->spilled, irn))
409 res *= already_spilled_factor;
415 return be_is_live_end(env->lv, bi->bl, irn) ? LIVE_END : DEAD;
418 static INLINE int is_local_phi(const ir_node *bl, const ir_node *irn)
420 return is_Phi(irn) && get_nodes_block(irn) == bl;
424 * Check, if the value is something that is transported into a block.
425 * That is, the value is defined elsewhere or defined by a Phi in the block.
426 * @param env The belady environment.
427 * @param bl The block in question.
428 * @param irn The node in question.
429 * @return 1, if node is something transported into @p bl, 0 if not.
430 * @note The function will only give correct answers in the case
431 * where @p irn is unsed in the block @p bl which is always
432 * the case in our usage scenario.
434 static INLINE int is_transport_in(const ir_node *bl, const ir_node *irn)
436 return is_local_phi(bl, irn) || get_nodes_block(irn) != bl;
440 * Performs the actions necessary to grant the request that:
441 * - new_vals can be held in registers
442 * - as few as possible other values are disposed
443 * - the worst values get disposed
445 * @p is_usage indicates that the values in new_vals are used (not defined)
446 * In this case reloads must be performed
448 static void displace(block_info_t *bi, workset_t *new_vals, int is_usage) {
449 belady_env_t *env = bi->bel;
450 workset_t *ws = env->ws;
451 ir_node **to_insert = alloca(env->n_regs * sizeof(to_insert[0]));
453 int i, len, max_allowed, demand, iter;
457 1. Identify the number of needed slots and the values to reload
460 workset_foreach(new_vals, val, iter) {
461 /* mark value as used */
463 if (! workset_contains(ws, val)) {
464 DBG((dbg, DBG_DECIDE, " insert %+F\n", val));
465 to_insert[demand++] = val;
467 int insert_reload = 1;
468 next_use_t *use = get_current_use(bi, val);
471 * if we use a value which is transported in this block, i.e. a
472 * phi defined here or a live in, for the first time, we check
473 * if there is room for that guy to survive from the block's
474 * entrance to here or not.
477 assert(sched_get_time_step(env->instr) == use->step);
478 if (is_transport_in(bi->bl, val) && use->is_first_use) {
479 DBG((dbg, DBG_DECIDE, "entrance node %+F, pressure %d:\n", val, bi->pressure));
480 if (bi->pressure < env->n_regs) {
481 workset_insert(env, bi->entrance_reg, val);
484 DBG((dbg, DBG_DECIDE, "... no reload. must be considered at block start\n"));
489 bitset_add_irn(env->spilled, val);
490 DBG((dbg, DBG_SPILL, "Reload %+F before %+F\n", val, env->instr));
491 be_add_reload(env->senv, val, env->instr, env->cls, 1);
495 assert(is_usage || "Defined value already in workset?!?");
496 DBG((dbg, DBG_DECIDE, " skip %+F\n", val));
499 DBG((dbg, DBG_DECIDE, " demand = %d\n", demand));
502 2. Make room for at least 'demand' slots
504 len = workset_get_length(ws);
505 max_allowed = env->n_regs - demand;
507 /* Only make more free room if we do not have enough */
508 if (len > max_allowed) {
509 DBG((dbg, DBG_DECIDE, " disposing %d values\n", len - max_allowed));
511 /* get current next-use distance */
512 for (i = 0; i < ws->len; ++i) {
513 ir_node *val = workset_get_val(ws, i);
514 unsigned dist = get_curr_distance(bi, val, is_usage);
515 workset_set_time(ws, i, dist);
518 /* sort entries by increasing nextuse-distance*/
521 /* kill the last 'demand' entries in the array */
522 workset_set_length(ws, max_allowed);
526 3. Insert the new values into the workset
527 Also, we update the pressure in the block info.
528 That is important for the global pass to decide
529 how many values can live through the block.
531 for (i = 0; i < demand; ++i)
532 workset_insert(env, env->ws, to_insert[i]);
534 bi->pressure = MAX(bi->pressure, workset_get_length(env->ws));
538 * For the given block @p block, decide for each values
539 * whether it is used from a register or is reloaded
542 static void belady(belady_env_t *env, int id) {
543 block_info_t *block_info = new_block_info(env, id);
544 const ir_node *block = block_info->bl;
545 void *obst_state = obstack_base(&env->ob);
551 DBG((dbg, DBG_WSETS, "Processing %+F...\n", block_info->bl));
552 new_vals = new_workset(env, &env->ob);
553 workset_clear(env->ws);
555 /* build the next use information for this block. */
556 build_next_uses(block_info);
559 block_info->first_non_in = NULL;
561 /* process the block from start to end */
562 sched_foreach(block, irn) {
564 assert(workset_get_length(env->ws) <= env->n_regs && "Too much values in workset!");
566 /* projs are handled with the tuple value.
567 * Phis are no real instr (see insert_starters())
568 * instr_nr does not increase */
569 if (is_Proj(irn) || is_Phi(irn)) {
570 DBG((dbg, DBG_DECIDE, " ...%+F skipped\n", irn));
573 DBG((dbg, DBG_DECIDE, " ...%+F\n", irn));
575 if (!block_info->first_non_in)
576 block_info->first_non_in = irn;
578 /* set instruction in the workset */
581 /* allocate all values _used_ by this instruction */
582 workset_clear(new_vals);
583 for(i = 0, arity = get_irn_arity(irn); i < arity; ++i) {
584 workset_insert(env, new_vals, get_irn_n(irn, i));
586 displace(block_info, new_vals, 1);
589 * set all used variables to the next use in their next_use_t list
590 * Also, kill all dead variables from the workset. They are only
591 * augmenting the pressure. Note, that a variable is dead
592 * if it has no further use in this block and is *not* live end
594 for(i = 0, arity = get_irn_arity(irn); i < arity; ++i) {
595 ir_node *op = get_irn_n(irn, i);
596 next_use_t *use = get_current_use(block_info, op);
599 if (!use->next && !be_is_live_end(env->lv, block, op))
600 workset_remove(env->ws, op);
602 advance_current_use(block_info, op);
605 /* allocate all values _defined_ by this instruction */
606 workset_clear(new_vals);
607 if (get_irn_mode(irn) == mode_T) { /* special handling for tuples and projs */
608 const ir_edge_t *edge;
610 foreach_out_edge(irn, edge) {
611 ir_node *proj = get_edge_src_irn(edge);
612 workset_insert(env, new_vals, proj);
615 workset_insert(env, new_vals, irn);
617 displace(block_info, new_vals, 0);
622 phase_free(&block_info->next_uses);
623 obstack_free(&env->ob, obst_state);
625 /* Remember end-workset for this block */
626 block_info->ws_end = workset_clone(env, &env->ob, env->ws);
627 DBG((dbg, DBG_WSETS, "End workset for %+F:\n", block));
628 workset_foreach(block_info->ws_end, irn, iter)
629 DBG((dbg, DBG_WSETS, " %+F (%u)\n", irn, workset_get_time(block_info->ws_end, iter)));
630 DBG((dbg, DBG_WSETS, "Max pressure in block: %d\n", block_info->pressure));
635 |_ _| |__ ___ __ _| | ___ | |__ __ _| | | _ \ __ _ _ __| |_
636 | | | '_ \ / _ \ / _` | |/ _ \| '_ \ / _` | | | |_) / _` | '__| __|
637 | | | | | | __/ | (_| | | (_) | |_) | (_| | | | __/ (_| | | | |_
638 |_| |_| |_|\___| \__, |_|\___/|_.__/ \__,_|_| |_| \__,_|_| \__|
643 #define workset_set_version(ws, i, t) ((ws)->vals[(i)].time = (t))
644 #define workset_get_version(ws, i) ((ws)->vals[(i)].time)
646 #define ver_oldest (0)
647 #define ver_youngest ((unsigned) -1)
648 #define ver_make_newer(v) ((v) + 1)
649 #define ver_is_older(v, w) ((v) < (w))
650 #define ver_is_younger(v, w) ((v) > (w))
652 static int block_freq_gt(const void *a, const void *b)
654 const ir_node * const *p = a;
655 const ir_node * const *q = b;
656 block_info_t *pi = get_block_info(*p);
657 block_info_t *qi = get_block_info(*q);
658 double diff = qi->exec_freq - pi->exec_freq;
659 return (diff > 0) - (diff < 0);
668 typedef struct _block_state_t {
669 struct _block_state_t *next;
670 struct _block_state_t *next_intern;
673 workset_t *end_state;
676 typedef struct _irn_action_t {
677 struct _irn_action_t *next;
683 typedef struct _global_end_state_t {
691 unsigned *bs_tops_vers;
692 block_state_t **bs_tops;
693 block_state_t *bs_top;
694 irn_action_t *ia_top;
695 } global_end_state_t;
699 block_state_t *bs_top;
700 irn_action_t *ia_top;
703 static INLINE block_state_t *get_block_state(global_end_state_t *ges, block_info_t *bi)
706 assert(!ver_is_younger(ges->bs_tops_vers[id], ges->version));
707 return ver_is_older(ges->bs_tops_vers[id], ges->version) ? NULL : ges->bs_tops[bi->id];
710 static INLINE const workset_t *get_end_state(global_end_state_t *ges, block_info_t *bi)
712 block_state_t *bs = get_block_state(ges, bi);
713 return bs ? bs->end_state : bi->ws_end;
716 static block_state_t *new_block_state(global_end_state_t *ges, block_info_t *bi)
718 block_state_t *bs = get_block_state(ges, bi);
719 block_state_t *nw = obstack_alloc(&ges->obst, sizeof(nw[0]));
721 nw->next_intern = bs;
722 nw->next = ges->bs_top;
726 nw->pressure = bs->pressure;
727 nw->end_state = workset_clone(ges->env, &ges->obst, bs->end_state);
730 nw->pressure = bi->pressure;
731 nw->end_state = workset_clone(ges->env, &ges->obst, bi->ws_end);
735 ges->bs_tops[bi->id] = nw;
736 ges->bs_tops_vers[bi->id] = ges->version;
740 static irn_action_t *new_irn_action(global_end_state_t *ges, ir_node *irn, const ir_node *bl)
742 irn_action_t *ia = obstack_alloc(&ges->obst, sizeof(ia[0]));
746 ia->act = irn_act_none;
747 ia->next = ges->ia_top;
752 static INLINE rollback_info_t trans_begin(global_end_state_t *ges)
755 rb.obst_level = obstack_base(&ges->obst);
756 rb.bs_top = ges->bs_top;
757 rb.ia_top = ges->ia_top;
761 static INLINE void trans_rollback(global_end_state_t *ges, rollback_info_t *rb)
765 /* unwind all the stacks indiced with the block number */
766 for (bs = ges->bs_top; bs != rb->bs_top; bs = bs->next) {
767 unsigned id = bs->bi->id;
768 ges->bs_tops[id] = bs->next_intern;
771 ges->ia_top = rb->ia_top;
772 ges->bs_top = rb->bs_top;
773 obstack_free(&ges->obst, rb->obst_level);
777 static double can_bring_in(global_end_state_t *ges, ir_node *bl, ir_node *irn, double limit, int level);
779 static double can_make_available_at_end(global_end_state_t *ges, ir_node *bl, ir_node *irn, double limit, int level)
781 block_info_t *bi = get_block_info(bl);
782 const workset_t *end = get_end_state(ges, bi);
786 DBG((dbg, DBG_GLOBAL, "\t%2Dcan make avail %+F at end of %+F\n", level, irn, bl));
789 * to make the value available at end,
790 * we have several cases here.
792 * - we already visited that block.
793 * - If the value is in the final end set, return 0.
794 * somebody else already allocated it there.
795 * - If not and the final end set is already full,
796 * we cannot make the value available at the end
797 * of this block. return INFINITY.
798 * - Else (value not in final end set and there is room):
799 * 1) The value is in a register at the end of the local Belady pass.
800 * Allocate a slot in the final end set and return 0.
801 * 2) The value is not in the Belady end set:
802 * If the block's capacity is < k then check what it costs
803 * to transport the value from upper blocks to this block.
804 * Compare that against the reload cost in this block. If
805 * cheaper, do the other thing. If not, reload it here.
808 /* if the end set contains it already, it is in a reg and it costs nothing
809 * to load it to one. */
810 index = workset_get_index(end, irn);
812 unsigned ver = workset_get_version(end, index);
813 DBG((dbg, DBG_GLOBAL, "\t%2Dnode is in the end set and is %s fixed\n",
814 level, ver_is_older(ver, ges->version) ? "already" : "not yet"));
817 * if the version is older, the value is already fixed
818 * and cannot be removed from the end set.
820 * If not, we will create a new block state for that block since
821 * we modify it by giving the end state a new version.
823 if (ver_is_younger(ver, ges->version)) {
824 block_state_t *bs = new_block_state(ges, bi);
825 workset_set_version(bs->end_state, index, ges->version);
833 * Now we have two options:
834 * 1) Reload the value at the end of the block.
835 * Therefore, perhaps, we have to erase another one from the workset.
836 * This may only be done if it has not been fixed.
837 * Since fixed means that a previous pass has decided that that value
838 * *has* to stay in the end set.
839 * 2) we can try, if the capacity of the block allows it, to let
840 * the value live through the block and make it available at
843 * First, we test the local (reload in this block) alternative
844 * and compare against the other alternative.
845 * Of course, we chose the cheaper one.
849 int n_regs = bi->bel->n_regs;
850 int len = workset_get_length(end);
857 * look if there is room in the end array
858 * for the variable. Note that this does not
859 * mean that the var can live through the block.
860 * There is just room at the *end*
863 DBG((dbg, DBG_GLOBAL, "\t%2Dthe end set has %d free slots\n", level, n_regs - len));
866 for (i = 0; i < len; ++i) {
867 unsigned ver = workset_get_version(end, i);
868 if (ver_is_younger(ver, ges->version))
873 DBG((dbg, DBG_GLOBAL, "\t%2D%+F (slot %d) can be erased from the end set\n",
874 level, end->vals[i].irn, i));
880 * finally there is some room. we can at least reload the value.
881 * but we will try to let ot live through anyhow.
884 irn_action_t *vs = new_irn_action(ges, irn, bi->bl);
885 block_state_t *bs = new_block_state(ges, bi);
886 workset_t *end = bs->end_state;
887 ir_node *ins_before = block_info_get_last_ins(bi);
888 double reload_here = be_get_reload_costs(bi->bel->senv, irn, ins_before);
889 int pressure_ok = bs->pressure < (unsigned) n_regs;
892 * No matter what we do, the value will be in the end set
893 * if the block from now on (of course only regarding the
894 * current state). Enter it and set the new length
897 end->vals[slot].irn = irn;
898 workset_set_version(end, slot, ges->version);
899 workset_set_length(end, MAX(workset_get_length(end), slot + 1));
901 vs->act = irn_act_reload;
904 DBG((dbg, DBG_GLOBAL, "\t%2Dthere is a free slot. capacity=%d, reload here=%f, pressure %s\n",
905 level, n_regs - bs->pressure, reload_here, pressure_ok ? "ok" : "insufficient"));
907 /* look if we can bring the value in. */
909 rollback_info_t rb = trans_begin(ges);
910 double new_limit = MIN(reload_here, limit);
912 vs->act = irn_act_live_through;
914 res = can_bring_in(ges, bl, irn, new_limit, level + 1);
917 * if bring in is too expensive re-adjust the pressure
918 * and roll back the state
920 if (res >= reload_here) {
922 vs->act = irn_act_reload;
923 trans_rollback(ges, &rb);
928 DBG((dbg, DBG_GLOBAL, "\t%2D%s\n", level,
929 vs->act == irn_act_reload ? "reloading" : "bringing in"));
934 DBG((dbg, DBG_GLOBAL, "\t%2D-> %f\n", level, res));
938 static double can_bring_in(global_end_state_t *ges, ir_node *bl, ir_node *irn, double limit, int level)
940 belady_env_t *env = ges->env;
941 double glob_costs = HUGE_VAL;
943 DBG((dbg, DBG_GLOBAL, "\t%2Dcan bring in (max %f) for %+F at block %+F\n", level, limit, irn, bl));
945 if (is_transport_in(bl, irn)) {
946 int i, n = get_irn_arity(bl);
947 ir_node **nodes = alloca(get_irn_arity(bl) * sizeof(nodes[0]));
948 rollback_info_t rb = trans_begin(ges);
952 for (i = 0; i < n; ++i) {
953 ir_node *pr = get_Block_cfgpred_block(bl, i);
954 ir_node *op = is_local_phi(bl, irn) ? get_irn_n(irn, i) : irn;
958 * there might by unknwons as operands of phis in that case
959 * we set the costs to zero, since they won't get spilled.
961 if (arch_irn_consider_in_reg_alloc(env->arch, env->cls, op))
962 c = can_make_available_at_end(ges, pr, op, limit - glob_costs, level + 1);
968 if (glob_costs >= limit) {
969 glob_costs = HUGE_VAL;
970 trans_rollback(ges, &rb);
977 DBG((dbg, DBG_GLOBAL, "\t%2D-> %f\n", level, glob_costs));
981 static void materialize_and_commit_end_state(global_end_state_t *ges)
983 belady_env_t *env = ges->env;
987 DBG((dbg, DBG_GLOBAL, "\tmaterializing\n"));
990 * Perform all the variable actions.
992 for (ia = ges->ia_top; ia != NULL; ia = ia->next) {
994 case irn_act_live_through:
995 if (is_local_phi(ia->bl, ia->irn)) {
996 bitset_add_irn(ges->succ_phis, ia->irn);
997 DBG((dbg, DBG_GLOBAL, "\t\tlive through phi kept alive: %+F\n", ia->irn));
1000 case irn_act_reload:
1001 be_add_reload_at_end(env->senv, ia->irn, ia->bl, env->cls, 1);
1002 DBG((dbg, DBG_GLOBAL, "\t\tadding reload of %+F at end of %+F\n", ia->irn, ia->bl));
1005 DBG((dbg, DBG_GLOBAL, "\t\t%+F is in the end set of %+F\n", ia->irn, ia->bl));
1010 * Commit the block end states
1012 for (bs = ges->bs_top; bs != NULL; bs = bs->next) {
1013 block_info_t *bi = bs->bi;
1015 if (!bitset_is_set(ges->committed, bi->id)) {
1016 DBG((dbg, DBG_GLOBAL, "\t\tcommiting workset of %+F with version %x\n", bi->bl, ges->version));
1017 // bes->bs->end_state->vals[idx].version = ges->version;
1018 workset_copy(env, bi->ws_end, bs->end_state);
1019 DBG((dbg, DBG_GLOBAL, "\t\told pressure: %d, new pressure: %d, end length: %d\n",
1020 bi->pressure, bs->pressure, workset_get_length(bs->end_state)));
1021 bi->pressure = bs->pressure;
1022 bitset_set(ges->committed, bi->id);
1026 /* clear the committed bitset. the next call is expecting it. */
1027 bitset_clear_all(ges->committed);
1031 * Examine all irns which shall be in regs at the beginning of the
1034 static void fix_block_borders(global_end_state_t *ges, ir_node *block) {
1035 block_info_t *bi = get_block_info(block);
1036 belady_env_t *env = ges->env;
1037 void *reset_level = obstack_base(&ges->obst);
1042 for (i = workset_get_length(bi->ws_end) - 1; i >= 0; --i)
1043 workset_set_version(bi->ws_end, i, ver_youngest);
1045 DBG((dbg, DBG_GLOBAL, "fixing block borders at %+F (%f)\n", block, bi->exec_freq));
1047 /* process all variables which shall be in a reg at
1048 * the beginning of the block in the order of the next use. */
1049 workset_foreach(bi->entrance_reg, irn, i) {
1050 double local_costs = be_get_reload_costs(env->senv, irn, bi->first_non_in);
1051 double bring_in_costs;
1053 /* reset the lists */
1057 DBG((dbg, DBG_GLOBAL, "\ttrans in var %+F, version %x\n", irn, ges->version));
1059 bring_in_costs = can_bring_in(ges, block, irn, local_costs, 1);
1061 DBG((dbg, DBG_GLOBAL, "\tbring in: %f, local: %f", bring_in_costs, local_costs));
1064 * we were not able to let the value arrive
1065 * in a register at the entrance of the block
1066 * or it is too costly, so we have to do the reload locally
1068 if (bring_in_costs >= local_costs) {
1069 DBG((dbg, DBG_GLOBAL, " -> do local reload\n"));
1070 be_add_reload(env->senv, irn, bi->first_non_in, env->cls, 1);
1073 * if the transport-in was a phi (that is actually used in block)
1074 * it will no longer remain and we have to spill it completely.
1076 if (is_local_phi(block, irn))
1077 bitset_add_irn(ges->succ_phis, irn);
1079 DBG((dbg, DBG_GLOBAL, " -> bring it in\n"));
1080 materialize_and_commit_end_state(ges);
1083 DBG((dbg, DBG_GLOBAL, "\n"));
1085 /* reset the obstack and create a new version. */
1086 obstack_free(&ges->obst, reset_level);
1087 ges->version = ver_make_newer(ges->version);
1091 static void global_assign(belady_env_t *env)
1093 global_end_state_t ges;
1097 * sort the blocks according to execution frequency.
1098 * That's not necessary for belady() but for the global pass later on.
1100 qsort(env->blocks, env->n_blocks, sizeof(env->blocks[0]), block_freq_gt);
1102 memset(&ges, 0, sizeof(ges));
1103 obstack_init(&ges.obst);
1105 ges.version = ver_make_newer(ver_oldest);
1106 ges.succ_phis = bitset_irg_obstack_alloc(&ges.obst, env->irg);
1107 ges.committed = bitset_obstack_alloc(&ges.obst, env->n_blocks);
1108 ges.bs_tops = obstack_alloc(&ges.obst, sizeof(ges.bs_tops[0]) * env->n_blocks);
1109 ges.bs_tops_vers = obstack_alloc(&ges.obst, sizeof(ges.bs_tops_vers[0]) * env->n_blocks);
1111 for (i = 0; i < env->n_blocks; ++i)
1112 ges.bs_tops_vers[i] = ver_oldest;
1114 for (i = 0; i < env->n_blocks; ++i)
1115 fix_block_borders(&ges, env->blocks[i]);
1118 * Now we spill phis which cannot be kept since they were replaced
1119 * by reloads at the block entrances.
1121 for (i = 0; i < env->n_blocks; ++i) {
1122 ir_node *bl = env->blocks[i];
1125 sched_foreach(bl, irn) {
1129 if (arch_irn_consider_in_reg_alloc(env->arch, env->cls, irn)
1130 && !bitset_contains_irn(ges.succ_phis, irn))
1131 be_spill_phi(env->senv, irn);
1136 static void collect_blocks(ir_node *bl, void *data)
1138 belady_env_t *env = data;
1140 obstack_ptr_grow(&env->ob, bl);
1144 * Do spilling for a register class on a graph using the belady heuristic.
1145 * In the transformed graph, the register pressure never exceeds the number
1146 * of available registers.
1148 * @param birg The backend graph
1149 * @param cls The register class to spill
1151 void be_spill_belady(be_irg_t *birg, const arch_register_class_t *cls)
1153 ir_graph *irg = be_get_birg_irg(birg);
1157 /* some special classes contain only ignore regs, nothing to do then */
1158 n_regs = cls->n_regs - be_put_ignore_regs(birg, cls, NULL);
1162 be_clear_links(irg);
1164 /* init belady env */
1165 obstack_init(&env.ob);
1167 env.arch = birg->main_env->arch_env;
1169 env.lv = be_get_birg_liveness(birg);
1170 env.n_regs = n_regs;
1171 env.ws = new_workset(&env, &env.ob);
1172 env.senv = be_new_spill_env(birg);
1173 env.ef = be_get_birg_exec_freq(birg);
1174 env.spilled = bitset_irg_obstack_alloc(&env.ob, irg);
1177 irg_block_walk_graph(irg, NULL, collect_blocks, &env);
1178 obstack_ptr_grow(&env.ob, NULL);
1179 env.blocks = obstack_finish(&env.ob);
1181 /* Fix high register pressure in blocks with belady algorithm */
1182 for (i = 0; i < env.n_blocks; ++i)
1185 global_assign(&env);
1187 /* Insert spill/reload nodes into the graph and fix usages */
1188 be_insert_spills_reloads(env.senv);
1191 be_delete_spill_env(env.senv);
1193 obstack_free(&env.ob, NULL);
1196 void be_init_spillbelady2(void)
1198 lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
1199 lc_opt_entry_t *spill_grp = lc_opt_get_grp(be_grp, "spill");
1200 lc_opt_entry_t *bel2_grp = lc_opt_get_grp(spill_grp, "belady2");
1202 static be_spiller_t belady_spiller = {
1206 lc_opt_add_table(bel2_grp, options);
1207 be_register_spiller("belady2", &belady_spiller);
1208 FIRM_DBG_REGISTER(dbg, "firm.be.spill.belady2");
1211 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_spillbelady2);