2 * Copyright (C) 1995-2007 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief Beladys spillalgorithm version 2.
23 * @author Sebastian Hack, Matthias Braun, Daniel Grund
27 * The main differences to the original Belady are:
28 * - The workset is empty at the start of a block
29 * There is no attempt to fill it with variables which
30 * are not used in the block.
31 * - There is a global pass which tries to use the remaining
32 * capacity of the blocks to let global variables live through
43 #include "irnodeset.h"
45 #include "irprintf_t.h"
51 #include "iredges_t.h"
52 #include "irphase_t.h"
60 #include "bespillbelady.h"
61 #include "besched_t.h"
65 #include "bechordal_t.h"
66 #include "bespilloptions.h"
67 #include "beloopana.h"
78 #define DBG_WORKSET 128
79 #define DBG_GLOBAL 256
82 #define LIVE_END (DEAD-1)
84 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
87 * An association between a node and a point in time.
89 typedef struct _loc_t {
90 ir_node *irn; /**< A node. */
91 unsigned time; /**< A use time.
92 In the global pass this is used
93 as the version number and not as a time.
98 typedef struct _workset_t {
99 int len; /**< current length */
100 loc_t vals[0]; /**< inlined array of the values/distances in this working set */
103 typedef struct _belady_env_t {
106 const arch_env_t *arch;
107 const arch_register_class_t *cls;
111 ir_node **blocks; /**< Array of all blocks. */
112 int n_blocks; /**< Number of blocks in the graph. */
113 int n_regs; /**< number of regs in this reg-class */
114 workset_t *ws; /**< the main workset used while processing a block. ob-allocated */
115 ir_node *instr; /**< current instruction */
116 unsigned instr_nr; /**< current instruction number (relative to block start) */
118 spill_env_t *senv; /**< see bespill.h */
122 static int loc_compare(const void *a, const void *b)
126 return (p->time > q->time) - (p->time < q->time);
129 static INLINE void workset_print(const workset_t *w)
133 for(i = 0; i < w->len; ++i) {
134 ir_fprintf(stderr, "%+F %d\n", w->vals[i].irn, w->vals[i].time);
139 * Alloc a new workset on obstack @p ob with maximum size @p max
141 static INLINE workset_t *new_workset(belady_env_t *env, struct obstack *ob) {
143 size_t size = sizeof(*res) + (env->n_regs)*sizeof(res->vals[0]);
144 res = obstack_alloc(ob, size);
145 memset(res, 0, size);
150 * Alloc a new instance on obstack and make it equal to @param ws
152 static INLINE workset_t *workset_clone(belady_env_t *env, struct obstack *ob, workset_t *ws) {
154 size_t size = sizeof(*res) + (env->n_regs)*sizeof(res->vals[0]);
155 res = obstack_alloc(ob, size);
156 memcpy(res, ws, size);
161 * Do NOT alloc anything. Make @param tgt equal to @param src.
162 * returns @param tgt for convenience
164 static INLINE workset_t *workset_copy(belady_env_t *env, workset_t *tgt, workset_t *src) {
165 size_t size = sizeof(*src) + (env->n_regs)*sizeof(src->vals[0]);
166 memcpy(tgt, src, size);
171 * Overwrites the current content array of @param ws with the
172 * @param count locations given at memory @param locs.
173 * Set the length of @param ws to count.
175 static INLINE void workset_bulk_fill(workset_t *workset, int count, const loc_t *locs) {
176 workset->len = count;
177 memcpy(&(workset->vals[0]), locs, count * sizeof(locs[0]));
181 * Inserts the value @p val into the workset, iff it is not
182 * already contained. The workset must not be full.
184 static INLINE void workset_insert(belady_env_t *env, workset_t *ws, ir_node *val) {
186 /* check for current regclass */
187 if (!arch_irn_consider_in_reg_alloc(env->arch, env->cls, val)) {
188 DBG((dbg, DBG_WORKSET, "Skipped %+F\n", val));
192 /* check if val is already contained */
193 for(i=0; i<ws->len; ++i)
194 if (ws->vals[i].irn == val)
198 assert(ws->len < env->n_regs && "Workset already full!");
199 ws->vals[ws->len++].irn = val;
203 * Removes all entries from this workset
205 static INLINE void workset_clear(workset_t *ws) {
210 * Removes the value @p val from the workset if present.
212 static INLINE void workset_remove(workset_t *ws, ir_node *val) {
214 for(i=0; i<ws->len; ++i) {
215 if (ws->vals[i].irn == val) {
216 ws->vals[i] = ws->vals[--ws->len];
222 static INLINE int workset_get_index(const workset_t *ws, const ir_node *val) {
224 for(i=0; i<ws->len; ++i) {
225 if (ws->vals[i].irn == val)
233 * Iterates over all values in the working set.
234 * @p ws The workset to iterate
235 * @p v A variable to put the current value in
236 * @p i An integer for internal use
238 #define workset_foreach(ws, v, i) for(i=0; \
239 v=(i < ws->len) ? ws->vals[i].irn : NULL, i < ws->len; \
242 #define workset_set_time(ws, i, t) (ws)->vals[i].time=t
243 #define workset_get_time(ws, i) (ws)->vals[i].time
244 #define workset_set_length(ws, length) (ws)->len = length
245 #define workset_get_length(ws) ((ws)->len)
246 #define workset_get_val(ws, i) ((ws)->vals[i].irn)
247 #define workset_sort(ws) qsort((ws)->vals, (ws)->len, sizeof((ws)->vals[0]), loc_compare);
248 #define workset_contains(ws, n) (workset_get_index(ws, n) >= 0)
250 typedef struct _block_info_t {
253 workset_t *ws_start, *ws_end;
257 ir_node *first_non_in; /**< First node in block which is not a phi. */
258 ir_node *last_ins; /**< The instruction before which end of
259 block reloads will be inserted. */
261 workset_t *entrance_reg; /**< That set will contain all values
262 transported into the block which
263 are used before they are displaced.
264 That means, we later have to care to
265 bring them into the block in a register
266 or reload them at the entry of the block. */
268 int pressure; /**< The amount of registers which remain free
269 in this block. This capacity can be used to let
270 global variables, transported into other blocks,
271 live through this block. */
273 double exec_freq; /**< The execution frequency of this block. */
276 static INLINE void *new_block_info(belady_env_t *bel, int id)
278 ir_node *bl = bel->blocks[id];
279 block_info_t *res = obstack_alloc(&bel->ob, sizeof(*res));
280 memset(res, 0, sizeof(res[0]));
281 res->first_non_in = NULL;
282 res->last_ins = NULL;
286 res->entrance_reg = new_workset(bel, &bel->ob);
287 res->exec_freq = get_block_execfreq(bel->ef, bl);
288 set_irn_link(bl, res);
292 #define get_block_info(block) ((block_info_t *)get_irn_link(block))
293 #define set_block_info(block, info) set_irn_link(block, info)
295 static INLINE ir_node *block_info_get_last_ins(block_info_t *bi)
298 bi->last_ins = be_get_end_of_block_insertion_point(bi->bl);
303 typedef struct _next_use_t {
304 unsigned is_first_use : 1; /**< Indicate that this use is the first
305 in the block. Needed to identify
306 transport in values for the global
308 int step; /**< The time step of the use. */
309 struct _next_use_t *next; /**< The next use int this block
313 static void *next_use_init(ir_phase *phase, ir_node *irn, void *old)
321 static void build_next_uses(block_info_t *bi)
325 phase_init(&bi->next_uses, "next uses", bi->bel->irg, PHASE_DEFAULT_GROWTH, next_use_init, NULL);
326 sched_foreach_reverse(bi->bl, irn) {
327 int i, step = sched_get_time_step(irn);
332 for (i = get_irn_arity(irn) - 1; i >= 0; --i) {
333 ir_node *op = get_irn_n(irn, i);
334 next_use_t *curr = phase_get_irn_data(&bi->next_uses, op);
335 next_use_t *use = phase_alloc(&bi->next_uses, sizeof(use[0]));
338 use->is_first_use = 1;
343 curr->is_first_use = 0;
345 phase_set_irn_data(&bi->next_uses, op, use);
350 #define get_current_use(bi, irn) phase_get_irn_data(&(bi)->next_uses, (irn))
352 static INLINE void advance_current_use(block_info_t *bi, const ir_node *irn)
354 next_use_t *use = get_current_use(bi, irn);
357 phase_set_irn_data(&bi->next_uses, irn, use->next);
360 static INLINE unsigned get_curr_distance(block_info_t *bi, const ir_node *irn, int is_usage)
362 belady_env_t *env = bi->bel;
363 next_use_t *use = get_current_use(bi, irn);
364 int curr_step = sched_get_time_step(env->instr);
365 int flags = arch_irn_get_flags(env->arch, irn);
367 assert(!(flags & arch_irn_flags_ignore));
369 /* We have to keep nonspillable nodes in the workingset */
370 if(flags & arch_irn_flags_dont_spill)
373 if (!is_usage && use && use->step == curr_step)
377 assert(use->step >= curr_step);
378 return use->step - curr_step;
381 return be_is_live_end(env->lv, bi->bl, irn) ? LIVE_END : DEAD;
384 static INLINE int is_local_phi(const ir_node *bl, const ir_node *irn)
386 return is_Phi(irn) && get_nodes_block(irn) == bl;
390 * Check, if the value is something that is transported into a block.
391 * That is, the value is defined elsewhere or defined by a Phi in the block.
392 * @param env The belady environment.
393 * @param bl The block in question.
394 * @param irn The node in question.
395 * @return 1, if node is something transported into @p bl, 0 if not.
396 * @note The function will only give correct answers in the case
397 * where @p irn is unsed in the block @p bl which is always
398 * the case in our usage scenario.
400 static INLINE int is_transport_in(const ir_node *bl, const ir_node *irn)
402 return is_local_phi(bl, irn) || get_nodes_block(irn) != bl;
406 * Performs the actions necessary to grant the request that:
407 * - new_vals can be held in registers
408 * - as few as possible other values are disposed
409 * - the worst values get disposed
411 * @p is_usage indicates that the values in new_vals are used (not defined)
412 * In this case reloads must be performed
414 static void displace(block_info_t *bi, workset_t *new_vals, int is_usage) {
415 belady_env_t *env = bi->bel;
416 workset_t *ws = env->ws;
417 ir_node **to_insert = alloca(env->n_regs * sizeof(to_insert[0]));
419 int i, len, max_allowed, demand, iter;
423 1. Identify the number of needed slots and the values to reload
426 workset_foreach(new_vals, val, iter) {
427 /* mark value as used */
429 if (! workset_contains(ws, val)) {
430 DBG((dbg, DBG_DECIDE, " insert %+F\n", val));
431 to_insert[demand++] = val;
433 int insert_reload = 1;
434 next_use_t *use = get_current_use(bi, val);
437 * if we use a value which is transported in this block, i.e. a
438 * phi defined here or a live in, for the first time, we check
439 * if there is room for that guy to survive from the block's
440 * entrance to here or not.
443 assert(sched_get_time_step(env->instr) == use->step);
444 if (is_transport_in(bi->bl, val) && use->is_first_use) {
445 DBG((dbg, DBG_DECIDE, "entrance node %+F, pressure %d:\n", val, bi->pressure));
446 if (bi->pressure < env->n_regs) {
447 workset_insert(env, bi->entrance_reg, val);
450 DBG((dbg, DBG_DECIDE, "... no reload. must be considered at block start\n"));
455 DBG((dbg, DBG_SPILL, "Reload %+F before %+F\n", val, env->instr));
456 be_add_reload(env->senv, val, env->instr, env->cls, 1);
460 assert(is_usage || "Defined value already in workset?!?");
461 DBG((dbg, DBG_DECIDE, " skip %+F\n", val));
464 DBG((dbg, DBG_DECIDE, " demand = %d\n", demand));
467 2. Make room for at least 'demand' slots
469 len = workset_get_length(ws);
470 max_allowed = env->n_regs - demand;
472 /* Only make more free room if we do not have enough */
473 if (len > max_allowed) {
474 // int curr_step = sched_get_time_step(env->instr);
476 DBG((dbg, DBG_DECIDE, " disposing %d values\n", len - max_allowed));
478 /* get current next-use distance */
479 for (i = 0; i < ws->len; ++i) {
480 ir_node *val = workset_get_val(ws, i);
481 unsigned dist = get_curr_distance(bi, val, is_usage);
482 workset_set_time(ws, i, dist);
485 /* sort entries by increasing nextuse-distance*/
488 /* kill the last 'demand' entries in the array */
489 workset_set_length(ws, max_allowed);
493 3. Insert the new values into the workset
494 Also, we update the pressure in the block info.
495 That is important for the global pass to decide
496 how many values can live through the block.
498 for (i = 0; i < demand; ++i)
499 workset_insert(env, env->ws, to_insert[i]);
501 bi->pressure = MAX(bi->pressure, workset_get_length(env->ws));
505 * For the given block @p block, decide for each values
506 * whether it is used from a register or is reloaded
509 static void belady(belady_env_t *env, int id) {
510 block_info_t *block_info = new_block_info(env, id);
511 const ir_node *block = block_info->bl;
512 void *obst_state = obstack_base(&env->ob);
518 DBG((dbg, DBG_WSETS, "Processing %+F...\n", block_info->bl));
519 new_vals = new_workset(env, &env->ob);
520 workset_clear(env->ws);
522 /* build the next use information for this block. */
523 build_next_uses(block_info);
526 block_info->first_non_in = NULL;
528 /* process the block from start to end */
529 sched_foreach(block, irn) {
531 assert(workset_get_length(env->ws) <= env->n_regs && "Too much values in workset!");
533 /* projs are handled with the tuple value.
534 * Phis are no real instr (see insert_starters())
535 * instr_nr does not increase */
536 if (is_Proj(irn) || is_Phi(irn)) {
537 DBG((dbg, DBG_DECIDE, " ...%+F skipped\n", irn));
540 DBG((dbg, DBG_DECIDE, " ...%+F\n", irn));
542 if (!block_info->first_non_in)
543 block_info->first_non_in = irn;
545 /* set instruction in the workset */
548 /* allocate all values _used_ by this instruction */
549 workset_clear(new_vals);
550 for(i = 0, arity = get_irn_arity(irn); i < arity; ++i) {
551 workset_insert(env, new_vals, get_irn_n(irn, i));
553 displace(block_info, new_vals, 1);
556 * set all used variables to the next use in their next_use_t list
557 * Also, kill all dead variables from the workset. They are only
558 * augmenting the pressure. Note, that a variable is dead
559 * if it has no further use in this block and is *not* live end
561 for(i = 0, arity = get_irn_arity(irn); i < arity; ++i) {
562 ir_node *op = get_irn_n(irn, i);
563 next_use_t *use = get_current_use(block_info, op);
566 if (!use->next && !be_is_live_end(env->lv, block, op))
567 workset_remove(env->ws, op);
569 advance_current_use(block_info, op);
572 /* allocate all values _defined_ by this instruction */
573 workset_clear(new_vals);
574 if (get_irn_mode(irn) == mode_T) { /* special handling for tuples and projs */
575 const ir_edge_t *edge;
577 foreach_out_edge(irn, edge) {
578 ir_node *proj = get_edge_src_irn(edge);
579 workset_insert(env, new_vals, proj);
582 workset_insert(env, new_vals, irn);
584 displace(block_info, new_vals, 0);
589 phase_free(&block_info->next_uses);
590 obstack_free(&env->ob, obst_state);
592 /* Remember end-workset for this block */
593 block_info->ws_end = workset_clone(env, &env->ob, env->ws);
594 DBG((dbg, DBG_WSETS, "End workset for %+F:\n", block));
595 workset_foreach(block_info->ws_end, irn, iter)
596 DBG((dbg, DBG_WSETS, " %+F (%u)\n", irn, workset_get_time(block_info->ws_end, iter)));
597 DBG((dbg, DBG_WSETS, "Max pressure in block: %d\n", block_info->pressure));
602 |_ _| |__ ___ __ _| | ___ | |__ __ _| | | _ \ __ _ _ __| |_
603 | | | '_ \ / _ \ / _` | |/ _ \| '_ \ / _` | | | |_) / _` | '__| __|
604 | | | | | | __/ | (_| | | (_) | |_) | (_| | | | __/ (_| | | | |_
605 |_| |_| |_|\___| \__, |_|\___/|_.__/ \__,_|_| |_| \__,_|_| \__|
610 #define workset_set_version(ws, i, t) ((ws)->vals[(i)].time = (t))
611 #define workset_get_version(ws, i) ((ws)->vals[(i)].time)
613 #define ver_oldest (0)
614 #define ver_youngest ((unsigned) -1)
615 #define ver_make_newer(v) ((v) + 1)
616 #define ver_is_older(v, w) ((v) < (w))
617 #define ver_is_younger(v, w) ((v) > (w))
619 static int block_freq_gt(const void *a, const void *b)
621 const ir_node * const *p = a;
622 const ir_node * const *q = b;
623 block_info_t *pi = get_block_info(*p);
624 block_info_t *qi = get_block_info(*q);
625 double diff = qi->exec_freq - pi->exec_freq;
626 return (diff > 0) - (diff < 0);
635 typedef struct _block_state_t {
636 struct _block_state_t *next;
637 struct _block_state_t *next_intern;
640 workset_t *end_state;
643 typedef struct _irn_action_t {
644 struct _irn_action_t *next;
650 typedef struct _global_end_state_t {
658 unsigned *bs_tops_vers;
659 block_state_t **bs_tops;
660 block_state_t *bs_top;
661 irn_action_t *ia_top;
662 } global_end_state_t;
666 block_state_t *bs_top;
667 irn_action_t *ia_top;
670 static INLINE block_state_t *get_block_state(global_end_state_t *ges, block_info_t *bi)
673 assert(!ver_is_younger(ges->bs_tops_vers[id], ges->version));
674 return ver_is_older(ges->bs_tops_vers[id], ges->version) ? NULL : ges->bs_tops[bi->id];
677 static INLINE const workset_t *get_end_state(global_end_state_t *ges, block_info_t *bi)
679 block_state_t *bs = get_block_state(ges, bi);
680 return bs ? bs->end_state : bi->ws_end;
683 static block_state_t *new_block_state(global_end_state_t *ges, block_info_t *bi)
685 block_state_t *bs = get_block_state(ges, bi);
686 block_state_t *nw = obstack_alloc(&ges->obst, sizeof(nw[0]));
688 nw->next_intern = bs;
689 nw->next = ges->bs_top;
693 nw->pressure = bs->pressure;
694 nw->end_state = workset_clone(ges->env, &ges->obst, bs->end_state);
697 nw->pressure = bi->pressure;
698 nw->end_state = workset_clone(ges->env, &ges->obst, bi->ws_end);
702 ges->bs_tops[bi->id] = nw;
703 ges->bs_tops_vers[bi->id] = ges->version;
707 static irn_action_t *new_irn_action(global_end_state_t *ges, ir_node *irn, const ir_node *bl)
709 irn_action_t *ia = obstack_alloc(&ges->obst, sizeof(ia[0]));
713 ia->act = irn_act_none;
714 ia->next = ges->ia_top;
719 static INLINE rollback_info_t trans_begin(global_end_state_t *ges)
722 rb.obst_level = obstack_base(&ges->obst);
723 rb.bs_top = ges->bs_top;
724 rb.ia_top = ges->ia_top;
728 static INLINE void trans_rollback(global_end_state_t *ges, rollback_info_t *rb)
732 /* unwind all the stacks indiced with the block number */
733 for (bs = ges->bs_top; bs != rb->bs_top; bs = bs->next) {
734 unsigned id = bs->bi->id;
735 ges->bs_tops[id] = bs->next_intern;
738 ges->ia_top = rb->ia_top;
739 ges->bs_top = rb->bs_top;
740 obstack_free(&ges->obst, rb->obst_level);
744 static double can_bring_in(global_end_state_t *ges, ir_node *bl, ir_node *irn, double limit, int level);
746 static double can_make_available_at_end(global_end_state_t *ges, ir_node *bl, ir_node *irn, double limit, int level)
748 block_info_t *bi = get_block_info(bl);
749 const workset_t *end = get_end_state(ges, bi);
753 DBG((dbg, DBG_GLOBAL, "\t%2Dcan make avail %+F at end of %+F\n", level, irn, bl));
756 * to make the value available at end,
757 * we have several cases here.
759 * - we already visited that block.
760 * - If the value is in the final end set, return 0.
761 * somebody else already allocated it there.
762 * - If not and the final end set is already full,
763 * we cannot make the value available at the end
764 * of this block. return INFINITY.
765 * - Else (value not in final end set and there is room):
766 * 1) The value is in a register at the end of the local Belady pass.
767 * Allocate a slot in the final end set and return 0.
768 * 2) The value is not in the Belady end set:
769 * If the block's capacity is < k then check what it costs
770 * to transport the value from upper blocks to this block.
771 * Compare that against the reload cost in this block. If
772 * cheaper, do the other thing. If not, reload it here.
775 /* if the end set contains it already, it is in a reg and it costs nothing
776 * to load it to one. */
777 index = workset_get_index(end, irn);
779 unsigned ver = workset_get_version(end, index);
780 DBG((dbg, DBG_GLOBAL, "\t%2Dnode is in the end set and is %s fixed\n",
781 level, ver_is_older(ver, ges->version) ? "already" : "not yet"));
784 * if the version is older, the value is already fixed
785 * and cannot be removed from the end set.
787 * If not, we will create a new block state for that block since
788 * we modify it by giving the end state a new version.
790 if (ver_is_younger(ver, ges->version)) {
791 block_state_t *bs = new_block_state(ges, bi);
792 workset_set_version(bs->end_state, index, ges->version);
800 * Now we have two options:
801 * 1) Reload the value at the end of the block.
802 * Therefore, perhaps, we have to erase another one from the workset.
803 * This may only be done if it has not been fixed.
804 * Since fixed means that a previous pass has decided that that value
805 * *has* to stay in the end set.
806 * 2) we can try, if the capacity of the block allows it, to let
807 * the value live through the block and make it available at
810 * First, we test the local (reload in this block) alternative
811 * and compare against the other alternative.
812 * Of course, we chose the cheaper one.
816 int n_regs = bi->bel->n_regs;
817 int len = workset_get_length(end);
824 * look if there is room in the end array
825 * for the variable. Note that this does not
826 * mean that the var can live through the block.
827 * There is just room at the *end*
830 DBG((dbg, DBG_GLOBAL, "\t%2Dthe end set has %d free slots\n", level, n_regs - len));
833 for (i = 0; i < len; ++i) {
834 unsigned ver = workset_get_version(end, i);
835 if (ver_is_younger(ver, ges->version))
840 DBG((dbg, DBG_GLOBAL, "\t%2D%+F (slot %d) can be erased from the end set\n",
841 level, end->vals[i].irn, i));
847 * finally there is some room. we can at least reload the value.
848 * but we will try to let ot live through anyhow.
851 irn_action_t *vs = new_irn_action(ges, irn, bi->bl);
852 block_state_t *bs = new_block_state(ges, bi);
853 workset_t *end = bs->end_state;
854 ir_node *ins_before = block_info_get_last_ins(bi);
855 double reload_here = be_get_reload_costs(bi->bel->senv, irn, ins_before);
856 int pressure_ok = bs->pressure < (unsigned) n_regs;
859 * No matter what we do, the value will be in the end set
860 * if the block from now on (of course only regarding the
861 * current state). Enter it and set the new length
864 end->vals[slot].irn = irn;
865 workset_set_version(end, slot, ges->version);
866 workset_set_length(end, MAX(workset_get_length(end), slot + 1));
868 vs->act = irn_act_reload;
871 DBG((dbg, DBG_GLOBAL, "\t%2Dthere is a free slot. capacity=%d, reload here=%f, pressure %s\n",
872 level, n_regs - bs->pressure, reload_here, pressure_ok ? "ok" : "insufficient"));
874 /* look if we can bring the value in. */
876 rollback_info_t rb = trans_begin(ges);
877 double new_limit = MIN(reload_here, limit);
879 vs->act = irn_act_live_through;
881 res = can_bring_in(ges, bl, irn, new_limit, level + 1);
884 * if bring in is too expensive re-adjust the pressure
885 * and roll back the state
887 if (res >= reload_here) {
889 vs->act = irn_act_reload;
890 trans_rollback(ges, &rb);
895 DBG((dbg, DBG_GLOBAL, "\t%2D%s\n", level,
896 vs->act == irn_act_reload ? "reloading" : "bringing in"));
901 DBG((dbg, DBG_GLOBAL, "\t%2D-> %f\n", level, res));
905 static double can_bring_in(global_end_state_t *ges, ir_node *bl, ir_node *irn, double limit, int level)
907 belady_env_t *env = ges->env;
908 double glob_costs = HUGE_VAL;
910 DBG((dbg, DBG_GLOBAL, "\t%2Dcan bring in (max %f) for %+F at block %+F\n", level, limit, irn, bl));
912 if (is_transport_in(bl, irn)) {
913 int i, n = get_irn_arity(bl);
914 ir_node **nodes = alloca(get_irn_arity(bl) * sizeof(nodes[0]));
915 rollback_info_t rb = trans_begin(ges);
919 for (i = 0; i < n; ++i) {
920 ir_node *pr = get_Block_cfgpred_block(bl, i);
921 ir_node *op = is_local_phi(bl, irn) ? get_irn_n(irn, i) : irn;
925 * there might by unknwons as operands of phis in that case
926 * we set the costs to zero, since they won't get spilled.
928 if (arch_irn_consider_in_reg_alloc(env->arch, env->cls, op))
929 c = can_make_available_at_end(ges, pr, op, limit - glob_costs, level + 1);
935 if (glob_costs >= limit) {
936 glob_costs = HUGE_VAL;
937 trans_rollback(ges, &rb);
944 DBG((dbg, DBG_GLOBAL, "\t%2D-> %f\n", level, glob_costs));
948 static void materialize_and_commit_end_state(global_end_state_t *ges)
950 belady_env_t *env = ges->env;
954 DBG((dbg, DBG_GLOBAL, "\tmaterializing\n"));
957 * Perform all the variable actions.
959 for (ia = ges->ia_top; ia != NULL; ia = ia->next) {
961 case irn_act_live_through:
962 if (is_local_phi(ia->bl, ia->irn)) {
963 bitset_add_irn(ges->succ_phis, ia->irn);
964 DBG((dbg, DBG_GLOBAL, "\t\tlive through phi kept alive: %+F\n", ia->irn));
968 be_add_reload_at_end(env->senv, ia->irn, ia->bl, env->cls, 1);
969 DBG((dbg, DBG_GLOBAL, "\t\tadding reload of %+F at end of %+F\n", ia->irn, ia->bl));
972 DBG((dbg, DBG_GLOBAL, "\t\t%+F is in the end set of %+F\n", ia->irn, ia->bl));
977 * Commit the block end states
979 for (bs = ges->bs_top; bs != NULL; bs = bs->next) {
980 block_info_t *bi = bs->bi;
982 if (!bitset_is_set(ges->committed, bi->id)) {
983 DBG((dbg, DBG_GLOBAL, "\t\tcommiting workset of %+F with version %x\n", bi->bl, ges->version));
984 // bes->bs->end_state->vals[idx].version = ges->version;
985 workset_copy(env, bi->ws_end, bs->end_state);
986 DBG((dbg, DBG_GLOBAL, "\t\told pressure: %d, new pressure: %d, end length: %d\n",
987 bi->pressure, bs->pressure, workset_get_length(bs->end_state)));
988 bi->pressure = bs->pressure;
989 bitset_set(ges->committed, bi->id);
993 /* clear the committed bitset. the next call is expecting it. */
994 bitset_clear_all(ges->committed);
998 * Examine all irns which shall be in regs at the beginning of the
1001 static void fix_block_borders(global_end_state_t *ges, ir_node *block) {
1002 block_info_t *bi = get_block_info(block);
1003 belady_env_t *env = ges->env;
1004 void *reset_level = obstack_base(&ges->obst);
1009 for (i = workset_get_length(bi->ws_end) - 1; i >= 0; --i)
1010 workset_set_version(bi->ws_end, i, ver_youngest);
1012 DBG((dbg, DBG_GLOBAL, "fixing block borders at %+F (%f)\n", block, bi->exec_freq));
1014 /* process all variables which shall be in a reg at
1015 * the beginning of the block in the order of the next use. */
1016 workset_foreach(bi->entrance_reg, irn, i) {
1017 double local_costs = be_get_reload_costs(env->senv, irn, bi->first_non_in);
1018 double bring_in_costs;
1020 /* reset the lists */
1024 DBG((dbg, DBG_GLOBAL, "\ttrans in var %+F, version %x\n", irn, ges->version));
1026 bring_in_costs = can_bring_in(ges, block, irn, local_costs, 1);
1028 DBG((dbg, DBG_GLOBAL, "\tbring in: %f, local: %f", bring_in_costs, local_costs));
1031 * we were not able to let the value arrive
1032 * in a register at the entrance of the block
1033 * or it is too costly, so we have to do the reload locally
1035 if (bring_in_costs >= local_costs) {
1036 DBG((dbg, DBG_GLOBAL, " -> do local reload\n"));
1037 be_add_reload(env->senv, irn, bi->first_non_in, env->cls, 1);
1040 * if the transport-in was a phi (that is actually used in block)
1041 * it will no longer remain and we have to spill it completely.
1043 if (is_local_phi(block, irn))
1044 bitset_add_irn(ges->succ_phis, irn);
1046 DBG((dbg, DBG_GLOBAL, " -> bring it in\n"));
1047 materialize_and_commit_end_state(ges);
1050 DBG((dbg, DBG_GLOBAL, "\n"));
1052 /* reset the obstack and create a new version. */
1053 obstack_free(&ges->obst, reset_level);
1054 ges->version = ver_make_newer(ges->version);
1058 static void global_assign(belady_env_t *env)
1060 global_end_state_t ges;
1064 * sort the blocks according to execution frequency.
1065 * That's not necessary for belady() but for the global pass later on.
1067 qsort(env->blocks, env->n_blocks, sizeof(env->blocks[0]), block_freq_gt);
1069 memset(&ges, 0, sizeof(ges));
1070 obstack_init(&ges.obst);
1072 ges.version = ver_make_newer(ver_oldest);
1073 ges.succ_phis = bitset_irg_obstack_alloc(&ges.obst, env->irg);
1074 ges.committed = bitset_obstack_alloc(&ges.obst, env->n_blocks);
1075 ges.bs_tops = obstack_alloc(&ges.obst, sizeof(ges.bs_tops[0]) * env->n_blocks);
1076 ges.bs_tops_vers = obstack_alloc(&ges.obst, sizeof(ges.bs_tops_vers[0]) * env->n_blocks);
1078 for (i = 0; i < env->n_blocks; ++i)
1079 ges.bs_tops_vers[i] = ver_oldest;
1081 for (i = 0; i < env->n_blocks; ++i)
1082 fix_block_borders(&ges, env->blocks[i]);
1085 * Now we spill phis which cannot be kept since they were replaced
1086 * by reloads at the block entrances.
1088 for (i = 0; i < env->n_blocks; ++i) {
1089 ir_node *bl = env->blocks[i];
1092 sched_foreach(bl, irn) {
1096 if (arch_irn_consider_in_reg_alloc(env->arch, env->cls, irn)
1097 && !bitset_contains_irn(ges.succ_phis, irn))
1098 be_spill_phi(env->senv, irn);
1103 static void collect_blocks(ir_node *bl, void *data)
1105 belady_env_t *env = data;
1107 obstack_ptr_grow(&env->ob, bl);
1110 void be_spill_belady_spill_env2(be_irg_t *birg, const arch_register_class_t *cls, spill_env_t *spill_env) {
1111 ir_graph *irg = be_get_birg_irg(birg);
1115 /* some special classes contain only ignore regs, nothing to do then */
1116 n_regs = cls->n_regs - be_put_ignore_regs(birg, cls, NULL);
1120 be_clear_links(irg);
1122 /* init belady env */
1123 obstack_init(&env.ob);
1125 env.arch = birg->main_env->arch_env;
1127 env.lv = be_get_birg_liveness(birg);
1128 env.n_regs = n_regs;
1129 env.ws = new_workset(&env, &env.ob);
1130 env.senv = spill_env ? spill_env : be_new_spill_env(birg);
1131 env.ef = be_get_birg_exec_freq(birg);
1134 irg_block_walk_graph(irg, NULL, collect_blocks, &env);
1135 obstack_ptr_grow(&env.ob, NULL);
1136 env.blocks = obstack_finish(&env.ob);
1138 /* Fix high register pressure in blocks with belady algorithm */
1139 for (i = 0; i < env.n_blocks; ++i)
1142 global_assign(&env);
1144 /* Insert spill/reload nodes into the graph and fix usages */
1145 be_insert_spills_reloads(env.senv);
1148 if(spill_env == NULL)
1149 be_delete_spill_env(env.senv);
1151 obstack_free(&env.ob, NULL);
1156 * Do spilling for a register class on a graph using the belady heuristic.
1157 * In the transformed graph, the register pressure never exceeds the number
1158 * of available registers.
1160 * @param birg The backend graph
1161 * @param cls The register class to spill
1163 static void be_spill_belady(be_irg_t *birg, const arch_register_class_t *cls) {
1164 be_spill_belady_spill_env2(birg, cls, NULL);
1168 void be_init_spillbelady2(void)
1170 static be_spiller_t belady_spiller = {
1174 be_register_spiller("belady2", &belady_spiller);
1175 FIRM_DBG_REGISTER(dbg, "firm.be.spill.belady2");
1178 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_spillbelady2);