2 * Copyright (C) 1995-2007 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief Beladys spillalgorithm version 2.
23 * @author Daniel Grund, Matthias Braun, Sebastian Hack
27 * The main differences to the original Belady are:
28 * - The workset is empty at the start of a block
29 * There is no attempt to fill it with variables which
30 * are not used in the block.
31 * - There is a global pass which tries to use the remaining
32 * capacity of the blocks to let global variables live through
43 #include "irnodeset.h"
45 #include "irprintf_t.h"
51 #include "iredges_t.h"
52 #include "irphase_t.h"
60 #include "bespillbelady.h"
61 #include "besched_t.h"
65 #include "bechordal_t.h"
66 #include "bespilloptions.h"
67 #include "beloopana.h"
78 #define DBG_WORKSET 128
79 #define DBG_GLOBAL 256
82 #define LIVE_END (DEAD-1)
84 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
87 * An association between a node and a point in time.
89 typedef struct _loc_t {
90 ir_node *irn; /**< A node. */
91 unsigned time; /**< A use time (see beuses.h). */
92 unsigned version; /**< That is used in the global pass below.
93 For usage see the comments below.
94 In the local belady pass, this is not important. */
97 typedef struct _workset_t {
98 int len; /**< current length */
99 loc_t vals[0]; /**< inlined array of the values/distances in this working set */
102 typedef struct _belady_env_t {
105 const arch_env_t *arch;
106 const arch_register_class_t *cls;
110 ir_node **blocks; /**< Array of all blocks. */
111 int n_blocks; /**< Number of blocks in the graph. */
112 int n_regs; /**< number of regs in this reg-class */
113 workset_t *ws; /**< the main workset used while processing a block. ob-allocated */
114 ir_node *instr; /**< current instruction */
115 unsigned instr_nr; /**< current instruction number (relative to block start) */
117 spill_env_t *senv; /**< see bespill.h */
121 static int loc_compare(const void *a, const void *b)
125 return (p->time > q->time) - (p->time < q->time);
128 static INLINE void workset_print(const workset_t *w)
132 for(i = 0; i < w->len; ++i) {
133 ir_fprintf(stderr, "%+F %d\n", w->vals[i].irn, w->vals[i].time);
138 * Alloc a new workset on obstack @p ob with maximum size @p max
140 static INLINE workset_t *new_workset(belady_env_t *env, struct obstack *ob) {
142 size_t size = sizeof(*res) + (env->n_regs)*sizeof(res->vals[0]);
143 res = obstack_alloc(ob, size);
144 memset(res, 0, size);
149 * Alloc a new instance on obstack and make it equal to @param ws
151 static INLINE workset_t *workset_clone(belady_env_t *env, struct obstack *ob, workset_t *ws) {
153 size_t size = sizeof(*res) + (env->n_regs)*sizeof(res->vals[0]);
154 res = obstack_alloc(ob, size);
155 memcpy(res, ws, size);
160 * Do NOT alloc anything. Make @param tgt equal to @param src.
161 * returns @param tgt for convenience
163 static INLINE workset_t *workset_copy(belady_env_t *env, workset_t *tgt, workset_t *src) {
164 size_t size = sizeof(*src) + (env->n_regs)*sizeof(src->vals[0]);
165 memcpy(tgt, src, size);
170 * Overwrites the current content array of @param ws with the
171 * @param count locations given at memory @param locs.
172 * Set the length of @param ws to count.
174 static INLINE void workset_bulk_fill(workset_t *workset, int count, const loc_t *locs) {
175 workset->len = count;
176 memcpy(&(workset->vals[0]), locs, count * sizeof(locs[0]));
180 * Inserts the value @p val into the workset, iff it is not
181 * already contained. The workset must not be full.
183 static INLINE void workset_insert(belady_env_t *env, workset_t *ws, ir_node *val) {
185 /* check for current regclass */
186 if (!arch_irn_consider_in_reg_alloc(env->arch, env->cls, val)) {
187 DBG((dbg, DBG_WORKSET, "Skipped %+F\n", val));
191 /* check if val is already contained */
192 for(i=0; i<ws->len; ++i)
193 if (ws->vals[i].irn == val)
197 assert(ws->len < env->n_regs && "Workset already full!");
198 ws->vals[ws->len++].irn = val;
202 * Removes all entries from this workset
204 static INLINE void workset_clear(workset_t *ws) {
209 * Removes the value @p val from the workset if present.
211 static INLINE void workset_remove(workset_t *ws, ir_node *val) {
213 for(i=0; i<ws->len; ++i) {
214 if (ws->vals[i].irn == val) {
215 ws->vals[i] = ws->vals[--ws->len];
221 static INLINE int workset_get_index(const workset_t *ws, const ir_node *val) {
223 for(i=0; i<ws->len; ++i) {
224 if (ws->vals[i].irn == val)
232 * Iterates over all values in the working set.
233 * @p ws The workset to iterate
234 * @p v A variable to put the current value in
235 * @p i An integer for internal use
237 #define workset_foreach(ws, v, i) for(i=0; \
238 v=(i < ws->len) ? ws->vals[i].irn : NULL, i < ws->len; \
241 #define workset_set_time(ws, i, t) (ws)->vals[i].time=t
242 #define workset_get_time(ws, i) (ws)->vals[i].time
243 #define workset_set_length(ws, length) (ws)->len = length
244 #define workset_get_length(ws) ((ws)->len)
245 #define workset_get_val(ws, i) ((ws)->vals[i].irn)
246 #define workset_sort(ws) qsort((ws)->vals, (ws)->len, sizeof((ws)->vals[0]), loc_compare);
247 #define workset_contains(ws, n) (workset_get_index(ws, n) >= 0)
249 typedef struct _block_info_t {
252 workset_t *ws_start, *ws_end;
255 ir_node *first_non_in; /**< First node in block which is not a phi. */
256 ir_node *last_ins; /**< The instruction before which end of
257 block reloads will be inserted. */
259 workset_t *entrance_reg; /**< That set will contain all values
260 transported into the block which
261 are used before they are displaced.
262 That means, we later have to care to
263 bring them into the block in a register
264 or reload them at the entry of the block. */
266 int pressure; /**< The amount of registers which remain free
267 in this block. This capacity can be used to let
268 global variables, transported into other blocks,
269 live through this block. */
271 double exec_freq; /**< The execution frequency of this block. */
274 static INLINE void *new_block_info(belady_env_t *bel, ir_node *bl) {
275 block_info_t *res = obstack_alloc(&bel->ob, sizeof(*res));
276 memset(res, 0, sizeof(res[0]));
277 res->first_non_in = NULL;
278 res->last_ins = NULL;
281 res->entrance_reg = new_workset(bel, &bel->ob);
282 res->exec_freq = get_block_execfreq(bel->ef, bl);
283 set_irn_link(bl, res);
287 #define get_block_info(block) ((block_info_t *)get_irn_link(block))
288 #define set_block_info(block, info) set_irn_link(block, info)
290 static INLINE ir_node *block_info_get_last_ins(block_info_t *bi)
293 bi->last_ins = be_get_end_of_block_insertion_point(bi->bl);
298 typedef struct _next_use_t {
299 unsigned is_first_use : 1; /**< Indicate that this use is the first
300 in the block. Needed to identify
301 transport in values for the global
303 int step; /**< The time step of the use. */
304 struct _next_use_t *next; /**< The next use int this block
308 static void *next_use_init(ir_phase *phase, ir_node *irn, void *old)
316 static void build_next_uses(block_info_t *bi)
320 phase_init(&bi->next_uses, "next uses", bi->bel->irg, PHASE_DEFAULT_GROWTH, next_use_init, NULL);
321 sched_foreach_reverse(bi->bl, irn) {
322 int i, step = sched_get_time_step(irn);
327 for (i = get_irn_arity(irn) - 1; i >= 0; --i) {
328 ir_node *op = get_irn_n(irn, i);
329 next_use_t *curr = phase_get_irn_data(&bi->next_uses, op);
330 next_use_t *use = phase_alloc(&bi->next_uses, sizeof(use[0]));
333 use->is_first_use = 1;
338 curr->is_first_use = 0;
340 phase_set_irn_data(&bi->next_uses, op, use);
345 #define get_current_use(bi, irn) phase_get_irn_data(&(bi)->next_uses, (irn))
347 static INLINE void advance_current_use(block_info_t *bi, const ir_node *irn)
349 next_use_t *use = get_current_use(bi, irn);
352 phase_set_irn_data(&bi->next_uses, irn, use->next);
355 static INLINE unsigned get_curr_distance(block_info_t *bi, const ir_node *irn, int is_usage)
357 belady_env_t *env = bi->bel;
358 next_use_t *use = get_current_use(bi, irn);
359 int curr_step = sched_get_time_step(env->instr);
360 int flags = arch_irn_get_flags(env->arch, irn);
362 assert(!(flags & arch_irn_flags_ignore));
364 /* We have to keep nonspillable nodes in the workingset */
365 if(flags & arch_irn_flags_dont_spill)
368 if (!is_usage && use && use->step == curr_step)
372 assert(use->step >= curr_step);
373 return use->step - curr_step;
376 return be_is_live_end(env->lv, bi->bl, irn) ? LIVE_END : DEAD;
379 static INLINE int is_local_phi(const ir_node *bl, const ir_node *irn)
381 return is_Phi(irn) && get_nodes_block(irn) == bl;
385 * Check, if the value is something that is transported into a block.
386 * That is, the value is defined elsewhere or defined by a Phi in the block.
387 * @param env The belady environment.
388 * @param bl The block in question.
389 * @param irn The node in question.
390 * @return 1, if node is something transported into @p bl, 0 if not.
391 * @note The function will only give correct answers in the case
392 * where @p irn is unsed in the block @p bl which is always
393 * the case in our usage scenario.
395 static INLINE int is_transport_in(const ir_node *bl, const ir_node *irn)
397 return is_local_phi(bl, irn) || get_nodes_block(irn) != bl;
401 * Performs the actions necessary to grant the request that:
402 * - new_vals can be held in registers
403 * - as few as possible other values are disposed
404 * - the worst values get disposed
406 * @p is_usage indicates that the values in new_vals are used (not defined)
407 * In this case reloads must be performed
409 static void displace(block_info_t *bi, workset_t *new_vals, int is_usage) {
410 belady_env_t *env = bi->bel;
411 workset_t *ws = env->ws;
412 ir_node **to_insert = alloca(env->n_regs * sizeof(to_insert[0]));
414 int i, len, max_allowed, demand, iter;
418 1. Identify the number of needed slots and the values to reload
421 workset_foreach(new_vals, val, iter) {
422 /* mark value as used */
424 if (! workset_contains(ws, val)) {
425 DBG((dbg, DBG_DECIDE, " insert %+F\n", val));
426 to_insert[demand++] = val;
428 int insert_reload = 1;
429 next_use_t *use = get_current_use(bi, val);
432 * if we use a value which is transported in this block, i.e. a
433 * phi defined here or a live in, for the first time, we check
434 * if there is room for that guy to survive from the block's
435 * entrance to here or not.
438 assert(sched_get_time_step(env->instr) == use->step);
439 if (is_transport_in(bi->bl, val) && use->is_first_use) {
440 DBG((dbg, DBG_DECIDE, "entrance node %+F, pressure %d:\n", val, bi->pressure));
441 if (bi->pressure < env->n_regs) {
442 workset_insert(env, bi->entrance_reg, val);
445 DBG((dbg, DBG_DECIDE, "... no reload. must be considered at block start\n"));
450 DBG((dbg, DBG_SPILL, "Reload %+F before %+F\n", val, env->instr));
451 be_add_reload(env->senv, val, env->instr, env->cls, 1);
455 assert(is_usage || "Defined value already in workset?!?");
456 DBG((dbg, DBG_DECIDE, " skip %+F\n", val));
459 DBG((dbg, DBG_DECIDE, " demand = %d\n", demand));
462 2. Make room for at least 'demand' slots
464 len = workset_get_length(ws);
465 max_allowed = env->n_regs - demand;
467 /* Only make more free room if we do not have enough */
468 if (len > max_allowed) {
469 // int curr_step = sched_get_time_step(env->instr);
471 DBG((dbg, DBG_DECIDE, " disposing %d values\n", len - max_allowed));
473 /* get current next-use distance */
474 for (i = 0; i < ws->len; ++i) {
475 ir_node *val = workset_get_val(ws, i);
476 unsigned dist = get_curr_distance(bi, val, is_usage);
477 workset_set_time(ws, i, dist);
480 /* sort entries by increasing nextuse-distance*/
483 /* kill the last 'demand' entries in the array */
484 workset_set_length(ws, max_allowed);
488 3. Insert the new values into the workset
489 Also, we update the pressure in the block info.
490 That is important for the global pass to decide
491 how many values can live through the block.
493 for (i = 0; i < demand; ++i)
494 workset_insert(env, env->ws, to_insert[i]);
496 bi->pressure = MAX(bi->pressure, workset_get_length(env->ws));
501 * For the given block @p block, decide for each values
502 * whether it is used from a register or is reloaded
505 static void belady(ir_node *block, void *data) {
506 belady_env_t *env = data;
507 block_info_t *block_info = new_block_info(env, block);
508 void *obst_state = obstack_base(&env->ob);
514 DBG((dbg, DBG_WSETS, "Processing %+F...\n", block_info->bl));
515 new_vals = new_workset(env, &env->ob);
516 workset_clear(env->ws);
518 /* build the next use information for this block. */
519 build_next_uses(block_info);
522 block_info->first_non_in = NULL;
524 /* process the block from start to end */
525 sched_foreach(block, irn) {
527 assert(workset_get_length(env->ws) <= env->n_regs && "Too much values in workset!");
529 /* projs are handled with the tuple value.
530 * Phis are no real instr (see insert_starters())
531 * instr_nr does not increase */
532 if (is_Proj(irn) || is_Phi(irn)) {
533 DBG((dbg, DBG_DECIDE, " ...%+F skipped\n", irn));
536 DBG((dbg, DBG_DECIDE, " ...%+F\n", irn));
538 if (!block_info->first_non_in)
539 block_info->first_non_in = irn;
541 /* set instruction in the workset */
544 /* allocate all values _used_ by this instruction */
545 workset_clear(new_vals);
546 for(i = 0, arity = get_irn_arity(irn); i < arity; ++i) {
547 workset_insert(env, new_vals, get_irn_n(irn, i));
549 displace(block_info, new_vals, 1);
552 * set all used variables to the next use in their next_use_t list
553 * Also, kill all dead variables from the workset. They are only
554 * augmenting the pressure. Note, that a variable is dead
555 * if it has no further use in this block and is *not* live end
557 for(i = 0, arity = get_irn_arity(irn); i < arity; ++i) {
558 ir_node *op = get_irn_n(irn, i);
559 next_use_t *use = get_current_use(block_info, op);
562 if (!use->next && !be_is_live_end(env->lv, block, op))
563 workset_remove(env->ws, op);
565 advance_current_use(block_info, op);
568 /* allocate all values _defined_ by this instruction */
569 workset_clear(new_vals);
570 if (get_irn_mode(irn) == mode_T) { /* special handling for tuples and projs */
571 const ir_edge_t *edge;
573 foreach_out_edge(irn, edge) {
574 ir_node *proj = get_edge_src_irn(edge);
575 workset_insert(env, new_vals, proj);
578 workset_insert(env, new_vals, irn);
580 displace(block_info, new_vals, 0);
585 phase_free(&block_info->next_uses);
586 obstack_free(&env->ob, obst_state);
588 /* Remember end-workset for this block */
589 block_info->ws_end = workset_clone(env, &env->ob, env->ws);
590 DBG((dbg, DBG_WSETS, "End workset for %+F:\n", block));
591 workset_foreach(block_info->ws_end, irn, iter)
592 DBG((dbg, DBG_WSETS, " %+F (%u)\n", irn, workset_get_time(block_info->ws_end, iter)));
593 DBG((dbg, DBG_WSETS, "Max pressure in block: %d\n", block_info->pressure));
598 |_ _| |__ ___ __ _| | ___ | |__ __ _| | | _ \ __ _ _ __| |_
599 | | | '_ \ / _ \ / _` | |/ _ \| '_ \ / _` | | | |_) / _` | '__| __|
600 | | | | | | __/ | (_| | | (_) | |_) | (_| | | | __/ (_| | | | |_
601 |_| |_| |_|\___| \__, |_|\___/|_.__/ \__,_|_| |_| \__,_|_| \__|
606 static int block_freq_gt(const void *a, const void *b)
608 const ir_node * const *p = a;
609 const ir_node * const *q = b;
610 block_info_t *pi = get_block_info(*p);
611 block_info_t *qi = get_block_info(*q);
612 double diff = qi->exec_freq - pi->exec_freq;
613 return (diff > 0) - (diff < 0);
616 typedef struct _block_end_state_t {
620 workset_t *end_state;
621 unsigned reload_at_end : 1;
622 unsigned live_through : 1;
625 typedef struct _global_end_state_t {
629 block_end_state_t *end_info;
632 } global_end_state_t;
639 static INLINE rollback_info_t trans_begin(global_end_state_t *ges)
642 rb.obst_level = obstack_base(&ges->obst);
643 rb.gauge = ges->gauge;
647 static INLINE void trans_rollback(global_end_state_t *ges, rollback_info_t *rb)
649 ges->gauge = rb->gauge;
650 obstack_free(&ges->obst, rb->obst_level);
653 static unsigned get_block_end_state(global_end_state_t *state, ir_node *bl, ir_node *irn)
657 for (i = 0; i < state->gauge; ++i) {
658 block_end_state_t *bei = &state->end_info[i];
659 if (bei->bl == bl && bei->irn == irn)
664 block_info_t *bi = get_block_info(bl);
665 block_end_state_t *curr;
669 /* make sure we have room in the array */
670 ARR_EXTO(block_end_state_t, state->end_info, (int) state->gauge);
672 curr = &state->end_info[state->gauge];
674 memset(curr, 0, sizeof(curr[0]));
677 curr->end_state = workset_clone(state->env, &state->obst, bi->ws_end);
684 static double can_bring_in(global_end_state_t *ges, ir_node *bl, ir_node *irn, double limit, int level);
686 static double can_make_available_at_end(global_end_state_t *ges, ir_node *bl, ir_node *irn, double limit, int level)
688 unsigned bes_index = get_block_end_state(ges, bl, irn);
689 block_end_state_t *bes = &ges->end_info[bes_index];
690 workset_t *end = bes->end_state;
691 block_info_t *bi = get_block_info(bl);
692 int n_regs = bi->bel->n_regs;
695 DBG((dbg, DBG_GLOBAL, "\t%2Dcan make avail %+F at end of %+F (pressure %d)\n",
696 level, irn, bl, bi->pressure));
699 * to make the value available at end,
700 * we have several cases here.
702 * - we already visited that block.
703 * - If the value is in the final end set, return 0.
704 * somebody else already allocated it there.
705 * - If not and the final end set is already full,
706 * we cannot make the value available at the end
707 * of this block. return INFINITY.
708 * - Else (value not in final end set and there is room):
709 * 1) The value is in a register at the end of the local Belady pass.
710 * Allocate a slot in the final end set and return 0.
711 * 2) The value is not in the Belady end set:
712 * If the block's capacity is < k then check what it costs
713 * to transport the value from upper blocks to this block.
714 * Compare that against the reload cost in this block. If
715 * cheaper, do the other thing. If not, reload it here.
719 * we have been here before and already figured out some costs.
720 * so we can exit safely.
722 if (bes->costs >= 0.0) {
723 DBG((dbg, DBG_GLOBAL, "\t%2Dwe\'ve been here before\n", level));
727 /* if the end set contains it already, it is in a reg and it costs nothing
728 * to load it to one. */
729 index = workset_get_index(end, irn);
731 unsigned ver = end->vals[index].version;
732 DBG((dbg, DBG_GLOBAL, "\t%2Dnode is in the end set and is %s fixed\n",
733 level, ver > ges->version ? "already" : "not yet"));
736 * if the version is older, the value is already fixed
737 * and cannot be removed from the end set. If not,
738 * we fix it here by giving it our version.
740 if (ver < ges->version)
741 end->vals[index].version = ges->version;
748 * Now we have two options:
749 * 1) Reload the value at the end of the block.
750 * Therefore, perhaps, we have to erase another one from the workset.
751 * This may only be done if it has not been fixed.
752 * Since fixed means that a previous pass has decided that that value
753 * *has* to stay in the end set.
754 * 2) we can try, if the capacity of the block allows it, to let
755 * the value live through the block and make it available at
758 * First, we test the local (reload in this block) alternative
759 * and compare against the other alternative.
760 * Of course, we chose the cheaper one.
764 int len = workset_get_length(end);
768 bes->costs = HUGE_VAL;
771 * look if there is room in the end array
772 * for the variable. Note that this does not
773 * means that the var is living through the block.
774 * There is just room at the *end*
777 DBG((dbg, DBG_GLOBAL, "\t%2Dthe end set has %d free slots\n",
778 level, n_regs - len));
781 for (i = 0; i < len; ++i)
782 if (end->vals[i].version < ges->version)
786 DBG((dbg, DBG_GLOBAL, "\t%2D%+F (slot %d) can be erased from the end set\n",
787 level, end->vals[i].irn, i));
793 rollback_info_t rb = trans_begin(ges);
794 ir_node *ins_before = block_info_get_last_ins(bi);
795 double reload_here = be_get_reload_costs(bi->bel->senv, irn, ins_before);
796 double bring_in = HUGE_VAL;
798 /* allocate a slot before recursively descending. */
799 end->vals[slot].irn = irn;
800 end->vals[slot].version = ges->version;
801 end->len = MAX(end->len, slot + 1);
803 /* look if we can bring the value in. */
804 if (bi->pressure < n_regs) {
805 double new_limit = MIN(reload_here, limit);
806 bring_in = can_bring_in(ges, bl, irn, new_limit, level + 1);
810 * re-read the bes descriptor since meanwhile, the
811 * array could have been displaced by recursive calls
813 assert(bes_index < ges->gauge);
814 bes = &ges->end_info[bes_index];
815 DBG((dbg, DBG_GLOBAL, "\t%2Dthere is a free slot. capacity=%d, reload here=%f, bring in=%f\n",
816 level, n_regs - bi->pressure, reload_here, bring_in));
819 * reloading here pays off; bringing the value in from elsewhere
820 * is too expensive, hence we drop that search by resetting
823 if (reload_here <= bring_in) {
824 trans_rollback(ges, &rb);
825 bes->costs = reload_here;
826 bes->reload_at_end = 1;
827 DBG((dbg, DBG_GLOBAL, "\t%2Ddoing a reload %p\n", level, bes));
829 bes->live_through = 1;
830 bes->costs = bring_in;
837 DBG((dbg, DBG_GLOBAL, "\t%2D-> %f\n", level, bes->costs));
841 static double can_bring_in(global_end_state_t *ges, ir_node *bl, ir_node *irn, double limit, int level)
843 belady_env_t *env = ges->env;
844 double glob_costs = HUGE_VAL;
846 DBG((dbg, DBG_GLOBAL, "\t%2Dcan bring in (max %f) for %+F at block %+F\n", level, limit, irn, bl));
848 if (is_transport_in(bl, irn)) {
849 int i, n = get_irn_arity(bl);
850 ir_node **nodes = alloca(get_irn_arity(bl) * sizeof(nodes[0]));
851 rollback_info_t rb = trans_begin(ges);
855 for (i = 0; i < n; ++i) {
856 ir_node *pr = get_Block_cfgpred_block(bl, i);
857 ir_node *op = is_local_phi(bl, irn) ? get_irn_n(irn, i) : irn;
861 * there might by unknwons as operands of phis in that case
862 * we set the costs to zero, since they won't get spilled.
864 if (arch_irn_consider_in_reg_alloc(env->arch, env->cls, op))
865 c = can_make_available_at_end(ges, pr, op, limit - glob_costs, level + 1);
871 if (glob_costs >= limit) {
872 glob_costs = HUGE_VAL;
873 trans_rollback(ges, &rb);
880 DBG((dbg, DBG_GLOBAL, "\t%2D-> %f\n", level, glob_costs));
884 static void materialize_and_commit_end_state(global_end_state_t *ges)
886 belady_env_t *env = ges->env;
889 DBG((dbg, DBG_GLOBAL, "\tmaterializing\n"));
890 for (i = 0; i < ges->gauge; ++i) {
891 block_end_state_t *bes = &ges->end_info[i];
892 block_info_t *bi = get_block_info(bes->bl);
893 int idx, end_pressure;
895 DBG((dbg, DBG_GLOBAL, "\t\t%+F in %+F, cost: %f through: %d, rel: %d %p\n",
896 bes->irn, bes->bl, bes->costs, bes->live_through, bes->reload_at_end, bes));
898 /* insert the reload if the val was reloaded at the block's end */
899 if (bes->reload_at_end) {
900 be_add_reload_at_end(env->senv, bes->irn, bes->bl, env->cls, 1);
901 DBG((dbg, DBG_GLOBAL, "\t\tadding reload of %+F at end of %+F\n", bes->irn, bes->bl));
904 idx = workset_get_index(bes->end_state, bes->irn);
906 if (is_local_phi(bes->bl, bes->irn) && bes->live_through)
907 bitset_add_irn(ges->succ_phis, bes->irn);
910 * set the version number in the workset.
911 * That will mark this value as fixed in the end set
912 * and will prevent further investigations from removing
914 * Also "commit" the workset;
915 * by copying it back to the block's end workset.
918 DBG((dbg, DBG_GLOBAL, "\t\tcommiting workset of %+F with version %x\n", bes->bl, ges->version));
919 bes->end_state->vals[idx].version = ges->version;
920 workset_copy(env, bi->ws_end, bes->end_state);
924 for (idx = workset_get_length(bes->end_state) - 1; idx >= 0; --idx)
925 if (bes->end_state->vals[idx].version >= ges->version)
929 * if the variable is live through the block,
930 * update the pressure indicator.
932 DBG((dbg, DBG_GLOBAL, "\t\told pressure %d, ", bi->pressure));
934 bi->pressure = MAX(bi->pressure + bes->live_through, end_pressure);
936 DBG((dbg, DBG_GLOBAL, "new pressure: %d, end pressure: %d, end length: %d\n",
937 bi->pressure, end_pressure, workset_get_length(bes->end_state)));
943 * Examine all irns which shall be in regs at the beginning of the
946 static void fix_block_borders(global_end_state_t *ges, ir_node *block) {
947 block_info_t *bi = get_block_info(block);
948 belady_env_t *env = ges->env;
953 DBG((dbg, DBG_GLOBAL, "fixing block borders at %+F (%f)\n", block, bi->exec_freq));
955 /* process all variables which shall be in a reg at
956 * the beginning of the block in the order of the next use. */
957 workset_foreach(bi->entrance_reg, irn, i) {
958 double local_costs = be_get_reload_costs(env->senv, irn, bi->first_non_in);
959 double bring_in_costs;
961 /* reset the gauge and create a new version. */
965 DBG((dbg, DBG_GLOBAL, "\ttrans in var %+F, version %x\n", irn, ges->version));
967 bring_in_costs = can_bring_in(ges, block, irn, local_costs, 1);
969 DBG((dbg, DBG_GLOBAL, "\tbring in: %f, local: %f", bring_in_costs, local_costs));
972 * we were not able to let the value arrive
973 * in a register at the entrance of the block
974 * or it is too costly, so we have to do the reload locally
976 if (bring_in_costs >= local_costs) {
977 DBG((dbg, DBG_GLOBAL, " -> do local reload\n"));
978 be_add_reload(env->senv, irn, bi->first_non_in, env->cls, 1);
981 * if the transport-in was a phi (that is actually used in block)
982 * it will no longer remain and we have to spill it completely.
984 if (is_local_phi(block, irn))
985 bitset_add_irn(ges->succ_phis, irn);
987 DBG((dbg, DBG_GLOBAL, " -> do remote reload\n"));
988 materialize_and_commit_end_state(ges);
991 DBG((dbg, DBG_GLOBAL, "\n"));
995 static void global_assign(belady_env_t *env)
997 global_end_state_t ges;
1000 obstack_init(&ges.obst);
1004 ges.end_info = NEW_ARR_F(block_end_state_t, env->n_blocks);
1005 ges.succ_phis = bitset_irg_obstack_alloc(&env->ob, env->irg);
1008 * sort the blocks according to execution frequency.
1009 * That's not necessary for belady() but for the global pass later on.
1011 qsort(env->blocks, env->n_blocks, sizeof(env->blocks[0]), block_freq_gt);
1013 for (i = 0; i < env->n_blocks; ++i)
1014 fix_block_borders(&ges, env->blocks[i]);
1017 * Now we spill phis which cannot be kept since they were replaced
1018 * by reloads at the block entrances.
1020 for (i = 0; i < env->n_blocks; ++i) {
1021 ir_node *bl = env->blocks[i];
1024 sched_foreach(bl, irn) {
1028 if (arch_irn_consider_in_reg_alloc(env->arch, env->cls, irn)
1029 && !bitset_contains_irn(ges.succ_phis, irn))
1030 be_spill_phi(env->senv, irn);
1034 DEL_ARR_F(ges.end_info);
1037 static void collect_blocks(ir_node *bl, void *data)
1039 belady_env_t *env = data;
1041 obstack_ptr_grow(&env->ob, bl);
1044 void be_spill_belady_spill_env2(be_irg_t *birg, const arch_register_class_t *cls, spill_env_t *spill_env) {
1045 ir_graph *irg = be_get_birg_irg(birg);
1049 /* some special classes contain only ignore regs, nothing to do then */
1050 n_regs = cls->n_regs - be_put_ignore_regs(birg, cls, NULL);
1054 be_clear_links(irg);
1056 /* init belady env */
1057 obstack_init(&env.ob);
1059 env.arch = birg->main_env->arch_env;
1061 env.lv = be_get_birg_liveness(birg);
1062 env.n_regs = n_regs;
1063 env.ws = new_workset(&env, &env.ob);
1064 env.senv = spill_env ? spill_env : be_new_spill_env(birg);
1065 env.ef = be_get_birg_exec_freq(birg);
1068 irg_block_walk_graph(irg, NULL, collect_blocks, &env);
1069 obstack_ptr_grow(&env.ob, NULL);
1070 env.blocks = obstack_finish(&env.ob);
1072 /* Fix high register pressure in blocks with belady algorithm */
1073 for (i = 0; i < env.n_blocks; ++i)
1074 belady(env.blocks[i], &env);
1076 global_assign(&env);
1078 /* Insert spill/reload nodes into the graph and fix usages */
1079 be_insert_spills_reloads(env.senv);
1082 if(spill_env == NULL)
1083 be_delete_spill_env(env.senv);
1085 obstack_free(&env.ob, NULL);
1090 * Do spilling for a register class on a graph using the belady heuristic.
1091 * In the transformed graph, the register pressure never exceeds the number
1092 * of available registers.
1094 * @param birg The backend graph
1095 * @param cls The register class to spill
1097 static void be_spill_belady(be_irg_t *birg, const arch_register_class_t *cls) {
1098 be_spill_belady_spill_env2(birg, cls, NULL);
1102 void be_init_spillbelady2(void)
1104 static be_spiller_t belady_spiller = {
1108 be_register_spiller("belady2", &belady_spiller);
1109 FIRM_DBG_REGISTER(dbg, "firm.be.spill.belady2");
1112 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_spillbelady2);