2 * Copyright (C) 1995-2007 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief Beladys spillalgorithm version 2.
23 * @author Sebastian Hack, Matthias Braun, Daniel Grund
27 * The main differences to the original Belady are:
28 * - The workset is empty at the start of a block
29 * There is no attempt to fill it with variables which
30 * are not used in the block.
31 * - There is a global pass which tries to use the remaining
32 * capacity of the blocks to let global variables live through
43 #include "irnodeset.h"
45 #include "irprintf_t.h"
51 #include "iredges_t.h"
52 #include "irphase_t.h"
61 #include "bespillbelady.h"
62 #include "besched_t.h"
66 #include "bechordal_t.h"
67 #include "bespilloptions.h"
68 #include "beloopana.h"
72 #include <libcore/lc_opts.h>
73 #include <libcore/lc_opts_enum.h>
74 #include <libcore/lc_timing.h>
83 #define DBG_WORKSET 128
84 #define DBG_GLOBAL 256
86 #define ALREADY_SPILLED_FACTOR 2
89 #define LIVE_END (DEAD-1)
90 #define REMAT_DIST (DEAD-2)
92 static int already_spilled_factor = 2;
93 static int remat_live_range_ext = 1;
94 static int global_pass_enabled = 1;
96 static const lc_opt_table_entry_t options[] = {
97 LC_OPT_ENT_INT ("asf", "already spilled factor", &already_spilled_factor),
98 LC_OPT_ENT_BOOL ("remat", "rematerializable ops get infinite long live ranges", &remat_live_range_ext),
99 LC_OPT_ENT_BOOL ("global", "enable/disable the global pass", &global_pass_enabled),
103 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
106 * An association between a node and a point in time.
108 typedef struct _loc_t {
109 ir_node *irn; /**< A node. */
110 unsigned time; /**< A use time.
111 In the global pass this is used
112 as the version number and not as a time.
113 Only to save space...
117 typedef struct _workset_t {
118 int len; /**< current length */
119 loc_t vals[0]; /**< inlined array of the values/distances in this working set */
122 typedef struct _belady_env_t {
126 const arch_env_t *arch;
127 const arch_register_class_t *cls;
131 ir_node **blocks; /**< Array of all blocks. */
132 int n_blocks; /**< Number of blocks in the graph. */
133 int n_regs; /**< number of regs in this reg-class */
134 workset_t *ws; /**< the main workset used while processing a block. ob-allocated */
135 ir_node *instr; /**< current instruction */
136 int instr_nr; /**< current instruction number (relative to block start) */
138 spill_env_t *senv; /**< see bespill.h */
139 bitset_t *spilled; /**< bitset to keep all the irns which have already been spilled. */
143 static int loc_compare(const void *a, const void *b)
147 return (p->time > q->time) - (p->time < q->time);
150 static INLINE void workset_print(const workset_t *w)
154 for(i = 0; i < w->len; ++i) {
155 ir_fprintf(stderr, "%+F %d\n", w->vals[i].irn, w->vals[i].time);
160 * Alloc a new workset on obstack @p ob with maximum size @p max
162 static INLINE workset_t *new_workset(belady_env_t *env, struct obstack *ob) {
164 size_t size = sizeof(*res) + (env->n_regs)*sizeof(res->vals[0]);
165 res = obstack_alloc(ob, size);
166 memset(res, 0, size);
171 * Alloc a new instance on obstack and make it equal to @param ws
173 static INLINE workset_t *workset_clone(belady_env_t *env, struct obstack *ob, workset_t *ws) {
175 size_t size = sizeof(*res) + (env->n_regs)*sizeof(res->vals[0]);
176 res = obstack_alloc(ob, size);
177 memcpy(res, ws, size);
182 * Do NOT alloc anything. Make @param tgt equal to @param src.
183 * returns @param tgt for convenience
185 static INLINE workset_t *workset_copy(belady_env_t *env, workset_t *tgt, workset_t *src) {
186 size_t size = sizeof(*src) + (env->n_regs)*sizeof(src->vals[0]);
187 memcpy(tgt, src, size);
192 * Overwrites the current content array of @param ws with the
193 * @param count locations given at memory @param locs.
194 * Set the length of @param ws to count.
196 static INLINE void workset_bulk_fill(workset_t *workset, int count, const loc_t *locs) {
197 workset->len = count;
198 memcpy(&(workset->vals[0]), locs, count * sizeof(locs[0]));
202 * Inserts the value @p val into the workset, iff it is not
203 * already contained. The workset must not be full.
205 static INLINE void workset_insert(belady_env_t *env, workset_t *ws, ir_node *val) {
207 /* check for current regclass */
208 if (!arch_irn_consider_in_reg_alloc(env->arch, env->cls, val)) {
209 DBG((dbg, DBG_WORKSET, "Skipped %+F\n", val));
213 /* check if val is already contained */
214 for(i=0; i<ws->len; ++i)
215 if (ws->vals[i].irn == val)
219 assert(ws->len < env->n_regs && "Workset already full!");
220 ws->vals[ws->len++].irn = val;
224 * Removes all entries from this workset
226 static INLINE void workset_clear(workset_t *ws) {
231 * Removes the value @p val from the workset if present.
233 static INLINE void workset_remove(workset_t *ws, ir_node *val) {
235 for(i=0; i<ws->len; ++i) {
236 if (ws->vals[i].irn == val) {
237 ws->vals[i] = ws->vals[--ws->len];
243 static INLINE int workset_get_index(const workset_t *ws, const ir_node *val) {
245 for(i=0; i<ws->len; ++i) {
246 if (ws->vals[i].irn == val)
254 * Iterates over all values in the working set.
255 * @p ws The workset to iterate
256 * @p v A variable to put the current value in
257 * @p i An integer for internal use
259 #define workset_foreach(ws, v, i) for(i=0; \
260 v=(i < ws->len) ? ws->vals[i].irn : NULL, i < ws->len; \
263 #define workset_set_time(ws, i, t) (ws)->vals[i].time=t
264 #define workset_get_time(ws, i) (ws)->vals[i].time
265 #define workset_set_length(ws, length) (ws)->len = length
266 #define workset_get_length(ws) ((ws)->len)
267 #define workset_get_val(ws, i) ((ws)->vals[i].irn)
268 #define workset_sort(ws) qsort((ws)->vals, (ws)->len, sizeof((ws)->vals[0]), loc_compare);
269 #define workset_contains(ws, n) (workset_get_index(ws, n) >= 0)
271 typedef struct _block_info_t {
274 workset_t *ws_start, *ws_end;
278 ir_node *first_non_in; /**< First node in block which is not a phi. */
279 ir_node *last_ins; /**< The instruction before which end of
280 block reloads will be inserted. */
282 workset_t *entrance_reg; /**< That set will contain all values
283 transported into the block which
284 are used before they are displaced.
285 That means, we later have to care to
286 bring them into the block in a register
287 or reload them at the entry of the block. */
289 int pressure; /**< The amount of registers which remain free
290 in this block. This capacity can be used to let
291 global variables, transported into other blocks,
292 live through this block. */
294 double exec_freq; /**< The execution frequency of this block. */
297 static INLINE void *new_block_info(belady_env_t *bel, int id)
299 ir_node *bl = bel->blocks[id];
300 block_info_t *res = obstack_alloc(&bel->ob, sizeof(*res));
301 memset(res, 0, sizeof(res[0]));
302 res->first_non_in = NULL;
303 res->last_ins = NULL;
307 res->entrance_reg = new_workset(bel, &bel->ob);
308 res->exec_freq = get_block_execfreq(bel->ef, bl);
309 set_irn_link(bl, res);
313 #define get_block_info(block) ((block_info_t *)get_irn_link(block))
314 #define set_block_info(block, info) set_irn_link(block, info)
316 static INLINE ir_node *block_info_get_last_ins(block_info_t *bi)
319 bi->last_ins = be_get_end_of_block_insertion_point(bi->bl);
324 typedef struct _next_use_t {
325 unsigned is_first_use : 1; /**< Indicate that this use is the first
326 in the block. Needed to identify
327 transport in values for the global
329 int step; /**< The time step of the use. */
331 struct _next_use_t *next; /**< The next use int this block
335 static void *next_use_init(ir_phase *phase, ir_node *irn, void *old)
343 static void build_next_uses(block_info_t *bi)
347 sched_renumber(bi->bl);
349 phase_init(&bi->next_uses, "next uses", bi->bel->irg, PHASE_DEFAULT_GROWTH, next_use_init, NULL);
350 sched_foreach_reverse(bi->bl, irn) {
356 for (i = get_irn_arity(irn) - 1; i >= 0; --i) {
357 ir_node *op = get_irn_n(irn, i);
358 next_use_t *curr = phase_get_irn_data(&bi->next_uses, op);
359 next_use_t *use = phase_alloc(&bi->next_uses, sizeof(use[0]));
361 use->is_first_use = 1;
362 use->step = sched_get_time_step(irn);
367 curr->is_first_use = 0;
368 assert(curr->step >= use->step);
371 phase_set_irn_data(&bi->next_uses, op, use);
376 #define get_current_use(bi, irn) phase_get_irn_data(&(bi)->next_uses, (irn))
378 static INLINE void advance_current_use(block_info_t *bi, const ir_node *irn)
380 next_use_t *use = get_current_use(bi, irn);
383 phase_set_irn_data(&bi->next_uses, irn, use->next);
386 static INLINE unsigned get_curr_distance(block_info_t *bi, const ir_node *irn, int is_usage)
388 belady_env_t *env = bi->bel;
389 int curr_step = sched_get_time_step(env->instr);
390 next_use_t *use = get_current_use(bi, irn);
391 int flags = arch_irn_get_flags(env->arch, irn);
393 assert(!(flags & arch_irn_flags_ignore));
395 /* We have to keep nonspillable nodes in the workingset */
396 if(flags & arch_irn_flags_dont_spill)
399 if (!is_usage && use && use->step == curr_step)
403 unsigned res = use->step - curr_step;
405 assert(use->step >= curr_step);
408 if (remat_live_range_ext && be_is_rematerializable(env->senv, irn, use->irn))
410 else if (bitset_contains_irn(env->spilled, irn))
411 res *= already_spilled_factor;
417 return be_is_live_end(env->lv, bi->bl, irn) ? LIVE_END : DEAD;
420 static INLINE int is_local_phi(const ir_node *bl, const ir_node *irn)
422 return is_Phi(irn) && get_nodes_block(irn) == bl;
426 * Check, if the value is something that is transported into a block.
427 * That is, the value is defined elsewhere or defined by a Phi in the block.
428 * @param env The belady environment.
429 * @param bl The block in question.
430 * @param irn The node in question.
431 * @return 1, if node is something transported into @p bl, 0 if not.
432 * @note The function will only give correct answers in the case
433 * where @p irn is unsed in the block @p bl which is always
434 * the case in our usage scenario.
436 static INLINE int is_transport_in(const ir_node *bl, const ir_node *irn)
438 return is_local_phi(bl, irn) || get_nodes_block(irn) != bl;
442 * Performs the actions necessary to grant the request that:
443 * - new_vals can be held in registers
444 * - as few as possible other values are disposed
445 * - the worst values get disposed
447 * @p is_usage indicates that the values in new_vals are used (not defined)
448 * In this case reloads must be performed
450 static void displace(block_info_t *bi, workset_t *new_vals, int is_usage) {
451 belady_env_t *env = bi->bel;
452 workset_t *ws = env->ws;
453 ir_node **to_insert = alloca(env->n_regs * sizeof(to_insert[0]));
455 int i, len, max_allowed, demand, iter;
459 1. Identify the number of needed slots and the values to reload
462 workset_foreach(new_vals, val, iter) {
463 /* mark value as used */
465 if (! workset_contains(ws, val)) {
466 DBG((dbg, DBG_DECIDE, " insert %+F\n", val));
467 to_insert[demand++] = val;
469 int insert_reload = 1;
470 next_use_t *use = get_current_use(bi, val);
473 * if we use a value which is transported in this block, i.e. a
474 * phi defined here or a live in, for the first time, we check
475 * if there is room for that guy to survive from the block's
476 * entrance to here or not.
479 assert(sched_get_time_step(env->instr) == use->step);
480 if (is_transport_in(bi->bl, val) && use->is_first_use) {
481 DBG((dbg, DBG_DECIDE, "entrance node %+F, pressure %d:\n", val, bi->pressure));
482 if (bi->pressure < env->n_regs) {
483 workset_insert(env, bi->entrance_reg, val);
486 DBG((dbg, DBG_DECIDE, "... no reload. must be considered at block start\n"));
491 bitset_add_irn(env->spilled, val);
492 DBG((dbg, DBG_SPILL, "Reload %+F before %+F\n", val, env->instr));
493 be_add_reload(env->senv, val, env->instr, env->cls, 1);
497 assert(is_usage || "Defined value already in workset?!?");
498 DBG((dbg, DBG_DECIDE, " skip %+F\n", val));
501 DBG((dbg, DBG_DECIDE, " demand = %d\n", demand));
504 2. Make room for at least 'demand' slots
506 len = workset_get_length(ws);
507 max_allowed = env->n_regs - demand;
509 /* Only make more free room if we do not have enough */
510 if (len > max_allowed) {
511 DBG((dbg, DBG_DECIDE, " disposing %d values\n", len - max_allowed));
513 /* get current next-use distance */
514 for (i = 0; i < ws->len; ++i) {
515 ir_node *val = workset_get_val(ws, i);
516 unsigned dist = get_curr_distance(bi, val, is_usage);
517 workset_set_time(ws, i, dist);
520 /* sort entries by increasing nextuse-distance*/
523 /* kill the last 'demand' entries in the array */
524 workset_set_length(ws, max_allowed);
528 3. Insert the new values into the workset
529 Also, we update the pressure in the block info.
530 That is important for the global pass to decide
531 how many values can live through the block.
533 for (i = 0; i < demand; ++i)
534 workset_insert(env, env->ws, to_insert[i]);
536 bi->pressure = MAX(bi->pressure, workset_get_length(env->ws));
540 * For the given block @p block, decide for each values
541 * whether it is used from a register or is reloaded
544 static void belady(belady_env_t *env, int id) {
545 block_info_t *block_info = new_block_info(env, id);
546 const ir_node *block = block_info->bl;
547 void *obst_state = obstack_base(&env->ob);
553 DBG((dbg, DBG_WSETS, "Processing %+F...\n", block_info->bl));
554 new_vals = new_workset(env, &env->ob);
555 workset_clear(env->ws);
557 /* build the next use information for this block. */
558 build_next_uses(block_info);
561 block_info->first_non_in = NULL;
563 /* process the block from start to end */
564 sched_foreach(block, irn) {
566 assert(workset_get_length(env->ws) <= env->n_regs && "Too much values in workset!");
568 /* projs are handled with the tuple value.
569 * Phis are no real instr (see insert_starters())
570 * instr_nr does not increase */
571 if (is_Proj(irn) || is_Phi(irn)) {
572 DBG((dbg, DBG_DECIDE, " ...%+F skipped\n", irn));
575 DBG((dbg, DBG_DECIDE, " ...%+F\n", irn));
577 if (!block_info->first_non_in)
578 block_info->first_non_in = irn;
580 /* set instruction in the workset */
583 /* allocate all values _used_ by this instruction */
584 workset_clear(new_vals);
585 for(i = 0, arity = get_irn_arity(irn); i < arity; ++i) {
586 workset_insert(env, new_vals, get_irn_n(irn, i));
588 displace(block_info, new_vals, 1);
591 * set all used variables to the next use in their next_use_t list
592 * Also, kill all dead variables from the workset. They are only
593 * augmenting the pressure. Note, that a variable is dead
594 * if it has no further use in this block and is *not* live end
596 for(i = 0, arity = get_irn_arity(irn); i < arity; ++i) {
597 ir_node *op = get_irn_n(irn, i);
598 next_use_t *use = get_current_use(block_info, op);
601 if (!use->next && !be_is_live_end(env->lv, block, op))
602 workset_remove(env->ws, op);
604 advance_current_use(block_info, op);
607 /* allocate all values _defined_ by this instruction */
608 workset_clear(new_vals);
609 if (get_irn_mode(irn) == mode_T) { /* special handling for tuples and projs */
610 const ir_edge_t *edge;
612 foreach_out_edge(irn, edge) {
613 ir_node *proj = get_edge_src_irn(edge);
614 workset_insert(env, new_vals, proj);
617 workset_insert(env, new_vals, irn);
619 displace(block_info, new_vals, 0);
624 phase_free(&block_info->next_uses);
625 obstack_free(&env->ob, obst_state);
627 /* Remember end-workset for this block */
628 block_info->ws_end = workset_clone(env, &env->ob, env->ws);
629 DBG((dbg, DBG_WSETS, "End workset for %+F:\n", block));
630 workset_foreach(block_info->ws_end, irn, iter)
631 DBG((dbg, DBG_WSETS, " %+F (%u)\n", irn, workset_get_time(block_info->ws_end, iter)));
632 DBG((dbg, DBG_WSETS, "Max pressure in block: %d\n", block_info->pressure));
637 |_ _| |__ ___ __ _| | ___ | |__ __ _| | | _ \ __ _ _ __| |_
638 | | | '_ \ / _ \ / _` | |/ _ \| '_ \ / _` | | | |_) / _` | '__| __|
639 | | | | | | __/ | (_| | | (_) | |_) | (_| | | | __/ (_| | | | |_
640 |_| |_| |_|\___| \__, |_|\___/|_.__/ \__,_|_| |_| \__,_|_| \__|
645 #define workset_set_version(ws, i, t) ((ws)->vals[(i)].time = (t))
646 #define workset_get_version(ws, i) ((ws)->vals[(i)].time)
648 #define ver_oldest (0)
649 #define ver_youngest ((unsigned) -1)
650 #define ver_make_newer(v) ((v) + 1)
651 #define ver_is_older(v, w) ((v) < (w))
652 #define ver_is_younger(v, w) ((v) > (w))
655 static int block_freq_gt(const void *a, const void *b)
657 const ir_node * const *p = a;
658 const ir_node * const *q = b;
659 block_info_t *pi = get_block_info(*p);
660 block_info_t *qi = get_block_info(*q);
661 double diff = qi->exec_freq - pi->exec_freq;
662 return (diff > 0) - (diff < 0);
666 static int block_order(const void *a, const void *b)
668 const ir_node * const *p = a;
669 const ir_node * const *q = b;
670 block_info_t *pi = get_block_info(*p);
671 block_info_t *qi = get_block_info(*q);
674 if (pi->exec_freq > 1.0 && qi->exec_freq > 1.0) {
675 const dfs_t *dfs = pi->bel->dfs;
676 int pp = dfs_get_post_num(dfs, pi->bl);
677 int pq = dfs_get_post_num(dfs, qi->bl);
681 diff = qi->exec_freq - pi->exec_freq;
682 return (diff > 0) - (diff < 0);
691 typedef struct _block_state_t {
692 struct _block_state_t *next;
693 struct _block_state_t *next_intern;
696 workset_t *end_state;
699 typedef struct _irn_action_t {
700 struct _irn_action_t *next;
706 typedef struct _global_end_state_t {
714 unsigned *bs_tops_vers;
715 block_state_t **bs_tops;
716 block_state_t *bs_top;
717 irn_action_t *ia_top;
718 } global_end_state_t;
722 block_state_t *bs_top;
723 irn_action_t *ia_top;
726 static INLINE block_state_t *get_block_state(global_end_state_t *ges, block_info_t *bi)
729 assert(!ver_is_younger(ges->bs_tops_vers[id], ges->version));
730 return ver_is_older(ges->bs_tops_vers[id], ges->version) ? NULL : ges->bs_tops[bi->id];
733 static INLINE const workset_t *get_end_state(global_end_state_t *ges, block_info_t *bi)
735 block_state_t *bs = get_block_state(ges, bi);
736 return bs ? bs->end_state : bi->ws_end;
739 static block_state_t *new_block_state(global_end_state_t *ges, block_info_t *bi)
741 block_state_t *bs = get_block_state(ges, bi);
742 block_state_t *nw = obstack_alloc(&ges->obst, sizeof(nw[0]));
744 nw->next_intern = bs;
745 nw->next = ges->bs_top;
749 nw->pressure = bs->pressure;
750 nw->end_state = workset_clone(ges->env, &ges->obst, bs->end_state);
753 nw->pressure = bi->pressure;
754 nw->end_state = workset_clone(ges->env, &ges->obst, bi->ws_end);
758 ges->bs_tops[bi->id] = nw;
759 ges->bs_tops_vers[bi->id] = ges->version;
763 static irn_action_t *new_irn_action(global_end_state_t *ges, ir_node *irn, const ir_node *bl)
765 irn_action_t *ia = obstack_alloc(&ges->obst, sizeof(ia[0]));
769 ia->act = irn_act_none;
770 ia->next = ges->ia_top;
775 static INLINE rollback_info_t trans_begin(global_end_state_t *ges)
778 rb.obst_level = obstack_base(&ges->obst);
779 rb.bs_top = ges->bs_top;
780 rb.ia_top = ges->ia_top;
784 static INLINE void trans_rollback(global_end_state_t *ges, rollback_info_t *rb)
788 /* unwind all the stacks indiced with the block number */
789 for (bs = ges->bs_top; bs != rb->bs_top; bs = bs->next) {
790 unsigned id = bs->bi->id;
791 ges->bs_tops[id] = bs->next_intern;
794 ges->ia_top = rb->ia_top;
795 ges->bs_top = rb->bs_top;
796 obstack_free(&ges->obst, rb->obst_level);
800 static double can_bring_in(global_end_state_t *ges, ir_node *bl, ir_node *irn, double limit, int level);
802 static double can_make_available_at_end(global_end_state_t *ges, ir_node *bl, ir_node *irn, double limit, int level)
804 block_info_t *bi = get_block_info(bl);
805 const workset_t *end = get_end_state(ges, bi);
809 DBG((dbg, DBG_GLOBAL, "\t%2Dcan make avail %+F at end of %+F\n", level, irn, bl));
812 * to make the value available at end,
813 * we have several cases here.
815 * - we already visited that block.
816 * - If the value is in the final end set, return 0.
817 * somebody else already allocated it there.
818 * - If not and the final end set is already full,
819 * we cannot make the value available at the end
820 * of this block. return INFINITY.
821 * - Else (value not in final end set and there is room):
822 * 1) The value is in a register at the end of the local Belady pass.
823 * Allocate a slot in the final end set and return 0.
824 * 2) The value is not in the Belady end set:
825 * If the block's capacity is < k then check what it costs
826 * to transport the value from upper blocks to this block.
827 * Compare that against the reload cost in this block. If
828 * cheaper, do the other thing. If not, reload it here.
831 /* if the end set contains it already, it is in a reg and it costs nothing
832 * to load it to one. */
833 index = workset_get_index(end, irn);
835 unsigned ver = workset_get_version(end, index);
836 DBG((dbg, DBG_GLOBAL, "\t%2Dnode is in the end set and is %s fixed\n",
837 level, ver_is_older(ver, ges->version) ? "already" : "not yet"));
840 * if the version is older, the value is already fixed
841 * and cannot be removed from the end set.
843 * If not, we will create a new block state for that block since
844 * we modify it by giving the end state a new version.
846 if (ver_is_younger(ver, ges->version)) {
847 block_state_t *bs = new_block_state(ges, bi);
848 workset_set_version(bs->end_state, index, ges->version);
856 * Now we have two options:
857 * 1) Reload the value at the end of the block.
858 * Therefore, perhaps, we have to erase another one from the workset.
859 * This may only be done if it has not been fixed.
860 * Since fixed means that a previous pass has decided that that value
861 * *has* to stay in the end set.
862 * 2) we can try, if the capacity of the block allows it, to let
863 * the value live through the block and make it available at
866 * First, we test the local (reload in this block) alternative
867 * and compare against the other alternative.
868 * Of course, we chose the cheaper one.
872 int n_regs = bi->bel->n_regs;
873 int len = workset_get_length(end);
880 * look if there is room in the end array
881 * for the variable. Note that this does not
882 * mean that the var can live through the block.
883 * There is just room at the *end*
886 DBG((dbg, DBG_GLOBAL, "\t%2Dthe end set has %d free slots\n", level, n_regs - len));
889 for (i = 0; i < len; ++i) {
890 unsigned ver = workset_get_version(end, i);
891 if (ver_is_younger(ver, ges->version))
896 DBG((dbg, DBG_GLOBAL, "\t%2D%+F (slot %d) can be erased from the end set\n",
897 level, end->vals[i].irn, i));
903 * finally there is some room. we can at least reload the value.
904 * but we will try to let ot live through anyhow.
907 irn_action_t *vs = new_irn_action(ges, irn, bi->bl);
908 block_state_t *bs = new_block_state(ges, bi);
909 workset_t *end = bs->end_state;
910 ir_node *ins_before = block_info_get_last_ins(bi);
911 double reload_here = be_get_reload_costs(bi->bel->senv, irn, ins_before);
912 int pressure_ok = bs->pressure < (unsigned) n_regs;
915 * No matter what we do, the value will be in the end set
916 * if the block from now on (of course only regarding the
917 * current state). Enter it and set the new length
920 end->vals[slot].irn = irn;
921 workset_set_version(end, slot, ges->version);
922 workset_set_length(end, MAX(workset_get_length(end), slot + 1));
924 vs->act = irn_act_reload;
927 DBG((dbg, DBG_GLOBAL, "\t%2Dthere is a free slot. capacity=%d, reload here=%f, pressure %s\n",
928 level, n_regs - bs->pressure, reload_here, pressure_ok ? "ok" : "insufficient"));
930 /* look if we can bring the value in. */
932 rollback_info_t rb = trans_begin(ges);
933 double new_limit = MIN(reload_here, limit);
935 vs->act = irn_act_live_through;
937 res = can_bring_in(ges, bl, irn, new_limit, level + 1);
940 * if bring in is too expensive re-adjust the pressure
941 * and roll back the state
943 if (res >= reload_here) {
945 vs->act = irn_act_reload;
946 trans_rollback(ges, &rb);
951 DBG((dbg, DBG_GLOBAL, "\t%2D%s\n", level,
952 vs->act == irn_act_reload ? "reloading" : "bringing in"));
957 DBG((dbg, DBG_GLOBAL, "\t%2D-> %f\n", level, res));
961 static double can_bring_in(global_end_state_t *ges, ir_node *bl, ir_node *irn, double limit, int level)
963 belady_env_t *env = ges->env;
964 double glob_costs = HUGE_VAL;
966 DBG((dbg, DBG_GLOBAL, "\t%2Dcan bring in (max %f) for %+F at block %+F\n", level, limit, irn, bl));
968 if (is_transport_in(bl, irn)) {
969 int i, n = get_irn_arity(bl);
970 ir_node **nodes = alloca(get_irn_arity(bl) * sizeof(nodes[0]));
971 rollback_info_t rb = trans_begin(ges);
975 for (i = 0; i < n; ++i) {
976 ir_node *pr = get_Block_cfgpred_block(bl, i);
977 ir_node *op = is_local_phi(bl, irn) ? get_irn_n(irn, i) : irn;
981 * there might by unknwons as operands of phis in that case
982 * we set the costs to zero, since they won't get spilled.
984 if (arch_irn_consider_in_reg_alloc(env->arch, env->cls, op))
985 c = can_make_available_at_end(ges, pr, op, limit - glob_costs, level + 1);
991 if (glob_costs >= limit) {
992 glob_costs = HUGE_VAL;
993 trans_rollback(ges, &rb);
1000 DBG((dbg, DBG_GLOBAL, "\t%2D-> %f\n", level, glob_costs));
1004 static void materialize_and_commit_end_state(global_end_state_t *ges)
1006 belady_env_t *env = ges->env;
1010 DBG((dbg, DBG_GLOBAL, "\tmaterializing\n"));
1013 * Perform all the variable actions.
1015 for (ia = ges->ia_top; ia != NULL; ia = ia->next) {
1017 case irn_act_live_through:
1018 if (is_local_phi(ia->bl, ia->irn)) {
1019 bitset_add_irn(ges->succ_phis, ia->irn);
1020 DBG((dbg, DBG_GLOBAL, "\t\tlive through phi kept alive: %+F\n", ia->irn));
1023 case irn_act_reload:
1024 be_add_reload_at_end(env->senv, ia->irn, ia->bl, env->cls, 1);
1025 DBG((dbg, DBG_GLOBAL, "\t\tadding reload of %+F at end of %+F\n", ia->irn, ia->bl));
1028 DBG((dbg, DBG_GLOBAL, "\t\t%+F is in the end set of %+F\n", ia->irn, ia->bl));
1033 * Commit the block end states
1035 for (bs = ges->bs_top; bs != NULL; bs = bs->next) {
1036 block_info_t *bi = bs->bi;
1038 if (!bitset_is_set(ges->committed, bi->id)) {
1039 DBG((dbg, DBG_GLOBAL, "\t\tcommiting workset of %+F with version %x\n", bi->bl, ges->version));
1040 // bes->bs->end_state->vals[idx].version = ges->version;
1041 workset_copy(env, bi->ws_end, bs->end_state);
1042 DBG((dbg, DBG_GLOBAL, "\t\told pressure: %d, new pressure: %d, end length: %d\n",
1043 bi->pressure, bs->pressure, workset_get_length(bs->end_state)));
1044 bi->pressure = bs->pressure;
1045 bitset_set(ges->committed, bi->id);
1049 /* clear the committed bitset. the next call is expecting it. */
1050 bitset_clear_all(ges->committed);
1054 * Examine all irns which shall be in regs at the beginning of the
1057 static void fix_block_borders(global_end_state_t *ges, ir_node *block) {
1058 block_info_t *bi = get_block_info(block);
1059 belady_env_t *env = ges->env;
1060 void *reset_level = obstack_base(&ges->obst);
1065 for (i = workset_get_length(bi->ws_end) - 1; i >= 0; --i)
1066 workset_set_version(bi->ws_end, i, ver_youngest);
1068 DBG((dbg, DBG_GLOBAL, "fixing block borders at %+F (%f)\n", block, bi->exec_freq));
1070 /* process all variables which shall be in a reg at
1071 * the beginning of the block in the order of the next use. */
1072 workset_foreach(bi->entrance_reg, irn, i) {
1073 double local_costs = be_get_reload_costs(env->senv, irn, bi->first_non_in);
1074 double bring_in_costs;
1076 /* reset the lists */
1080 DBG((dbg, DBG_GLOBAL, "\ttrans in var %+F, version %x\n", irn, ges->version));
1082 bring_in_costs = can_bring_in(ges, block, irn, local_costs, 1);
1084 DBG((dbg, DBG_GLOBAL, "\tbring in: %f, local: %f", bring_in_costs, local_costs));
1087 * we were not able to let the value arrive
1088 * in a register at the entrance of the block
1089 * or it is too costly, so we have to do the reload locally
1091 if (bring_in_costs >= local_costs) {
1092 DBG((dbg, DBG_GLOBAL, " -> do local reload\n"));
1093 be_add_reload(env->senv, irn, bi->first_non_in, env->cls, 1);
1096 * if the transport-in was a phi (that is actually used in block)
1097 * it will no longer remain and we have to spill it completely.
1099 if (is_local_phi(block, irn))
1100 bitset_add_irn(ges->succ_phis, irn);
1102 DBG((dbg, DBG_GLOBAL, " -> bring it in\n"));
1103 materialize_and_commit_end_state(ges);
1106 DBG((dbg, DBG_GLOBAL, "\n"));
1108 /* reset the obstack and create a new version. */
1109 obstack_free(&ges->obst, reset_level);
1110 ges->version = ver_make_newer(ges->version);
1114 static void global_assign(belady_env_t *env)
1116 global_end_state_t ges;
1120 * sort the blocks according to execution frequency.
1121 * That's not necessary for belady() but for the global pass later on.
1123 qsort(env->blocks, env->n_blocks, sizeof(env->blocks[0]), block_order);
1125 memset(&ges, 0, sizeof(ges));
1126 obstack_init(&ges.obst);
1128 ges.version = ver_make_newer(ver_oldest);
1129 ges.succ_phis = bitset_irg_obstack_alloc(&ges.obst, env->irg);
1130 ges.committed = bitset_obstack_alloc(&ges.obst, env->n_blocks);
1131 ges.bs_tops = obstack_alloc(&ges.obst, sizeof(ges.bs_tops[0]) * env->n_blocks);
1132 ges.bs_tops_vers = obstack_alloc(&ges.obst, sizeof(ges.bs_tops_vers[0]) * env->n_blocks);
1134 for (i = 0; i < env->n_blocks; ++i)
1135 ges.bs_tops_vers[i] = ver_oldest;
1137 for (i = 0; i < env->n_blocks; ++i)
1138 fix_block_borders(&ges, env->blocks[i]);
1141 * Now we spill phis which cannot be kept since they were replaced
1142 * by reloads at the block entrances.
1144 for (i = 0; i < env->n_blocks; ++i) {
1145 ir_node *bl = env->blocks[i];
1148 sched_foreach(bl, irn) {
1152 if (arch_irn_consider_in_reg_alloc(env->arch, env->cls, irn)
1153 && !bitset_contains_irn(ges.succ_phis, irn))
1154 be_spill_phi(env->senv, irn);
1159 static void collect_blocks(ir_node *bl, void *data)
1161 belady_env_t *env = data;
1163 obstack_ptr_grow(&env->ob, bl);
1167 * Do spilling for a register class on a graph using the belady heuristic.
1168 * In the transformed graph, the register pressure never exceeds the number
1169 * of available registers.
1171 * @param birg The backend graph
1172 * @param cls The register class to spill
1174 void be_spill_belady(be_irg_t *birg, const arch_register_class_t *cls)
1176 ir_graph *irg = be_get_birg_irg(birg);
1180 /* some special classes contain only ignore regs, nothing to do then */
1181 n_regs = cls->n_regs - be_put_ignore_regs(birg, cls, NULL);
1185 be_clear_links(irg);
1187 /* init belady env */
1188 obstack_init(&env.ob);
1190 env.arch = be_get_birg_arch_env(birg);
1192 env.lv = be_get_birg_liveness(birg);
1193 env.dfs = env.lv->dfs;
1194 env.n_regs = n_regs;
1195 env.ws = new_workset(&env, &env.ob);
1196 env.senv = be_new_spill_env(birg);
1197 env.ef = be_get_birg_exec_freq(birg);
1198 env.spilled = bitset_irg_obstack_alloc(&env.ob, irg);
1201 irg_block_walk_graph(irg, NULL, collect_blocks, &env);
1202 obstack_ptr_grow(&env.ob, NULL);
1203 env.blocks = obstack_finish(&env.ob);
1205 /* Fix high register pressure in blocks with belady algorithm */
1206 for (i = 0; i < env.n_blocks; ++i)
1209 global_assign(&env);
1211 /* Insert spill/reload nodes into the graph and fix usages */
1212 be_insert_spills_reloads(env.senv);
1215 be_delete_spill_env(env.senv);
1217 obstack_free(&env.ob, NULL);
1220 void be_init_spillbelady2(void)
1222 lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
1223 lc_opt_entry_t *spill_grp = lc_opt_get_grp(be_grp, "spill");
1224 lc_opt_entry_t *bel2_grp = lc_opt_get_grp(spill_grp, "belady2");
1226 static be_spiller_t belady_spiller = {
1230 lc_opt_add_table(bel2_grp, options);
1231 be_register_spiller("belady2", &belady_spiller);
1232 FIRM_DBG_REGISTER(dbg, "firm.be.spill.belady2");
1235 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_spillbelady2);